DryChem 1.0.0
A generic, compile-time C++ toolbox with no dependencies for the modern computational chemistry project.
Loading...
Searching...
No Matches
testTokenizer.hpp
Go to the documentation of this file.
1// Copyright (c) 2020-2025 Cody R. Drisko. All rights reserved.
2// Licensed under the MIT License. See the LICENSE file in the project root for more information.
3//
4// Name: testTokenizer.hpp
5// Author: crdrisko
6// Date: 11/04/2020-09:19:27
7// Description: Provides ~100% unit test coverage over the Tokenizer class functions
8
9#ifndef DRYCHEM_COMMON_UTILITIES_LIBS_STRINGS_TESTS_TESTUTILITIES_TESTTOKENIZER_HPP
10#define DRYCHEM_COMMON_UTILITIES_LIBS_STRINGS_TESTS_TESTUTILITIES_TESTTOKENIZER_HPP
11
12#include <deque>
13#include <forward_list>
14#include <list>
15#include <map>
16#include <set>
17#include <string>
18#include <type_traits>
19#include <unordered_map>
20#include <unordered_set>
21#include <vector>
22
24#include <gtest/gtest.h>
25
26GTEST_TEST(testTokenizer, has_mapped_typeReturnsTrueForStdMapsAndOtherMapContainersOnly)
27{
28 ASSERT_TRUE((CppUtils::Strings::details::has_mapped_type<std::map<int, int>>::value));
29 ASSERT_TRUE((CppUtils::Strings::details::has_mapped_type<std::multimap<int, int>>::value));
30 ASSERT_TRUE((CppUtils::Strings::details::has_mapped_type<std::unordered_map<int, int>>::value));
31 ASSERT_TRUE((CppUtils::Strings::details::has_mapped_type<std::unordered_multimap<int, int>>::value));
32
33 ASSERT_FALSE((CppUtils::Strings::details::has_mapped_type<std::deque<int>>::value));
34 ASSERT_FALSE((CppUtils::Strings::details::has_mapped_type<std::forward_list<int>>::value));
35 ASSERT_FALSE((CppUtils::Strings::details::has_mapped_type<std::list<int>>::value));
36 ASSERT_FALSE((CppUtils::Strings::details::has_mapped_type<std::multiset<int>>::value));
37 ASSERT_FALSE((CppUtils::Strings::details::has_mapped_type<std::set<int>>::value));
39 ASSERT_FALSE((CppUtils::Strings::details::has_mapped_type<std::unordered_multiset<int>>::value));
40 ASSERT_FALSE((CppUtils::Strings::details::has_mapped_type<std::unordered_set<int>>::value));
41 ASSERT_FALSE((CppUtils::Strings::details::has_mapped_type<std::vector<int>>::value));
42}
43
44GTEST_TEST(testTokenizer, memberTypesForATokenizerAreCorrect)
45{
46 using namespace DryChem;
47
48 // clang-format off
49 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::value_type, ci_string>), "value_type is incorrect.");
50 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::reference, ci_string&>), "reference is incorrect.");
51 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::const_reference, const ci_string&>), "const_reference is incorrect.");
52 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::iterator, ci_string::iterator>), "iterator is incorrect.");
53 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::const_iterator, ci_string::const_iterator>), "const_iterator is incorrect.");
54 // clang-format on
55}
56
57GTEST_TEST(testTokenizer, ctadWorksForTheStringConstructorOfAStdStringTokenizer)
58{
59 using DryChem::Tokenizer;
60
61 std::string str {"Hello, World!"};
62 std::string punctuation {" .,;:'\"?!"};
63 std::string keepChars {",!"};
64
65 Tokenizer tokenizer1 {str};
66 Tokenizer tokenizer2 {str, punctuation};
67 Tokenizer tokenizer3 {str, punctuation.c_str()};
68 Tokenizer tokenizer4 {str, punctuation, keepChars};
69 Tokenizer tokenizer5 {str, punctuation.c_str(), keepChars};
70 Tokenizer tokenizer6 {str, punctuation, keepChars.c_str()};
71 Tokenizer tokenizer7 {str, punctuation.c_str(), keepChars.c_str()};
72 Tokenizer<std::char_traits<char>> tokenizer8 {str.begin(), str.end() - 6};
73 Tokenizer<std::char_traits<char>> tokenizer9 {str.begin(), str.end() - 6, punctuation};
74 Tokenizer<std::char_traits<char>> tokenizer10 {str.begin() + 5, str.end(), punctuation.c_str()};
75 Tokenizer<std::char_traits<char>> tokenizer11 {str.begin(), str.end() - 6, punctuation, keepChars};
76 Tokenizer<std::char_traits<char>> tokenizer12 {str.begin(), str.end() - 6, punctuation.c_str(), keepChars};
77 Tokenizer<std::char_traits<char>> tokenizer13 {str.begin() + 5, str.end(), punctuation, keepChars.c_str()};
78 Tokenizer<std::char_traits<char>> tokenizer14 {str.begin() + 6, str.end(), punctuation.c_str(), keepChars.c_str()};
79
80 ASSERT_EQ(tokenizer1.split(), (std::vector<std::string> {"Hello,", "World!"}));
81 ASSERT_EQ(tokenizer2.split(), (std::vector<std::string> {"Hello", "World"}));
82 ASSERT_EQ(tokenizer3.split(), (std::vector<std::string> {"Hello", "World"}));
83 ASSERT_EQ(tokenizer4.split(), (std::vector<std::string> {"Hello", ",", "World", "!"}));
84 ASSERT_EQ(tokenizer5.split(), (std::vector<std::string> {"Hello", ",", "World", "!"}));
85 ASSERT_EQ(tokenizer6.split(), (std::vector<std::string> {"Hello", ",", "World", "!"}));
86 ASSERT_EQ(tokenizer7.split(), (std::vector<std::string> {"Hello", ",", "World", "!"}));
87 ASSERT_EQ(tokenizer8.split(), (std::vector<std::string> {"Hello,"}));
88 ASSERT_EQ(tokenizer9.split(), (std::vector<std::string> {"Hello"}));
89 ASSERT_EQ(tokenizer10.split(), (std::vector<std::string> {"World"}));
90 ASSERT_EQ(tokenizer11.split(), (std::vector<std::string> {"Hello", ","}));
91 ASSERT_EQ(tokenizer12.split(), (std::vector<std::string> {"Hello", ","}));
92 ASSERT_EQ(tokenizer13.split(), (std::vector<std::string> {",", "World", "!"}));
93 ASSERT_EQ(tokenizer14.split(), (std::vector<std::string> {"World", "!"}));
94}
95
96GTEST_TEST(testTokenizer, ctadWorksForTheStringConstructorOfACaseInsensitiveTokenizer)
97{
98 using DryChem::Tokenizer;
99
100 DryChem::ci_string str {"HeLlO, wOrLd!"};
101 DryChem::ci_string punctuation {" .,;:'\"?!"};
102 DryChem::ci_string keepChars {",!"};
103
104 Tokenizer tokenizer1 {str};
105 Tokenizer tokenizer2 {str, punctuation};
106 Tokenizer tokenizer3 {str, punctuation.c_str()};
107 Tokenizer tokenizer4 {str, punctuation, keepChars};
108 Tokenizer tokenizer5 {str, punctuation.c_str(), keepChars};
109 Tokenizer tokenizer6 {str, punctuation, keepChars.c_str()};
110 Tokenizer tokenizer7 {str, punctuation.c_str(), keepChars.c_str()};
111 Tokenizer<DryChem::ci_char_traits> tokenizer8 {str.begin(), str.end() - 6};
112 Tokenizer<DryChem::ci_char_traits> tokenizer9 {str.begin(), str.end() - 6, punctuation};
113 Tokenizer<DryChem::ci_char_traits> tokenizer10 {str.begin() + 5, str.end(), punctuation.c_str()};
114 Tokenizer<DryChem::ci_char_traits> tokenizer11 {str.begin(), str.end() - 6, punctuation, keepChars};
115 Tokenizer<DryChem::ci_char_traits> tokenizer12 {str.begin(), str.end() - 6, punctuation.c_str(), keepChars};
116 Tokenizer<DryChem::ci_char_traits> tokenizer13 {str.begin() + 5, str.end(), punctuation, keepChars.c_str()};
117 Tokenizer<DryChem::ci_char_traits> tokenizer14 {str.begin() + 6, str.end(), punctuation.c_str(), keepChars.c_str()};
118
119 ASSERT_EQ(tokenizer1.split(), (std::vector<DryChem::ci_string> {"hello,", "world!"}));
120 ASSERT_EQ(tokenizer2.split(), (std::vector<DryChem::ci_string> {"hello", "world"}));
121 ASSERT_EQ(tokenizer3.split(), (std::vector<DryChem::ci_string> {"hello", "world"}));
122 ASSERT_EQ(tokenizer4.split(), (std::vector<DryChem::ci_string> {"hello", ",", "world", "!"}));
123 ASSERT_EQ(tokenizer5.split(), (std::vector<DryChem::ci_string> {"hello", ",", "world", "!"}));
124 ASSERT_EQ(tokenizer6.split(), (std::vector<DryChem::ci_string> {"hello", ",", "world", "!"}));
125 ASSERT_EQ(tokenizer7.split(), (std::vector<DryChem::ci_string> {"hello", ",", "world", "!"}));
126 ASSERT_EQ(tokenizer8.split(), (std::vector<DryChem::ci_string> {"hello,"}));
127 ASSERT_EQ(tokenizer9.split(), (std::vector<DryChem::ci_string> {"hello"}));
128 ASSERT_EQ(tokenizer10.split(), (std::vector<DryChem::ci_string> {"world"}));
129 ASSERT_EQ(tokenizer11.split(), (std::vector<DryChem::ci_string> {"hello", ","}));
130 ASSERT_EQ(tokenizer12.split(), (std::vector<DryChem::ci_string> {"hello", ","}));
131 ASSERT_EQ(tokenizer13.split(), (std::vector<DryChem::ci_string> {",", "world", "!"}));
132 ASSERT_EQ(tokenizer14.split(), (std::vector<DryChem::ci_string> {"world", "!"}));
133}
134
135GTEST_TEST(testTokenizer, withoutATemplateParameterSplitReturnsAVectorOfStrings)
136{
137 std::string str {"This\n is,\ta test."};
138
139 DryChem::Tokenizer tokenizer {str, ",. \t\n"};
140
141 auto tokens = tokenizer.split();
142
143 std::vector<std::string> expected {"This", "is", "a", "test"};
144
145 ASSERT_EQ(tokens, expected);
146}
147
148GTEST_TEST(testTokenizer, aDequeOfStringsSplitsTokensNormally)
149{
150 std::string str {"This\n is,\ta test."};
151
152 DryChem::Tokenizer tokenizer {str, ",. \t\n"};
153
154 auto tokens = tokenizer.split<std::deque<std::string>>();
155
156 std::deque<std::string> expected {"This", "is", "a", "test"};
157
158 ASSERT_EQ(tokens, expected);
159}
160
161GTEST_TEST(testTokenizer, aForwardListOfStringsReversesTheTokens)
162{
163 std::string str {"This\n is,\ta test."};
164
165 DryChem::Tokenizer tokenizer {str, ",. \t\n"};
166
167 auto tokens = tokenizer.split<std::forward_list<std::string>>();
168
169 std::forward_list<std::string> expected {"test", "a", "is", "This"};
170
171 ASSERT_EQ(tokens, expected);
172}
173
174GTEST_TEST(testTokenizer, aListOfStringsSplitsTokensNormally)
175{
176 std::string str {"This\n is,\ta test."};
177
178 DryChem::Tokenizer tokenizer {str, ",. \t\n"};
179
180 auto tokens = tokenizer.split<std::list<std::string>>();
181
182 std::list<std::string> expected {"This", "is", "a", "test"};
183
184 ASSERT_EQ(tokens, expected);
185}
186
187GTEST_TEST(testTokenizer, aMultiSetOfStringsOrdersTokens)
188{
189 std::string str {"This\n is,\ta test."};
190
191 DryChem::Tokenizer tokenizer {str, ",. \t\n"};
192
193 auto tokens = tokenizer.split<std::multiset<std::string>>();
194
195 std::multiset<std::string> expected {"This", "a", "is", "test"};
196
197 ASSERT_EQ(tokens, expected);
198}
199
200GTEST_TEST(testTokenizer, aSetOfStringsOrdersTokens)
201{
202 std::string str {"This\n is,\ta test."};
203
204 DryChem::Tokenizer tokenizer {str, ",. \t\n"};
205
206 auto tokens = tokenizer.split<std::set<std::string>>();
207
208 std::set<std::string> expected {"This", "a", "is", "test"};
209
210 ASSERT_EQ(tokens, expected);
211}
212
213GTEST_TEST(testTokenizer, aSetOfCaseInsensitiveStringsOrdersTokensWithoutRegardToCase)
214{
215 DryChem::ci_string ci_str {"This\n is,\ta test."};
216
217 DryChem::Tokenizer tokenizer {ci_str, ",. \t\n"};
218
219 auto tokens = tokenizer.split<std::set<DryChem::ci_string>>();
220
221 std::set<DryChem::ci_string> expected {"a", "is", "test", "This"};
222
223 ASSERT_EQ(tokens, expected);
224}
225
226GTEST_TEST(testTokenizer, aStringTruncatesAllButTheFirstLetterOfTheToken)
227{
228 std::string str {"This\n is,\ta test."};
229
230 DryChem::Tokenizer tokenizer {str, ",. \t\n"};
231
232 auto tokens = tokenizer.split<std::string>();
233
234 std::string expected {"Tiat"};
235
236 ASSERT_EQ(tokens, expected);
237}
238
239GTEST_TEST(testTokenizer, anUnorderedMultiSetHashesTheTokens)
240{
241 std::string str = "This\n is,\ta test.";
242
243 DryChem::Tokenizer tokenizer {str, ",. \t\n"};
244
245 auto tokens = tokenizer.split<std::unordered_multiset<std::string>>();
246
247 // Not sure what the hash will be so make a version with only the tokens we expect
248 std::unordered_multiset<std::string> expected {"This", "is", "a", "test"};
249
250 ASSERT_EQ(tokens, expected);
251}
252
253GTEST_TEST(testTokenizer, anUnorderedSetHashesTheTokens)
254{
255 std::string str {"This\n is,\ta test."};
256
257 DryChem::Tokenizer tokenizer {str, ",. \t\n"};
258
259 auto tokens = tokenizer.split<std::unordered_set<std::string>>();
260
261 // Not sure what the hash will be so make a version with only the tokens we expect
262 std::unordered_set<std::string> expected {"This", "is", "a", "test"};
263
264 ASSERT_EQ(tokens, expected);
265}
266
267GTEST_TEST(testTokenizer, weCanCastStringsToAnyTypeJustBySpecifyingTheContainersTemplateParameter)
268{
269 testing::internal::CaptureStdout();
270
271 std::string str = "1.234\t2.345\t3.456";
272
273 DryChem::Tokenizer tokenizer {str};
274
275 auto tokens = tokenizer.split<std::list<long double>>();
276
277 for (const auto& token : tokens)
278 std::cout << token << std::endl;
279
280 std::string actualOutput = testing::internal::GetCapturedStdout();
281 ASSERT_EQ(actualOutput, "1.234\n2.345\n3.456\n");
282}
283
284#endif
Allow for a shorter namespace name for less using statements.
Definition errors.hpp:23
GTEST_TEST(testTokenizer, has_mapped_typeReturnsTrueForStdMapsAndOtherMapContainersOnly)
Definition testTokenizer.hpp:26