44GTEST_TEST(testTokenizer, memberTypesForATokenizerAreCorrect)
49 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::value_type, ci_string>),
"value_type is incorrect.");
50 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::reference, ci_string&>),
"reference is incorrect.");
51 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::const_reference,
const ci_string&>),
"const_reference is incorrect.");
52 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::iterator, ci_string::iterator>),
"iterator is incorrect.");
53 static_assert((std::is_same_v<Tokenizer<ci_char_traits>::const_iterator, ci_string::const_iterator>),
"const_iterator is incorrect.");
57GTEST_TEST(testTokenizer, ctadWorksForTheStringConstructorOfAStdStringTokenizer)
59 using DryChem::Tokenizer;
61 std::string str {
"Hello, World!"};
62 std::string punctuation {
" .,;:'\"?!"};
63 std::string keepChars {
",!"};
65 Tokenizer tokenizer1 {str};
66 Tokenizer tokenizer2 {str, punctuation};
67 Tokenizer tokenizer3 {str, punctuation.c_str()};
68 Tokenizer tokenizer4 {str, punctuation, keepChars};
69 Tokenizer tokenizer5 {str, punctuation.c_str(), keepChars};
70 Tokenizer tokenizer6 {str, punctuation, keepChars.c_str()};
71 Tokenizer tokenizer7 {str, punctuation.c_str(), keepChars.c_str()};
72 Tokenizer<std::char_traits<char>> tokenizer8 {str.begin(), str.end() - 6};
73 Tokenizer<std::char_traits<char>> tokenizer9 {str.begin(), str.end() - 6, punctuation};
74 Tokenizer<std::char_traits<char>> tokenizer10 {str.begin() + 5, str.end(), punctuation.c_str()};
75 Tokenizer<std::char_traits<char>> tokenizer11 {str.begin(), str.end() - 6, punctuation, keepChars};
76 Tokenizer<std::char_traits<char>> tokenizer12 {str.begin(), str.end() - 6, punctuation.c_str(), keepChars};
77 Tokenizer<std::char_traits<char>> tokenizer13 {str.begin() + 5, str.end(), punctuation, keepChars.c_str()};
78 Tokenizer<std::char_traits<char>> tokenizer14 {str.begin() + 6, str.end(), punctuation.c_str(), keepChars.c_str()};
80 ASSERT_EQ(tokenizer1.split(), (std::vector<std::string> {
"Hello,",
"World!"}));
81 ASSERT_EQ(tokenizer2.split(), (std::vector<std::string> {
"Hello",
"World"}));
82 ASSERT_EQ(tokenizer3.split(), (std::vector<std::string> {
"Hello",
"World"}));
83 ASSERT_EQ(tokenizer4.split(), (std::vector<std::string> {
"Hello",
",",
"World",
"!"}));
84 ASSERT_EQ(tokenizer5.split(), (std::vector<std::string> {
"Hello",
",",
"World",
"!"}));
85 ASSERT_EQ(tokenizer6.split(), (std::vector<std::string> {
"Hello",
",",
"World",
"!"}));
86 ASSERT_EQ(tokenizer7.split(), (std::vector<std::string> {
"Hello",
",",
"World",
"!"}));
87 ASSERT_EQ(tokenizer8.split(), (std::vector<std::string> {
"Hello,"}));
88 ASSERT_EQ(tokenizer9.split(), (std::vector<std::string> {
"Hello"}));
89 ASSERT_EQ(tokenizer10.split(), (std::vector<std::string> {
"World"}));
90 ASSERT_EQ(tokenizer11.split(), (std::vector<std::string> {
"Hello",
","}));
91 ASSERT_EQ(tokenizer12.split(), (std::vector<std::string> {
"Hello",
","}));
92 ASSERT_EQ(tokenizer13.split(), (std::vector<std::string> {
",",
"World",
"!"}));
93 ASSERT_EQ(tokenizer14.split(), (std::vector<std::string> {
"World",
"!"}));
96GTEST_TEST(testTokenizer, ctadWorksForTheStringConstructorOfACaseInsensitiveTokenizer)
98 using DryChem::Tokenizer;
100 DryChem::ci_string str {
"HeLlO, wOrLd!"};
101 DryChem::ci_string punctuation {
" .,;:'\"?!"};
102 DryChem::ci_string keepChars {
",!"};
104 Tokenizer tokenizer1 {str};
105 Tokenizer tokenizer2 {str, punctuation};
106 Tokenizer tokenizer3 {str, punctuation.c_str()};
107 Tokenizer tokenizer4 {str, punctuation, keepChars};
108 Tokenizer tokenizer5 {str, punctuation.c_str(), keepChars};
109 Tokenizer tokenizer6 {str, punctuation, keepChars.c_str()};
110 Tokenizer tokenizer7 {str, punctuation.c_str(), keepChars.c_str()};
111 Tokenizer<DryChem::ci_char_traits> tokenizer8 {str.begin(), str.end() - 6};
112 Tokenizer<DryChem::ci_char_traits> tokenizer9 {str.begin(), str.end() - 6, punctuation};
113 Tokenizer<DryChem::ci_char_traits> tokenizer10 {str.begin() + 5, str.end(), punctuation.c_str()};
114 Tokenizer<DryChem::ci_char_traits> tokenizer11 {str.begin(), str.end() - 6, punctuation, keepChars};
115 Tokenizer<DryChem::ci_char_traits> tokenizer12 {str.begin(), str.end() - 6, punctuation.c_str(), keepChars};
116 Tokenizer<DryChem::ci_char_traits> tokenizer13 {str.begin() + 5, str.end(), punctuation, keepChars.c_str()};
117 Tokenizer<DryChem::ci_char_traits> tokenizer14 {str.begin() + 6, str.end(), punctuation.c_str(), keepChars.c_str()};
119 ASSERT_EQ(tokenizer1.split(), (std::vector<DryChem::ci_string> {
"hello,",
"world!"}));
120 ASSERT_EQ(tokenizer2.split(), (std::vector<DryChem::ci_string> {
"hello",
"world"}));
121 ASSERT_EQ(tokenizer3.split(), (std::vector<DryChem::ci_string> {
"hello",
"world"}));
122 ASSERT_EQ(tokenizer4.split(), (std::vector<DryChem::ci_string> {
"hello",
",",
"world",
"!"}));
123 ASSERT_EQ(tokenizer5.split(), (std::vector<DryChem::ci_string> {
"hello",
",",
"world",
"!"}));
124 ASSERT_EQ(tokenizer6.split(), (std::vector<DryChem::ci_string> {
"hello",
",",
"world",
"!"}));
125 ASSERT_EQ(tokenizer7.split(), (std::vector<DryChem::ci_string> {
"hello",
",",
"world",
"!"}));
126 ASSERT_EQ(tokenizer8.split(), (std::vector<DryChem::ci_string> {
"hello,"}));
127 ASSERT_EQ(tokenizer9.split(), (std::vector<DryChem::ci_string> {
"hello"}));
128 ASSERT_EQ(tokenizer10.split(), (std::vector<DryChem::ci_string> {
"world"}));
129 ASSERT_EQ(tokenizer11.split(), (std::vector<DryChem::ci_string> {
"hello",
","}));
130 ASSERT_EQ(tokenizer12.split(), (std::vector<DryChem::ci_string> {
"hello",
","}));
131 ASSERT_EQ(tokenizer13.split(), (std::vector<DryChem::ci_string> {
",",
"world",
"!"}));
132 ASSERT_EQ(tokenizer14.split(), (std::vector<DryChem::ci_string> {
"world",
"!"}));