DryChem 1.0.0
A generic, compile-time C++ toolbox with no dependencies for the modern computational chemistry project.
Loading...
Searching...
No Matches
tokenizer.hpp
Go to the documentation of this file.
1// Copyright (c) 2020-2025 Cody R. Drisko. All rights reserved.
2// Licensed under the MIT License. See the LICENSE file in the project root for more information.
3//
4// Name: tokenizer.hpp
5// Author: crdrisko
6// Date: 11/04/2020-07:04:33
7// Description: A class dedicated to splitting strings (tokens) on selected delimiters
8
9#ifndef DRYCHEM_COMMON_UTILITIES_INCLUDE_COMMON_UTILS_STRINGS_UTILS_TOKENIZER_HPP
10#define DRYCHEM_COMMON_UTILITIES_INCLUDE_COMMON_UTILS_STRINGS_UTILS_TOKENIZER_HPP
11
12#include <algorithm>
13#include <forward_list>
14#include <iterator>
15#include <optional>
16#include <string>
17#include <type_traits>
18#include <vector>
19
23
24namespace CppUtils::Strings
25{
26 namespace details
27 {
34 template<typename, typename = std::void_t<>>
35 struct has_mapped_type : std::false_type
36 {
37 };
38
45 template<typename T>
46 struct has_mapped_type<T, std::void_t<typename T::mapped_type>> : std::true_type
47 {
48 };
49 } // namespace details
50
60 template<typename CharTraits>
62 {
63 public:
65 using value_type = std::basic_string<char, CharTraits>;
66 using reference = std::basic_string<char, CharTraits>&;
67 using const_reference = const std::basic_string<char, CharTraits>&;
68 using iterator = typename std::basic_string<char, CharTraits>::iterator;
69 using const_iterator = typename std::basic_string<char, CharTraits>::const_iterator;
70
71 private:
75 std::optional<value_type> keepDelimiters;
76
82 constexpr bool isDelimiter(const char ch_) const noexcept { return foundSubstr(ch_, delimiters); }
83
91 {
92 value_type str {};
93
94 if (str_current != str_end)
95 {
97 {
99 return value_type {*str_current++};
100
101 ++str_current;
102 }
103
105 str += value_type {*str_current++};
106 }
107
108 return str;
109 }
110
111 public:
120 constexpr Tokenizer(const_iterator begin_, const_iterator end_, const_reference delimiters_ = " \t\n",
121 std::optional<value_type> keepDelimiters_ = std::nullopt)
122 : str_current {begin_}, str_end {end_}, delimiters {delimiters_}, keepDelimiters {keepDelimiters_} {}
123
131 constexpr explicit Tokenizer(const_reference str_, const_reference delimiters_ = " \t\n",
132 std::optional<value_type> keepDelimiters_ = std::nullopt)
133 : str_current {str_.begin()}, str_end {str_.end()}, delimiters {delimiters_}, keepDelimiters {keepDelimiters_} {}
134
145 template<typename Container = std::vector<value_type>, typename T = typename Container::value_type,
146 typename = std::enable_if_t<std::conjunction_v<Meta::is_allocator_aware_container<Container>,
147 std::negation<details::has_mapped_type<Container>>>>>
148 constexpr Container split()
149 {
150 Container tokens;
151
152 if (str_current != str_end)
153 {
154 if constexpr (std::is_same_v<Container, std::forward_list<T>>)
155 {
156 std::front_insert_iterator<Container> tokensIter {tokens};
157
158 std::for_each(str_current, str_end, [&](const auto) {
159 auto str = this->nextToken();
160
161 if (!str.empty())
162 *tokensIter++ = lexical_cast<T>(str);
163 });
164 }
165 else
166 {
167 std::insert_iterator<Container> tokensIter {tokens, tokens.begin()};
168
169 std::for_each(str_current, str_end, [&](const auto) {
170 value_type str = this->nextToken();
171
172 if (!str.empty())
173 *tokensIter++ = lexical_cast<T>(str);
174 });
175 }
176 }
177
178 return tokens;
179 }
180 };
181
189 template<typename CharTraits, typename... TArgs>
190 Tokenizer(const std::basic_string<char, CharTraits>&, TArgs...) -> Tokenizer<CharTraits>;
191} // namespace CppUtils::Strings
192
193#endif
Definition tokenizer.hpp:62
const std::basic_string< char, CharTraits > & const_reference
Definition tokenizer.hpp:67
constexpr Container split()
Definition tokenizer.hpp:148
typename std::basic_string< char, CharTraits >::const_iterator const_iterator
Definition tokenizer.hpp:69
const_iterator str_current
Definition tokenizer.hpp:72
std::basic_string< char, CharTraits > value_type
Member types.
Definition tokenizer.hpp:65
constexpr Tokenizer(const_iterator begin_, const_iterator end_, const_reference delimiters_=" \t\n", std::optional< value_type > keepDelimiters_=std::nullopt)
Definition tokenizer.hpp:120
constexpr Tokenizer(const_reference str_, const_reference delimiters_=" \t\n", std::optional< value_type > keepDelimiters_=std::nullopt)
Definition tokenizer.hpp:131
value_type delimiters
Definition tokenizer.hpp:74
std::optional< value_type > keepDelimiters
Definition tokenizer.hpp:75
constexpr bool isDelimiter(const char ch_) const noexcept
Definition tokenizer.hpp:82
std::basic_string< char, CharTraits > & reference
Definition tokenizer.hpp:66
const_iterator str_end
Definition tokenizer.hpp:73
typename std::basic_string< char, CharTraits >::iterator iterator
Definition tokenizer.hpp:68
constexpr value_type nextToken()
Definition tokenizer.hpp:90
Definition tokenizer.hpp:27
Definition ciString.hpp:17
Tokenizer(const std::basic_string< char, CharTraits > &, TArgs...) -> Tokenizer< CharTraits >
auto lexical_cast(const std::basic_string< char, CharTraits > &str)
Definition lexicalCast.hpp:34
constexpr bool foundSubstr(const std::basic_string< char, CharTraits > &stringToFind, const std::basic_string< char, CharTraits > &stringToSearch) noexcept
Definition stringUtils.hpp:26