DryChem 1.0.0
A generic, compile-time C++ toolbox with no dependencies for the modern computational chemistry project.
Loading...
Searching...
No Matches
columnStrategy.hpp
Go to the documentation of this file.
1// Copyright (c) 2020-2025 Cody R. Drisko. All rights reserved.
2// Licensed under the MIT License. See the LICENSE file in the project root for more information.
3//
4// Name: columnStrategy.hpp
5// Author: crdrisko
6// Date: 01/01/2021-14:15:57
7// Description: An implementations of file parser for parsing data as columns
8
9#ifndef DRYCHEM_COMMON_UTILITIES_INCLUDE_COMMON_UTILS_FILES_FILEPARSING_PARSERSTRATEGIES_COLUMNSTRATEGY_HPP
10#define DRYCHEM_COMMON_UTILITIES_INCLUDE_COMMON_UTILS_FILES_FILEPARSING_PARSERSTRATEGIES_COLUMNSTRATEGY_HPP
11
12#include <algorithm>
13#include <cstddef>
14#include <optional>
15#include <string>
16#include <vector>
17
19
20namespace CppUtils::Files
21{
23 {
24 private:
25 mutable std::vector<std::string> columnCache;
26
27 void resetColumnsOnNewLine(std::size_t tokenIndex_, std::size_t& columnIndex_) const
28 {
29 // We only need to resize the vector once after we know the number of columns
30 if (tokenIndex_ == columnIndex_)
31 columnCache.resize(tokenIndex_);
32
33 columnIndex_ = 0;
34 }
35
37 {
38 // clang-format off
39 std::transform(columnCache.begin(), columnCache.end(), columnCache.begin(),
40 [](const std::string& column) { return column.substr(0, column.size() - 1); });
41 //clang-format on
42 }
43
44 public:
54 const std::string& fileContents_, const std::optional<std::string>& fieldSeparator_ = std::nullopt) const
55 {
56 if (columnCache.empty())
57 {
58 std::string delimiters = fieldSeparator_.value_or("") + " \t\n";
59
60 Strings::Tokenizer tok {fileContents_, delimiters, "\n"};
61
62 auto tokens = tok.split();
63 columnCache.resize(tokens.size());
64
65 for (std::size_t i {}, j {}; i < tokens.size(); ++i)
66 {
67 if (tokens[i] == "\n")
68 {
69 this->resetColumnsOnNewLine(i, j);
70 continue;
71 }
72
73 columnCache[j++].append(tokens[i]).append(fieldSeparator_.value_or("\t"));
74 }
75
77 }
78
79 return columnCache;
80 }
81 };
82} // namespace CppUtils::Files
83
84
85#endif
Definition columnStrategy.hpp:23
std::vector< std::string > columnCache
Definition columnStrategy.hpp:25
void trimTrainingSeparators() const
Definition columnStrategy.hpp:36
void resetColumnsOnNewLine(std::size_t tokenIndex_, std::size_t &columnIndex_) const
Definition columnStrategy.hpp:27
auto operator()(const std::string &fileContents_, const std::optional< std::string > &fieldSeparator_=std::nullopt) const
Definition columnStrategy.hpp:53
Definition tokenizer.hpp:62
constexpr Container split()
Definition tokenizer.hpp:148
Definition fileParser.hpp:23