Create custom string tokenizer for use in placefile parsing

- Avoids the use of regular expressions, and is expected to be more efficient with large placefiles
This commit is contained in:
Dan Paulat 2023-07-16 23:59:28 -05:00
parent 88475f5b0e
commit 6767c0c50a
4 changed files with 148 additions and 0 deletions

View file

@ -8,6 +8,25 @@ namespace scwx
namespace util
{
/**
* @brief Parse a list of tokens from a string
*
* This function will take an input string, and apply the delimiters vector in
* order to tokenize the string. Each set of delimiters in the delimiters vector
* will be used once. A set of delimiters will be used to match any character,
* rather than a sequence of characters. Tokens are automatically trimmed of any
* whitespace.
*
* @param [in] s Input string to tokenize
* @param [in] delimiters A vector of delimiters to use for each token.
* @param [in] pos Search begin position. Default is 0.
*
* @return Tokenized string
*/
std::vector<std::string> ParseTokens(const std::string& s,
std::vector<std::string> delimiters,
std::size_t pos = 0);
std::string ToString(const std::vector<std::string>& v);
} // namespace util

View file

@ -1,10 +1,76 @@
#include <scwx/util/strings.hpp>
#include <boost/algorithm/string/trim.hpp>
namespace scwx
{
namespace util
{
std::vector<std::string> ParseTokens(const std::string& s,
std::vector<std::string> delimiters,
std::size_t pos)
{
std::vector<std::string> tokens {};
std::size_t findPos {};
// Iterate through each delimiter
for (std::size_t i = 0; i < delimiters.size() && pos != std::string::npos;
++i)
{
// Skip leading spaces
while (pos < s.size() && std::isspace(s[pos]))
{
++pos;
}
if (pos < s.size() && s[pos] == '"')
{
// Do not search for a delimeter within a quoted string
findPos = s.find('"', pos + 1);
// Increment search start to one after quotation mark
if (findPos != std::string::npos)
{
++findPos;
}
}
else
{
// Search starting at the current position
findPos = pos;
}
// Search for delimiter
std::size_t nextPos = s.find_first_of(delimiters[i], findPos);
// If the delimiter was not found, stop processing tokens
if (nextPos == std::string::npos)
{
break;
}
// Add the current substring as a token
auto& newToken = tokens.emplace_back(s.substr(pos, nextPos - pos));
boost::trim(newToken);
// Increment nextPos until the next non-space character
while (++nextPos < s.size() && std::isspace(s[nextPos])) {}
// Store new position value
pos = nextPos;
}
// Add the remainder of the string as a token
if (pos < s.size())
{
auto& newToken = tokens.emplace_back(s.substr(pos));
boost::trim(newToken);
}
return tokens;
}
std::string ToString(const std::vector<std::string>& v)
{
std::string value {};