mirror of
https://github.com/ciphervance/supercell-wx.git
synced 2025-10-30 15:40:04 +00:00
Create custom string tokenizer for use in placefile parsing
- Avoids the use of regular expressions, and is expected to be more efficient with large placefiles
This commit is contained in:
parent
88475f5b0e
commit
6767c0c50a
4 changed files with 148 additions and 0 deletions
|
|
@ -8,6 +8,25 @@ namespace scwx
|
|||
namespace util
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief Parse a list of tokens from a string
|
||||
*
|
||||
* This function will take an input string, and apply the delimiters vector in
|
||||
* order to tokenize the string. Each set of delimiters in the delimiters vector
|
||||
* will be used once. A set of delimiters will be used to match any character,
|
||||
* rather than a sequence of characters. Tokens are automatically trimmed of any
|
||||
* whitespace.
|
||||
*
|
||||
* @param [in] s Input string to tokenize
|
||||
* @param [in] delimiters A vector of delimiters to use for each token.
|
||||
* @param [in] pos Search begin position. Default is 0.
|
||||
*
|
||||
* @return Tokenized string
|
||||
*/
|
||||
std::vector<std::string> ParseTokens(const std::string& s,
|
||||
std::vector<std::string> delimiters,
|
||||
std::size_t pos = 0);
|
||||
|
||||
std::string ToString(const std::vector<std::string>& v);
|
||||
|
||||
} // namespace util
|
||||
|
|
|
|||
|
|
@ -1,10 +1,76 @@
|
|||
#include <scwx/util/strings.hpp>
|
||||
|
||||
#include <boost/algorithm/string/trim.hpp>
|
||||
|
||||
namespace scwx
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
|
||||
std::vector<std::string> ParseTokens(const std::string& s,
|
||||
std::vector<std::string> delimiters,
|
||||
std::size_t pos)
|
||||
{
|
||||
std::vector<std::string> tokens {};
|
||||
std::size_t findPos {};
|
||||
|
||||
// Iterate through each delimiter
|
||||
for (std::size_t i = 0; i < delimiters.size() && pos != std::string::npos;
|
||||
++i)
|
||||
{
|
||||
// Skip leading spaces
|
||||
while (pos < s.size() && std::isspace(s[pos]))
|
||||
{
|
||||
++pos;
|
||||
}
|
||||
|
||||
if (pos < s.size() && s[pos] == '"')
|
||||
{
|
||||
// Do not search for a delimeter within a quoted string
|
||||
findPos = s.find('"', pos + 1);
|
||||
|
||||
// Increment search start to one after quotation mark
|
||||
if (findPos != std::string::npos)
|
||||
{
|
||||
++findPos;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Search starting at the current position
|
||||
findPos = pos;
|
||||
}
|
||||
|
||||
// Search for delimiter
|
||||
std::size_t nextPos = s.find_first_of(delimiters[i], findPos);
|
||||
|
||||
// If the delimiter was not found, stop processing tokens
|
||||
if (nextPos == std::string::npos)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Add the current substring as a token
|
||||
auto& newToken = tokens.emplace_back(s.substr(pos, nextPos - pos));
|
||||
boost::trim(newToken);
|
||||
|
||||
// Increment nextPos until the next non-space character
|
||||
while (++nextPos < s.size() && std::isspace(s[nextPos])) {}
|
||||
|
||||
// Store new position value
|
||||
pos = nextPos;
|
||||
}
|
||||
|
||||
// Add the remainder of the string as a token
|
||||
if (pos < s.size())
|
||||
{
|
||||
auto& newToken = tokens.emplace_back(s.substr(pos));
|
||||
boost::trim(newToken);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
std::string ToString(const std::vector<std::string>& v)
|
||||
{
|
||||
std::string value {};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue