mirror of
https://github.com/ciphervance/supercell-wx.git
synced 2025-10-30 08:00:06 +00:00
Create custom string tokenizer for use in placefile parsing
- Avoids the use of regular expressions, and is expected to be more efficient with large placefiles
This commit is contained in:
parent
88475f5b0e
commit
6767c0c50a
4 changed files with 148 additions and 0 deletions
62
test/source/scwx/util/strings.test.cpp
Normal file
62
test/source/scwx/util/strings.test.cpp
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
#include <scwx/util/strings.hpp>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace scwx
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
|
||||
TEST(StringsTest, ParseTokensColor)
|
||||
{
|
||||
static const std::string line {"Color: red green blue alpha discarded"};
|
||||
static const std::vector<std::string> delimiters {":", " ", " ", " ", " "};
|
||||
|
||||
std::vector<std::string> tokens = ParseTokens(line, delimiters);
|
||||
|
||||
ASSERT_EQ(tokens.size(), 6);
|
||||
EXPECT_EQ(tokens[0], "Color");
|
||||
EXPECT_EQ(tokens[1], "red");
|
||||
EXPECT_EQ(tokens[2], "green");
|
||||
EXPECT_EQ(tokens[3], "blue");
|
||||
EXPECT_EQ(tokens[4], "alpha");
|
||||
EXPECT_EQ(tokens[5], "discarded");
|
||||
}
|
||||
|
||||
TEST(StringsTest, ParseTokensColorOffset)
|
||||
{
|
||||
static const std::string line {"Color: red green blue alpha"};
|
||||
static const std::vector<std::string> delimiters {" ", " ", " ", " "};
|
||||
static const std::size_t offset = std::string {"Color:"}.size();
|
||||
|
||||
std::vector<std::string> tokens = ParseTokens(line, delimiters, offset);
|
||||
|
||||
ASSERT_EQ(tokens.size(), 4);
|
||||
EXPECT_EQ(tokens[0], "red");
|
||||
EXPECT_EQ(tokens[1], "green");
|
||||
EXPECT_EQ(tokens[2], "blue");
|
||||
EXPECT_EQ(tokens[3], "alpha");
|
||||
}
|
||||
|
||||
TEST(StringsTest, ParseTokensText)
|
||||
{
|
||||
static const std::string line {
|
||||
"Text: lat, lon, fontNumber, \"string, string\", \"hover, hover\", "
|
||||
"discarded"};
|
||||
static const std::vector<std::string> delimiters {
|
||||
":", ",", ",", ",", ",", ","};
|
||||
|
||||
std::vector<std::string> tokens = ParseTokens(line, delimiters);
|
||||
|
||||
ASSERT_EQ(tokens.size(), 7);
|
||||
EXPECT_EQ(tokens[0], "Text");
|
||||
EXPECT_EQ(tokens[1], "lat");
|
||||
EXPECT_EQ(tokens[2], "lon");
|
||||
EXPECT_EQ(tokens[3], "fontNumber");
|
||||
EXPECT_EQ(tokens[4], "\"string, string\"");
|
||||
EXPECT_EQ(tokens[5], "\"hover, hover\"");
|
||||
EXPECT_EQ(tokens[6], "discarded");
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
} // namespace scwx
|
||||
|
|
@ -31,6 +31,7 @@ set(SRC_QT_UTIL_TESTS source/scwx/qt/util/q_file_input_stream.test.cpp)
|
|||
set(SRC_UTIL_TESTS source/scwx/util/float.test.cpp
|
||||
source/scwx/util/rangebuf.test.cpp
|
||||
source/scwx/util/streams.test.cpp
|
||||
source/scwx/util/strings.test.cpp
|
||||
source/scwx/util/vectorbuf.test.cpp)
|
||||
set(SRC_WSR88D_TESTS source/scwx/wsr88d/ar2v_file.test.cpp
|
||||
source/scwx/wsr88d/level3_file.test.cpp
|
||||
|
|
|
|||
|
|
@ -8,6 +8,25 @@ namespace scwx
|
|||
namespace util
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief Parse a list of tokens from a string
|
||||
*
|
||||
* This function will take an input string, and apply the delimiters vector in
|
||||
* order to tokenize the string. Each set of delimiters in the delimiters vector
|
||||
* will be used once. A set of delimiters will be used to match any character,
|
||||
* rather than a sequence of characters. Tokens are automatically trimmed of any
|
||||
* whitespace.
|
||||
*
|
||||
* @param [in] s Input string to tokenize
|
||||
* @param [in] delimiters A vector of delimiters to use for each token.
|
||||
* @param [in] pos Search begin position. Default is 0.
|
||||
*
|
||||
* @return Tokenized string
|
||||
*/
|
||||
std::vector<std::string> ParseTokens(const std::string& s,
|
||||
std::vector<std::string> delimiters,
|
||||
std::size_t pos = 0);
|
||||
|
||||
std::string ToString(const std::vector<std::string>& v);
|
||||
|
||||
} // namespace util
|
||||
|
|
|
|||
|
|
@ -1,10 +1,76 @@
|
|||
#include <scwx/util/strings.hpp>
|
||||
|
||||
#include <boost/algorithm/string/trim.hpp>
|
||||
|
||||
namespace scwx
|
||||
{
|
||||
namespace util
|
||||
{
|
||||
|
||||
std::vector<std::string> ParseTokens(const std::string& s,
|
||||
std::vector<std::string> delimiters,
|
||||
std::size_t pos)
|
||||
{
|
||||
std::vector<std::string> tokens {};
|
||||
std::size_t findPos {};
|
||||
|
||||
// Iterate through each delimiter
|
||||
for (std::size_t i = 0; i < delimiters.size() && pos != std::string::npos;
|
||||
++i)
|
||||
{
|
||||
// Skip leading spaces
|
||||
while (pos < s.size() && std::isspace(s[pos]))
|
||||
{
|
||||
++pos;
|
||||
}
|
||||
|
||||
if (pos < s.size() && s[pos] == '"')
|
||||
{
|
||||
// Do not search for a delimeter within a quoted string
|
||||
findPos = s.find('"', pos + 1);
|
||||
|
||||
// Increment search start to one after quotation mark
|
||||
if (findPos != std::string::npos)
|
||||
{
|
||||
++findPos;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Search starting at the current position
|
||||
findPos = pos;
|
||||
}
|
||||
|
||||
// Search for delimiter
|
||||
std::size_t nextPos = s.find_first_of(delimiters[i], findPos);
|
||||
|
||||
// If the delimiter was not found, stop processing tokens
|
||||
if (nextPos == std::string::npos)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Add the current substring as a token
|
||||
auto& newToken = tokens.emplace_back(s.substr(pos, nextPos - pos));
|
||||
boost::trim(newToken);
|
||||
|
||||
// Increment nextPos until the next non-space character
|
||||
while (++nextPos < s.size() && std::isspace(s[nextPos])) {}
|
||||
|
||||
// Store new position value
|
||||
pos = nextPos;
|
||||
}
|
||||
|
||||
// Add the remainder of the string as a token
|
||||
if (pos < s.size())
|
||||
{
|
||||
auto& newToken = tokens.emplace_back(s.substr(pos));
|
||||
boost::trim(newToken);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
std::string ToString(const std::vector<std::string>& v)
|
||||
{
|
||||
std::string value {};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue