Migrate UGC from std::regex to RE2

This commit is contained in:
Dan Paulat 2023-12-10 21:28:17 -06:00
parent ad87fc77fe
commit 4f309ecb39

View file

@ -2,12 +2,12 @@
#include <scwx/util/logger.hpp> #include <scwx/util/logger.hpp>
#include <map> #include <map>
#include <regex>
#include <boost/assign.hpp> #include <boost/assign.hpp>
#include <boost/bimap.hpp> #include <boost/bimap.hpp>
#include <boost/bimap/unordered_set_of.hpp> #include <boost/bimap/unordered_set_of.hpp>
#include <boost/tokenizer.hpp> #include <boost/tokenizer.hpp>
#include <re2/re2.h>
namespace scwx namespace scwx
{ {
@ -104,10 +104,10 @@ bool Ugc::Parse(const std::vector<std::string>& ugcString)
bool dataValid = false; bool dataValid = false;
// UGC takes the form SSFNNN-NNN>NNN-SSFNNN-DDHHMM- (NWSI 10-1702) // UGC takes the form SSFNNN-NNN>NNN-SSFNNN-DDHHMM- (NWSI 10-1702)
static const std::regex reStart {"[A-Z]{2}[CZ]([0-9]{3}|ALL)"}; static constexpr LazyRE2 reStart = {"[A-Z]{2}[CZ]([0-9]{3}|ALL)"};
static const std::regex reAnyFipsId {"([0-9]{3}|ALL)"}; static constexpr LazyRE2 reAnyFipsId = {"([0-9]{3}|ALL)"};
static const std::regex reSpecificFipsId {"(?!0{3})[0-9]{3}"}; static constexpr LazyRE2 reSpecificFipsId = {"[0-9]{3}"};
static const std::regex reProductExpiration {"[0-9]{6}"}; static constexpr LazyRE2 reProductExpiration = {"[0-9]{6}"};
std::stringstream ugcStream; std::stringstream ugcStream;
for (auto& line : ugcString) for (auto& line : ugcString)
@ -131,7 +131,7 @@ bool Ugc::Parse(const std::vector<std::string>& ugcString)
for (auto& token : tokens) for (auto& token : tokens)
{ {
// Product Expiration is the final token // Product Expiration is the final token
if (std::regex_match(token, reProductExpiration)) if (RE2::FullMatch(token, *reProductExpiration))
{ {
p->productExpiration_ = token; p->productExpiration_ = token;
dataValid = true; dataValid = true;
@ -153,7 +153,7 @@ bool Ugc::Parse(const std::vector<std::string>& ugcString)
// Look for the start of the UGC string (may be multiple per UGC string // Look for the start of the UGC string (may be multiple per UGC string
// for multiple states, territories, or marine area) // for multiple states, territories, or marine area)
if (std::regex_match(firstToken, reStart)) if (RE2::FullMatch(firstToken, *reStart))
{ {
currentState = firstToken.substr(0, 2); currentState = firstToken.substr(0, 2);
currentFormat = ugcFormatMap_.right.at(firstToken.at(2)); currentFormat = ugcFormatMap_.right.at(firstToken.at(2));
@ -167,7 +167,7 @@ bool Ugc::Parse(const std::vector<std::string>& ugcString)
} }
// Look for additional FIPS IDs in the UGC string // Look for additional FIPS IDs in the UGC string
else if (!currentState.empty() && else if (!currentState.empty() &&
std::regex_match(firstToken, reAnyFipsId)) RE2::FullMatch(firstToken, *reAnyFipsId))
{ {
firstFipsId = firstToken; firstFipsId = firstToken;
} }
@ -188,7 +188,8 @@ bool Ugc::Parse(const std::vector<std::string>& ugcString)
{ {
std::string secondToken {(++tokenIt).current_token()}; std::string secondToken {(++tokenIt).current_token()};
if (std::regex_match(secondToken, reSpecificFipsId)) if (RE2::FullMatch(secondToken, *reSpecificFipsId) &&
secondToken != "000")
{ {
secondFipsId = secondToken; secondFipsId = secondToken;
} }