From d3b3ac6be6ed00c08e5e9e78f33f9344425b4c3c Mon Sep 17 00:00:00 2001 From: Dan Paulat Date: Fri, 14 Oct 2022 23:31:35 -0500 Subject: [PATCH] Adding UGC class for structured UGC --- wxdata/include/scwx/awips/ugc.hpp | 37 +++++ wxdata/source/scwx/awips/ugc.cpp | 248 ++++++++++++++++++++++++++++++ wxdata/wxdata.cmake | 2 + 3 files changed, 287 insertions(+) create mode 100644 wxdata/include/scwx/awips/ugc.hpp create mode 100644 wxdata/source/scwx/awips/ugc.cpp diff --git a/wxdata/include/scwx/awips/ugc.hpp b/wxdata/include/scwx/awips/ugc.hpp new file mode 100644 index 00000000..a47e0ad5 --- /dev/null +++ b/wxdata/include/scwx/awips/ugc.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + +namespace scwx +{ +namespace awips +{ + +class UgcImpl; + +class Ugc +{ +public: + explicit Ugc(); + ~Ugc(); + + Ugc(const Ugc&) = delete; + Ugc& operator=(const Ugc&) = delete; + + Ugc(Ugc&&) noexcept; + Ugc& operator=(Ugc&&) noexcept; + + std::vector states() const; + std::vector fips_ids() const; + std::string product_expiration() const; + + bool Parse(const std::vector& ugcString); + +private: + std::unique_ptr p; +}; + +} // namespace awips +} // namespace scwx diff --git a/wxdata/source/scwx/awips/ugc.cpp b/wxdata/source/scwx/awips/ugc.cpp new file mode 100644 index 00000000..f49ea36a --- /dev/null +++ b/wxdata/source/scwx/awips/ugc.cpp @@ -0,0 +1,248 @@ +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace scwx +{ +namespace awips +{ + +static const std::string logPrefix_ = "scwx::awips::ugc"; +static const auto logger_ = util::Logger::Create(logPrefix_); + +enum class UgcFormat +{ + Counties, + Zones, + Unknown +}; + +typedef boost::bimap, + boost::bimaps::unordered_set_of> + UgcFormatBimap; + +static const UgcFormatBimap ugcFormatMap_ = + boost::assign::list_of // + (UgcFormat::Counties, 'C') // + (UgcFormat::Zones, 'Z') // + (UgcFormat::Unknown, '?'); + +class UgcImpl +{ +public: + explicit UgcImpl() : + ugcString_ {}, + format_ {UgcFormat::Unknown}, + fipsIdMap_ {}, + productExpiration_ {}, + valid_ {false} + { + } + + ~UgcImpl() {} + + std::vector ugcString_; + + UgcFormat format_; + std::map> fipsIdMap_; + std::string productExpiration_; + + bool valid_; +}; + +Ugc::Ugc() : p(std::make_unique()) {} +Ugc::~Ugc() = default; + +Ugc::Ugc(Ugc&&) noexcept = default; +Ugc& Ugc::operator=(Ugc&&) noexcept = default; + +std::vector Ugc::states() const +{ + std::vector states {}; + states.reserve(p->fipsIdMap_.size()); + + for (auto& entry : p->fipsIdMap_) + { + states.push_back(entry.first); + } + + return states; +} + +std::vector Ugc::fips_ids() const +{ + std::vector fipsIds {}; + + for (auto& fipsIdList : p->fipsIdMap_) + { + for (auto& id : fipsIdList.second) + { + fipsIds.push_back(std::format("{}{}{:03}", + fipsIdList.first, + ugcFormatMap_.left.at(p->format_), + id)); + } + } + + return fipsIds; +} + +std::string Ugc::product_expiration() const +{ + return p->productExpiration_; +} + +bool Ugc::Parse(const std::vector& ugcString) +{ + bool dataValid = false; + + // UGC takes the form SSFNNN-NNN>NNN-SSFNNN-DDHHMM- (NWSI 10-1702) + static const std::regex reStart {"[A-Z]{2}[CZ]([0-9]{3}|ALL)"}; + static const std::regex reAnyFipsId {"([0-9]{3}|ALL)"}; + static const std::regex reSpecificFipsId {"(?!0{3})[0-9]{3}"}; + static const std::regex reProductExpiration {"[0-9]{6}"}; + + std::stringstream ugcStream; + for (auto& line : ugcString) + { + ugcStream << line; + } + + // Concatenate UGC lines into a single string + std::string ugc {}; + for (const std::string& line : ugcString) + { + ugc += line; + } + + boost::char_separator sectionDelimiter("-"); + boost::char_separator rangeDelimiter(">"); + boost::tokenizer tokens(ugc, sectionDelimiter); + + std::string currentState {}; + + for (auto& token : tokens) + { + // Product Expiration is the final token + if (std::regex_match(token, reProductExpiration)) + { + p->productExpiration_ = token; + dataValid = true; + break; + } + + // Tokenize string again by ">" (note there will always be at least one + // range token) + boost::tokenizer rangeTokens(token, rangeDelimiter); + const size_t numRangeTokens = + std::distance(rangeTokens.begin(), rangeTokens.end()); + bool tokenValid = true; + bool allFipsIds = false; + auto tokenIt = rangeTokens.begin(); + std::string firstToken {tokenIt.current_token()}; + UgcFormat currentFormat {p->format_}; + std::string firstFipsId {}; + std::string secondFipsId {}; + + // Look for the start of the UGC string (may be multiple per UGC string + // for multiple states, territories, or marine area) + if (std::regex_match(firstToken, reStart)) + { + currentState = firstToken.substr(0, 2); + currentFormat = ugcFormatMap_.right.at(firstToken.at(2)); + firstFipsId = firstToken.substr(3, 3); + + // The UGC string must contain counties or zones, but not both + if (p->format_ != UgcFormat::Unknown && p->format_ != currentFormat) + { + tokenValid = false; + } + } + // Look for additional FIPS IDs in the UGC string + else if (!currentState.empty() && + std::regex_match(firstToken, reAnyFipsId)) + { + firstFipsId = firstToken; + } + // If we see anything else, the UGC token is invalid + else + { + tokenValid = false; + } + + // All counties or zones are specified by using "000" or "ALL" + if (firstFipsId == "000" || firstFipsId == "ALL") + { + allFipsIds = true; + } + + // Parse the second token in a range (i.e., NNN>XXX) + if (numRangeTokens == 2) + { + std::string secondToken {(++tokenIt).current_token()}; + + if (std::regex_match(secondToken, reSpecificFipsId)) + { + secondFipsId = secondToken; + } + else + { + tokenValid = false; + } + } + + // Check validity before using parsed data + if (!tokenValid || numRangeTokens > 2 || + (allFipsIds && numRangeTokens > 1)) + { + logger_->warn("Invalid token: {}", token); + break; + } + + p->format_ = currentFormat; + auto& fipsIds = p->fipsIdMap_[currentState]; + + if (allFipsIds) + { + fipsIds.push_back(0); + } + else + { + // Insert the FIPS ID (NNN) from the token + fipsIds.push_back(static_cast(std::stoul(firstFipsId))); + + if (numRangeTokens == 2) + { + // Insert the remainder of the FIPS IDs in the range given by the + // token (NNN>XXX) + const uint16_t first = fipsIds.back(); + const uint16_t last = + static_cast(std::stoul(secondFipsId)); + + for (uint16_t i = first + 1; i <= last; i++) + { + fipsIds.push_back(i); + } + } + } + } + + p->valid_ = dataValid; + + if (!dataValid) + { + p->fipsIdMap_.clear(); + } + + return dataValid; +} + +} // namespace awips +} // namespace scwx diff --git a/wxdata/wxdata.cmake b/wxdata/wxdata.cmake index e2234524..7a2703f4 100644 --- a/wxdata/wxdata.cmake +++ b/wxdata/wxdata.cmake @@ -14,6 +14,7 @@ set(HDR_AWIPS include/scwx/awips/coded_location.hpp include/scwx/awips/significance.hpp include/scwx/awips/text_product_file.hpp include/scwx/awips/text_product_message.hpp + include/scwx/awips/ugc.hpp include/scwx/awips/wmo_header.hpp) set(SRC_AWIPS source/scwx/awips/coded_location.cpp source/scwx/awips/coded_time_motion_location.cpp @@ -23,6 +24,7 @@ set(SRC_AWIPS source/scwx/awips/coded_location.cpp source/scwx/awips/significance.cpp source/scwx/awips/text_product_file.cpp source/scwx/awips/text_product_message.cpp + source/scwx/awips/ugc.cpp source/scwx/awips/wmo_header.cpp) set(HDR_COMMON include/scwx/common/characters.hpp include/scwx/common/color_table.hpp