diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e92d732..bffe974f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,15 +14,19 @@ include(${PROJECT_SOURCE_DIR}/external/cmake-conan/conan.cmake) conan_cmake_configure(REQUIRES aws-sdk-cpp/1.9.234 boost/1.78.0 + cpr/1.9.0 freetype/2.12.1 geographiclib/1.52 glew/2.2.0 glm/cci.20220420 gtest/cci.20210126 + libcurl/7.85.0 + libxml2/2.9.14 openssl/1.1.1q spdlog/1.10.0 sqlite3/3.39.4 vulkan-loader/1.3.221 + zlib/1.2.13 GENERATORS cmake cmake_find_package cmake_paths diff --git a/test/source/scwx/network/dir_list.test.cpp b/test/source/scwx/network/dir_list.test.cpp new file mode 100644 index 00000000..9bac666a --- /dev/null +++ b/test/source/scwx/network/dir_list.test.cpp @@ -0,0 +1,28 @@ +#include + +#include + +namespace scwx +{ +namespace network +{ + +static const std::string& kDefaultUrl {"https://warnings.allisonhouse.com"}; +static const std::string& kAlternateUrl {"http://warnings.cod.edu"}; + +TEST(DirList, GetDefaultUrl) +{ + auto records = DirList(kDefaultUrl); + + EXPECT_GT(records.size(), 0); +} + +TEST(DirList, GetAlternateUrl) +{ + auto records = DirList(kAlternateUrl); + + EXPECT_GT(records.size(), 0); +} + +} // namespace network +} // namespace scwx diff --git a/test/test.cmake b/test/test.cmake index 31b7bc50..efc9300d 100644 --- a/test/test.cmake +++ b/test/test.cmake @@ -15,6 +15,7 @@ set(SRC_AWIPS_TESTS source/scwx/awips/coded_location.test.cpp source/scwx/awips/ugc.test.cpp) set(SRC_COMMON_TESTS source/scwx/common/color_table.test.cpp source/scwx/common/products.test.cpp) +set(SRC_NETWORK_TESTS source/scwx/network/dir_list.test.cpp) set(SRC_PROVIDER_TESTS source/scwx/provider/aws_level2_data_provider.test.cpp source/scwx/provider/aws_level3_data_provider.test.cpp) set(SRC_QT_CONFIG_TESTS source/scwx/qt/config/county_database.test.cpp @@ -33,6 +34,7 @@ set(CMAKE_FILES test.cmake) add_executable(wxtest ${SRC_MAIN} ${SRC_AWIPS_TESTS} ${SRC_COMMON_TESTS} + ${SRC_NETWORK_TESTS} ${SRC_PROVIDER_TESTS} ${SRC_QT_CONFIG_TESTS} ${SRC_QT_MANAGER_TESTS} @@ -43,6 +45,7 @@ add_executable(wxtest ${SRC_MAIN} source_group("Source Files\\main" FILES ${SRC_MAIN}) source_group("Source Files\\awips" FILES ${SRC_AWIPS_TESTS}) source_group("Source Files\\common" FILES ${SRC_COMMON_TESTS}) +source_group("Source Files\\network" FILES ${SRC_NETWORK_TESTS}) source_group("Source Files\\provider" FILES ${SRC_PROVIDER_TESTS}) source_group("Source Files\\qt\\config" FILES ${SRC_QT_CONFIG_TESTS}) source_group("Source Files\\qt\\manager" FILES ${SRC_QT_MANAGER_TESTS}) diff --git a/wxdata/include/scwx/network/dir_list.hpp b/wxdata/include/scwx/network/dir_list.hpp new file mode 100644 index 00000000..3804b4e6 --- /dev/null +++ b/wxdata/include/scwx/network/dir_list.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include + +namespace scwx +{ +namespace network +{ + +struct DirListRecord +{ + std::string filename_ = {}; + std::filesystem::file_type type_ = std::filesystem::file_type::unknown; + std::chrono::system_clock::time_point mtime_ = + {}; ///< Modified time (server time) + size_t size_ = 0u; ///< Approximate file size in bytes +}; + +/** + * @brief Retrieve Directory Listing + * + * Retrieves a directory listing. Supports default Apache-style directory + * listings only. + */ +std::vector DirList(const std::string& baseUrl); + +} // namespace network +} // namespace scwx diff --git a/wxdata/source/scwx/network/dir_list.cpp b/wxdata/source/scwx/network/dir_list.cpp new file mode 100644 index 00000000..9f3e9c38 --- /dev/null +++ b/wxdata/source/scwx/network/dir_list.cpp @@ -0,0 +1,262 @@ +#define LIBXML_HTML_ENABLED + +#include +#include + +#pragma warning(push, 0) +#include +#include +#include +#pragma warning(pop) + +namespace scwx +{ +namespace network +{ + +static const std::string logPrefix_ = "scwx::network::dir_list"; +static const auto logger_ = util::Logger::Create(logPrefix_); + +static const cpr::SslOptions kSslOptions_ = cpr::Ssl(cpr::ssl::TLSv1_2 {}); +static const cpr::HttpVersion kHttpVersion_ { + cpr::HttpVersionCode::VERSION_2_0_TLS}; + +class DirListSAXHandler +{ +public: + DirListSAXHandler() = delete; + static void StartDocument(void* userData); + static void EndDocument(void* userData); + static void + StartElement(void* userData, const xmlChar* name, const xmlChar** attrs); + static void EndElement(void* userData, const xmlChar* name); + static void Characters(void* userData, const xmlChar* ch, int len); + static void Warning(void* userData, const char* msg, ...); + static void Error(void* userData, const char* msg, ...); + static void Critical(void* userData, const char* msg, ...); +}; + +struct DirListSAXData +{ + enum class State + { + FindingLink, + FoundLink, + UpdateLinkTimestamp, + UpdateLinkSize + }; + State state_ {State::FindingLink}; + size_t warningCount_ {0u}; + size_t errorCount_ {0u}; + size_t criticalCount_ {0u}; + + std::vector records_; +}; + +static htmlSAXHandler saxHandler_ // + {.startElement = &DirListSAXHandler::StartElement, + .endElement = &DirListSAXHandler::EndElement, + .characters = &DirListSAXHandler::Characters, + .warning = &DirListSAXHandler::Warning, + .error = &DirListSAXHandler::Error, + .fatalError = &DirListSAXHandler::Critical}; + +std::vector DirList(const std::string& baseUrl) +{ + using namespace std::chrono; + + logger_->trace("DirList: {}", baseUrl); + + cpr::Response response = + cpr::Get(cpr::Url {baseUrl}, kSslOptions_, kHttpVersion_); + DirListSAXData saxData {}; + + if (response.status_code != cpr::status::HTTP_OK) + { + logger_->warn("Bad response from {}: {} ({})", + baseUrl, + response.error.message, + response.status_code); + } + else + { + htmlDocPtr doc = htmlSAXParseDoc( + reinterpret_cast(response.text.c_str()), + nullptr, + &saxHandler_, + &saxData); + if (doc != nullptr) + { + xmlFreeDoc(doc); + } + } + + return saxData.records_; +} + +void DirListSAXHandler::StartElement(void* userData, + const xmlChar* name, + const xmlChar** attrs) +{ + logger_->trace("SAX: Start Element: {}", name); + + DirListSAXData* data = reinterpret_cast(userData); + + if (strcmp(reinterpret_cast(name), "a") == 0) + { + // If an "a" element is found, search for an "href" attribute + for (int i = 0; attrs != nullptr && attrs[i] != nullptr; ++i) + { + if (i > 0 && + strcmp(reinterpret_cast(attrs[i - 1]), "href") == 0) + { + // If the "href" attribute is found, treat this as a new file + std::string filename {reinterpret_cast(attrs[i])}; + std::filesystem::file_type fileType; + + // Determine if the file is a directory + if (filename.ends_with("/")) + { + filename.pop_back(); + fileType = std::filesystem::file_type::directory; + } + else + { + fileType = std::filesystem::file_type::regular; + } + + // If the filename is valid, add it as a record + if (filename.size() > 0 && !filename.starts_with("?") && + // And the filename is not a duplicate of the previous record + (data->records_.size() == 0 || + data->records_.back().filename_ != filename)) + { + data->records_.emplace_back(filename, fileType); + data->state_ = DirListSAXData::State::FoundLink; + break; + } + } + } + } + for (int i = 0; attrs != nullptr && attrs[i] != nullptr; ++i) + { + logger_->trace(" Attribute: {}", attrs[i]); + } +} + +void DirListSAXHandler::EndElement(void* userData, const xmlChar* name) +{ + logger_->trace("SAX: End Element: {}", name); + + DirListSAXData* data = reinterpret_cast(userData); + + if (data->state_ == DirListSAXData::State::FoundLink && + strcmp(reinterpret_cast(name), "a") == 0) + { + // The "a" element is closed, so begin looking for the timestamp + data->state_ = DirListSAXData::State::UpdateLinkTimestamp; + } +} + +void DirListSAXHandler::Characters(void* userData, const xmlChar* ch, int len) +{ + std::string characters(reinterpret_cast(ch), len); + logger_->trace("SAX: Characters: {}", characters); + + DirListSAXData* data = reinterpret_cast(userData); + + if (data->state_ == DirListSAXData::State::UpdateLinkTimestamp) + { + using namespace std::chrono; + + // Date time format: yyyy-mm-dd hh:mm + static const std::string kDateTimeFormat {"%Y-%m-%d %H:%M"}; + static constexpr size_t kDateTimeSize {16u}; + + // Attempt to parse the date time + std::istringstream ssCharacters {characters}; + sys_time mtime; + ssCharacters >> parse(kDateTimeFormat, mtime); + + if (!ssCharacters.fail()) + { + // Date time parsing succeeded, look for link size + auto& record = data->records_.back(); + record.mtime_ = mtime; + + if (record.type_ == std::filesystem::file_type::directory) + { + // If the record is a directory, there is no size, skip to next link + data->state_ = DirListSAXData::State::FindingLink; + } + else + { + // After the time is parsed, get the file size + data->state_ = DirListSAXData::State::UpdateLinkSize; + } + } + } + else if (data->state_ == DirListSAXData::State::UpdateLinkSize) + { + // Trim the file size string + std::string fileSizeString {characters}; + boost::trim(fileSizeString); + + size_t fileSize = 0u; + size_t multiplier = 1u; + + // Look for size suffix + if (fileSizeString.ends_with("K")) + { + fileSizeString.pop_back(); + multiplier = 1024u; + } + else if (fileSizeString.ends_with("M")) + { + fileSizeString.pop_back(); + multiplier = 1024u * 1024u; + } + else if (fileSizeString.ends_with("G")) + { + fileSizeString.pop_back(); + multiplier = 1024u * 1024u * 1024u; + } + else if (fileSizeString.ends_with("T")) + { + fileSizeString.pop_back(); + multiplier = 1024ull * 1024ull * 1024ull * 1024ull; + } + + try + { + // Parse the remaining file size string, and multiply by the suffix + fileSize = static_cast(std::stod(fileSizeString) * multiplier); + data->records_.back().size_ = fileSize; + + // Look for the next link + data->state_ = DirListSAXData::State::FindingLink; + } + catch (const std::exception&) + { + // This was something other than a file size + } + } +} + +void DirListSAXHandler::Warning(void* /* userData */, const char* msg, ...) +{ + logger_->warn("SAX: {}", msg); +} + +void DirListSAXHandler::Error(void* /* userData */, const char* msg, ...) +{ + logger_->error("SAX: {}", msg); +} + +void DirListSAXHandler::Critical(void* /* userData */, const char* msg, ...) +{ + logger_->critical("SAX: {}", msg); +} + +} // namespace network +} // namespace scwx diff --git a/wxdata/wxdata.cmake b/wxdata/wxdata.cmake index e69496db..3bbfb561 100644 --- a/wxdata/wxdata.cmake +++ b/wxdata/wxdata.cmake @@ -4,6 +4,8 @@ project(scwx-data) find_package(AWSSDK) find_package(Boost) +find_package(cpr) +find_package(LibXml2) find_package(spdlog) set(HDR_AWIPS include/scwx/awips/coded_location.hpp @@ -40,6 +42,8 @@ set(SRC_COMMON source/scwx/common/characters.cpp source/scwx/common/products.cpp source/scwx/common/sites.cpp source/scwx/common/vcp.cpp) +set(HDR_NETWORK include/scwx/network/dir_list.hpp) +set(SRC_NETWORK source/scwx/network/dir_list.cpp) set(HDR_PROVIDER include/scwx/provider/aws_level2_data_provider.hpp include/scwx/provider/aws_level3_data_provider.hpp include/scwx/provider/aws_nexrad_data_provider.hpp @@ -186,6 +190,8 @@ add_library(wxdata OBJECT ${HDR_AWIPS} ${SRC_AWIPS} ${HDR_COMMON} ${SRC_COMMON} + ${HDR_NETWORK} + ${SRC_NETWORK} ${HDR_PROVIDER} ${SRC_PROVIDER} ${HDR_UTIL} @@ -202,6 +208,8 @@ source_group("Header Files\\awips" FILES ${HDR_AWIPS}) source_group("Source Files\\awips" FILES ${SRC_AWIPS}) source_group("Header Files\\common" FILES ${HDR_COMMON}) source_group("Source Files\\common" FILES ${SRC_COMMON}) +source_group("Header Files\\network" FILES ${HDR_NETWORK}) +source_group("Source Files\\network" FILES ${SRC_NETWORK}) source_group("Header Files\\provider" FILES ${HDR_PROVIDER}) source_group("Source Files\\provider" FILES ${SRC_PROVIDER}) source_group("Header Files\\util" FILES ${HDR_UTIL}) @@ -225,6 +233,8 @@ target_compile_options(wxdata PRIVATE ) target_link_libraries(wxdata PUBLIC AWS::s3 + cpr::cpr + LibXml2::LibXml2 spdlog::spdlog) target_link_libraries(wxdata INTERFACE Boost::iostreams BZip2::BZip2