mirror of
https://github.com/ciphervance/supercell-wx.git
synced 2025-10-29 23:10:06 +00:00
Directory listing
This commit is contained in:
parent
b9fc22d63e
commit
d5d9285736
6 changed files with 337 additions and 0 deletions
|
|
@ -14,15 +14,19 @@ include(${PROJECT_SOURCE_DIR}/external/cmake-conan/conan.cmake)
|
|||
|
||||
conan_cmake_configure(REQUIRES aws-sdk-cpp/1.9.234
|
||||
boost/1.78.0
|
||||
cpr/1.9.0
|
||||
freetype/2.12.1
|
||||
geographiclib/1.52
|
||||
glew/2.2.0
|
||||
glm/cci.20220420
|
||||
gtest/cci.20210126
|
||||
libcurl/7.85.0
|
||||
libxml2/2.9.14
|
||||
openssl/1.1.1q
|
||||
spdlog/1.10.0
|
||||
sqlite3/3.39.4
|
||||
vulkan-loader/1.3.221
|
||||
zlib/1.2.13
|
||||
GENERATORS cmake
|
||||
cmake_find_package
|
||||
cmake_paths
|
||||
|
|
|
|||
28
test/source/scwx/network/dir_list.test.cpp
Normal file
28
test/source/scwx/network/dir_list.test.cpp
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#include <scwx/network/dir_list.hpp>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace scwx
|
||||
{
|
||||
namespace network
|
||||
{
|
||||
|
||||
static const std::string& kDefaultUrl {"https://warnings.allisonhouse.com"};
|
||||
static const std::string& kAlternateUrl {"http://warnings.cod.edu"};
|
||||
|
||||
TEST(DirList, GetDefaultUrl)
|
||||
{
|
||||
auto records = DirList(kDefaultUrl);
|
||||
|
||||
EXPECT_GT(records.size(), 0);
|
||||
}
|
||||
|
||||
TEST(DirList, GetAlternateUrl)
|
||||
{
|
||||
auto records = DirList(kAlternateUrl);
|
||||
|
||||
EXPECT_GT(records.size(), 0);
|
||||
}
|
||||
|
||||
} // namespace network
|
||||
} // namespace scwx
|
||||
|
|
@ -15,6 +15,7 @@ set(SRC_AWIPS_TESTS source/scwx/awips/coded_location.test.cpp
|
|||
source/scwx/awips/ugc.test.cpp)
|
||||
set(SRC_COMMON_TESTS source/scwx/common/color_table.test.cpp
|
||||
source/scwx/common/products.test.cpp)
|
||||
set(SRC_NETWORK_TESTS source/scwx/network/dir_list.test.cpp)
|
||||
set(SRC_PROVIDER_TESTS source/scwx/provider/aws_level2_data_provider.test.cpp
|
||||
source/scwx/provider/aws_level3_data_provider.test.cpp)
|
||||
set(SRC_QT_CONFIG_TESTS source/scwx/qt/config/county_database.test.cpp
|
||||
|
|
@ -33,6 +34,7 @@ set(CMAKE_FILES test.cmake)
|
|||
add_executable(wxtest ${SRC_MAIN}
|
||||
${SRC_AWIPS_TESTS}
|
||||
${SRC_COMMON_TESTS}
|
||||
${SRC_NETWORK_TESTS}
|
||||
${SRC_PROVIDER_TESTS}
|
||||
${SRC_QT_CONFIG_TESTS}
|
||||
${SRC_QT_MANAGER_TESTS}
|
||||
|
|
@ -43,6 +45,7 @@ add_executable(wxtest ${SRC_MAIN}
|
|||
source_group("Source Files\\main" FILES ${SRC_MAIN})
|
||||
source_group("Source Files\\awips" FILES ${SRC_AWIPS_TESTS})
|
||||
source_group("Source Files\\common" FILES ${SRC_COMMON_TESTS})
|
||||
source_group("Source Files\\network" FILES ${SRC_NETWORK_TESTS})
|
||||
source_group("Source Files\\provider" FILES ${SRC_PROVIDER_TESTS})
|
||||
source_group("Source Files\\qt\\config" FILES ${SRC_QT_CONFIG_TESTS})
|
||||
source_group("Source Files\\qt\\manager" FILES ${SRC_QT_MANAGER_TESTS})
|
||||
|
|
|
|||
30
wxdata/include/scwx/network/dir_list.hpp
Normal file
30
wxdata/include/scwx/network/dir_list.hpp
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
|
||||
namespace scwx
|
||||
{
|
||||
namespace network
|
||||
{
|
||||
|
||||
struct DirListRecord
|
||||
{
|
||||
std::string filename_ = {};
|
||||
std::filesystem::file_type type_ = std::filesystem::file_type::unknown;
|
||||
std::chrono::system_clock::time_point mtime_ =
|
||||
{}; ///< Modified time (server time)
|
||||
size_t size_ = 0u; ///< Approximate file size in bytes
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Retrieve Directory Listing
|
||||
*
|
||||
* Retrieves a directory listing. Supports default Apache-style directory
|
||||
* listings only.
|
||||
*/
|
||||
std::vector<DirListRecord> DirList(const std::string& baseUrl);
|
||||
|
||||
} // namespace network
|
||||
} // namespace scwx
|
||||
262
wxdata/source/scwx/network/dir_list.cpp
Normal file
262
wxdata/source/scwx/network/dir_list.cpp
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
#define LIBXML_HTML_ENABLED
|
||||
|
||||
#include <scwx/network/dir_list.hpp>
|
||||
#include <scwx/util/logger.hpp>
|
||||
|
||||
#pragma warning(push, 0)
|
||||
#include <boost/algorithm/string/trim.hpp>
|
||||
#include <cpr/cpr.h>
|
||||
#include <libxml/HTMLparser.h>
|
||||
#pragma warning(pop)
|
||||
|
||||
namespace scwx
|
||||
{
|
||||
namespace network
|
||||
{
|
||||
|
||||
static const std::string logPrefix_ = "scwx::network::dir_list";
|
||||
static const auto logger_ = util::Logger::Create(logPrefix_);
|
||||
|
||||
static const cpr::SslOptions kSslOptions_ = cpr::Ssl(cpr::ssl::TLSv1_2 {});
|
||||
static const cpr::HttpVersion kHttpVersion_ {
|
||||
cpr::HttpVersionCode::VERSION_2_0_TLS};
|
||||
|
||||
class DirListSAXHandler
|
||||
{
|
||||
public:
|
||||
DirListSAXHandler() = delete;
|
||||
static void StartDocument(void* userData);
|
||||
static void EndDocument(void* userData);
|
||||
static void
|
||||
StartElement(void* userData, const xmlChar* name, const xmlChar** attrs);
|
||||
static void EndElement(void* userData, const xmlChar* name);
|
||||
static void Characters(void* userData, const xmlChar* ch, int len);
|
||||
static void Warning(void* userData, const char* msg, ...);
|
||||
static void Error(void* userData, const char* msg, ...);
|
||||
static void Critical(void* userData, const char* msg, ...);
|
||||
};
|
||||
|
||||
struct DirListSAXData
|
||||
{
|
||||
enum class State
|
||||
{
|
||||
FindingLink,
|
||||
FoundLink,
|
||||
UpdateLinkTimestamp,
|
||||
UpdateLinkSize
|
||||
};
|
||||
State state_ {State::FindingLink};
|
||||
size_t warningCount_ {0u};
|
||||
size_t errorCount_ {0u};
|
||||
size_t criticalCount_ {0u};
|
||||
|
||||
std::vector<DirListRecord> records_;
|
||||
};
|
||||
|
||||
static htmlSAXHandler saxHandler_ //
|
||||
{.startElement = &DirListSAXHandler::StartElement,
|
||||
.endElement = &DirListSAXHandler::EndElement,
|
||||
.characters = &DirListSAXHandler::Characters,
|
||||
.warning = &DirListSAXHandler::Warning,
|
||||
.error = &DirListSAXHandler::Error,
|
||||
.fatalError = &DirListSAXHandler::Critical};
|
||||
|
||||
std::vector<DirListRecord> DirList(const std::string& baseUrl)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
logger_->trace("DirList: {}", baseUrl);
|
||||
|
||||
cpr::Response response =
|
||||
cpr::Get(cpr::Url {baseUrl}, kSslOptions_, kHttpVersion_);
|
||||
DirListSAXData saxData {};
|
||||
|
||||
if (response.status_code != cpr::status::HTTP_OK)
|
||||
{
|
||||
logger_->warn("Bad response from {}: {} ({})",
|
||||
baseUrl,
|
||||
response.error.message,
|
||||
response.status_code);
|
||||
}
|
||||
else
|
||||
{
|
||||
htmlDocPtr doc = htmlSAXParseDoc(
|
||||
reinterpret_cast<const xmlChar*>(response.text.c_str()),
|
||||
nullptr,
|
||||
&saxHandler_,
|
||||
&saxData);
|
||||
if (doc != nullptr)
|
||||
{
|
||||
xmlFreeDoc(doc);
|
||||
}
|
||||
}
|
||||
|
||||
return saxData.records_;
|
||||
}
|
||||
|
||||
void DirListSAXHandler::StartElement(void* userData,
|
||||
const xmlChar* name,
|
||||
const xmlChar** attrs)
|
||||
{
|
||||
logger_->trace("SAX: Start Element: {}", name);
|
||||
|
||||
DirListSAXData* data = reinterpret_cast<DirListSAXData*>(userData);
|
||||
|
||||
if (strcmp(reinterpret_cast<const char*>(name), "a") == 0)
|
||||
{
|
||||
// If an "a" element is found, search for an "href" attribute
|
||||
for (int i = 0; attrs != nullptr && attrs[i] != nullptr; ++i)
|
||||
{
|
||||
if (i > 0 &&
|
||||
strcmp(reinterpret_cast<const char*>(attrs[i - 1]), "href") == 0)
|
||||
{
|
||||
// If the "href" attribute is found, treat this as a new file
|
||||
std::string filename {reinterpret_cast<const char*>(attrs[i])};
|
||||
std::filesystem::file_type fileType;
|
||||
|
||||
// Determine if the file is a directory
|
||||
if (filename.ends_with("/"))
|
||||
{
|
||||
filename.pop_back();
|
||||
fileType = std::filesystem::file_type::directory;
|
||||
}
|
||||
else
|
||||
{
|
||||
fileType = std::filesystem::file_type::regular;
|
||||
}
|
||||
|
||||
// If the filename is valid, add it as a record
|
||||
if (filename.size() > 0 && !filename.starts_with("?") &&
|
||||
// And the filename is not a duplicate of the previous record
|
||||
(data->records_.size() == 0 ||
|
||||
data->records_.back().filename_ != filename))
|
||||
{
|
||||
data->records_.emplace_back(filename, fileType);
|
||||
data->state_ = DirListSAXData::State::FoundLink;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; attrs != nullptr && attrs[i] != nullptr; ++i)
|
||||
{
|
||||
logger_->trace(" Attribute: {}", attrs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void DirListSAXHandler::EndElement(void* userData, const xmlChar* name)
|
||||
{
|
||||
logger_->trace("SAX: End Element: {}", name);
|
||||
|
||||
DirListSAXData* data = reinterpret_cast<DirListSAXData*>(userData);
|
||||
|
||||
if (data->state_ == DirListSAXData::State::FoundLink &&
|
||||
strcmp(reinterpret_cast<const char*>(name), "a") == 0)
|
||||
{
|
||||
// The "a" element is closed, so begin looking for the timestamp
|
||||
data->state_ = DirListSAXData::State::UpdateLinkTimestamp;
|
||||
}
|
||||
}
|
||||
|
||||
void DirListSAXHandler::Characters(void* userData, const xmlChar* ch, int len)
|
||||
{
|
||||
std::string characters(reinterpret_cast<const char*>(ch), len);
|
||||
logger_->trace("SAX: Characters: {}", characters);
|
||||
|
||||
DirListSAXData* data = reinterpret_cast<DirListSAXData*>(userData);
|
||||
|
||||
if (data->state_ == DirListSAXData::State::UpdateLinkTimestamp)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
// Date time format: yyyy-mm-dd hh:mm
|
||||
static const std::string kDateTimeFormat {"%Y-%m-%d %H:%M"};
|
||||
static constexpr size_t kDateTimeSize {16u};
|
||||
|
||||
// Attempt to parse the date time
|
||||
std::istringstream ssCharacters {characters};
|
||||
sys_time<minutes> mtime;
|
||||
ssCharacters >> parse(kDateTimeFormat, mtime);
|
||||
|
||||
if (!ssCharacters.fail())
|
||||
{
|
||||
// Date time parsing succeeded, look for link size
|
||||
auto& record = data->records_.back();
|
||||
record.mtime_ = mtime;
|
||||
|
||||
if (record.type_ == std::filesystem::file_type::directory)
|
||||
{
|
||||
// If the record is a directory, there is no size, skip to next link
|
||||
data->state_ = DirListSAXData::State::FindingLink;
|
||||
}
|
||||
else
|
||||
{
|
||||
// After the time is parsed, get the file size
|
||||
data->state_ = DirListSAXData::State::UpdateLinkSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (data->state_ == DirListSAXData::State::UpdateLinkSize)
|
||||
{
|
||||
// Trim the file size string
|
||||
std::string fileSizeString {characters};
|
||||
boost::trim(fileSizeString);
|
||||
|
||||
size_t fileSize = 0u;
|
||||
size_t multiplier = 1u;
|
||||
|
||||
// Look for size suffix
|
||||
if (fileSizeString.ends_with("K"))
|
||||
{
|
||||
fileSizeString.pop_back();
|
||||
multiplier = 1024u;
|
||||
}
|
||||
else if (fileSizeString.ends_with("M"))
|
||||
{
|
||||
fileSizeString.pop_back();
|
||||
multiplier = 1024u * 1024u;
|
||||
}
|
||||
else if (fileSizeString.ends_with("G"))
|
||||
{
|
||||
fileSizeString.pop_back();
|
||||
multiplier = 1024u * 1024u * 1024u;
|
||||
}
|
||||
else if (fileSizeString.ends_with("T"))
|
||||
{
|
||||
fileSizeString.pop_back();
|
||||
multiplier = 1024ull * 1024ull * 1024ull * 1024ull;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// Parse the remaining file size string, and multiply by the suffix
|
||||
fileSize = static_cast<size_t>(std::stod(fileSizeString) * multiplier);
|
||||
data->records_.back().size_ = fileSize;
|
||||
|
||||
// Look for the next link
|
||||
data->state_ = DirListSAXData::State::FindingLink;
|
||||
}
|
||||
catch (const std::exception&)
|
||||
{
|
||||
// This was something other than a file size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DirListSAXHandler::Warning(void* /* userData */, const char* msg, ...)
|
||||
{
|
||||
logger_->warn("SAX: {}", msg);
|
||||
}
|
||||
|
||||
void DirListSAXHandler::Error(void* /* userData */, const char* msg, ...)
|
||||
{
|
||||
logger_->error("SAX: {}", msg);
|
||||
}
|
||||
|
||||
void DirListSAXHandler::Critical(void* /* userData */, const char* msg, ...)
|
||||
{
|
||||
logger_->critical("SAX: {}", msg);
|
||||
}
|
||||
|
||||
} // namespace network
|
||||
} // namespace scwx
|
||||
|
|
@ -4,6 +4,8 @@ project(scwx-data)
|
|||
|
||||
find_package(AWSSDK)
|
||||
find_package(Boost)
|
||||
find_package(cpr)
|
||||
find_package(LibXml2)
|
||||
find_package(spdlog)
|
||||
|
||||
set(HDR_AWIPS include/scwx/awips/coded_location.hpp
|
||||
|
|
@ -40,6 +42,8 @@ set(SRC_COMMON source/scwx/common/characters.cpp
|
|||
source/scwx/common/products.cpp
|
||||
source/scwx/common/sites.cpp
|
||||
source/scwx/common/vcp.cpp)
|
||||
set(HDR_NETWORK include/scwx/network/dir_list.hpp)
|
||||
set(SRC_NETWORK source/scwx/network/dir_list.cpp)
|
||||
set(HDR_PROVIDER include/scwx/provider/aws_level2_data_provider.hpp
|
||||
include/scwx/provider/aws_level3_data_provider.hpp
|
||||
include/scwx/provider/aws_nexrad_data_provider.hpp
|
||||
|
|
@ -186,6 +190,8 @@ add_library(wxdata OBJECT ${HDR_AWIPS}
|
|||
${SRC_AWIPS}
|
||||
${HDR_COMMON}
|
||||
${SRC_COMMON}
|
||||
${HDR_NETWORK}
|
||||
${SRC_NETWORK}
|
||||
${HDR_PROVIDER}
|
||||
${SRC_PROVIDER}
|
||||
${HDR_UTIL}
|
||||
|
|
@ -202,6 +208,8 @@ source_group("Header Files\\awips" FILES ${HDR_AWIPS})
|
|||
source_group("Source Files\\awips" FILES ${SRC_AWIPS})
|
||||
source_group("Header Files\\common" FILES ${HDR_COMMON})
|
||||
source_group("Source Files\\common" FILES ${SRC_COMMON})
|
||||
source_group("Header Files\\network" FILES ${HDR_NETWORK})
|
||||
source_group("Source Files\\network" FILES ${SRC_NETWORK})
|
||||
source_group("Header Files\\provider" FILES ${HDR_PROVIDER})
|
||||
source_group("Source Files\\provider" FILES ${SRC_PROVIDER})
|
||||
source_group("Header Files\\util" FILES ${HDR_UTIL})
|
||||
|
|
@ -225,6 +233,8 @@ target_compile_options(wxdata PRIVATE
|
|||
)
|
||||
|
||||
target_link_libraries(wxdata PUBLIC AWS::s3
|
||||
cpr::cpr
|
||||
LibXml2::LibXml2
|
||||
spdlog::spdlog)
|
||||
target_link_libraries(wxdata INTERFACE Boost::iostreams
|
||||
BZip2::BZip2
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue