mirror of
				https://github.com/ciphervance/supercell-wx.git
				synced 2025-10-30 21:30:05 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			264 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			264 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| #define LIBXML_HTML_ENABLED
 | |
| 
 | |
| #include <scwx/network/dir_list.hpp>
 | |
| #include <scwx/util/logger.hpp>
 | |
| 
 | |
| #pragma warning(push, 0)
 | |
| #include <boost/algorithm/string/trim.hpp>
 | |
| #include <cpr/cpr.h>
 | |
| #include <libxml/HTMLparser.h>
 | |
| #pragma warning(pop)
 | |
| 
 | |
| namespace scwx
 | |
| {
 | |
| namespace network
 | |
| {
 | |
| 
 | |
| static const std::string logPrefix_ = "scwx::network::dir_list";
 | |
| static const auto        logger_    = util::Logger::Create(logPrefix_);
 | |
| 
 | |
| static const cpr::SslOptions  kSslOptions_ = cpr::Ssl(cpr::ssl::TLSv1_2 {});
 | |
| static const cpr::HttpVersion kHttpVersion_ {
 | |
|    cpr::HttpVersionCode::VERSION_2_0_TLS};
 | |
| 
 | |
| class DirListSAXHandler
 | |
| {
 | |
| public:
 | |
|    DirListSAXHandler() = delete;
 | |
|    static void StartDocument(void* userData);
 | |
|    static void EndDocument(void* userData);
 | |
|    static void
 | |
|    StartElement(void* userData, const xmlChar* name, const xmlChar** attrs);
 | |
|    static void EndElement(void* userData, const xmlChar* name);
 | |
|    static void Characters(void* userData, const xmlChar* ch, int len);
 | |
|    static void Warning(void* userData, const char* msg, ...);
 | |
|    static void Error(void* userData, const char* msg, ...);
 | |
|    static void Critical(void* userData, const char* msg, ...);
 | |
| };
 | |
| 
 | |
| struct DirListSAXData
 | |
| {
 | |
|    enum class State
 | |
|    {
 | |
|       FindingLink,
 | |
|       FoundLink,
 | |
|       UpdateLinkTimestamp,
 | |
|       UpdateLinkSize
 | |
|    };
 | |
|    State  state_ {State::FindingLink};
 | |
|    size_t warningCount_ {0u};
 | |
|    size_t errorCount_ {0u};
 | |
|    size_t criticalCount_ {0u};
 | |
| 
 | |
|    std::vector<DirListRecord> records_;
 | |
| };
 | |
| 
 | |
| static htmlSAXHandler saxHandler_ //
 | |
|    {.startElement = &DirListSAXHandler::StartElement,
 | |
|     .endElement   = &DirListSAXHandler::EndElement,
 | |
|     .characters   = &DirListSAXHandler::Characters,
 | |
|     .warning      = &DirListSAXHandler::Warning,
 | |
|     .error        = &DirListSAXHandler::Error,
 | |
|     .fatalError   = &DirListSAXHandler::Critical};
 | |
| 
 | |
| std::vector<DirListRecord> DirList(const std::string& baseUrl)
 | |
| {
 | |
|    using namespace std::chrono;
 | |
| 
 | |
|    logger_->trace("DirList: {}", baseUrl);
 | |
| 
 | |
|    cpr::Response response =
 | |
|       cpr::Get(cpr::Url {baseUrl}, kSslOptions_, kHttpVersion_);
 | |
|    DirListSAXData saxData {};
 | |
| 
 | |
|    if (response.status_code != cpr::status::HTTP_OK)
 | |
|    {
 | |
|       logger_->warn("Bad response from {}: {} ({})",
 | |
|                     baseUrl,
 | |
|                     response.error.message,
 | |
|                     response.status_code);
 | |
|    }
 | |
|    else
 | |
|    {
 | |
|       htmlDocPtr doc = htmlSAXParseDoc(
 | |
|          reinterpret_cast<const xmlChar*>(response.text.c_str()),
 | |
|          nullptr,
 | |
|          &saxHandler_,
 | |
|          &saxData);
 | |
|       if (doc != nullptr)
 | |
|       {
 | |
|          xmlFreeDoc(doc);
 | |
|       }
 | |
|    }
 | |
| 
 | |
|    return saxData.records_;
 | |
| }
 | |
| 
 | |
| void DirListSAXHandler::StartElement(void*           userData,
 | |
|                                      const xmlChar*  name,
 | |
|                                      const xmlChar** attrs)
 | |
| {
 | |
|    logger_->trace("SAX: Start Element: {}",
 | |
|                   reinterpret_cast<const char*>(name));
 | |
| 
 | |
|    DirListSAXData* data = reinterpret_cast<DirListSAXData*>(userData);
 | |
| 
 | |
|    if (strcmp(reinterpret_cast<const char*>(name), "a") == 0)
 | |
|    {
 | |
|       // If an "a" element is found, search for an "href" attribute
 | |
|       for (int i = 0; attrs != nullptr && attrs[i] != nullptr; ++i)
 | |
|       {
 | |
|          if (i > 0 &&
 | |
|              strcmp(reinterpret_cast<const char*>(attrs[i - 1]), "href") == 0)
 | |
|          {
 | |
|             // If the "href" attribute is found, treat this as a new file
 | |
|             std::string filename {reinterpret_cast<const char*>(attrs[i])};
 | |
|             std::filesystem::file_type fileType;
 | |
| 
 | |
|             // Determine if the file is a directory
 | |
|             if (filename.ends_with("/"))
 | |
|             {
 | |
|                filename.pop_back();
 | |
|                fileType = std::filesystem::file_type::directory;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                fileType = std::filesystem::file_type::regular;
 | |
|             }
 | |
| 
 | |
|             // If the filename is valid, add it as a record
 | |
|             if (filename.size() > 0 && !filename.starts_with("?") &&
 | |
|                 // And the filename is not a duplicate of the previous record
 | |
|                 (data->records_.size() == 0 ||
 | |
|                  data->records_.back().filename_ != filename))
 | |
|             {
 | |
|                data->records_.emplace_back(filename, fileType);
 | |
|                data->state_ = DirListSAXData::State::FoundLink;
 | |
|                break;
 | |
|             }
 | |
|          }
 | |
|       }
 | |
|    }
 | |
|    for (int i = 0; attrs != nullptr && attrs[i] != nullptr; ++i)
 | |
|    {
 | |
|       logger_->trace("     Attribute: {}",
 | |
|                      reinterpret_cast<const char*>(attrs[i]));
 | |
|    }
 | |
| }
 | |
| 
 | |
| void DirListSAXHandler::EndElement(void* userData, const xmlChar* name)
 | |
| {
 | |
|    logger_->trace("SAX: End Element: {}", reinterpret_cast<const char*>(name));
 | |
| 
 | |
|    DirListSAXData* data = reinterpret_cast<DirListSAXData*>(userData);
 | |
| 
 | |
|    if (data->state_ == DirListSAXData::State::FoundLink &&
 | |
|        strcmp(reinterpret_cast<const char*>(name), "a") == 0)
 | |
|    {
 | |
|       // The "a" element is closed, so begin looking for the timestamp
 | |
|       data->state_ = DirListSAXData::State::UpdateLinkTimestamp;
 | |
|    }
 | |
| }
 | |
| 
 | |
| void DirListSAXHandler::Characters(void* userData, const xmlChar* ch, int len)
 | |
| {
 | |
|    std::string characters(reinterpret_cast<const char*>(ch), len);
 | |
|    logger_->trace("SAX: Characters: {}", characters);
 | |
| 
 | |
|    DirListSAXData* data = reinterpret_cast<DirListSAXData*>(userData);
 | |
| 
 | |
|    if (data->state_ == DirListSAXData::State::UpdateLinkTimestamp)
 | |
|    {
 | |
|       using namespace std::chrono;
 | |
| 
 | |
|       // Date time format: yyyy-mm-dd hh:mm
 | |
|       static const std::string kDateTimeFormat {"%Y-%m-%d %H:%M"};
 | |
|       static constexpr size_t  kDateTimeSize {16u};
 | |
| 
 | |
|       // Attempt to parse the date time
 | |
|       std::istringstream ssCharacters {characters};
 | |
|       sys_time<minutes>  mtime;
 | |
|       ssCharacters >> parse(kDateTimeFormat, mtime);
 | |
| 
 | |
|       if (!ssCharacters.fail())
 | |
|       {
 | |
|          // Date time parsing succeeded, look for link size
 | |
|          auto& record  = data->records_.back();
 | |
|          record.mtime_ = mtime;
 | |
| 
 | |
|          if (record.type_ == std::filesystem::file_type::directory)
 | |
|          {
 | |
|             // If the record is a directory, there is no size, skip to next link
 | |
|             data->state_ = DirListSAXData::State::FindingLink;
 | |
|          }
 | |
|          else
 | |
|          {
 | |
|             // After the time is parsed, get the file size
 | |
|             data->state_ = DirListSAXData::State::UpdateLinkSize;
 | |
|          }
 | |
|       }
 | |
|    }
 | |
|    else if (data->state_ == DirListSAXData::State::UpdateLinkSize)
 | |
|    {
 | |
|       // Trim the file size string
 | |
|       std::string fileSizeString {characters};
 | |
|       boost::trim(fileSizeString);
 | |
| 
 | |
|       size_t fileSize   = 0u;
 | |
|       size_t multiplier = 1u;
 | |
| 
 | |
|       // Look for size suffix
 | |
|       if (fileSizeString.ends_with("K"))
 | |
|       {
 | |
|          fileSizeString.pop_back();
 | |
|          multiplier = 1024u;
 | |
|       }
 | |
|       else if (fileSizeString.ends_with("M"))
 | |
|       {
 | |
|          fileSizeString.pop_back();
 | |
|          multiplier = 1024u * 1024u;
 | |
|       }
 | |
|       else if (fileSizeString.ends_with("G"))
 | |
|       {
 | |
|          fileSizeString.pop_back();
 | |
|          multiplier = 1024u * 1024u * 1024u;
 | |
|       }
 | |
|       else if (fileSizeString.ends_with("T"))
 | |
|       {
 | |
|          fileSizeString.pop_back();
 | |
|          multiplier = 1024ull * 1024ull * 1024ull * 1024ull;
 | |
|       }
 | |
| 
 | |
|       try
 | |
|       {
 | |
|          // Parse the remaining file size string, and multiply by the suffix
 | |
|          fileSize = static_cast<size_t>(std::stod(fileSizeString) * multiplier);
 | |
|          data->records_.back().size_ = fileSize;
 | |
| 
 | |
|          // Look for the next link
 | |
|          data->state_ = DirListSAXData::State::FindingLink;
 | |
|       }
 | |
|       catch (const std::exception&)
 | |
|       {
 | |
|          // This was something other than a file size
 | |
|       }
 | |
|    }
 | |
| }
 | |
| 
 | |
| void DirListSAXHandler::Warning(void* /* userData */, const char* msg, ...)
 | |
| {
 | |
|    logger_->warn("SAX: {}", msg);
 | |
| }
 | |
| 
 | |
| void DirListSAXHandler::Error(void* /* userData */, const char* msg, ...)
 | |
| {
 | |
|    logger_->error("SAX: {}", msg);
 | |
| }
 | |
| 
 | |
| void DirListSAXHandler::Critical(void* /* userData */, const char* msg, ...)
 | |
| {
 | |
|    logger_->critical("SAX: {}", msg);
 | |
| }
 | |
| 
 | |
| } // namespace network
 | |
| } // namespace scwx
 | 
