mirror of
				https://github.com/ciphervance/supercell-wx.git
				synced 2025-10-31 19:50:06 +00:00 
			
		
		
		
	Directory listing
This commit is contained in:
		
							parent
							
								
									b9fc22d63e
								
							
						
					
					
						commit
						d5d9285736
					
				
					 6 changed files with 337 additions and 0 deletions
				
			
		
							
								
								
									
										262
									
								
								wxdata/source/scwx/network/dir_list.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								wxdata/source/scwx/network/dir_list.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,262 @@ | |||
| #define LIBXML_HTML_ENABLED | ||||
| 
 | ||||
| #include <scwx/network/dir_list.hpp> | ||||
| #include <scwx/util/logger.hpp> | ||||
| 
 | ||||
| #pragma warning(push, 0) | ||||
| #include <boost/algorithm/string/trim.hpp> | ||||
| #include <cpr/cpr.h> | ||||
| #include <libxml/HTMLparser.h> | ||||
| #pragma warning(pop) | ||||
| 
 | ||||
| namespace scwx | ||||
| { | ||||
| namespace network | ||||
| { | ||||
| 
 | ||||
| static const std::string logPrefix_ = "scwx::network::dir_list"; | ||||
| static const auto        logger_    = util::Logger::Create(logPrefix_); | ||||
| 
 | ||||
| static const cpr::SslOptions  kSslOptions_ = cpr::Ssl(cpr::ssl::TLSv1_2 {}); | ||||
| static const cpr::HttpVersion kHttpVersion_ { | ||||
|    cpr::HttpVersionCode::VERSION_2_0_TLS}; | ||||
| 
 | ||||
| class DirListSAXHandler | ||||
| { | ||||
| public: | ||||
|    DirListSAXHandler() = delete; | ||||
|    static void StartDocument(void* userData); | ||||
|    static void EndDocument(void* userData); | ||||
|    static void | ||||
|    StartElement(void* userData, const xmlChar* name, const xmlChar** attrs); | ||||
|    static void EndElement(void* userData, const xmlChar* name); | ||||
|    static void Characters(void* userData, const xmlChar* ch, int len); | ||||
|    static void Warning(void* userData, const char* msg, ...); | ||||
|    static void Error(void* userData, const char* msg, ...); | ||||
|    static void Critical(void* userData, const char* msg, ...); | ||||
| }; | ||||
| 
 | ||||
| struct DirListSAXData | ||||
| { | ||||
|    enum class State | ||||
|    { | ||||
|       FindingLink, | ||||
|       FoundLink, | ||||
|       UpdateLinkTimestamp, | ||||
|       UpdateLinkSize | ||||
|    }; | ||||
|    State  state_ {State::FindingLink}; | ||||
|    size_t warningCount_ {0u}; | ||||
|    size_t errorCount_ {0u}; | ||||
|    size_t criticalCount_ {0u}; | ||||
| 
 | ||||
|    std::vector<DirListRecord> records_; | ||||
| }; | ||||
| 
 | ||||
| static htmlSAXHandler saxHandler_ //
 | ||||
|    {.startElement = &DirListSAXHandler::StartElement, | ||||
|     .endElement   = &DirListSAXHandler::EndElement, | ||||
|     .characters   = &DirListSAXHandler::Characters, | ||||
|     .warning      = &DirListSAXHandler::Warning, | ||||
|     .error        = &DirListSAXHandler::Error, | ||||
|     .fatalError   = &DirListSAXHandler::Critical}; | ||||
| 
 | ||||
| std::vector<DirListRecord> DirList(const std::string& baseUrl) | ||||
| { | ||||
|    using namespace std::chrono; | ||||
| 
 | ||||
|    logger_->trace("DirList: {}", baseUrl); | ||||
| 
 | ||||
|    cpr::Response response = | ||||
|       cpr::Get(cpr::Url {baseUrl}, kSslOptions_, kHttpVersion_); | ||||
|    DirListSAXData saxData {}; | ||||
| 
 | ||||
|    if (response.status_code != cpr::status::HTTP_OK) | ||||
|    { | ||||
|       logger_->warn("Bad response from {}: {} ({})", | ||||
|                     baseUrl, | ||||
|                     response.error.message, | ||||
|                     response.status_code); | ||||
|    } | ||||
|    else | ||||
|    { | ||||
|       htmlDocPtr doc = htmlSAXParseDoc( | ||||
|          reinterpret_cast<const xmlChar*>(response.text.c_str()), | ||||
|          nullptr, | ||||
|          &saxHandler_, | ||||
|          &saxData); | ||||
|       if (doc != nullptr) | ||||
|       { | ||||
|          xmlFreeDoc(doc); | ||||
|       } | ||||
|    } | ||||
| 
 | ||||
|    return saxData.records_; | ||||
| } | ||||
| 
 | ||||
| void DirListSAXHandler::StartElement(void*           userData, | ||||
|                                      const xmlChar*  name, | ||||
|                                      const xmlChar** attrs) | ||||
| { | ||||
|    logger_->trace("SAX: Start Element: {}", name); | ||||
| 
 | ||||
|    DirListSAXData* data = reinterpret_cast<DirListSAXData*>(userData); | ||||
| 
 | ||||
|    if (strcmp(reinterpret_cast<const char*>(name), "a") == 0) | ||||
|    { | ||||
|       // If an "a" element is found, search for an "href" attribute
 | ||||
|       for (int i = 0; attrs != nullptr && attrs[i] != nullptr; ++i) | ||||
|       { | ||||
|          if (i > 0 && | ||||
|              strcmp(reinterpret_cast<const char*>(attrs[i - 1]), "href") == 0) | ||||
|          { | ||||
|             // If the "href" attribute is found, treat this as a new file
 | ||||
|             std::string filename {reinterpret_cast<const char*>(attrs[i])}; | ||||
|             std::filesystem::file_type fileType; | ||||
| 
 | ||||
|             // Determine if the file is a directory
 | ||||
|             if (filename.ends_with("/")) | ||||
|             { | ||||
|                filename.pop_back(); | ||||
|                fileType = std::filesystem::file_type::directory; | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                fileType = std::filesystem::file_type::regular; | ||||
|             } | ||||
| 
 | ||||
|             // If the filename is valid, add it as a record
 | ||||
|             if (filename.size() > 0 && !filename.starts_with("?") && | ||||
|                 // And the filename is not a duplicate of the previous record
 | ||||
|                 (data->records_.size() == 0 || | ||||
|                  data->records_.back().filename_ != filename)) | ||||
|             { | ||||
|                data->records_.emplace_back(filename, fileType); | ||||
|                data->state_ = DirListSAXData::State::FoundLink; | ||||
|                break; | ||||
|             } | ||||
|          } | ||||
|       } | ||||
|    } | ||||
|    for (int i = 0; attrs != nullptr && attrs[i] != nullptr; ++i) | ||||
|    { | ||||
|       logger_->trace("     Attribute: {}", attrs[i]); | ||||
|    } | ||||
| } | ||||
| 
 | ||||
| void DirListSAXHandler::EndElement(void* userData, const xmlChar* name) | ||||
| { | ||||
|    logger_->trace("SAX: End Element: {}", name); | ||||
| 
 | ||||
|    DirListSAXData* data = reinterpret_cast<DirListSAXData*>(userData); | ||||
| 
 | ||||
|    if (data->state_ == DirListSAXData::State::FoundLink && | ||||
|        strcmp(reinterpret_cast<const char*>(name), "a") == 0) | ||||
|    { | ||||
|       // The "a" element is closed, so begin looking for the timestamp
 | ||||
|       data->state_ = DirListSAXData::State::UpdateLinkTimestamp; | ||||
|    } | ||||
| } | ||||
| 
 | ||||
| void DirListSAXHandler::Characters(void* userData, const xmlChar* ch, int len) | ||||
| { | ||||
|    std::string characters(reinterpret_cast<const char*>(ch), len); | ||||
|    logger_->trace("SAX: Characters: {}", characters); | ||||
| 
 | ||||
|    DirListSAXData* data = reinterpret_cast<DirListSAXData*>(userData); | ||||
| 
 | ||||
|    if (data->state_ == DirListSAXData::State::UpdateLinkTimestamp) | ||||
|    { | ||||
|       using namespace std::chrono; | ||||
| 
 | ||||
|       // Date time format: yyyy-mm-dd hh:mm
 | ||||
|       static const std::string kDateTimeFormat {"%Y-%m-%d %H:%M"}; | ||||
|       static constexpr size_t  kDateTimeSize {16u}; | ||||
| 
 | ||||
|       // Attempt to parse the date time
 | ||||
|       std::istringstream ssCharacters {characters}; | ||||
|       sys_time<minutes>  mtime; | ||||
|       ssCharacters >> parse(kDateTimeFormat, mtime); | ||||
| 
 | ||||
|       if (!ssCharacters.fail()) | ||||
|       { | ||||
|          // Date time parsing succeeded, look for link size
 | ||||
|          auto& record  = data->records_.back(); | ||||
|          record.mtime_ = mtime; | ||||
| 
 | ||||
|          if (record.type_ == std::filesystem::file_type::directory) | ||||
|          { | ||||
|             // If the record is a directory, there is no size, skip to next link
 | ||||
|             data->state_ = DirListSAXData::State::FindingLink; | ||||
|          } | ||||
|          else | ||||
|          { | ||||
|             // After the time is parsed, get the file size
 | ||||
|             data->state_ = DirListSAXData::State::UpdateLinkSize; | ||||
|          } | ||||
|       } | ||||
|    } | ||||
|    else if (data->state_ == DirListSAXData::State::UpdateLinkSize) | ||||
|    { | ||||
|       // Trim the file size string
 | ||||
|       std::string fileSizeString {characters}; | ||||
|       boost::trim(fileSizeString); | ||||
| 
 | ||||
|       size_t fileSize   = 0u; | ||||
|       size_t multiplier = 1u; | ||||
| 
 | ||||
|       // Look for size suffix
 | ||||
|       if (fileSizeString.ends_with("K")) | ||||
|       { | ||||
|          fileSizeString.pop_back(); | ||||
|          multiplier = 1024u; | ||||
|       } | ||||
|       else if (fileSizeString.ends_with("M")) | ||||
|       { | ||||
|          fileSizeString.pop_back(); | ||||
|          multiplier = 1024u * 1024u; | ||||
|       } | ||||
|       else if (fileSizeString.ends_with("G")) | ||||
|       { | ||||
|          fileSizeString.pop_back(); | ||||
|          multiplier = 1024u * 1024u * 1024u; | ||||
|       } | ||||
|       else if (fileSizeString.ends_with("T")) | ||||
|       { | ||||
|          fileSizeString.pop_back(); | ||||
|          multiplier = 1024ull * 1024ull * 1024ull * 1024ull; | ||||
|       } | ||||
| 
 | ||||
|       try | ||||
|       { | ||||
|          // Parse the remaining file size string, and multiply by the suffix
 | ||||
|          fileSize = static_cast<size_t>(std::stod(fileSizeString) * multiplier); | ||||
|          data->records_.back().size_ = fileSize; | ||||
| 
 | ||||
|          // Look for the next link
 | ||||
|          data->state_ = DirListSAXData::State::FindingLink; | ||||
|       } | ||||
|       catch (const std::exception&) | ||||
|       { | ||||
|          // This was something other than a file size
 | ||||
|       } | ||||
|    } | ||||
| } | ||||
| 
 | ||||
| void DirListSAXHandler::Warning(void* /* userData */, const char* msg, ...) | ||||
| { | ||||
|    logger_->warn("SAX: {}", msg); | ||||
| } | ||||
| 
 | ||||
| void DirListSAXHandler::Error(void* /* userData */, const char* msg, ...) | ||||
| { | ||||
|    logger_->error("SAX: {}", msg); | ||||
| } | ||||
| 
 | ||||
| void DirListSAXHandler::Critical(void* /* userData */, const char* msg, ...) | ||||
| { | ||||
|    logger_->critical("SAX: {}", msg); | ||||
| } | ||||
| 
 | ||||
| } // namespace network
 | ||||
| } // namespace scwx
 | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Dan Paulat
						Dan Paulat