From 25fb7168e3a0b99c607af9644e152ceab443c069 Mon Sep 17 00:00:00 2001 From: Dan Paulat Date: Sun, 23 Oct 2022 08:19:14 -0500 Subject: [PATCH] Python script to generate counties database --- .gitmodules | 3 + data | 1 + scwx-qt/tools/generate_counties_db.py | 121 ++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 160000 data create mode 100644 scwx-qt/tools/generate_counties_db.py diff --git a/.gitmodules b/.gitmodules index ca0da9a3..6428f061 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,3 +16,6 @@ [submodule "external/stb"] path = external/stb url = https://github.com/nothings/stb.git +[submodule "data"] + path = data + url = ../supercell-wx-data diff --git a/data b/data new file mode 160000 index 00000000..19a3854e --- /dev/null +++ b/data @@ -0,0 +1 @@ +Subproject commit 19a3854e7986215bf6282ccbfd173635f4ac03eb diff --git a/scwx-qt/tools/generate_counties_db.py b/scwx-qt/tools/generate_counties_db.py new file mode 100644 index 00000000..1cbc01bf --- /dev/null +++ b/scwx-qt/tools/generate_counties_db.py @@ -0,0 +1,121 @@ +import argparse +import geopandas as gpd +import pathlib +import sqlite3 + +class DatabaseInfo: + def __init__(self): + self.sqlConnection_ = None + self.sqlCursor_ = None + +def ParseArguments(): + parser = argparse.ArgumentParser(description='Generate counties SQLite database') + parser.add_argument("-c", "--county_dbf", + metavar = "filename", + help = "input county database", + dest = "inputCountyDbs_", + action = "extend", + nargs = "+", + default = [], + type = pathlib.Path) + parser.add_argument("-z", "--zone_dbf", + metavar = "filename", + help = "input zone database", + dest = "inputZoneDbs_", + action = "extend", + nargs = "+", + default = [], + type = pathlib.Path) + parser.add_argument("-o", "--output_db", + metavar = "filename", + help = "output sqlite database", + dest = "outputDb_", + type = pathlib.Path, + required = True) + return parser.parse_args() + +def Prepare(dbInfo, outputDb): + # Truncate existing database + file = open(outputDb, 'w') + file.close() + + # Establish SQLite database connection + dbInfo.sqlConnection_ = sqlite3.connect(outputDb) + + # Set row factory for name-based access to columns + dbInfo.sqlConnection_.row_factory = sqlite3.Row + + dbInfo.sqlCursor_ = dbInfo.sqlConnection_.cursor() + + # Create database table + dbInfo.sqlCursor_.execute("""CREATE TABLE counties( + id TEXT NOT NULL PRIMARY KEY, + name TEXT)""") + +def ProcessCountiesDbf(dbInfo, dbfFilename): + # County area type + areaType = 'C' + + # Read dataframe + dbfTable = gpd.read_file(filename = dbfFilename, + include_fields = ["STATE", "FIPS", "COUNTYNAME"], + ignore_geometry = True) + dbfTable.drop_duplicates(inplace=True) + + for row in dbfTable.itertuples(): + # Generate a FIPS ID compatible with UGC format (NWSI 10-1702) + fipsId = "{}{}{:03}".format(row.STATE, areaType, (int(row.FIPS) % 1000)) + + # Insert FIPS ID and name pair into database + try: + dbInfo.sqlCursor_.execute("INSERT INTO counties VALUES (?, ?)", (fipsId, row.COUNTYNAME)) + except: + print("Skipping duplicate county:", fipsId, row.COUNTYNAME) + +def ProcessZoneDbf(dbInfo, dbfFilename): + print("Processing zone file:", dbfFilename) + # Zone area type + areaType = 'Z' + + # Read dataframe + dbfTable = gpd.read_file(filename = dbfFilename, + include_fields = ["ID", "STATE", "ZONE", "NAME"], + ignore_geometry = True) + dbfTable.drop_duplicates(inplace=True) + + for row in dbfTable.itertuples(): + # Generate a FIPS ID compatible with UGC format (NWSI 10-1702) + if "ID" in dbfTable.columns: + fipsId = row.ID + else: + fipsId = "{}{}{:03}".format(row.STATE, areaType, (int(row.ZONE) % 1000)) + + # Insert FIPS ID and name pair into database + try: + dbInfo.sqlCursor_.execute("INSERT INTO counties VALUES (?, ?)", + (fipsId, row.NAME)) + except: + # Only print warning if FIPS ID has multiple names + result = dbInfo.sqlCursor_.execute("SELECT name FROM counties WHERE id = :fipsId", + {"fipsId": fipsId}) + resultRow = result.fetchone() + if resultRow is not None: + if resultRow["name"] != row.NAME: + print("Skipping duplicate zone:", fipsId, row.NAME) + +def PostProcess(dbInfo): + # Commit changes and close database + dbInfo.sqlConnection_.commit() + dbInfo.sqlConnection_.close() + +dbInfo = DatabaseInfo() +args = ParseArguments() +Prepare(dbInfo, args.outputDb_) + +for countyDb in args.inputCountyDbs_: + ProcessCountiesDbf(dbInfo, countyDb) + +for zoneDb in args.inputZoneDbs_: + ProcessZoneDbf(dbInfo, zoneDb) + +PostProcess(dbInfo)