refactor wfs module, remove out_file option from dump, add standard o…

…ptions to cat
smnorris · Dec 13, 2024 · afa8bb6 · afa8bb6
1 parent ed65760
commit afa8bb6
Show file tree

Hide file tree

Showing 5 changed files with 134 additions and 325 deletions.
diff --git a/src/bcdata/__init__.py b/src/bcdata/__init__.py
@@ -4,7 +4,11 @@
 from .bcdc import get_table_definition as get_table_definition
 from .bcdc import get_table_name as get_table_name
 from .wcs import get_dem as get_dem
-from .wfs import BCWFS
+from .wfs import get_count as get_count
+from .wfs import get_data as get_data
+from .wfs import get_sortkey as get_sortkey
+from .wfs import list_tables as list_tables
+from .wfs import validate_name as validate_name
 
 PRIMARY_KEY_DB_URL = "https://raw.githubusercontent.com/smnorris/bcdata/main/data/primary_keys.json"
 
@@ -19,109 +23,3 @@
     primary_keys = {}
 
 __version__ = "0.14.0dev0"
-
-# abstract away the WFS object
-
-
-def define_requests(
-    dataset,
-    query=None,
-    crs="epsg:4326",
-    bounds=None,
-    bounds_crs="EPSG:3005",
-    count=None,
-    sortby=None,
-    check_count=True,
-):
-    WFS = BCWFS()
-    return WFS.define_requests(
-        dataset,
-        query=query,
-        crs=crs,
-        bounds=bounds,
-        bounds_crs=bounds_crs,
-        count=count,
-        sortby=sortby,
-        check_count=check_count,
-    )
-
-
-def get_count(dataset, query=None, bounds=None, bounds_crs="EPSG:3005"):
-    WFS = BCWFS()
-    table = WFS.validate_name(dataset)
-    geom_column = WFS.get_schema(table)["geometry_column"]
-    return WFS.get_count(
-        dataset,
-        query=query,
-        bounds=bounds,
-        bounds_crs=bounds_crs,
-        geom_column=geom_column,
-    )
-
-
-def get_data(
-    dataset,
-    query=None,
-    crs="epsg:4326",
-    bounds=None,
-    bounds_crs="epsg:3005",
-    count=None,
-    sortby=None,
-    as_gdf=False,
-    lowercase=False,
-    clean=True,
-):
-    WFS = BCWFS()
-    return WFS.get_data(
-        dataset,
-        query=query,
-        crs=crs,
-        bounds=bounds,
-        bounds_crs=bounds_crs,
-        count=count,
-        sortby=sortby,
-        as_gdf=as_gdf,
-        lowercase=lowercase,
-        clean=clean,
-    )
-
-
-def get_features(
-    dataset,
-    query=None,
-    crs="epsg:4326",
-    bounds=None,
-    bounds_crs="epsg:3005",
-    count=None,
-    sortby=None,
-    lowercase=False,
-    check_count=True,
-):
-    WFS = BCWFS()
-    return WFS.get_features(
-        dataset,
-        query=query,
-        crs=crs,
-        bounds=bounds,
-        bounds_crs=bounds_crs,
-        count=count,
-        sortby=sortby,
-        lowercase=lowercase,
-        check_count=check_count,
-    )
-
-
-def get_sortkey(dataset):
-    WFS = BCWFS()
-    table = WFS.validate_name(dataset)
-    return WFS.get_sortkey(table)
-
-
-def list_tables(refresh=False):
-    WFS = BCWFS(refresh)
-    return WFS.list_tables()
-
-
-def validate_name(dataset):
-    WFS = BCWFS()
-    return WFS.validate_name(dataset)
diff --git a/src/bcdata/bc2pg.py b/src/bcdata/bc2pg.py
@@ -73,14 +73,8 @@ def bc2pg(  # noqa: C901
     WFS = BCWFS()
 
     # define requests
-    urls = bcdata.define_requests(
-        dataset,
-        query=query,
-        bounds=bounds,
-        bounds_crs=bounds_crs,
-        count=count,
-        sortby=sortby,
-        crs="epsg:3005",
+    urls = WFS.define_requests(
+        dataset, query=query, bounds=bounds, bounds_crs=bounds_crs, count=count, sortby=sortby
     )
 
     df = None  # just for tracking if first download is done by geometry type check
@@ -101,7 +95,7 @@ def bc2pg(  # noqa: C901
 
         # if geometry type is not provided, determine type by making the first request
         if not geometry_type:
-            df = WFS.make_requests(dataset=dataset, urls=[urls[0]], as_gdf=True, crs="epsg:3005", lowercase=True)
+            df = WFS.request_features(url=urls[0], as_gdf=True, crs="epsg:3005", lowercase=True)
             geometry_type = df.geom_type.unique()[0]  # keep only the first type
             if numpy.any(df.has_z.unique()[0]):  # geopandas does not include Z in geom_type string
                 geometry_type = geometry_type + "Z"
@@ -110,9 +104,8 @@ def bc2pg(  # noqa: C901
         # (in case all entrys with geom are near the bottom)
         if not geometry_type:
             if not urls[-1] == urls[0]:
-                df_temp = WFS.make_requests(
-                    dataset=dataset,
-                    urls=[urls[-1]],
+                df_temp = WFS.request_features(
+                    url=urls[-1],
                     as_gdf=True,
                     crs="epsg:3005",
                     lowercase=True,
@@ -165,7 +158,7 @@ def bc2pg(  # noqa: C901
         for n, url in enumerate(urls):
             # if first url not downloaded above when checking geom type, do now
             if df is None:
-                df = WFS.make_requests(dataset=dataset, urls=[url], as_gdf=True, crs="epsg:3005", lowercase=True)
+                df = WFS.request_features(url=url, as_gdf=True, crs="epsg:3005", lowercase=True)
             # tidy the resulting dataframe
             df = df.rename_geometry("geom")
             # lowercasify

diff --git a/src/bcdata/cli.py b/src/bcdata/cli.py
@@ -8,7 +8,6 @@
 from cligj import compact_opt, indent_opt, quiet_opt, verbose_opt
 
 import bcdata
-from bcdata import BCWFS
 from bcdata.database import Database
 
 LOG_FORMAT = "%(asctime)s:%(levelname)s:%(name)s: %(message)s"
@@ -237,25 +236,18 @@ def dump(dataset, query, count, bounds, bounds_crs, lowercase, promote_to_multi,
     verbosity = verbose - quiet
     configure_logging(verbosity)
     table = bcdata.validate_name(dataset)
-
-    urls = bcdata.define_requests(
+    data = bcdata.get_data(
         table,
         query=query,
         count=count,
         bounds=bounds,
         bounds_crs=bounds_crs,
+        lowercase=lowercase,
+        promote_to_multi=promote_to_multi,
+        as_gdf=False,
     )
-    WFS = BCWFS()
-    for url in urls:
-        gdf = WFS.make_requests(
-            dataset=dataset,
-            urls=[url],
-            as_gdf=True,
-            lowercase=lowercase,
-            silent=True,
-        )
-        sink = click.get_text_stream("stdout")
-        sink.write(json.dumps(json.loads(gdf.to_json())))
+    sink = click.get_text_stream("stdout")
+    sink.write(json.dumps(data))
 
 
 @cli.command()
@@ -264,30 +256,46 @@ def dump(dataset, query, count, bounds, bounds_crs, lowercase, promote_to_multi,
     "--query",
     help="A valid CQL or ECQL query",
 )
+@click.option(
+    "--count",
+    "-c",
+    default=None,
+    type=int,
+    help="Number of features to request and dump",
+)
 @bounds_opt
-@indent_opt
-@compact_opt
-@dst_crs_opt
-@click.option("--sortby", "-s", help="Name of sort field")
 @click.option(
     "--bounds-crs",
     "--bounds_crs",
     help="CRS of provided bounds",
     default="EPSG:3005",
 )
+@indent_opt
+@compact_opt
+@dst_crs_opt
+@click.option("--sortby", "-s", help="Name of sort field")
 @lowercase_opt
+@click.option(
+    "--promote-to-multi",
+    "-m",
+    help="Promote features to multipart",
+    is_flag=True,
+    default=False,
+)
 @verbose_opt
 @quiet_opt
 def cat(
     dataset,
     query,
+    count,
     bounds,
     bounds_crs,
     indent,
     compact,
     dst_crs,
     sortby,
     lowercase,
+    promote_to_multi,
     verbose,
     quiet,
 ):
@@ -303,16 +311,23 @@ def cat(
     if compact:
         dump_kwds["separators"] = (",", ":")
     table = bcdata.validate_name(dataset)
-    for feat in bcdata.get_features(
+    WFS = bcdata.wfs.BCWFS()
+    for url in WFS.define_requests(
         table,
         query=query,
+        count=count,
         bounds=bounds,
         bounds_crs=bounds_crs,
-        sortby=sortby,
-        crs=dst_crs,
-        lowercase=lowercase,
     ):
-        click.echo(json.dumps(feat, **dump_kwds))
+        featurecollection = WFS.request_features(
+            url=url,
+            as_gdf=False,
+            lowercase=lowercase,
+            crs=dst_crs,
+            promote_to_multi=promote_to_multi,
+        )
+        for feat in featurecollection["features"]:
+            click.echo(json.dumps(feat, **dump_kwds))
 
 
 @cli.command()