Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #193 and simplify get_table_definition function #194

Merged
merged 1 commit into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions bcdata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from .bc2pg import bc2pg, get_primary_keys
import requests

from .bc2pg import bc2pg
from .bcdc import get_table_definition, get_table_name
from .wcs import get_dem
from .wfs import (
Expand All @@ -15,4 +17,14 @@
"https://raw.githubusercontent.com/smnorris/bcdata/main/data/primary_keys.json"
)

__version__ = "0.11.1dev0"
# BCDC does not indicate which column in the schema is the primary key.
# In this absence, bcdata maintains its own dictionary of {table: primary_key},
# served via github. Retrieve the dict with this function"""
response = requests.get(PRIMARY_KEY_DB_URL)
if response.status_code == 200:
primary_keys = response.json()
else:
raise Exception(f"Failed to download primary key database at {PRIMARY_KEY_DB_URL}")
primary_keys = {}

__version__ = "0.12.0dev0"
18 changes: 2 additions & 16 deletions bcdata/bc2pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,6 @@
]


def get_primary_keys():
"""download primary key data file"""
response = requests.get(bcdata.PRIMARY_KEY_DB_URL)
if response.status_code == 200:
primary_keys = response.json()
else:
log.warning(
f"Failed to download primary key database at {bcdata.PRIMARY_KEY_DB_URL}"
)
primary_keys = {}
return primary_keys


def bc2pg( # noqa: C901
dataset,
db_url,
Expand Down Expand Up @@ -148,9 +135,8 @@ def bc2pg( # noqa: C901
raise ValueError("Geometry type {geometry_type} is not supported")

# if primary key is not supplied, use default (if present in list)
primary_keys = get_primary_keys()
if not primary_key and dataset.lower() in primary_keys:
primary_key = primary_keys[dataset.lower()]
if not primary_key and dataset.lower() in bcdata.primary_keys:
primary_key = bcdata.primary_keys[dataset.lower()]

# fail if specified primary key is not in the table
if primary_key and primary_key.upper() not in [
Expand Down
104 changes: 36 additions & 68 deletions bcdata/bcdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ def _package_show(package):

@stamina.retry(on=requests.HTTPError, timeout=60)
def _table_definition(table_name):
r = requests.get(BCDC_API_URL + "package_search", params={"q": table_name})
r = requests.get(
BCDC_API_URL + "package_search",
params={"q": "res_extras_object_name:" + table_name},
)
if r.status_code != 200:
log.warning(r.headers)
if r.status_code in [400, 401, 404]:
Expand Down Expand Up @@ -66,7 +69,7 @@ def get_table_name(package):
return layer_names[0]


def get_table_definition(table_name): # noqa: C901
def get_table_definition(table_name):
"""
Given a table/object name, search BCDC for the first package/resource with a matching "object_name",
returns dict: {"comments": <>, "notes": <>, "schema": {<schema dict>} }
Expand All @@ -77,81 +80,46 @@ def get_table_definition(table_name): # noqa: C901
raise ValueError(
f"Only tables available via WFS are supported, {table_name} not found"
)

# search the api for the provided table
r = _table_definition(table_name)

# start with an empty table definition dict
table_definition = {
"description": None,
"comments": None,
"schema": [],
"primary_key": None,
}

# if there are no matching results, let the user know
if r.json()["result"]["count"] == 0:
log.warning(
f"BC Data Catalouge API search provides no results for: {table_name}"
)
return []
else:
matches = []
# iterate through results of search (packages)
for result in r.json()["result"]["results"]:
notes = result["notes"]
# description is at top level, same for all resources
table_definition["description"] = result["notes"]
# iterate through resources associated with each package
for resource in result["resources"]:
# where to find schema details depends on format type
if resource["format"] == "wms":
if urlparse(resource["url"]).path.split("/")[3] == table_name:
if "object_table_comments" in resource.keys():
table_comments = resource["object_table_comments"]
else:
table_comments = None
# only add to matches if schema details found
if "details" in resource.keys() and resource["details"] != "":
table_details = resource["details"]
matches.append((notes, table_comments, table_details))
log.debug(resource)
# oracle sde format type
if resource["format"] == "oracle_sde":
if resource["object_name"] == table_name:
if "object_table_comments" in resource.keys():
table_comments = resource["object_table_comments"]
else:
table_comments = None
# only add to matches if schema details found
if "details" in resource.keys() and resource["details"] != "":
table_details = resource["details"]
matches.append((notes, table_comments, table_details))
log.debug(resource)

# multiple format resource
elif resource["format"] == "multiple":
# if multiple format, check for table name match in this location
if resource["preview_info"]:
# check that layer_name key is present
if "layer_name" in json.loads(resource["preview_info"]):
# then check if it matches the table name
if (
json.loads(resource["preview_info"])["layer_name"]
== table_name
):
if "object_table_comments" in resource.keys():
table_comments = resource["object_table_comments"]
else:
table_comments = None
# only add to matches if schema details found
if (
"details" in resource.keys()
and resource["details"] != ""
):
table_details = resource["details"]
matches.append(
(notes, table_comments, table_details)
)
log.debug(resource)

# uniquify the result
if len(matches) > 0:
matched = list(set(matches))[0]
return {
"description": matched[0], # notes=description
"comments": matched[1],
"schema": json.loads(matched[2]),
}
else:
raise ValueError(
f"BCDC search for {table_name} does not return a table schema"
)
# presume description and details are the same for all resources
# (below only retains the final schema/comments if there is more than one
# package with this information)
if "details" in resource.keys() and resource["details"] != "":
table_definition["schema"] = json.loads(resource["details"])
# look for comments only if details/schema is present
if "object_table_comments" in resource.keys():
table_definition["comments"] = resource["object_table_comments"]

if not table_definition["schema"]:
raise log.warning(
f"BC Data Catalouge API search provides no schema for: {table_name}"
)

# add primary key if present in bcdata.primary_keys
if table_name.lower() in bcdata.primary_keys:
table_definition["primary_key"] = bcdata.primary_keys[table_name.lower()]

return table_definition
7 changes: 1 addition & 6 deletions bcdata/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,9 @@ def info(dataset, indent, meta_member, verbose, quiet):
verbosity = verbose - quiet
configure_logging(verbosity)
dataset = bcdata.validate_name(dataset)
info = {}
info = bcdata.get_table_definition(dataset)
info["name"] = dataset
info["count"] = bcdata.get_count(dataset)
table_definition = bcdata.get_table_definition(dataset)
info["description"] = table_definition["description"]
info["table_comments"] = table_definition["comments"]
info["schema"] = table_definition["schema"]

if meta_member:
click.echo(info[meta_member])
else:
Expand Down
5 changes: 2 additions & 3 deletions bcdata/wfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,8 @@ def get_sortkey(self, table):
"""Check data for unique columns available for sorting paged requests"""
columns = list(self.get_schema(table)["properties"].keys())
# use known primary key if it is present in the bcdata repository
known_primary_keys = bcdata.get_primary_keys()
if table.lower() in known_primary_keys:
return known_primary_keys[table.lower()].upper()
if table.lower() in bcdata.primary_keys:
return bcdata.primary_keys[table.lower()].upper()
# if pk not known, use OBJECTID as default sort key when present
elif "OBJECTID" in columns:
return "OBJECTID"
Expand Down
3 changes: 1 addition & 2 deletions tests/test_bc2pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ def test_bc2pg_primary_key():


def test_bc2pg_get_primary_keys():
primary_keys = bcdata.get_primary_keys()
assert primary_keys[ASSESSMENTS_TABLE] == "stream_crossing_id"
assert bcdata.primary_keys[ASSESSMENTS_TABLE] == "stream_crossing_id"


def test_bc2pg_primary_key_default():
Expand Down