Skip to content

Commit

Permalink
sql changes
Browse files Browse the repository at this point in the history
  • Loading branch information
keegansmith21 committed Jul 21, 2023
1 parent 2c3cac8 commit debb25d
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 32 deletions.
51 changes: 19 additions & 32 deletions oaebu_workflows/database/sql/create_book_products.sql.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -160,24 +160,6 @@ CREATE TEMP FUNCTION group_items_irus_location(items ARRAY<STRUCT<latitude FLOAT
)
);

# Helper Function:
{#
Output Schema:
country_code STRING NULLABLE
country_name STRING NULLABLE
download_count INTEGER NULLABLE
#}
CREATE TEMP FUNCTION group_items_ucl_country(items ARRAY<STRUCT<download_count INT64, country_name STRING, country_code STRING>>) as (
ARRAY(
(SELECT AS STRUCT
country_code,
MAX(country_name) as country_name,
SUM(download_count) as download_count,
FROM UNNEST(items)
GROUP BY country_code)
)
);

# Helper Function:
{#
Output Schema:
Expand Down Expand Up @@ -364,11 +346,10 @@ Providng an empty row enable simplicity of the downstream queries and also means
#}
empty_ucl_discovery as (
SELECT
CAST(NULL as STRING) as isbn,
CAST(NULL as STRING) as book_title,
CAST(NULL as STRING) as ISBN13,
CAST(NULL as DATE) as release_date,
CAST(NULL as INT64) as total_downloads,
[STRUCT(CAST(null as INT64) as download_count, CAST(NULL as STRING) as country_name, CAST(NULL as STRING) as country_code)] as downloads_per_country
[STRUCT(CAST(NULL as INT64) as count] as total,
[STRUCT(CAST(null as INT64) as count, CAST(NULL as STRING) as country_code)] as country
),

{#
Expand Down Expand Up @@ -770,24 +751,30 @@ fulcrum_metadata as (

# UCL Discovery Metrics
{#
The purpose of this block of SQL is to organise the metrics from OAPEN IRUS-UK for easier consumption of downstream queries.
The purpose of this block of SQL is to organise the metrics from UCL Discovery for easier consumption of downstream queries.
Defined in the create_oaebu_book_product_table method, in onix_workflow.py, is the value of 'ucl_table_id'.
This will either point to 'empty_ucl_discovery' (defined above as an empty row) or the name of the real data table in bigquery.
The reason for the choice of selecting an empty row, is that some partners will not have corresponding data to query.
Providng an empty row enable simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
Providng an empty row enables simplicity of the downstream queries and also means the resulting schema across all publishers is the same.
#}
ucl_discovery_metrics as (
SELECT
isbn as ISBN13,
ISBN13,
release_date,
STRUCT(
SUM(total_downloads) as total_downloads, group_items_ucl_country(ARRAY_CONCAT_AGG(downloads_per_country)) as downloads_per_country
) as metrics
FROM `{{ ucl_table_id }}`
WHERE isbn IS NOT NULL
GROUP BY isbn, release_date
),
STRUCT( ARRAY_AGG(ucl_country.value ) AS country_code,
ARRAY_AGG(IFNULL(countries.iso_name, "N/A")) AS country_name,
ARRAY_AGG(ucl_country.count ) AS country_downloads) AS metrics
FROM
`{{ ucl_table_id }}`,
UNNEST(country) AS ucl_country
LEFT JOIN
`{{ country_table_id }}` AS countries
ON
ucl_country.value = countries.alpha2
GROUP BY
ISBN13, release_date
)

# Crossref Events
{#
Expand Down
2 changes: 2 additions & 0 deletions oaebu_workflows/workflows/onix_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,7 @@ def create_oaebu_book_product_table(
self.bq_onix_table_name,
release.onix_snapshot_date,
)
country_table_id = bq_table_id(self.bq_country_project_id, self.bq_country_dataset_id, "country")
template_path = os.path.join(sql_folder(), "create_book_products.sql.jinja2")
sql = render_template(
template_path,
Expand All @@ -749,6 +750,7 @@ def create_oaebu_book_product_table(
ucl_table_id=ucl_table_id,
fulcrum_table_id=fulcrum_table_id,
book_table_id=book_table_id,
country_table_id = country_table_id,
workid_table_id=workid_table_id,
workfamilyid_table_id=workfamilyid_table_id,
onix_workflow=True,
Expand Down

0 comments on commit debb25d

Please sign in to comment.