Skip to content

Commit

Permalink
Update the 'updated_on' column during popularity refresh (#4460)
Browse files Browse the repository at this point in the history
* Update the update_on column during popularity refresh

* Update tests

* Require updated_on be updated in batched_update
  • Loading branch information
stacimc authored Jun 13, 2024
1 parent 6812cb5 commit d0c0226
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
4 changes: 2 additions & 2 deletions catalog/dags/database/batched_update/batched_update_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,13 @@
pattern="^WHERE",
),
"update_query": Param(
default="SET...",
default="SET updated_on = NOW(), ...",
type="string",
description=(
"The part of the SQL `UPDATE` command, beginning with `SET`, that"
" will be run for each batch."
),
pattern="^SET",
pattern="^SET updated_on = NOW()",
),
"batch_size": Param(
default=constants.DEFAULT_BATCH_SIZE,
Expand Down
3 changes: 2 additions & 1 deletion catalog/dags/popularity/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ def format_update_standardized_popularity_query(
by a `batched_update` DagRun.
"""
return (
f"SET {col.STANDARDIZED_POPULARITY.db_name} ="
f"SET {col.UPDATED_ON.db_name} = NOW(),"
f" {col.STANDARDIZED_POPULARITY.db_name} ="
f" {sql_info.standardized_popularity_fn}({sql_info.media_table}.{PARTITION},"
f" {sql_info.media_table}.{METADATA_COLUMN})"
)
Expand Down
6 changes: 3 additions & 3 deletions catalog/tests/dags/popularity/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ def test_standardized_popularity_function_calculates(
"query_id": "foo_provider_popularity_refresh_20230101",
"table_name": "image",
"select_query": "WHERE provider='foo_provider' AND updated_on < '2023-01-01 00:00:00'",
"update_query": "SET standardized_popularity = standardized_image_popularity(image.provider, image.meta_data)",
"update_query": "SET updated_on = NOW(), standardized_popularity = standardized_image_popularity(image.provider, image.meta_data)",
"batch_size": 10000,
"update_timeout": 3600.0,
"dry_run": False,
Expand All @@ -446,7 +446,7 @@ def test_standardized_popularity_function_calculates(
"query_id": "my_provider_popularity_refresh_20230101",
"table_name": "audio",
"select_query": "WHERE provider='my_provider' AND updated_on < '2023-01-01 00:00:00'",
"update_query": "SET standardized_popularity = standardized_audio_popularity(audio.provider, audio.meta_data)",
"update_query": "SET updated_on = NOW(), standardized_popularity = standardized_audio_popularity(audio.provider, audio.meta_data)",
"batch_size": 10000,
"update_timeout": 3600.0,
"dry_run": False,
Expand All @@ -456,7 +456,7 @@ def test_standardized_popularity_function_calculates(
"query_id": "your_provider_popularity_refresh_20230101",
"table_name": "audio",
"select_query": "WHERE provider='your_provider' AND updated_on < '2023-01-01 00:00:00'",
"update_query": "SET standardized_popularity = standardized_audio_popularity(audio.provider, audio.meta_data)",
"update_query": "SET updated_on = NOW(), standardized_popularity = standardized_audio_popularity(audio.provider, audio.meta_data)",
"batch_size": 10000,
"update_timeout": 3600.0,
"dry_run": False,
Expand Down

0 comments on commit d0c0226

Please sign in to comment.