Skip to content

Commit

Permalink
Modified documentation for assert_table_count
Browse files Browse the repository at this point in the history
  • Loading branch information
shashambhavi committed Nov 6, 2024
1 parent 0f0aeb5 commit bcb6476
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
11 changes: 10 additions & 1 deletion cloudbuild/nightly/scripts/python-scripts/assert_table_count.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
"""Python script that validates the data written to a BigQuery table by
the Flink job.
It compares the total row count and unique key count in the source
(either a BigQuery table or a GCS URI) with the destination BigQuery table.
The source is a BigQuery Table for bounded read-write tests and GCS file for
unbounded read-write tests.
"""

import argparse
from collections.abc import Sequence

Expand Down Expand Up @@ -74,13 +82,14 @@ def assert_unique_key_count(bq_client, storage_client, project_name, dataset_nam
mode,
is_exactly_once):
source_unique_key_count = 0
# The rows in the source for unbounded mode are unique
if mode == "unbounded":
source_unique_key_count = get_total_row_count_unbounded(storage_client, source)
else:
source_unique_key_count = get_unique_key_count(bq_client, project_name, dataset_name,
source)
logging.info(
f"Unique Key Count for Source Table {source}: {source_unique_key_count}")
f"Unique Key Count for Source {source}: {source_unique_key_count}")
destination_unique_key_count = get_unique_key_count(bq_client, project_name, dataset_name,
destination_table_name)
logging.info(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# The following operations are performed for internal, unbounded read-write tests:
# 1. Copying source files to a temporary GCS directory which acts as a new source.
# 2. Creating a destination table with a hardcoded schema.
# 3. Running the Flink job in unbounded mode while dynamically adding new files to the source.
"""The following operations are performed for internal, unbounded read-write tests:
1. Copying source files to a temporary GCS directory which acts as a new source.
2. Creating a destination table with a hardcoded schema.
3. Running the Flink job in unbounded mode while dynamically adding new files to the source.
"""

import argparse
from collections.abc import Sequence
Expand Down

0 comments on commit bcb6476

Please sign in to comment.