Modified documentation for assert_table_count

GoogleCloudDataproc · Nov 6, 2024 · bcb6476 · bcb6476
1 parent 0f0aeb5
commit bcb6476
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 5 deletions.
diff --git a/cloudbuild/nightly/scripts/python-scripts/assert_table_count.py b/cloudbuild/nightly/scripts/python-scripts/assert_table_count.py
@@ -1,3 +1,11 @@
+"""Python script that validates the data written to a BigQuery table by
+the Flink job.
+It compares the total row count and unique key count in the source
+(either a BigQuery table or a GCS URI) with the destination BigQuery table.
+The source is a BigQuery Table for bounded read-write tests and GCS file for
+unbounded read-write tests.
+"""
+
 import argparse
 from collections.abc import Sequence
 
@@ -74,13 +82,14 @@ def assert_unique_key_count(bq_client, storage_client, project_name, dataset_nam
                             mode,
                             is_exactly_once):
     source_unique_key_count = 0
+    # The rows in the source for unbounded mode are unique
     if mode == "unbounded":
             source_unique_key_count = get_total_row_count_unbounded(storage_client, source)
     else:
             source_unique_key_count = get_unique_key_count(bq_client, project_name, dataset_name,
                                                    source)
     logging.info(
-        f"Unique Key Count for Source Table {source}: {source_unique_key_count}")
+        f"Unique Key Count for Source {source}: {source_unique_key_count}")
     destination_unique_key_count = get_unique_key_count(bq_client, project_name, dataset_name,
                                                         destination_table_name)
     logging.info(

diff --git a/cloudbuild/nightly/scripts/python-scripts/create_unbounded_sink_table.py b/cloudbuild/nightly/scripts/python-scripts/create_unbounded_sink_table.py
@@ -1,7 +1,8 @@
-# The following operations are performed for internal, unbounded read-write tests:
-# 1. Copying source files to a temporary GCS directory which acts as a new source.
-# 2. Creating a destination table with a hardcoded schema.
-# 3. Running the Flink job in unbounded mode while dynamically adding new files to the source.
+"""The following operations are performed for internal, unbounded read-write tests:
+1. Copying source files to a temporary GCS directory which acts as a new source.
+2. Creating a destination table with a hardcoded schema.
+3. Running the Flink job in unbounded mode while dynamically adding new files to the source.
+"""
 
 import argparse
 from collections.abc import Sequence