From bcb6476cfe69844202e15e39f4539cf6825ad609 Mon Sep 17 00:00:00 2001 From: shashambhavi Date: Wed, 6 Nov 2024 10:43:24 +0530 Subject: [PATCH] Modified documentation for assert_table_count --- .../scripts/python-scripts/assert_table_count.py | 11 ++++++++++- .../python-scripts/create_unbounded_sink_table.py | 9 +++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/cloudbuild/nightly/scripts/python-scripts/assert_table_count.py b/cloudbuild/nightly/scripts/python-scripts/assert_table_count.py index be0453ee..251eb8da 100644 --- a/cloudbuild/nightly/scripts/python-scripts/assert_table_count.py +++ b/cloudbuild/nightly/scripts/python-scripts/assert_table_count.py @@ -1,3 +1,11 @@ +"""Python script that validates the data written to a BigQuery table by +the Flink job. +It compares the total row count and unique key count in the source +(either a BigQuery table or a GCS URI) with the destination BigQuery table. +The source is a BigQuery Table for bounded read-write tests and GCS file for +unbounded read-write tests. +""" + import argparse from collections.abc import Sequence @@ -74,13 +82,14 @@ def assert_unique_key_count(bq_client, storage_client, project_name, dataset_nam mode, is_exactly_once): source_unique_key_count = 0 + # The rows in the source for unbounded mode are unique if mode == "unbounded": source_unique_key_count = get_total_row_count_unbounded(storage_client, source) else: source_unique_key_count = get_unique_key_count(bq_client, project_name, dataset_name, source) logging.info( - f"Unique Key Count for Source Table {source}: {source_unique_key_count}") + f"Unique Key Count for Source {source}: {source_unique_key_count}") destination_unique_key_count = get_unique_key_count(bq_client, project_name, dataset_name, destination_table_name) logging.info( diff --git a/cloudbuild/nightly/scripts/python-scripts/create_unbounded_sink_table.py b/cloudbuild/nightly/scripts/python-scripts/create_unbounded_sink_table.py index bd70d420..492ae14e 100644 --- a/cloudbuild/nightly/scripts/python-scripts/create_unbounded_sink_table.py +++ b/cloudbuild/nightly/scripts/python-scripts/create_unbounded_sink_table.py @@ -1,7 +1,8 @@ -# The following operations are performed for internal, unbounded read-write tests: -# 1. Copying source files to a temporary GCS directory which acts as a new source. -# 2. Creating a destination table with a hardcoded schema. -# 3. Running the Flink job in unbounded mode while dynamically adding new files to the source. +"""The following operations are performed for internal, unbounded read-write tests: +1. Copying source files to a temporary GCS directory which acts as a new source. +2. Creating a destination table with a hardcoded schema. +3. Running the Flink job in unbounded mode while dynamically adding new files to the source. +""" import argparse from collections.abc import Sequence