From c1441c52f90703b254afc21323856a697796f439 Mon Sep 17 00:00:00 2001 From: Praveena2607 Date: Fri, 17 Jan 2025 04:38:23 +0000 Subject: [PATCH] e2e additional BigQuery source scenarios --- .../source/BigQuerySourceError.feature | 18 ++ .../source/BigQueryToBigQuery.feature | 35 ++++ .../source/BigQueryToGCS_WithMacro.feature | 154 ++++++++++++++++++ .../utils/CdfPluginPropertyLocator.java | 8 +- .../resources/errorMessage.properties | 4 + .../resources/pluginParameters.properties | 11 ++ 6 files changed, 229 insertions(+), 1 deletion(-) diff --git a/src/e2e-test/features/bigquery/source/BigQuerySourceError.feature b/src/e2e-test/features/bigquery/source/BigQuerySourceError.feature index eb475837ee..951d22aa11 100644 --- a/src/e2e-test/features/bigquery/source/BigQuerySourceError.feature +++ b/src/e2e-test/features/bigquery/source/BigQuerySourceError.feature @@ -55,3 +55,21 @@ Feature: BigQuery source - Validate BigQuery source plugin error scenarios Then Enter BigQuery source property table name Then Enter BigQuery property temporary bucket name "bqInvalidTemporaryBucket" Then Verify the BigQuery validation error message for invalid property "bucket" + + @BQ_SOURCE_TEST @BigQuery_Source_Required + Scenario:Verify BigQuery Source properties validation errors for incorrect reference name, Partition Start date and Partition end Date + Given Open Datafusion Project to configure pipeline + When Expand Plugin group in the LHS plugins list: "Source" + When Select plugin: "BigQuery" from the plugins list as: "Source" + Then Navigate to the properties page of plugin: "BigQuery" + Then Replace input plugin property: "project" with value: "projectId" + And Enter input plugin property: "referenceName" with value: "bqInvalidReferenceName" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + And Enter input plugin property: "partitionFrom" with value: "bqIncorrectFormatStartDate" + And Enter input plugin property: "partitionTo" with value: "bqIncorrectFormatEndDate" + Then Click on the Get Schema button + And Click on the Validate button + Then Verify that the Plugin Property: "referenceName" is displaying an in-line error message: "errorMessageIncorrectReferenceName" + Then Verify that the Plugin Property: "partitionFrom" is displaying an in-line error message: "errorMessageIncorrectPartitionStartDate" + Then Verify that the Plugin Property: "partitionTo" is displaying an in-line error message: "errorMessageIncorrectPartitionEndDate" diff --git a/src/e2e-test/features/bigquery/source/BigQueryToBigQuery.feature b/src/e2e-test/features/bigquery/source/BigQueryToBigQuery.feature index 0a87779c3b..423dcbc00b 100644 --- a/src/e2e-test/features/bigquery/source/BigQueryToBigQuery.feature +++ b/src/e2e-test/features/bigquery/source/BigQueryToBigQuery.feature @@ -354,3 +354,38 @@ Feature: BigQuery source - Verification of BigQuery to BigQuery successful data Then Open and capture logs Then Verify the pipeline status is "Succeeded" Then Validate the values of records transferred to BQ sink is equal to the values from source BigQuery table + + @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario:Validate that pipeline run gets failed when incorrect filter values are provided + Given Open Datafusion Project to configure pipeline + When Expand Plugin group in the LHS plugins list: "Source" + When Select plugin: "BigQuery" from the plugins list as: "Source" + Then Navigate to the properties page of plugin: "BigQuery" + Then Enter BigQuery property reference name + Then Enter BigQuery property projectId "projectId" + Then Enter BigQuery property datasetProjectId "projectId" + Then Override Service account details if set in environment variables + Then Enter BigQuery property dataset "dataset" + Then Enter BigQuery source property table name + Then Enter input plugin property: "filter" with value: "incorrectFilter" + Then Validate output schema with expectedSchema "bqSourceSchema" + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "BigQuery" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "BigQuery2" + Then Override Service account details if set in environment variables + Then Enter the BigQuery sink mandatory properties + Then Validate "BigQuery2" plugin properties + Then Close the BigQuery properties + Then Connect source as "BigQuery" and sink as "BigQuery" to establish connection + Then Save the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Failed" + Then Close the pipeline logs + Then Open Pipeline logs and verify Log entries having below listed Level and Message: + | Level | Message | + | ERROR | errorLogsMessageInvalidFilter | diff --git a/src/e2e-test/features/bigquery/source/BigQueryToGCS_WithMacro.feature b/src/e2e-test/features/bigquery/source/BigQueryToGCS_WithMacro.feature index bf90df79ae..10ef895b5f 100644 --- a/src/e2e-test/features/bigquery/source/BigQueryToGCS_WithMacro.feature +++ b/src/e2e-test/features/bigquery/source/BigQueryToGCS_WithMacro.feature @@ -69,3 +69,157 @@ Feature: BigQuery source - Verification of BigQuery to GCS successful data trans Then Verify the pipeline status is "Succeeded" Then Verify data is transferred to target GCS bucket Then Validate the cmek key "cmekGCS" of target GCS bucket if cmek is enabled + + @CMEK @BQ_SOURCE_TEST @GCS_SINK_TEST + Scenario:Validate successful records transfer from BigQuery to GCS with macro arguments for partition start date and partition end date + Given Open Datafusion Project to configure pipeline + When Expand Plugin group in the LHS plugins list: "Source" + When Select plugin: "BigQuery" from the plugins list as: "Source" + Then Navigate to the properties page of plugin: "BigQuery" + Then Enter BigQuery property reference name + Then Enter BigQuery property "projectId" as macro argument "bqProjectId" + Then Enter BigQuery property "datasetProjectId" as macro argument "bqDatasetProjectId" + Then Enter BigQuery property "partitionFrom" as macro argument "bqStartDate" + Then Enter BigQuery property "partitionTo" as macro argument "bqEndDate" + Then Enter BigQuery property "serviceAccountType" as macro argument "serviceAccountType" + Then Enter BigQuery property "serviceAccountFilePath" as macro argument "serviceAccount" + Then Enter BigQuery property "serviceAccountJSON" as macro argument "serviceAccount" + Then Enter BigQuery property "dataset" as macro argument "bqDataset" + Then Enter BigQuery property "table" as macro argument "bqSourceTable" + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "GCS" + Then Enter GCS property reference name + Then Enter GCS property "projectId" as macro argument "gcsProjectId" + Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType" + Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount" + Then Enter GCS property "serviceAccountJSON" as macro argument "serviceAccount" + Then Enter GCS property "path" as macro argument "gcsSinkPath" + Then Enter GCS sink property "pathSuffix" as macro argument "gcsPathSuffix" + Then Enter GCS property "format" as macro argument "gcsFormat" + Then Enter GCS sink cmek property "encryptionKeyName" as macro argument "cmekGCS" if cmek is enabled + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "projectId" for key "bqProjectId" + Then Enter runtime argument value "projectId" for key "bqDatasetProjectId" + Then Enter runtime argument value "partitionFrom" for key "bqStartDate" + Then Enter runtime argument value "partitionTo" for key "bqEndDate" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value "dataset" for key "bqDataset" + Then Enter runtime argument value for BigQuery source table name key "bqSourceTable" + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "cmekGCS" for GCS cmek property key "cmekGCS" if GCS cmek is enabled + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Click on preview data for GCS sink + Then Close the preview data + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "projectId" for key "bqProjectId" + Then Enter runtime argument value "projectId" for key "bqDatasetProjectId" + Then Enter runtime argument value "partitionFrom" for key "bqStartDate" + Then Enter runtime argument value "partitionTo" for key "bqEndDate" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value "dataset" for key "bqDataset" + Then Enter runtime argument value for BigQuery source table name key "bqSourceTable" + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "cmekGCS" for GCS cmek property key "cmekGCS" if GCS cmek is enabled + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Verify data is transferred to target GCS bucket + Then Validate the cmek key "cmekGCS" of target GCS bucket if cmek is enabled + + @CMEK @BQ_SOURCE_TEST @GCS_SINK_TEST + Scenario:Validate successful records transfer from BigQuery to GCS with macro arguments for filter and Output Schema + Given Open Datafusion Project to configure pipeline + When Expand Plugin group in the LHS plugins list: "Source" + When Select plugin: "BigQuery" from the plugins list as: "Source" + Then Navigate to the properties page of plugin: "BigQuery" + Then Enter BigQuery property reference name + Then Enter BigQuery property "projectId" as macro argument "bqProjectId" + Then Enter BigQuery property "datasetProjectId" as macro argument "bqDatasetProjectId" + Then Enter BigQuery property "filter" as macro argument "bqFilter" + Then Enter BigQuery property "serviceAccountType" as macro argument "serviceAccountType" + Then Enter BigQuery property "serviceAccountFilePath" as macro argument "serviceAccount" + Then Enter BigQuery property "serviceAccountJSON" as macro argument "serviceAccount" + Then Enter BigQuery property "dataset" as macro argument "bqDataset" + Then Enter BigQuery property "table" as macro argument "bqSourceTable" + Then Select Macro action of output schema property: "Output Schema-macro-input" and set the value to "bqOutputSchema" + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" + Then Navigate to the properties page of plugin: "GCS" + Then Enter GCS property reference name + Then Enter GCS property "projectId" as macro argument "gcsProjectId" + Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType" + Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount" + Then Enter GCS property "serviceAccountJSON" as macro argument "serviceAccount" + Then Enter GCS property "path" as macro argument "gcsSinkPath" + Then Enter GCS sink property "pathSuffix" as macro argument "gcsPathSuffix" + Then Enter GCS property "format" as macro argument "gcsFormat" + Then Enter GCS sink cmek property "encryptionKeyName" as macro argument "cmekGCS" if cmek is enabled + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "projectId" for key "bqProjectId" + Then Enter runtime argument value "projectId" for key "bqDatasetProjectId" + Then Enter runtime argument value "filter" for key "bqFilter" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value "dataset" for key "bqDataset" + Then Enter runtime argument value for BigQuery source table name key "bqSourceTable" + Then Enter runtime argument value "OutputSchema" for key "bqOutputSchema" + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "cmekGCS" for GCS cmek property key "cmekGCS" if GCS cmek is enabled + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Click on preview data for GCS sink + Then Close the preview data + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "projectId" for key "bqProjectId" + Then Enter runtime argument value "projectId" for key "bqDatasetProjectId" + Then Enter runtime argument value "filter" for key "bqFilter" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value "dataset" for key "bqDataset" + Then Enter runtime argument value for BigQuery source table name key "bqSourceTable" + Then Enter runtime argument value "OutputSchema" for key "bqOutputSchema" + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "cmekGCS" for GCS cmek property key "cmekGCS" if GCS cmek is enabled + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Verify data is transferred to target GCS bucket + Then Validate the cmek key "cmekGCS" of target GCS bucket if cmek is enabled diff --git a/src/e2e-test/java/io/cdap/plugin/utils/CdfPluginPropertyLocator.java b/src/e2e-test/java/io/cdap/plugin/utils/CdfPluginPropertyLocator.java index 297c623838..d54c4e176d 100644 --- a/src/e2e-test/java/io/cdap/plugin/utils/CdfPluginPropertyLocator.java +++ b/src/e2e-test/java/io/cdap/plugin/utils/CdfPluginPropertyLocator.java @@ -36,7 +36,10 @@ public enum CdfPluginPropertyLocator { GCS_CREATE_OBJECTS_TO_CREATE("paths"), GCS_CREATE_FAIL_IF_OBJECT_EXISTS("failIfExists"), GCS_MOVE_SOURCE_PATH("sourcePath"), - GCS_MOVE_DESTINATION_PATH("destPath"); + GCS_MOVE_DESTINATION_PATH("destPath"), + PARTITION_START_DATE("partitionFrom"), + PARTITION_END_DATE("partitionTo"), + FILTER("filter"); public String pluginProperty; CdfPluginPropertyLocator(String property) { @@ -74,6 +77,9 @@ public enum CdfPluginPropertyLocator { .put("createFailIfObjectExists", CdfPluginPropertyLocator.GCS_CREATE_FAIL_IF_OBJECT_EXISTS) .put("gcsMoveSourcePath", CdfPluginPropertyLocator.GCS_MOVE_SOURCE_PATH) .put("gcsMoveDestinationPath", CdfPluginPropertyLocator.GCS_MOVE_DESTINATION_PATH) + .put("filter", CdfPluginPropertyLocator.FILTER) + .put("partitionFrom", CdfPluginPropertyLocator.PARTITION_START_DATE) + .put("partitionTo", CdfPluginPropertyLocator.PARTITION_END_DATE) .build(); } diff --git a/src/e2e-test/resources/errorMessage.properties b/src/e2e-test/resources/errorMessage.properties index 79b67385e8..5d6d901a70 100644 --- a/src/e2e-test/resources/errorMessage.properties +++ b/src/e2e-test/resources/errorMessage.properties @@ -35,3 +35,7 @@ errorMessageInvalidSourcePath=Invalid bucket name in path 'abc@'. Bucket name sh errorMessageInvalidDestPath=Invalid bucket name in path 'abc@'. Bucket name should errorMessageInvalidEncryptionKey=CryptoKeyName.parse: formattedString not in valid format: Parameter "abc@" must be errorMessageInvalidBucketNameSink=Unable to read or access GCS bucket. +errorMessageIncorrectPartitionStartDate=21-01-2025 is not in a valid format. Enter valid date in format: yyyy-MM-dd +errorMessageIncorrectPartitionEndDate=22-01-2025 is not in a valid format. Enter valid date in format: yyyy-MM-dd +errorMessageIncorrectReferenceName=Invalid reference name 'invalidRef&^*&&*'. Supported characters are: letters, numbers, and '_', '-', '.', or '$'. +errorLogsMessageInvalidFilter=invalidQuery diff --git a/src/e2e-test/resources/pluginParameters.properties b/src/e2e-test/resources/pluginParameters.properties index 117594cc14..b654de942b 100644 --- a/src/e2e-test/resources/pluginParameters.properties +++ b/src/e2e-test/resources/pluginParameters.properties @@ -255,6 +255,17 @@ bqDateExpectedFile=testdata/BigQuery/BQDateFile bqDateTimeExpectedFile=testdata/BigQuery/BQDateTimeFile bqTimeStampExpectedFile=testdata/BigQuery/BQTimeStampFile bqPartitionFieldDate=transaction_date +bqStartDate=2025-01-21 +bqEndDate=2025-01-22 +partitionFrom=2025-01-21 +partitionTo=2025-01-22 +filter=Id=20 +bqInvalidReferenceName=invalidRef&^*&&* +OutputSchema={ "type": "record", "name": "text", "fields": [{ "name": "Id", "type": "long" }, { "name": "Value", "type": "long" }, \ + { "name": "UID", "type": "string" } ] } +incorrectFilter=%%%% +bqIncorrectFormatStartDate=21-01-2025 +bqIncorrectFormatEndDate=22-01-2025 ## BIGQUERY-PLUGIN-PROPERTIES-END ## PUBSUBSINK-PLUGIN-PROPERTIES-START