From 2014fdf5c546b71c030de62c22e3f9816782452a Mon Sep 17 00:00:00 2001 From: priyabhatnagar Date: Wed, 21 Jun 2023 22:25:25 +0530 Subject: [PATCH] e2e for wrangler-groupBy --- pom.xml | 4 +- .../features/Wrangler/DataTypeParsers.feature | 8 +- .../features/Wrangler/ParseAsCsv.feature | 4 +- .../Wrangler/ParseAsFixedLength.feature | 4 +- .../features/Wrangler/ParseAsHl7.feature | 4 +- .../features/Wrangler/Runtime.feature | 41 ++++ .../common/stepsdesign/TestSetupHooks.java | 66 ++++-- .../Directive_parse_DateTime | 6 +- .../Directive_wrangler_GroupBy | 5 + .../BigQuery/BigQueryCreateTableQuery.txt | 2 + .../BigQueryCreateTableQueryDatetime.txt | 2 +- .../BigQueryCreateTableQueryFxdlen.txt | 2 +- .../BigQuery/BigQueryInsertDataQuery.txt | 10 + .../BigQueryInsertDataQueryDatetime.txt | 6 +- .../BigQueryInsertDataQueryFxdlen.txt | 2 +- .../resources/pluginParameters.properties | 7 +- ...hWrnglerNGrpby-cdap-data-pipeline (1).json | 223 ++++++++++++++++++ ...ixedlength_wrangle-cdap-data-pipeline.json | 2 +- 18 files changed, 345 insertions(+), 53 deletions(-) create mode 100644 wrangler-transform/src/e2e-test/features/Wrangler/Runtime.feature create mode 100644 wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_wrangler_GroupBy create mode 100644 wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQuery.txt create mode 100644 wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt create mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/BQ2BQwithWrnglerNGrpby-cdap-data-pipeline (1).json diff --git a/pom.xml b/pom.xml index 75561b6e0..94284fb88 100644 --- a/pom.xml +++ b/pom.xml @@ -83,7 +83,7 @@ 1.11.133 0.10.2-hadoop2 1.56 - 6.10.0-SNAPSHOT + 6.11.0-SNAPSHOT 1.1.5 1.6 2.5 @@ -547,7 +547,7 @@ io.cdap.tests.e2e cdap-e2e-framework - 0.3.0-SNAPSHOT + 0.4.0-SNAPSHOT test diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature b/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature index dd27ebc83..e56497edf 100644 --- a/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature +++ b/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature @@ -15,7 +15,7 @@ @Wrangler Feature: datatype parsers - @BQ_SOURCE_TS_TEST @BQ_SINK_TEST + @BQ_SOURCE_TS_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse timestamp directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -24,8 +24,6 @@ Feature: datatype parsers Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" - Then Click on the Get Schema button - Then Click on the Validate button Then Close the Plugin Properties page Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" @@ -43,7 +41,7 @@ Feature: datatype parsers Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_Timestamp" - @BQ_SOURCE_DATETIME_TEST @BQ_SINK_TEST + @BQ_SOURCE_DATETIME_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse datetime directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -52,8 +50,6 @@ Feature: datatype parsers Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" - Then Click on the Get Schema button - Then Click on the Validate button Then Close the Plugin Properties page Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature index fa59cb54c..f36b9f2bd 100644 --- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature @@ -15,7 +15,7 @@ @Wrangler Feature: Wrangler - Run time scenarios - @BQ_SOURCE_CSV_TEST @BQ_SINK_TEST + @BQ_SOURCE_CSV_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse csv directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -24,8 +24,6 @@ Feature: Wrangler - Run time scenarios Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" - Then Click on the Get Schema button - Then Click on the Validate button Then Close the Plugin Properties page Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature index 5ac20c0b7..aab62f5fd 100644 --- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature @@ -15,7 +15,7 @@ @Wrangler Feature: parse as fixed length - @BQ_SOURCE_FXDLEN_TEST @BQ_SINK_TEST + @BQ_SOURCE_FXDLEN_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse fixedlength directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -24,8 +24,6 @@ Feature: parse as fixed length Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" - Then Click on the Get Schema button - Then Click on the Validate button Then Close the Plugin Properties page Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature index c6c9e00df..1e5a80a41 100644 --- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature @@ -15,7 +15,7 @@ @Wrangler Feature: parse as HL7 - @BQ_SOURCE_HL7_TEST @BQ_SINK_TEST + @BQ_SOURCE_HL7_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse hl7 directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -24,8 +24,6 @@ Feature: parse as HL7 Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" - Then Click on the Get Schema button - Then Click on the Validate button Then Close the Plugin Properties page Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/Runtime.feature b/wrangler-transform/src/e2e-test/features/Wrangler/Runtime.feature new file mode 100644 index 000000000..93218cd6d --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/Runtime.feature @@ -0,0 +1,41 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Wrangler - Run time scenarios + + @BQ_SOURCE_GRPBY_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using wrangler and groupBy directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_GroupBy" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_GroupBy" diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java index 0243dc4ed..c8fc73dcf 100644 --- a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java @@ -40,9 +40,10 @@ public class TestSetupHooks { @Before(order = 1, value = "@BQ_SOURCE_CSV_TEST") public static void createTempSourceBQTable() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv")); } - @Before(order = 1, value = "@BQ_SINK_TEST") + + @Before(order = 2, value = "@BQ_SINK_TEST") public static void setTempTargetBQTableName() { String bqTargetTableName = "E2E_TARGET_" + UUID.randomUUID().toString().replaceAll("-", "_"); PluginPropertyUtils.addPluginProp("bqTargetTable", bqTargetTableName); @@ -54,7 +55,8 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce String bqTargetTableName = PluginPropertyUtils.pluginProp("bqTargetTable"); try { BigQueryClient.dropBqQuery(bqTargetTableName); - BeforeActions.scenario.write("BQ Target table - " + bqTargetTableName + " deleted successfully"); + BeforeActions.scenario.write( + "BQ Target table - " + bqTargetTableName + " deleted successfully"); PluginPropertyUtils.removePluginProp("bqTargetTable"); } catch (BigQueryException e) { if (e.getMessage().contains("Not found: Table")) { @@ -71,61 +73,77 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce @Before(order = 1, value = "@BQ_SOURCE_FXDLEN_TEST") public static void createTempSourceBQTableFxdLen() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileFxdLen"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileFxdLen")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileFxdLen")); } + @Before(order = 1, value = "@BQ_SOURCE_HL7_TEST") public static void createTempSourceBQTableHl7() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileHl7"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileHl7")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileHl7")); } + @Before(order = 1, value = "@BQ_SOURCE_TS_TEST") public static void createTempSourceBQTableTimestamp() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileTimestamp"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileTimestamp")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileTimestamp")); } + @Before(order = 1, value = "@BQ_SOURCE_DATETIME_TEST") public static void createTempSourceBQTableDateTime() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileDatetime"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileDatetime")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileDatetime")); } - @After(order = 1, value = "@BQ_SOURCE_TEST") + @After(order = 2, value = "@BQ_SOURCE_TEST") public static void deleteTempSourceBQTable() throws IOException, InterruptedException { String bqSourceTable = PluginPropertyUtils.pluginProp("bqSourceTable"); BigQueryClient.dropBqQuery(bqSourceTable); BeforeActions.scenario.write("BQ source Table " + bqSourceTable + " deleted successfully"); PluginPropertyUtils.removePluginProp("bqSourceTable"); + + } + + @Before(order = 1, value = "@BQ_SOURCE_GRPBY_TEST") + public static void createTempSourceBQTableGroupBy() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFile"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFile")); } - private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile, String bqInsertDataQueryFile) - throws IOException, InterruptedException { - String bqSourceTable = "E2E_SOURCE_" + UUID.randomUUID().toString().substring(0, 5).replaceAll("-", - "_"); + private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile, + String bqInsertDataQueryFile) + throws IOException, InterruptedException { + String bqSourceTable = + "E2E_SOURCE_" + UUID.randomUUID().toString().substring(0, 5).replaceAll("-", + "_"); String createTableQuery = StringUtils.EMPTY; try { createTableQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource - ("/" + bqCreateTableQueryFile).toURI())) - , StandardCharsets.UTF_8); - createTableQuery = createTableQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset")) - .replace("TABLE_NAME", bqSourceTable); + ("/" + bqCreateTableQueryFile).toURI())) + , StandardCharsets.UTF_8); + createTableQuery = createTableQuery.replace("DATASET", + PluginPropertyUtils.pluginProp("dataset")) + .replace("TABLE_NAME", bqSourceTable); } catch (Exception e) { - BeforeActions.scenario.write("Exception in reading " + bqCreateTableQueryFile + " - " + e.getMessage()); + BeforeActions.scenario.write( + "Exception in reading " + bqCreateTableQueryFile + " - " + e.getMessage()); Assert.fail("Exception in BigQuery testdata prerequisite setup " + - "- error in reading create table query file " + e.getMessage()); + "- error in reading create table query file " + e.getMessage()); } String insertDataQuery = StringUtils.EMPTY; try { insertDataQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource - ("/" + bqInsertDataQueryFile).toURI())) - , StandardCharsets.UTF_8); - insertDataQuery = insertDataQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset")) - .replace("TABLE_NAME", bqSourceTable); + ("/" + bqInsertDataQueryFile).toURI())) + , StandardCharsets.UTF_8); + insertDataQuery = insertDataQuery.replace("DATASET", + PluginPropertyUtils.pluginProp("dataset")) + .replace("TABLE_NAME", bqSourceTable); } catch (Exception e) { - BeforeActions.scenario.write("Exception in reading " + bqInsertDataQueryFile + " - " + e.getMessage()); + BeforeActions.scenario.write( + "Exception in reading " + bqInsertDataQueryFile + " - " + e.getMessage()); Assert.fail("Exception in BigQuery testdata prerequisite setup " + - "- error in reading insert data query file " + e.getMessage()); + "- error in reading insert data query file " + e.getMessage()); } BigQueryClient.getSoleQueryResult(createTableQuery); try { diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime index aa50c50d0..10bc48764 100644 --- a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime @@ -1,3 +1,3 @@ -{"create_date":"2023","id":1,"timecolumn":"2006-03-18"} -{"create_date":"2023","id":2,"timecolumn":"2007-03-18"} -{"create_date":"2023","id":3,"timecolumn":"2008-04-19"} \ No newline at end of file +{"create_date":"2024","id":1,"timecolumn":"2006-03-18"} +{"create_date":"2024","id":2,"timecolumn":"2007-03-18"} +{"create_date":"2024","id":3,"timecolumn":"2008-04-19"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_wrangler_GroupBy b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_wrangler_GroupBy new file mode 100644 index 000000000..d02f42a93 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_wrangler_GroupBy @@ -0,0 +1,5 @@ +{"city":"San Jose","cityFirst":"San Jose","firstname":"DOUGLAS","id":"1","lastname":"Williams","state":"CA","zipcode":923564293} +{"city":"Houston","cityFirst":"Houston","firstname":"DAVID","id":"2","lastname":"Johnson","state":"TX","zipcode":1738378970} +{"city":"Manhattan","cityFirst":"Manhattan","firstname":"HUGH","id":"3","lastname":"Jackman","state":"NY","zipcode":-1863622247} +{"city":"San Diego","cityFirst":"San Diego","firstname":"FRANK","id":"5","lastname":"Underwood","state":"CA","zipcode":-1317090526} +{"city":"New York","cityFirst":"New York","firstname":"SARTHAK","id":"7","lastname":"Dash","state":"NY","zipcode":-1949601773} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQuery.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQuery.txt new file mode 100644 index 000000000..2ad22ced8 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQuery.txt @@ -0,0 +1,2 @@ +create table `DATASET.TABLE_NAME` (id STRING, firstname STRING, lastname STRING, streetAddress STRING, +city STRING, state STRING, zipcode BIGINT, phoneNumber BIGINT) \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt index 2bc7bd04b..026e4eedf 100644 --- a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt @@ -1 +1 @@ -create table `DATASET.TABLE_NAME` (id STRING, create_date STRING, timestamp STRING) \ No newline at end of file +create table `DATASET.TABLE_NAME` (id BIGINT, create_date STRING, timestamp STRING) \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt index d4a47cbb8..8e98a195c 100644 --- a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt @@ -1 +1 @@ -create table `DATASET.TABLE_NAME` (url STRING, fixedlength STRING) \ No newline at end of file +create table `DATASET.TABLE_NAME` (Url STRING, fixedlength STRING) \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt new file mode 100644 index 000000000..ba7441f97 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt @@ -0,0 +1,10 @@ +INSERT INTO DATASET.TABLE_NAME (id, firstname, lastname, streetAddress, city, state, zipcode, phoneNumber) +VALUES +('5', 'Frank', 'Underwood', '1609 Far St.', 'San Diego', 'CA', 2977876770, 19061512345), +('1', 'Douglas', 'Williams', '1 Vista Montana', 'San Jose', 'CA', 9513498885, 35834612345), +('4', 'Walter', 'White', '3828 Piermont Dr', 'Orlando', 'FL', 7349864532, 7829812345), +('3', 'Hugh', 'Jackman', '5, Cool Way', 'Manhattan', 'NY', 6726312345, 1695412345), +('7', 'Sarthak', 'Dash', '123 Far St.', 'New York', 'NY', 2345365523, 1324812345), +('6', 'Serena', 'Woods', '123 Far St.', 'Las Vegas', 'NV', 4533456734, 78919612345), +('2', 'David', 'Johnson', '3 Baypointe Parkway', 'Houston', 'TX', 1738378970, 1451412345), +('8', 'Rahul', 'Dash', '22 MG Road.', 'Bangalore', 'KA',NULL, 94864612345); \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt index 33a34920f..b3361bc86 100644 --- a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt @@ -1,5 +1,5 @@ INSERT INTO DATASET.TABLE_NAME (id,create_date,timestamp) VALUES -('1','2021-01-21','2006-02-18T05:03:42Z[UTC]'), -('2','2022-02-22','2007-01-18T04:03:22Z[UTC]'), -('3','2023-03-23','2008-07-19T08:04:22Z[UTC]'); +(1,'2021-01-21','2006-02-18T05:03:42Z[UTC]'), +(2,'2022-02-22','2007-01-18T04:03:22Z[UTC]'), +(3,'2023-03-23','2008-07-19T08:04:22Z[UTC]'); diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt index 9af6a9f30..d1f9353b2 100644 --- a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt @@ -1,4 +1,4 @@ -INSERT INTO DATASET.TABLE_NAME (url,fixedlength) +INSERT INTO DATASET.TABLE_NAME (Url,fixedlength) VALUES ('http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING','21 10 ABCXYZ'), ('http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING','19 13 ABCXYZ'), diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties index 3d6473dbf..3a7ecc612 100644 --- a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties +++ b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties @@ -3,8 +3,8 @@ Directive_parse_Fixed_Length=testData/Wrangler/parse_fixedlength_wrangle-cdap-da Directive_parse_hl7=testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json Directive_parse_Timestamp=testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json Directive_parse_Datetime=testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json -Directive_parse_csv=testData/Wrangler\ - /parse_csv_wrangle-cdap-data-pipeline.json +Directive_parse_csv=testData/Wrangler/parse_csv_wrangle-cdap-data-pipeline.json +Directive_GroupBy=testData/Wrangler/BQ2BQwithWrnglerNGrpby-cdap-data-pipeline (1).json bqSourceTable=dummy sourcePath=example/hello.csv gcsSourceBucket=dummy @@ -20,12 +20,15 @@ CreateBQDataQueryFileDatetime=BQtesdata/BigQuery/BigQueryCreateTableQueryDatetim InsertBQDataQueryFileDatetime=BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt CreateBQTableQueryFileCsv=BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt InsertBQDataQueryFileCsv=BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt +CreateBQTableQueryFile=BQtesdata/BigQuery/BigQueryCreateTableQuery.txt +InsertBQDataQueryFile=BQtesdata/BigQuery/BigQueryInsertDataQuery.txt #bq properties projectId=cdf-athena dataset=test_automation dataset2=Wrangler #expectedBQFiles +ExpectedDirective_GroupBy=BQValidationExpectedFiles/Directive_wrangler_GroupBy ExpectedDirective_parse_FixedLength=BQValidationExpectedFiles/Directive_parse_fixedlength ExpectedDirective_parse_hl7=BQValidationExpectedFiles/Directive_parse_hl7 diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/BQ2BQwithWrnglerNGrpby-cdap-data-pipeline (1).json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/BQ2BQwithWrnglerNGrpby-cdap-data-pipeline (1).json new file mode 100644 index 000000000..2fb99c4f1 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/BQ2BQwithWrnglerNGrpby-cdap-data-pipeline (1).json @@ -0,0 +1,223 @@ +{ + "name": "BQ2BQwithWrnglerNGrpby", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "Group By" + }, + { + "from": "Group By", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "wrangler_ankit", + "table": "joinerTest", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"long\",\"null\"]},{\"name\":\"phoneNumber\",\"type\":[\"long\",\"null\"]}]}", + "enableQueryingViews": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"long\",\"null\"]},{\"name\":\"phoneNumber\",\"type\":[\"long\",\"null\"]}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:1585", + "isPluginAvailable": true, + "_uiPosition": { + "left": "346px", + "top": "343px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "drop phonenumber\nuppercase :firstname\nset-type :zipcode integer \nfind-and-replace :streetAddress s/St./Street/Ig\nset-column :lastname_count string:length(lastname)\nfilter-rows-on regex-match lastname_count .*5.*\nfilter-rows-on condition-true zipcode == null || zipcode =~ \"^\\W*$\"", + "field": "*", + "precondition": "false", + "workspaceId": "fb521d04-7644-4ec4-b545-837980f402cf", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"lastname_count\",\"type\":[\"int\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"lastname_count\",\"type\":[\"int\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"long\",\"null\"]},{\"name\":\"phoneNumber\",\"type\":[\"long\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:1586", + "isPluginAvailable": true, + "_uiPosition": { + "left": "646px", + "top": "343px" + } + }, + { + "name": "Group By", + "plugin": { + "name": "GroupByAggregate", + "type": "batchaggregator", + "label": "Group By", + "artifact": { + "name": "core-plugins", + "version": "2.12.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "groupByFields": "city,firstname,lastname,state,zipcode,id", + "aggregates": "cityFirst:First(city)" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"record.typeagg\",\"fields\":[{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"cityFirst\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"streetAddress\",\"type\":[\"string\",\"null\"]},{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"lastname_count\",\"type\":[\"int\",\"null\"]}]}" + } + ], + "id": "Group-By", + "type": "batchaggregator", + "label": "Group By", + "icon": "icon-groupbyaggregate", + "$$hashKey": "object:1587", + "isPluginAvailable": true, + "_uiPosition": { + "left": "946px", + "top": "343px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "wrangler_ankit", + "table": "joinTestOutput", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"record.typeagg\",\"fields\":[{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"cityFirst\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"record.typeagg\",\"fields\":[{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"cityFirst\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Group By", + "schema": "{\"type\":\"record\",\"name\":\"record.typeagg\",\"fields\":[{\"name\":\"city\",\"type\":[\"string\",\"null\"]},{\"name\":\"firstname\",\"type\":[\"string\",\"null\"]},{\"name\":\"lastname\",\"type\":[\"string\",\"null\"]},{\"name\":\"state\",\"type\":[\"string\",\"null\"]},{\"name\":\"zipcode\",\"type\":[\"int\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"cityFirst\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:1588", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1246px", + "top": "343px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "714034ca-5154-11ee-9b22-000000505066" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json index 533727b11..f1f602d33 100644 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json @@ -81,7 +81,7 @@ "scope": "SYSTEM" }, "properties": { - "directives": "parse-as-fixed-length :fixedlength 2,4,5,3\nsplit-url :url\nwrite-as-csv :url_protocol\nurl-encode :url\nurl-decode :url\nencode base32 :fixedlength\ndecode base32 :fixedlength_encode_base32\nsplit-to-columns :url_query '='\nrename :fixedlength_2 :id\nfilter-rows-on condition-true fixedlength_4 !~ 'XYZ'", + "directives": "parse-as-fixed-length :fixedlength 2,4,5,3\nsplit-url :Url\nwrite-as-csv :Url_protocol\nurl-encode :Url\nurl-decode :Url\nencode base32 :fixedlength\ndecode base32 :fixedlength_encode_base32\nsplit-to-columns :Url_query '='\nrename :fixedlength_2 :id\nfilter-rows-on condition-true fixedlength_4 !~ 'XYZ'", "field": "*", "precondition": "false", "workspaceId": "f4d30074-2193-4690-a589-2982afc0a21a",