diff --git a/pom.xml b/pom.xml index 75561b6e0..18ea1fdfe 100644 --- a/pom.xml +++ b/pom.xml @@ -83,7 +83,7 @@ 1.11.133 0.10.2-hadoop2 1.56 - 6.10.0-SNAPSHOT + 6.10.0 1.1.5 1.6 2.5 @@ -547,7 +547,7 @@ io.cdap.tests.e2e cdap-e2e-framework - 0.3.0-SNAPSHOT + 0.4.0-SNAPSHOT test diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature b/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature index dd27ebc83..66016f7cf 100644 --- a/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature +++ b/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature @@ -15,7 +15,7 @@ @Wrangler Feature: datatype parsers - @BQ_SOURCE_TS_TEST @BQ_SINK_TEST + @BQ_SOURCE_TS_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse timestamp directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -25,13 +25,13 @@ Feature: datatype parsers Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" Then Click on the Get Schema button - Then Click on the Validate button + Then Validate "BigQueryTable" plugin properties Then Close the Plugin Properties page Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "table" with value: "bqTargetTable" Then Replace input plugin property: "dataset" with value: "dataset" - Then Click on the Validate button + Then Validate "BigQuery2" plugin properties Then Close the Plugin Properties page Then Rename the pipeline Then Deploy the pipeline @@ -43,7 +43,7 @@ Feature: datatype parsers Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_Timestamp" - @BQ_SOURCE_DATETIME_TEST @BQ_SINK_TEST + @BQ_SOURCE_DATETIME_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse datetime directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -53,13 +53,14 @@ Feature: datatype parsers Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" Then Click on the Get Schema button - Then Click on the Validate button Then Close the Plugin Properties page + Then Validate "BigQueryTable" plugin properties Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "table" with value: "bqTargetTable" Then Replace input plugin property: "dataset" with value: "dataset" Then Click on the Validate button + Then Validate "BigQuery2" plugin properties Then Close the Plugin Properties page Then Rename the pipeline Then Deploy the pipeline diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature index fa59cb54c..3cf923618 100644 --- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature @@ -13,9 +13,9 @@ # the License. @Wrangler -Feature: Wrangler - Run time scenarios +Feature: Wrangler - Run time scenarios for parse csv - @BQ_SOURCE_CSV_TEST @BQ_SINK_TEST + @BQ_SOURCE_CSV_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse csv directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -25,13 +25,13 @@ Feature: Wrangler - Run time scenarios Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" Then Click on the Get Schema button - Then Click on the Validate button + Then Validate "BigQueryTable" plugin properties Then Close the Plugin Properties page Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "table" with value: "bqTargetTable" Then Replace input plugin property: "dataset" with value: "dataset" - Then Click on the Validate button + Then Validate "BigQuery2" plugin properties Then Close the Plugin Properties page Then Rename the pipeline Then Deploy the pipeline diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature new file mode 100644 index 000000000..2a3d21acf --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature @@ -0,0 +1,40 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Parse as excel + + @GCS_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse Excel directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_excel" + Then Navigate to the properties page of plugin: "GCSFile" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "path" with value: "gcsSourceBucket" + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Validate "BigQuery" plugin properties + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_excel" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature index 5ac20c0b7..cb2929120 100644 --- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature @@ -15,7 +15,7 @@ @Wrangler Feature: parse as fixed length - @BQ_SOURCE_FXDLEN_TEST @BQ_SINK_TEST + @BQ_SOURCE_FXDLEN_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse fixedlength directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -25,13 +25,13 @@ Feature: parse as fixed length Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" Then Click on the Get Schema button - Then Click on the Validate button + Then Validate "BigQueryTable" plugin properties Then Close the Plugin Properties page Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "table" with value: "bqTargetTable" Then Replace input plugin property: "dataset" with value: "dataset" - Then Click on the Validate button + Then Validate "BigQuery2" plugin properties Then Close the Plugin Properties page Then Rename the pipeline Then Deploy the pipeline diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature index c6c9e00df..15ac6000d 100644 --- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature @@ -15,7 +15,7 @@ @Wrangler Feature: parse as HL7 - @BQ_SOURCE_HL7_TEST @BQ_SINK_TEST + @BQ_SOURCE_HL7_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST Scenario: To verify User is able to run a pipeline using parse hl7 directive Given Open Datafusion Project to configure pipeline Then Click on the Plus Green Button to import the pipelines @@ -25,16 +25,15 @@ Feature: parse as HL7 Then Replace input plugin property: "dataset" with value: "dataset" Then Replace input plugin property: "table" with value: "bqSourceTable" Then Click on the Get Schema button - Then Click on the Validate button + Then Validate "BigQueryTable" plugin properties Then Close the Plugin Properties page Then Navigate to the properties page of plugin: "BigQuery2" Then Replace input plugin property: "project" with value: "projectId" Then Replace input plugin property: "table" with value: "bqTargetTable" Then Replace input plugin property: "dataset" with value: "dataset" - Then Click on the Validate button + Then Validate "BigQuery2" plugin properties Then Close the Plugin Properties page - Then Rename the pipeline - Then Deploy the pipeline + Then Save and Deploy Pipeline Then Run the Pipeline in Runtime Then Wait till pipeline is in running state Then Open and capture logs diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature new file mode 100644 index 000000000..a7ba8f92d --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature @@ -0,0 +1,43 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: parse as Json + + @BQ_SOURCE_JSON_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse Json directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_json" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Validate "BigQueryTable" plugin properties + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Validate "BigQuery2" plugin properties + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_json" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsXmlToJson.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsXmlToJson.feature new file mode 100644 index 000000000..a0f0b5a9a --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsXmlToJson.feature @@ -0,0 +1,43 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: parse as XmlToJson + + @BQ_SOURCE_XML_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse XmlToJson directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_xml" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Validate "BigQueryTable" plugin properties + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Validate "BigQuery2" plugin properties + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_xml" diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java index 0243dc4ed..b277ef375 100644 --- a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java @@ -17,8 +17,11 @@ package io.cdap.plugin.common.stepsdesign; import com.google.cloud.bigquery.BigQueryException; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.StorageException; import io.cdap.e2e.utils.BigQueryClient; import io.cdap.e2e.utils.PluginPropertyUtils; +import io.cdap.e2e.utils.StorageClient; import io.cucumber.java.After; import io.cucumber.java.Before; import org.apache.commons.lang3.StringUtils; @@ -26,6 +29,7 @@ import stepsdesign.BeforeActions; import java.io.IOException; +import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; @@ -37,12 +41,15 @@ */ public class TestSetupHooks { + public static String gcsSourceBucketName = StringUtils.EMPTY; + @Before(order = 1, value = "@BQ_SOURCE_CSV_TEST") public static void createTempSourceBQTable() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv")); } - @Before(order = 1, value = "@BQ_SINK_TEST") + + @Before(order = 2, value = "@BQ_SINK_TEST") public static void setTempTargetBQTableName() { String bqTargetTableName = "E2E_TARGET_" + UUID.randomUUID().toString().replaceAll("-", "_"); PluginPropertyUtils.addPluginProp("bqTargetTable", bqTargetTableName); @@ -54,7 +61,8 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce String bqTargetTableName = PluginPropertyUtils.pluginProp("bqTargetTable"); try { BigQueryClient.dropBqQuery(bqTargetTableName); - BeforeActions.scenario.write("BQ Target table - " + bqTargetTableName + " deleted successfully"); + BeforeActions.scenario.write( + "BQ Target table - " + bqTargetTableName + " deleted successfully"); PluginPropertyUtils.removePluginProp("bqTargetTable"); } catch (BigQueryException e) { if (e.getMessage().contains("Not found: Table")) { @@ -66,30 +74,33 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce } /** - * Create BigQuery table. + * Create BigQuery table test. */ @Before(order = 1, value = "@BQ_SOURCE_FXDLEN_TEST") public static void createTempSourceBQTableFxdLen() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileFxdLen"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileFxdLen")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileFxdLen")); } + @Before(order = 1, value = "@BQ_SOURCE_HL7_TEST") public static void createTempSourceBQTableHl7() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileHl7"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileHl7")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileHl7")); } + @Before(order = 1, value = "@BQ_SOURCE_TS_TEST") public static void createTempSourceBQTableTimestamp() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileTimestamp"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileTimestamp")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileTimestamp")); } + @Before(order = 1, value = "@BQ_SOURCE_DATETIME_TEST") public static void createTempSourceBQTableDateTime() throws IOException, InterruptedException { createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileDatetime"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileDatetime")); + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileDatetime")); } - @After(order = 1, value = "@BQ_SOURCE_TEST") + @After(order = 2, value = "@BQ_SOURCE_TEST") public static void deleteTempSourceBQTable() throws IOException, InterruptedException { String bqSourceTable = PluginPropertyUtils.pluginProp("bqSourceTable"); BigQueryClient.dropBqQuery(bqSourceTable); @@ -97,35 +108,91 @@ public static void deleteTempSourceBQTable() throws IOException, InterruptedExce PluginPropertyUtils.removePluginProp("bqSourceTable"); } - private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile, String bqInsertDataQueryFile) - throws IOException, InterruptedException { - String bqSourceTable = "E2E_SOURCE_" + UUID.randomUUID().toString().substring(0, 5).replaceAll("-", - "_"); + @Before(order = 1, value = "@BQ_SOURCE_JSON_TEST") + public static void createTempSourceBQTableJson() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileJson"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileJson")); + } + + @Before(order = 1, value = "@BQ_SOURCE_XML_TEST") + public static void createTempSourceBQTableXml() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileXml"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileXml")); + } + + @Before(order = 1, value = "@GCS_SOURCE_TEST") + public static void createBucketWithEXCELFile() throws IOException, URISyntaxException { + gcsSourceBucketName = createGCSBucketWithFile(PluginPropertyUtils.pluginProp("testFile")); + PluginPropertyUtils.addPluginProp("gcsSourceBucket", "gs://" + gcsSourceBucketName + "/" + + PluginPropertyUtils.pluginProp("testFile")); + BeforeActions.scenario.write("GCS source bucket1 name - " + gcsSourceBucketName); + } + + private static String createGCSBucketWithFile(String filePath) + throws IOException, URISyntaxException { + String bucketName = StorageClient.createBucket("e2e-test-" + UUID.randomUUID()).getName(); + StorageClient.uploadObject(bucketName, filePath, filePath); + return bucketName; + } + + @After(order = 1, value = "@GCS_SOURCE_TEST") + public static void deleteSourceBucketWithFile() { + deleteGCSBucket(gcsSourceBucketName); + gcsSourceBucketName = StringUtils.EMPTY; + } + + private static void deleteGCSBucket(String bucketName) { + try { + for (Blob blob : StorageClient.listObjects(bucketName).iterateAll()) { + StorageClient.deleteObject(bucketName, blob.getName()); + } + StorageClient.deleteBucket(bucketName); + BeforeActions.scenario.write("Deleted GCS Bucket " + bucketName); + } catch (StorageException | IOException e) { + if (e.getMessage().contains("The specified bucket does not exist")) { + BeforeActions.scenario.write("GCS Bucket " + bucketName + " does not exist."); + } else { + Assert.fail(e.getMessage()); + } + } + } + + + private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile, + String bqInsertDataQueryFile) + throws IOException, InterruptedException { + String bqSourceTable = + "E2E_SOURCE_" + UUID.randomUUID().toString().substring(0, 5).replaceAll("-", + "_"); String createTableQuery = StringUtils.EMPTY; try { createTableQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource - ("/" + bqCreateTableQueryFile).toURI())) - , StandardCharsets.UTF_8); - createTableQuery = createTableQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset")) - .replace("TABLE_NAME", bqSourceTable); + ("/" + bqCreateTableQueryFile).toURI())) + , StandardCharsets.UTF_8); + createTableQuery = createTableQuery.replace("DATASET", + PluginPropertyUtils.pluginProp("dataset")) + .replace("TABLE_NAME", bqSourceTable); } catch (Exception e) { - BeforeActions.scenario.write("Exception in reading " + bqCreateTableQueryFile + " - " + e.getMessage()); + BeforeActions.scenario.write( + "Exception in reading " + bqCreateTableQueryFile + " - " + e.getMessage()); Assert.fail("Exception in BigQuery testdata prerequisite setup " + - "- error in reading create table query file " + e.getMessage()); + "- error in reading create table query file " + e.getMessage()); } String insertDataQuery = StringUtils.EMPTY; try { insertDataQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource - ("/" + bqInsertDataQueryFile).toURI())) - , StandardCharsets.UTF_8); - insertDataQuery = insertDataQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset")) - .replace("TABLE_NAME", bqSourceTable); + ("/" + bqInsertDataQueryFile).toURI())) + , StandardCharsets.UTF_8); + insertDataQuery = insertDataQuery.replace("DATASET", + PluginPropertyUtils.pluginProp("dataset")) + .replace("TABLE_NAME", bqSourceTable); } catch (Exception e) { - BeforeActions.scenario.write("Exception in reading " + bqInsertDataQueryFile + " - " + e.getMessage()); + BeforeActions.scenario.write( + "Exception in reading " + bqInsertDataQueryFile + " - " + e.getMessage()); Assert.fail("Exception in BigQuery testdata prerequisite setup " + - "- error in reading insert data query file " + e.getMessage()); + "- error in reading insert data query file " + e.getMessage()); } BigQueryClient.getSoleQueryResult(createTableQuery); try { diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime index aa50c50d0..0af0511b3 100644 --- a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime @@ -1,3 +1,3 @@ -{"create_date":"2023","id":1,"timecolumn":"2006-03-18"} -{"create_date":"2023","id":2,"timecolumn":"2007-03-18"} -{"create_date":"2023","id":3,"timecolumn":"2008-04-19"} \ No newline at end of file +{"create_date":"2024","id":"1","timecolumn":"2006-03-18"} +{"create_date":"2024","id":"2","timecolumn":"2007-03-18"} +{"create_date":"2024","id":"3","timecolumn":"2008-04-19"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel new file mode 100644 index 000000000..3c3ae5154 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel @@ -0,0 +1,2 @@ +{"copiedname":"very","id":0,"name":"very","phone":"8838.0","rollno":"3.0","uniquenum":"very,0"} +{"copiedname":"hello","id":2,"name":"hell","phone":"12345.0","rollno":"1.0","uniquenum":"hello,2"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength index 33010a877..591e939aa 100644 --- a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength @@ -1,2 +1,2 @@ -{"Url":"http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"21 10 ABCXYZ","fixedlength_1":"21","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GIYSAIBRGAQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"21 10 ABCXYZ","id":" 10","url_authority":"example.com:80","url_filename":"/docs/books/tutorial/index.html?name=networking","url_host":"example.com","url_path":"/docs/books/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} -{"Url":"http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"19 13 ABCXYZ","fixedlength_1":"19","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GE4SAIBRGMQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"19 13 ABCXYZ","id":" 13","url_authority":"geeks.com:80","url_filename":"/docs/chair/tutorial/index.html?name=networking","url_host":"geeks.com","url_path":"/docs/chair/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} \ No newline at end of file +{"fixedlength":"21 10 ABCXYZ","fixedlength_1":"21","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GIYSAIBRGAQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"21 10 ABCXYZ","id":" 10","url":"http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING","url_authority":"example.com:80","url_filename":"/docs/books/tutorial/index.html?name=networking","url_host":"example.com","url_path":"/docs/books/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} +{"fixedlength":"19 13 ABCXYZ","fixedlength_1":"19","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GE4SAIBRGMQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"19 13 ABCXYZ","id":" 13","url":"http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING","url_authority":"geeks.com:80","url_filename":"/docs/chair/tutorial/index.html?name=networking","url_host":"geeks.com","url_path":"/docs/chair/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json new file mode 100644 index 000000000..881f21c3c --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json @@ -0,0 +1,3 @@ +{"body":"hello abc","copied":{"first":"Root","last":"joy"},"desc":"nickhello abc","id":22,"json_age":"{\"json_id\":22,\"copied\":{\"first\":\"Root\",\"last\":\"joy\"},\"json_age\":1,\"json_name\":{\"first\":\"Root\",\"last\":\"joy\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"22,{\\\"first\\\":\\\"Root\\\",\\\"last\\\":\\\"joy\\\"}\",\"body\":\"hello abc\",\"desc\":\"nickhello abc\"}","json_id_json_name":"22,{\"first\":\"Root\",\"last\":\"joy\"}","json_name":{"first":"Root","last":"joy"},"json_pet":"testing"} +{"body":"hello def","copied":{"first":"dded","last":"share"},"desc":"hellohello def","id":23,"json_age":"{\"json_id\":23,\"copied\":{\"first\":\"dded\",\"last\":\"share\"},\"json_age\":2,\"json_name\":{\"first\":\"dded\",\"last\":\"share\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"23,{\\\"first\\\":\\\"dded\\\",\\\"last\\\":\\\"share\\\"}\",\"body\":\"hello def\",\"desc\":\"hellohello def\"}","json_id_json_name":"23,{\"first\":\"dded\",\"last\":\"share\"}","json_name":{"first":"dded","last":"share"},"json_pet":"testing"} +{"body":"hello ghi","copied":{"first":"Root","last":"Joltie"},"desc":"domshello ghi","id":24,"json_age":"{\"json_id\":24,\"copied\":{\"first\":\"Root\",\"last\":\"Joltie\"},\"json_age\":3,\"json_name\":{\"first\":\"Root\",\"last\":\"Joltie\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"24,{\\\"first\\\":\\\"Root\\\",\\\"last\\\":\\\"Joltie\\\"}\",\"body\":\"hello ghi\",\"desc\":\"domshello ghi\"}","json_id_json_name":"24,{\"first\":\"Root\",\"last\":\"Joltie\"}","json_name":{"first":"Root","last":"Joltie"},"json_pet":"testing"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_xmltojson b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_xmltojson new file mode 100644 index 000000000..4a72c0069 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_xmltojson @@ -0,0 +1,6 @@ +{"distance":2.0,"distance2":0.3571428656578064,"email":"abc01@mail.com","email_domain":{"distance":2.0,"email_account":"abc01"},"email_porter":["abc","mail","com"],"id":"1","xmldata_note":{"body":"Dont forget me this week!","from":"Tani","heading":"Reminder","to":"Tove"}} +{"distance":2.0,"distance2":0.3571428656578064,"email":"def02@mail.com","email_domain":{"distance":2.0,"email_account":"def02"},"email_porter":["def","mail","com"],"id":"2","xmldata_note":{"body":"Dont forget us this holiday!","from":"joy","heading":"Reminder","to":"Tove"}} +{"distance":2.0,"distance2":0.3571428656578064,"email":"abc01@mail.com","email_domain":{"distance":2.0,"email_account":"abc01"},"email_porter":["abc","mail","com"],"id":"abc","xmldata_note":{"body":"Dont forget me this week!","from":"Tani","heading":"Reminder","to":"Tove"}} +{"distance":2.0,"distance2":0.3571428656578064,"email":"ghi03@mail.com","email_domain":{"distance":2.0,"email_account":"ghi03"},"email_porter":["ghi","mail","com"],"id":"3","xmldata_note":{"body":"Dont forget him this weekend!","from":"shree","heading":"Reminder","to":"Tove"}} +{"distance":2.0,"distance2":0.3571428656578064,"email":"def02@mail.com","email_domain":{"distance":2.0,"email_account":"def02"},"email_porter":["def","mail","com"],"id":"def","xmldata_note":{"body":"Dont forget us this holiday!","from":"joy","heading":"Reminder","to":"Tove"}} +{"distance":2.0,"distance2":0.3571428656578064,"email":"ghi03@mail.com","email_domain":{"distance":2.0,"email_account":"ghi03"},"email_porter":["ghi","mail","com"],"id":"ghi","xmldata_note":{"body":"Dont forget him this weekend!","from":"shree","heading":"Reminder","to":"Tove"}} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt new file mode 100644 index 000000000..a711921e2 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (email STRING, xmldata STRING) diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt new file mode 100644 index 000000000..0dc9608ce --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt @@ -0,0 +1,5 @@ +INSERT INTO DATASET.TABLE_NAME (email,xmldata) +VALUES +('abc01@mail.com',' Tove Tani Reminder Dont forget me this week! '), +('def02@mail.com',' Tove joy Reminder Dont forget us this holiday! '), +('ghi03@mail.com',' Tove shree Reminder Dont forget him this weekend! '); diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt new file mode 100644 index 000000000..dc9fa7d17 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt @@ -0,0 +1,6 @@ +INSERT INTO DATASET.TABLE_NAME (body,json) +VALUES +(' hello abc', '{"id": 1, "name": {"first": "Root", "last": "joy"}, "age": 22, "pet": "nick", "height": 5.8}'), +('hello def', '{"id": 2, "name": {"first": "dded", "last": "share"}, "age": 23, "pet": "hello", "height": 6.8}'), +('hello ghi', '{"id": 3, "name": {"first": "Root", "last": "Joltie"}, "age": 24, "pet": "doms", "height": 7.8}'); + diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt new file mode 100644 index 000000000..be6b585ea --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (body STRING, json STRING) \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/test1.xlsx b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/test1.xlsx new file mode 100644 index 000000000..adaa5291b Binary files /dev/null and b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/test1.xlsx differ diff --git a/wrangler-transform/src/e2e-test/resources/errorMessage.properties b/wrangler-transform/src/e2e-test/resources/errorMessage.properties new file mode 100644 index 000000000..87c823507 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/errorMessage.properties @@ -0,0 +1 @@ +validationSuccessMessage=No errors found. diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties index 3d6473dbf..16519804e 100644 --- a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties +++ b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties @@ -1,13 +1,18 @@ #json file path -Directive_parse_Fixed_Length=testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json +Directive_parse_Fixed_Length=testData/Wrangler/parse_fixedlength_wrangler-cdap-data-pipeline.json Directive_parse_hl7=testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json Directive_parse_Timestamp=testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json -Directive_parse_Datetime=testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json +Directive_parse_Datetime=testData/Wrangler/parse_datetime_wrangler-cdap-data-pipeline.json +Directive_parse_json=testData/Wrangler/parse_json_wrangler1-cdap-data-pipeline.json +Directive_parse_xml=testData/Wrangler/parse_xmltojson_wrangler-cdap-data-pipeline (1).json +Directive_parse_excel=testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json Directive_parse_csv=testData/Wrangler\ /parse_csv_wrangle-cdap-data-pipeline.json bqSourceTable=dummy +bqTargetTable=dummy sourcePath=example/hello.csv gcsSourceBucket=dummy +testFile=BQtesdata/BigQuery/test1.xlsx #bq queries file path CreateBQDataQueryFileFxdLen=BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt @@ -18,12 +23,16 @@ CreateBQDataQueryFileTimestamp=BQtesdata/BigQuery/BigQueryCreateTableQueryTimest InsertBQDataQueryFileTimestamp=BQtesdata/BigQuery/BigQueryInsertDataQueryTimestamp.txt CreateBQDataQueryFileDatetime=BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt InsertBQDataQueryFileDatetime=BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt +CreateBQTableQueryFileJson=BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt +InsertBQDataQueryFileJson=BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt +CreateBQDataQueryFileXml=BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt +InsertBQDataQueryFileXml=BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt CreateBQTableQueryFileCsv=BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt InsertBQDataQueryFileCsv=BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt #bq properties projectId=cdf-athena -dataset=test_automation +dataset=Wrangler_Test dataset2=Wrangler #expectedBQFiles @@ -31,4 +40,7 @@ ExpectedDirective_parse_FixedLength=BQValidationExpectedFiles/Directive_parse_fi ExpectedDirective_parse_hl7=BQValidationExpectedFiles/Directive_parse_hl7 ExpectedDirective_parse_Datetime=BQValidationExpectedFiles/Directive_parse_DateTime ExpectedDirective_parse_Timestamp=BQValidationExpectedFiles/Directive_parse_Timestamp +ExpectedDirective_parse_json=BQValidationExpectedFiles/Directive_parse_json +ExpectedDirective_parse_xml=BQValidationExpectedFiles/Directive_parse_xmltojson +ExpectedDirective_parse_excel=BQValidationExpectedFiles/Directive_parse_excel ExpectedDirective_parse_csv=BQValidationExpectedFiles/Directive_parse_csv diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json index c90ff138a..59af75ce2 100644 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json @@ -3,7 +3,7 @@ "description": "Data Pipeline Application", "artifact": { "name": "cdap-data-pipeline", - "version": "6.10.0-SNAPSHOT", + "version": "[6.0.0, 7.0.0]", "scope": "SYSTEM" }, "config": { @@ -37,9 +37,7 @@ "type": "batchsource", "label": "BigQueryTable", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", @@ -76,9 +74,7 @@ "type": "transform", "label": "Wrangler", "artifact": { - "name": "wrangler-transform", - "version": "4.10.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "wrangler-transform" }, "properties": { "directives": "parse-as-hl7 :Body\nhash :Body MD5\nset-type :Body string \nkeep address,Body,Body_hl7_MSH_12,Body_hl7_MSH_9_1\nfind-and-replace address s/address1/test/g\nmask-shuffle :Body_hl7_MSH_9_1\nsend-to-error empty(address)\nrename :Body_hl7_MSH_12 :id ", @@ -119,9 +115,7 @@ "type": "batchsink", "label": "BigQuery2", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_csv_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_csv_wrangle-cdap-data-pipeline.json index bcd2d8458..9ca1207c2 100644 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_csv_wrangle-cdap-data-pipeline.json +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_csv_wrangle-cdap-data-pipeline.json @@ -3,7 +3,7 @@ "description": "Data Pipeline Application", "artifact": { "name": "cdap-data-pipeline", - "version": "6.10.0-SNAPSHOT", + "version": "[6.0.0, 7.0.0]", "scope": "SYSTEM" }, "config": { @@ -37,9 +37,7 @@ "type": "batchsource", "label": "BigQueryTable", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", @@ -76,9 +74,7 @@ "type": "transform", "label": "Wrangler", "artifact": { - "name": "wrangler-transform", - "version": "4.10.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "wrangler-transform" }, "properties": { "directives": "parse-as-csv :body ',' false\nrename body_1 new_id\nquantize body_4 body_q 1:2=20,3:4=40\nset-type :body_4 integer \ncolumns-replace s/^new_//g\nfill-null-or-empty :body_3 'shubh'\nset-headers :abc\nchange-column-case uppercase\ncleanse-column-names\nsplit-to-rows :id '#'", @@ -119,9 +115,7 @@ "type": "batchsink", "label": "BigQuery2", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangler-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangler-cdap-data-pipeline.json new file mode 100644 index 000000000..352d9a339 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangler-cdap-data-pipeline.json @@ -0,0 +1,167 @@ +{ + "name": "parse_datetime_wrangler", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "[6.0.0, 7.0.0]", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler_Test", + "table": "datetimetab", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timestamp\",\"type\":[\"string\",\"null\"]}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timestamp\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:358", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "342px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform" + }, + "properties": { + "directives": "parse-as-datetime :timestamp \"yyyy-MM-dd'T'HH:mm:ssX'['z']'\"\ncurrent-datetime :create_date\ndatetime-to-timestamp :timestamp\nformat-datetime :create_date 'y'\nformat-date :timestamp yyyy-mm-dd\nrename timestamp timecolumn", + "field": "*", + "precondition": "false", + "workspaceId": "b28b92f3-93bb-4a4f-8258-ef5881543ecb", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:359", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "342px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "ddtab", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "timePartitioningType": "DAY", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:360", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "342px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "a397cf5a-af9f-11ee-bad0-0000007dcfa3" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json new file mode 100644 index 000000000..59142a4cf --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json @@ -0,0 +1,180 @@ +{ + "name": "parse_excel_wrangler_copy", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "[6.0.0, 7.0.0]", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "GCSFile", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "GCSFile", + "plugin": { + "name": "GCSFile", + "type": "batchsource", + "label": "GCSFile", + "artifact": { + "name": "google-cloud" + }, + "properties": { + "format": "blob", + "path": "gs://00000000-e2e-0014a44f-81be-4501-8360-0ddca1c39789/test1.xlsx", + "fileEncoding": "UTF-8", + "useConnection": "false", + "referenceName": "test", + "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "sampleSize": "1000", + "filenameOnly": "false", + "recursive": "false", + "ignoreNonExistingFolders": "false", + "encrypted": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}" + } + ], + "id": "GCSFile", + "type": "batchsource", + "label": "GCSFile", + "icon": "fa-plug", + "$$hashKey": "object:475", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "343px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform" + }, + "properties": { + "directives": "parse-as-excel :body '0' true\ncopy name copiedname\nmerge name bkd uniquenum ','\nrename bkd rollno\ndrop fwd\nswap id rollno\nsplit-to-rows :name 'o'\nfilter-rows-on condition-false rollno !~ '2.0'", + "field": "*", + "precondition": "false", + "workspaceId": "667f9e85-6c36-4d38-ad48-ef85db7a04a2", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "GCSFile", + "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:476", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "343px" + } + }, + { + "name": "BigQuery", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery", + "artifact": { + "name": "google-cloud" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "extab34", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery", + "type": "batchsink", + "label": "BigQuery", + "icon": "fa-plug", + "$$hashKey": "object:477", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "343px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "2dd12daa-5395-11ee-9dac-000000d0cf32" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangler-cdap-data-pipeline.json similarity index 81% rename from wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json rename to wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangler-cdap-data-pipeline.json index 533727b11..7aca89eb5 100644 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangler-cdap-data-pipeline.json @@ -1,9 +1,9 @@ { - "name": "parse_as_fixedlength", + "name": "parse_fixedlength_wrangler", "description": "Data Pipeline Application", "artifact": { "name": "cdap-data-pipeline", - "version": "6.10.0-SNAPSHOT", + "version": "[6.0.0, 7.0.0]", "scope": "SYSTEM" }, "config": { @@ -37,15 +37,13 @@ "type": "batchsource", "label": "BigQueryTable", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", - "dataset": "Wrangler", - "table": "fstab", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}", + "dataset": "Wrangler_Test", + "table": "fixedlengthtab", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]}]}", "project": "auto-detect", "serviceAccountType": "filePath", "serviceFilePath": "auto-detect", @@ -55,18 +53,18 @@ "outputSchema": [ { "name": "etlSchemaBody", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}" + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]}]}" } ], "id": "BigQueryTable", "type": "batchsource", "label": "BigQueryTable", "icon": "fa-plug", - "$$hashKey": "object:503", + "$$hashKey": "object:31", "isPluginAvailable": true, "_uiPosition": { "left": "496px", - "top": "343px" + "top": "342px" } }, { @@ -76,40 +74,31 @@ "type": "transform", "label": "Wrangler", "artifact": { - "name": "wrangler-transform", - "version": "4.10.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "wrangler-transform" }, "properties": { - "directives": "parse-as-fixed-length :fixedlength 2,4,5,3\nsplit-url :url\nwrite-as-csv :url_protocol\nurl-encode :url\nurl-decode :url\nencode base32 :fixedlength\ndecode base32 :fixedlength_encode_base32\nsplit-to-columns :url_query '='\nrename :fixedlength_2 :id\nfilter-rows-on condition-true fixedlength_4 !~ 'XYZ'", + "directives": "parse-as-fixed-length :fixedlength 2,4,5,3\nsplit-url url\nwrite-as-csv :url_protocol\nurl-encode :url\nurl-decode :url\nencode base32 fixedlength\ndecode base32 fixedlength_encode_base32\nsplit-to-columns :url_query '='\nrename fixedlength_2 id\nfilter-rows-on condition-true fixedlength_4 !~ 'XYZ'", "field": "*", "precondition": "false", - "workspaceId": "f4d30074-2193-4690-a589-2982afc0a21a", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}", - "on-error": "fail-pipeline" + "workspaceId": "6b2760c2-e722-47d3-b5d2-ddefc5bc9ab0", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" } }, "outputSchema": [ { "name": "etlSchemaBody", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" - } - ], - "inputSchema": [ - { - "name": "BigQueryTable", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}" + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" } ], "id": "Wrangler", "type": "transform", "label": "Wrangler", "icon": "icon-DataPreparation", - "$$hashKey": "object:504", + "$$hashKey": "object:32", "isPluginAvailable": true, "_uiPosition": { "left": "796px", - "top": "343px" + "top": "342px" } }, { @@ -119,48 +108,47 @@ "type": "batchsink", "label": "BigQuery2", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", "project": "auto-detect", "serviceAccountType": "filePath", "serviceFilePath": "auto-detect", - "dataset": "Wrangler", - "table": "fstabup", + "dataset": "Wrangler_Test", + "table": "fsdtable", "operation": "insert", "truncateTable": "false", "allowSchemaRelaxation": "false", "location": "US", "createPartitionedTable": "false", "partitioningType": "TIME", + "timePartitioningType": "DAY", "partitionFilterRequired": "false", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" } }, "outputSchema": [ { "name": "etlSchemaBody", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" } ], "inputSchema": [ { "name": "Wrangler", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" } ], "id": "BigQuery2", "type": "batchsink", "label": "BigQuery2", "icon": "fa-plug", - "$$hashKey": "object:505", + "$$hashKey": "object:33", "isPluginAvailable": true, "_uiPosition": { "left": "1096px", - "top": "343px" + "top": "342px" } } ], @@ -175,5 +163,5 @@ "pushdownEnabled": false, "transformationPushdown": {} }, - "version": "88ba63d3-4c08-11ee-81a4-0000001ad828" + "version": "7f3d3a08-af99-11ee-a55b-00000031b618" } \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_json_wrangler1-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_json_wrangler1-cdap-data-pipeline.json new file mode 100644 index 000000000..e1544bfe6 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_json_wrangler1-cdap-data-pipeline.json @@ -0,0 +1,467 @@ +{ + "name": "parse_json_wrangler1", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "[6.0.0, 7.0.0]", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler_Test", + "table": "jsontab", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json\",\"type\":[\"string\",\"null\"]}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:443", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "342px" + }, + "selected": false + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform" + }, + "properties": { + "directives": "parse-as-json :json 1\nltrim :body\nset-column :desc concat(json_pet,body)\ncopy :json_name :copied\nswap :json_id :json_age\nmerge :json_id :json_name :json_id_json_name ,\nmask-number :json_pet 'testing'\ndrop json_height\nwrite-as-json-map :json_age\nrename json_id id", + "field": "*", + "precondition": "false", + "workspaceId": "6e59a102-2268-4328-afce-e81e6eb9228b", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:444", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "342px" + }, + "selected": false + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler_Test", + "table": "jstabss", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "timePartitioningType": "DAY", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:445", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "342px" + }, + "_backendProperties": { + "schema": { + "name": "schema", + "description": "The schema of the data to write. If provided, must be compatible with the table schema.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionFilter": { + "name": "partitionFilter", + "description": "Partition filter that can be used for partition elimination during Update or Upsert operations.This value is ignored if operation is not UPDATE or UPSERT.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "rangeStart": { + "name": "rangeStart", + "description": "Start value for range partitioning. The start value is inclusive. Ignored when table already exists", + "type": "long", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceAccountJSON": { + "name": "serviceAccountJSON", + "description": "Content of the service account file.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "useConnection": { + "name": "useConnection", + "description": "Whether to use an existing connection.", + "type": "boolean", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + }, + "jsonStringFields": { + "name": "jsonStringFields", + "description": "Fields in input schema that should be treated as JSON strings. The schema of these fields should be of type STRING.", + "type": "string", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + }, + "project": { + "name": "project", + "description": "Google Cloud Project ID. It can be found on the Dashboard in the Google Cloud Platform Console.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitioningType": { + "name": "partitioningType", + "description": "Specifies the partitioning type. Can either be Integer or Time or None. Ignored when table already exists", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "timePartitioningType": { + "name": "timePartitioningType", + "description": "Specifies the time partitioning type. Can either be Daily or Hourly or Monthly or Yearly. Ignored when table already exists", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "jobLabels": { + "name": "jobLabels", + "description": "Key value pairs to be added as labels to the BigQuery job. Keys must be unique. [job_source, type] are reserved keys and cannot be used as label keys.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "relationTableKey": { + "name": "relationTableKey", + "description": "List of fields that determines relation between tables during Update and Upsert operations.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "rangeEnd": { + "name": "rangeEnd", + "description": "End value for range partitioning. The end value is exclusive. Ignored when table already exists", + "type": "long", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "clusteringOrder": { + "name": "clusteringOrder", + "description": "List of fields that determines the sort order of the data. Fields must be of type INT, LONG, STRING, DATE, TIMESTAMP, BOOLEAN or DECIMAL. Tables cannot be clustered on more than 4 fields. This value is only used when the BigQuery table is automatically created and ignored if the table already exists.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionFilterRequired": { + "name": "partitionFilterRequired", + "description": "Whether to create a table that requires a partition filter. This value is ignored if the table already exists.", + "type": "boolean", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceFilePath": { + "name": "serviceFilePath", + "description": "Path on the local file system of the service account key used for authorization. Can be set to 'auto-detect' when running on a Dataproc cluster. When running on other clusters, the file must be present on every node in the cluster.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "truncateTable": { + "name": "truncateTable", + "description": "Whether or not to truncate the table before writing to it. Should only be used with the Insert operation. This could overwrite the table schema", + "type": "boolean", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionByField": { + "name": "partitionByField", + "description": "Partitioning column for the BigQuery table. This should be left empty if the BigQuery table is an ingestion-time partitioned table.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "connection": { + "name": "connection", + "description": "The existing connection to use.", + "type": "bigqueryconnectorconfig", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [ + "serviceAccountJSON", + "serviceFilePath", + "project", + "serviceAccountType", + "datasetProject" + ] + }, + "table": { + "name": "table", + "description": "The table to write to. A table contains individual records organized in rows. Each record is composed of columns (also called fields). Every table is defined by a schema that describes the column names, data types, and other information.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "referenceName": { + "name": "referenceName", + "description": "This will be used to uniquely identify this source/sink for lineage, annotating metadata, etc.", + "type": "string", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + }, + "cmekKey": { + "name": "cmekKey", + "description": "The GCP customer managed encryption key (CMEK) name used to encrypt data written to any bucket, dataset or table created by the plugin. If the bucket, dataset or table already exists, this is ignored. More information can be found at https://cloud.google.com/data-fusion/docs/how-to/customer-managed-encryption-keys", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "createPartitionedTable": { + "name": "createPartitionedTable", + "description": "DEPRECATED!. Whether to create the BigQuery table with time partitioning. This value is ignored if the table already exists. When this is set to false, value of Partitioning type will be used. Use 'Partitioning type' property", + "type": "boolean", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "dedupeBy": { + "name": "dedupeBy", + "description": "Column names and sort order used to choose which input record to update/upsert when there are multiple input records with the same key. For example, if this is set to 'updated_time desc', then if there are multiple input records with the same key, the one with the largest value for 'updated_time' will be applied.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "allowSchemaRelaxation": { + "name": "allowSchemaRelaxation", + "description": "Whether to modify the BigQuery table schema if it differs from the input schema.", + "type": "boolean", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "bucket": { + "name": "bucket", + "description": "The Google Cloud Storage bucket to store temporary data in. Cloud Storage data will be deleted after it is loaded into BigQuery. If it is not provided, a unique bucket will be automatically created and then deleted after the run finishes. The service account must have permission to create buckets in the configured project.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "rangeInterval": { + "name": "rangeInterval", + "description": "Interval value for range partitioning. The interval value must be a positive integer.Ignored when table already exists", + "type": "long", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "gcsChunkSize": { + "name": "gcsChunkSize", + "description": "Optional property to tune chunk size in gcs upload request. The value of this property should be in number of bytes. By default, 8388608 bytes (8MB) will be used as upload request chunk size.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "location": { + "name": "location", + "description": "The location where the big query dataset will get created. This value is ignored if the dataset or temporary bucket already exist.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceAccountType": { + "name": "serviceAccountType", + "description": "Service account type, file path where the service account is located or the JSON content of the service account.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "datasetProject": { + "name": "datasetProject", + "description": "The project the dataset belongs to. This is only required if the dataset is not in the same project that the BigQuery job will run in. If no value is given, it will default to the configured project ID.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "operation": { + "name": "operation", + "description": "Type of write operation to perform. This can be set to Insert, Update or Upsert.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "dataset": { + "name": "dataset", + "description": "The dataset to write to. A dataset is contained within a specific project. Datasets are top-level containers that are used to organize and control access to tables and views.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + } + }, + "description": "This sink writes to a BigQuery table. BigQuery is Google's serverless, highly scalable, enterprise data warehouse. Data is first written to a temporary location on Google Cloud Storage, then loaded into BigQuery from there.", + "selected": false + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "15e6341c-af95-11ee-a080-000000f3bab4" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json index c17cfff71..4bf09c0c1 100644 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json @@ -3,7 +3,7 @@ "description": "Data Pipeline Application", "artifact": { "name": "cdap-data-pipeline", - "version": "6.10.0-SNAPSHOT", + "version": "[6.0.0, 7.0.0]", "scope": "SYSTEM" }, "config": { @@ -37,9 +37,7 @@ "type": "batchsource", "label": "BigQueryTable", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", @@ -76,9 +74,7 @@ "type": "transform", "label": "Wrangler", "artifact": { - "name": "wrangler-transform", - "version": "4.10.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "wrangler-transform" }, "properties": { "directives": "parse-timestamp :time\nparse-as-currency :price :newprice\nformat-as-currency :newprice :format_price\nparse-as-simple-date :create_date yyyy-MM-dd\nparse-as-simple-date :update_date yyyy-MM-dd\ndiff-date :create_date :update_date :diff_date\ntimestamp-to-datetime :update_date\nrename :newprice :id", @@ -119,9 +115,7 @@ "type": "batchsink", "label": "BigQuery2", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_xmltojson_wrangler-cdap-data-pipeline (1).json similarity index 71% rename from wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json rename to wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_xmltojson_wrangler-cdap-data-pipeline (1).json index cf0973aa6..8a8cd0243 100644 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_xmltojson_wrangler-cdap-data-pipeline (1).json @@ -1,9 +1,9 @@ { - "name": "parse_as_datetime", + "name": "parse_xmltojson_wrangler", "description": "Data Pipeline Application", "artifact": { "name": "cdap-data-pipeline", - "version": "6.10.0-SNAPSHOT", + "version": "[6.0.0, 7.0.0]", "scope": "SYSTEM" }, "config": { @@ -37,15 +37,13 @@ "type": "batchsource", "label": "BigQueryTable", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", - "dataset": "Wrangler", - "table": "datetimeupd", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"}]}", + "dataset": "Wrangler_Test", + "table": "xmlnews", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata\",\"type\":[\"string\",\"null\"]}]}", "project": "auto-detect", "serviceAccountType": "filePath", "serviceFilePath": "auto-detect", @@ -55,18 +53,18 @@ "outputSchema": [ { "name": "etlSchemaBody", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"}]}" + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata\",\"type\":[\"string\",\"null\"]}]}" } ], "id": "BigQueryTable", "type": "batchsource", "label": "BigQueryTable", "icon": "fa-plug", - "$$hashKey": "object:532", + "$$hashKey": "object:609", "isPluginAvailable": true, "_uiPosition": { "left": "496px", - "top": "343px" + "top": "342px" }, "_backendProperties": { "schema": { @@ -257,40 +255,39 @@ "type": "transform", "label": "Wrangler", "artifact": { - "name": "wrangler-transform", - "version": "4.10.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "wrangler-transform" }, "properties": { - "directives": "parse-as-datetime :timestamp \"yyyy-MM-dd'T'HH:mm:ssX'['z']'\"\ncurrent-datetime :create_date\ndatetime-to-timestamp :timestamp\nformat-datetime :create_date 'y'\nformat-date :timestamp yyyy-mm-dd\nrename timestamp timecolumn", + "directives": "parse-xml-to-json :xmldata 1\nsplit-email :email\ntext-distance block email email_account distance\ntext-metric longest-common-subsequence email email_account distance2\nwrite-as-json-object :email_domain distance,email_account\nstemming :email\nsplit-to-rows :email_account '0'\nrename :email_account id", "field": "*", "precondition": "false", - "workspaceId": "7faca220-0705-4a1c-99d6-60d7dd657a0b", - "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}", + "workspaceId": "4c1d141a-66f6-4b4c-bc5f-a92ca41bee42", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}", + "expressionLanguage": "jexl", "on-error": "fail-pipeline" } }, "outputSchema": [ { "name": "etlSchemaBody", - "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}" } ], "inputSchema": [ { "name": "BigQueryTable", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"}]}" + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata\",\"type\":[\"string\",\"null\"]}]}" } ], "id": "Wrangler", "type": "transform", "label": "Wrangler", "icon": "icon-DataPreparation", - "$$hashKey": "object:533", + "$$hashKey": "object:610", "isPluginAvailable": true, "_uiPosition": { "left": "796px", - "top": "343px" + "top": "342px" }, "selected": false }, @@ -301,48 +298,47 @@ "type": "batchsink", "label": "BigQuery2", "artifact": { - "name": "google-cloud", - "version": "0.23.0-SNAPSHOT", - "scope": "SYSTEM" + "name": "google-cloud" }, "properties": { "useConnection": "false", "project": "auto-detect", "serviceAccountType": "filePath", "serviceFilePath": "auto-detect", - "dataset": "Wrangler", - "table": "dateupd", + "dataset": "Wrangler_Test", + "table": "fintab", "operation": "insert", "truncateTable": "false", "allowSchemaRelaxation": "false", "location": "US", "createPartitionedTable": "false", "partitioningType": "TIME", + "timePartitioningType": "DAY", "partitionFilterRequired": "false", - "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}" } }, "outputSchema": [ { "name": "etlSchemaBody", - "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}" } ], "inputSchema": [ { "name": "Wrangler", - "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}" } ], "id": "BigQuery2", "type": "batchsink", "label": "BigQuery2", "icon": "fa-plug", - "$$hashKey": "object:534", + "$$hashKey": "object:611", "isPluginAvailable": true, "_uiPosition": { "left": "1096px", - "top": "343px" + "top": "342px" }, "selected": false } @@ -358,5 +354,5 @@ "pushdownEnabled": false, "transformationPushdown": {} }, - "version": "6ab2074d-4e26-11ee-84d2-000000ba158f" + "version": "42a96af3-af8e-11ee-8372-00000073831c" } \ No newline at end of file