diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f353f5f7c..939f9a6c2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,11 +30,11 @@ jobs: steps: # Pinned 1.0.0 version - uses: haya14busa/action-workflow_run-status@967ed83efa565c257675ed70cfe5231f062ddd94 - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v3 with: ref: ${{ github.event.workflow_run.head_branch }} - name: Cache - uses: actions/cache@v2.1.3 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ github.workflow }}-${{ hashFiles('**/pom.xml') }} @@ -43,7 +43,7 @@ jobs: - name: Build with Maven run: mvn clean test -fae -T 2 -B -V -DcloudBuild -Dmaven.wagon.http.retryHandler.count=3 -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 - name: Archive build artifacts - uses: actions/upload-artifact@v2.2.2 + uses: actions/upload-artifact@v3 if: always() with: name: Build debug files diff --git a/pom.xml b/pom.xml index b43b5c99d..172d11f52 100644 --- a/pom.xml +++ b/pom.xml @@ -492,7 +492,7 @@ integration-test - verify + diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature b/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature new file mode 100644 index 000000000..dd27ebc83 --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature @@ -0,0 +1,71 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: datatype parsers + + @BQ_SOURCE_TS_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse timestamp directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_Timestamp" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_Timestamp" + + + @BQ_SOURCE_DATETIME_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse datetime directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_Datetime" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_Datetime" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature new file mode 100644 index 000000000..5ac20c0b7 --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature @@ -0,0 +1,43 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: parse as fixed length + + @BQ_SOURCE_FXDLEN_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse fixedlength directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_Fixed_Length" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_FixedLength" diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature new file mode 100644 index 000000000..c6c9e00df --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature @@ -0,0 +1,43 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: parse as HL7 + + @BQ_SOURCE_HL7_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using parse hl7 directive + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_parse_hl7" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "project" with value: "projectId" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_hl7" diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java index 40c60c665..0243dc4ed 100644 --- a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java @@ -17,34 +17,31 @@ package io.cdap.plugin.common.stepsdesign; import com.google.cloud.bigquery.BigQueryException; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.StorageException; import io.cdap.e2e.utils.BigQueryClient; import io.cdap.e2e.utils.PluginPropertyUtils; -import io.cdap.e2e.utils.StorageClient; import io.cucumber.java.After; import io.cucumber.java.Before; -import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; import org.junit.Assert; import stepsdesign.BeforeActions; import java.io.IOException; -import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; -import java.sql.SQLException; import java.util.NoSuchElementException; import java.util.UUID; -import static io.cdap.e2e.pages.locators.CdfGCSLocators.filePath; - /** * Setup BQ for Wrangler tests. */ public class TestSetupHooks { + @Before(order = 1, value = "@BQ_SOURCE_CSV_TEST") + public static void createTempSourceBQTable() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv")); + } @Before(order = 1, value = "@BQ_SINK_TEST") public static void setTempTargetBQTableName() { String bqTargetTableName = "E2E_TARGET_" + UUID.randomUUID().toString().replaceAll("-", "_"); @@ -71,10 +68,33 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce /** * Create BigQuery table. */ - @Before(order = 1, value = "@BQ_SOURCE_CSV_TEST") - public static void createTempSourceBQTable() throws IOException, InterruptedException { - createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"), - PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv")); + @Before(order = 1, value = "@BQ_SOURCE_FXDLEN_TEST") + public static void createTempSourceBQTableFxdLen() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileFxdLen"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileFxdLen")); + } + @Before(order = 1, value = "@BQ_SOURCE_HL7_TEST") + public static void createTempSourceBQTableHl7() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileHl7"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileHl7")); + } + @Before(order = 1, value = "@BQ_SOURCE_TS_TEST") + public static void createTempSourceBQTableTimestamp() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileTimestamp"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileTimestamp")); + } + @Before(order = 1, value = "@BQ_SOURCE_DATETIME_TEST") + public static void createTempSourceBQTableDateTime() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileDatetime"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFileDatetime")); + } + + @After(order = 1, value = "@BQ_SOURCE_TEST") + public static void deleteTempSourceBQTable() throws IOException, InterruptedException { + String bqSourceTable = PluginPropertyUtils.pluginProp("bqSourceTable"); + BigQueryClient.dropBqQuery(bqSourceTable); + BeforeActions.scenario.write("BQ source Table " + bqSourceTable + " deleted successfully"); + PluginPropertyUtils.removePluginProp("bqSourceTable"); } private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile, String bqInsertDataQueryFile) diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime new file mode 100644 index 000000000..aa50c50d0 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime @@ -0,0 +1,3 @@ +{"create_date":"2023","id":1,"timecolumn":"2006-03-18"} +{"create_date":"2023","id":2,"timecolumn":"2007-03-18"} +{"create_date":"2023","id":3,"timecolumn":"2008-04-19"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_Timestamp b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_Timestamp new file mode 100644 index 000000000..5e46afb52 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_Timestamp @@ -0,0 +1,3 @@ +{"create_date":"2021-01-21T00:00:00Z","diff_date":594345600000,"format_price":"$1.00","id":1.0,"price":"$1","time":"2018-09-07T14:57:51.892Z","update_date":"2002-03-23T00:00:00"} +{"create_date":"2022-01-22T00:00:00Z","diff_date":562723200000,"format_price":"$2.00","id":2.0,"price":"$2","time":"2018-09-07T14:57:51.896Z","update_date":"2004-03-24T00:00:00"} +{"create_date":"2023-01-23T00:00:00Z","diff_date":652060800000,"format_price":"$3.00","id":3.0,"price":"$3","time":"2018-09-07T14:57:51.898Z","update_date":"2002-05-26T00:00:00"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength new file mode 100644 index 000000000..33010a877 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength @@ -0,0 +1,2 @@ +{"Url":"http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"21 10 ABCXYZ","fixedlength_1":"21","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GIYSAIBRGAQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"21 10 ABCXYZ","id":" 10","url_authority":"example.com:80","url_filename":"/docs/books/tutorial/index.html?name=networking","url_host":"example.com","url_path":"/docs/books/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} +{"Url":"http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"19 13 ABCXYZ","fixedlength_1":"19","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GE4SAIBRGMQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"19 13 ABCXYZ","id":" 13","url_authority":"geeks.com:80","url_filename":"/docs/chair/tutorial/index.html?name=networking","url_host":"geeks.com","url_path":"/docs/chair/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_hl7 b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_hl7 new file mode 100644 index 000000000..3d1429529 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_hl7 @@ -0,0 +1,2 @@ +{"Body":"s��\u0011y�X��\u0006�H���","Body_hl7_MSH_9_1":"ALM","address":"test","id":"3"} +{"Body":"F<��\u001c����#J��^�:","Body_hl7_MSH_9_1":"BLM","address":"address2","id":"4"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt new file mode 100644 index 000000000..2bc7bd04b --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (id STRING, create_date STRING, timestamp STRING) \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt new file mode 100644 index 000000000..d4a47cbb8 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (url STRING, fixedlength STRING) \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryTimestamp.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryTimestamp.txt new file mode 100644 index 000000000..852efe2c5 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryTimestamp.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (create_date STRING, update_date STRING, time BIGINT, price STRING) \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryhl7.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryhl7.txt new file mode 100644 index 000000000..6af1bd254 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryhl7.txt @@ -0,0 +1 @@ +create table `DATASET.TABLE_NAME` (address STRING, Body STRING) diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt new file mode 100644 index 000000000..33a34920f --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt @@ -0,0 +1,5 @@ +INSERT INTO DATASET.TABLE_NAME (id,create_date,timestamp) +VALUES +('1','2021-01-21','2006-02-18T05:03:42Z[UTC]'), +('2','2022-02-22','2007-01-18T04:03:22Z[UTC]'), +('3','2023-03-23','2008-07-19T08:04:22Z[UTC]'); diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt new file mode 100644 index 000000000..9af6a9f30 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt @@ -0,0 +1,5 @@ +INSERT INTO DATASET.TABLE_NAME (url,fixedlength) +VALUES +('http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING','21 10 ABCXYZ'), +('http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING','19 13 ABCXYZ'), +('http://amazing.com:80/docs/tables/tutorial/index.html?name=networking#DOWNLOADING','18 14 CDEFGH'); diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryHl7.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryHl7.txt new file mode 100644 index 000000000..9afa292e3 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryHl7.txt @@ -0,0 +1,5 @@ +INSERT INTO DATASET.TABLE_NAME (address,Body) +VALUES +('address1','MSH|^~?2||.|||199908180016||ADT^A04|ADT.1.1698593|P|3'), +('address2','MSH|^~?2||.|||199908180016||BSC^A04|ADT.1.1698593|P|4'), +('','MSH|^~?2||.|||199908180016||JKL^A04|ADT.1.1698593|P|5'); diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryTimestamp.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryTimestamp.txt new file mode 100644 index 000000000..d1626f14c --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryTimestamp.txt @@ -0,0 +1,5 @@ +INSERT INTO DATASET.TABLE_NAME (create_date,update_date,time,price) +VALUES +('2021-01-21','2002-03-23',1536332271892,'$1'), +('2022-01-22','2004-03-24',1536332271896,'$2'), +('2023-01-23','2002-05-26',1536332271898,'$3'); diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties index 8ea4bf26e..3d6473dbf 100644 --- a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties +++ b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties @@ -1,10 +1,23 @@ #json file path +Directive_parse_Fixed_Length=testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json +Directive_parse_hl7=testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json +Directive_parse_Timestamp=testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json +Directive_parse_Datetime=testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json Directive_parse_csv=testData/Wrangler\ /parse_csv_wrangle-cdap-data-pipeline.json bqSourceTable=dummy sourcePath=example/hello.csv gcsSourceBucket=dummy #bq queries file path + +CreateBQDataQueryFileFxdLen=BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt +InsertBQDataQueryFileFxdLen=BQtesdata/BigQuery/BigQueryInsertDataQueryFxdlen.txt +CreateBQDataQueryFileHl7=BQtesdata/BigQuery/BigQueryCreateTableQueryhl7.txt +InsertBQDataQueryFileHl7=BQtesdata/BigQuery/BigQueryInsertDataQueryHl7.txt +CreateBQDataQueryFileTimestamp=BQtesdata/BigQuery/BigQueryCreateTableQueryTimestamp.txt +InsertBQDataQueryFileTimestamp=BQtesdata/BigQuery/BigQueryInsertDataQueryTimestamp.txt +CreateBQDataQueryFileDatetime=BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt +InsertBQDataQueryFileDatetime=BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt CreateBQTableQueryFileCsv=BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt InsertBQDataQueryFileCsv=BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt @@ -13,4 +26,9 @@ projectId=cdf-athena dataset=test_automation dataset2=Wrangler #expectedBQFiles + +ExpectedDirective_parse_FixedLength=BQValidationExpectedFiles/Directive_parse_fixedlength +ExpectedDirective_parse_hl7=BQValidationExpectedFiles/Directive_parse_hl7 +ExpectedDirective_parse_Datetime=BQValidationExpectedFiles/Directive_parse_DateTime +ExpectedDirective_parse_Timestamp=BQValidationExpectedFiles/Directive_parse_Timestamp ExpectedDirective_parse_csv=BQValidationExpectedFiles/Directive_parse_csv diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json new file mode 100644 index 000000000..c90ff138a --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json @@ -0,0 +1,180 @@ +{ + "name": "parse_as_HL7", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "hl7finalt", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":\"string\"}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":\"string\"}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:495", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "343px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "parse-as-hl7 :Body\nhash :Body MD5\nset-type :Body string \nkeep address,Body,Body_hl7_MSH_12,Body_hl7_MSH_9_1\nfind-and-replace address s/address1/test/g\nmask-shuffle :Body_hl7_MSH_9_1\nsend-to-error empty(address)\nrename :Body_hl7_MSH_12 :id ", + "field": "*", + "precondition": "false", + "workspaceId": "8317e17e-30ca-491a-8a07-56e124d53603", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":\"string\"}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:496", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "343px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "hlupde", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body\",\"type\":[\"string\",\"null\"]},{\"name\":\"Body_hl7_MSH_9_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:497", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "343px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "5cd279f1-4ca0-11ee-b9e7-0000007a8317" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json new file mode 100644 index 000000000..cf0973aa6 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json @@ -0,0 +1,362 @@ +{ + "name": "parse_as_datetime", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "datetimeupd", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:532", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "343px" + }, + "_backendProperties": { + "schema": { + "name": "schema", + "description": "The schema of the table to read.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "viewMaterializationDataset": { + "name": "viewMaterializationDataset", + "description": "The dataset in the specified project where the temporary table should be created. Defaults to the same dataset in which the table is located.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "enableQueryingViews": { + "name": "enableQueryingViews", + "description": "Whether to allow querying views. Since BigQuery views are not materialized by default, querying them may have a performance overhead.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceAccountJSON": { + "name": "serviceAccountJSON", + "description": "Content of the service account file.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionTo": { + "name": "partitionTo", + "description": "It's inclusive partition end date. It should be a String with format \"yyyy-MM-dd\". This value is ignored if the table does not support partitioning.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "cmekKey": { + "name": "cmekKey", + "description": "The GCP customer managed encryption key (CMEK) name used to encrypt data written to any bucket, dataset or table created by the plugin. If the bucket, dataset or table already exists, this is ignored. More information can be found at https://cloud.google.com/data-fusion/docs/how-to/customer-managed-encryption-keys", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "useConnection": { + "name": "useConnection", + "description": "Whether to use an existing connection.", + "type": "boolean", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + }, + "project": { + "name": "project", + "description": "Google Cloud Project ID. It can be found on the Dashboard in the Google Cloud Platform Console.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "viewMaterializationProject": { + "name": "viewMaterializationProject", + "description": "The project name where the temporary table should be created. Defaults to the same project in which the table is located.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "filter": { + "name": "filter", + "description": "The WHERE clause filters out rows by evaluating each row against boolean expression, and discards all rows that do not return TRUE (that is, rows that return FALSE or NULL).", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "bucket": { + "name": "bucket", + "description": "The Google Cloud Storage bucket to store temporary data in. Cloud Storage data will be deleted after it is loaded into BigQuery. If it is not provided, a unique bucket will be automatically created and then deleted after the run finishes. The service account must have permission to create buckets in the configured project.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionFrom": { + "name": "partitionFrom", + "description": "It's inclusive partition start date. It should be a String with format \"yyyy-MM-dd\". This value is ignored if the table does not support partitioning.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceFilePath": { + "name": "serviceFilePath", + "description": "Path on the local file system of the service account key used for authorization. Can be set to 'auto-detect' when running on a Dataproc cluster. When running on other clusters, the file must be present on every node in the cluster.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceAccountType": { + "name": "serviceAccountType", + "description": "Service account type, file path where the service account is located or the JSON content of the service account.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "connection": { + "name": "connection", + "description": "The existing connection to use.", + "type": "bigqueryconnectorconfig", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [ + "serviceAccountJSON", + "serviceFilePath", + "project", + "serviceAccountType", + "datasetProject" + ] + }, + "datasetProject": { + "name": "datasetProject", + "description": "The project the dataset belongs to. This is only required if the dataset is not in the same project that the BigQuery job will run in. If no value is given, it will default to the configured project ID.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "dataset": { + "name": "dataset", + "description": "The dataset to write to. A dataset is contained within a specific project. Datasets are top-level containers that are used to organize and control access to tables and views.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "table": { + "name": "table", + "description": "The table to read from. A table contains individual records organized in rows. Each record is composed of columns (also called fields). Every table is defined by a schema that describes the column names, data types, and other information.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "referenceName": { + "name": "referenceName", + "description": "This will be used to uniquely identify this source for lineage, annotating metadata, etc.", + "type": "string", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + } + }, + "description": "This source reads the entire contents of a BigQuery table. BigQuery is Google's serverless, highly scalable, enterprise data warehouse.Data is first written to a temporary location on Google Cloud Storage, then read into the pipeline from there.", + "selected": false + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "parse-as-datetime :timestamp \"yyyy-MM-dd'T'HH:mm:ssX'['z']'\"\ncurrent-datetime :create_date\ndatetime-to-timestamp :timestamp\nformat-datetime :create_date 'y'\nformat-date :timestamp yyyy-mm-dd\nrename timestamp timecolumn", + "field": "*", + "precondition": "false", + "workspaceId": "7faca220-0705-4a1c-99d6-60d7dd657a0b", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:533", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "343px" + }, + "selected": false + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "dateupd", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:534", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "343px" + }, + "selected": false + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "6ab2074d-4e26-11ee-84d2-000000ba158f" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json new file mode 100644 index 000000000..533727b11 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json @@ -0,0 +1,179 @@ +{ + "name": "parse_as_fixedlength", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "fstab", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:503", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "343px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "parse-as-fixed-length :fixedlength 2,4,5,3\nsplit-url :url\nwrite-as-csv :url_protocol\nurl-encode :url\nurl-decode :url\nencode base32 :fixedlength\ndecode base32 :fixedlength_encode_base32\nsplit-to-columns :url_query '='\nrename :fixedlength_2 :id\nfilter-rows-on condition-true fixedlength_4 !~ 'XYZ'", + "field": "*", + "precondition": "false", + "workspaceId": "f4d30074-2193-4690-a589-2982afc0a21a", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:504", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "343px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "fstabup", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:505", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "343px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "88ba63d3-4c08-11ee-81a4-0000001ad828" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json new file mode 100644 index 000000000..c17cfff71 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json @@ -0,0 +1,179 @@ +{ + "name": "parse_as_timestamp", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "timestampupd", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"update_date\",\"type\":\"string\"},{\"name\":\"time\",\"type\":\"long\"},{\"name\":\"price\",\"type\":\"string\"}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"update_date\",\"type\":\"string\"},{\"name\":\"time\",\"type\":\"long\"},{\"name\":\"price\",\"type\":\"string\"}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:509", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "343px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "parse-timestamp :time\nparse-as-currency :price :newprice\nformat-as-currency :newprice :format_price\nparse-as-simple-date :create_date yyyy-MM-dd\nparse-as-simple-date :update_date yyyy-MM-dd\ndiff-date :create_date :update_date :diff_date\ntimestamp-to-datetime :update_date\nrename :newprice :id", + "field": "*", + "precondition": "false", + "workspaceId": "dbb637f2-8334-4d73-b35c-894732e01e49", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"create_date\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"update_date\",\"type\":[{\"type\":\"string\",\"logicalType\":\"datetime\"},\"null\"]},{\"name\":\"time\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"price\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"double\",\"null\"]},{\"name\":\"format_price\",\"type\":[\"string\",\"null\"]},{\"name\":\"diff_date\",\"type\":[\"long\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"create_date\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"update_date\",\"type\":[{\"type\":\"string\",\"logicalType\":\"datetime\"},\"null\"]},{\"name\":\"time\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"price\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"double\",\"null\"]},{\"name\":\"format_price\",\"type\":[\"string\",\"null\"]},{\"name\":\"diff_date\",\"type\":[\"long\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"update_date\",\"type\":\"string\"},{\"name\":\"time\",\"type\":\"long\"},{\"name\":\"price\",\"type\":\"string\"}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:510", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "343px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "timeupd", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"create_date\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"update_date\",\"type\":[{\"type\":\"string\",\"logicalType\":\"datetime\"},\"null\"]},{\"name\":\"time\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"price\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"double\",\"null\"]},{\"name\":\"format_price\",\"type\":[\"string\",\"null\"]},{\"name\":\"diff_date\",\"type\":[\"long\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"create_date\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"update_date\",\"type\":[{\"type\":\"string\",\"logicalType\":\"datetime\"},\"null\"]},{\"name\":\"time\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"price\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"double\",\"null\"]},{\"name\":\"format_price\",\"type\":[\"string\",\"null\"]},{\"name\":\"diff_date\",\"type\":[\"long\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"create_date\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"update_date\",\"type\":[{\"type\":\"string\",\"logicalType\":\"datetime\"},\"null\"]},{\"name\":\"time\",\"type\":[{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"},\"null\"]},{\"name\":\"price\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"double\",\"null\"]},{\"name\":\"format_price\",\"type\":[\"string\",\"null\"]},{\"name\":\"diff_date\",\"type\":[\"long\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:511", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "343px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "60373737-4e22-11ee-9c01-00000016111e" +} \ No newline at end of file