diff --git a/pom.xml b/pom.xml
index 75561b6e0..94284fb88 100644
--- a/pom.xml
+++ b/pom.xml
@@ -83,7 +83,7 @@
1.11.133
0.10.2-hadoop2
1.56
- 6.10.0-SNAPSHOT
+ 6.11.0-SNAPSHOT
1.1.5
1.6
2.5
@@ -547,7 +547,7 @@
io.cdap.tests.e2e
cdap-e2e-framework
- 0.3.0-SNAPSHOT
+ 0.4.0-SNAPSHOT
test
diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature b/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature
index dd27ebc83..e56497edf 100644
--- a/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature
+++ b/wrangler-transform/src/e2e-test/features/Wrangler/DataTypeParsers.feature
@@ -15,7 +15,7 @@
@Wrangler
Feature: datatype parsers
- @BQ_SOURCE_TS_TEST @BQ_SINK_TEST
+ @BQ_SOURCE_TS_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST
Scenario: To verify User is able to run a pipeline using parse timestamp directive
Given Open Datafusion Project to configure pipeline
Then Click on the Plus Green Button to import the pipelines
@@ -24,8 +24,6 @@ Feature: datatype parsers
Then Replace input plugin property: "project" with value: "projectId"
Then Replace input plugin property: "dataset" with value: "dataset"
Then Replace input plugin property: "table" with value: "bqSourceTable"
- Then Click on the Get Schema button
- Then Click on the Validate button
Then Close the Plugin Properties page
Then Navigate to the properties page of plugin: "BigQuery2"
Then Replace input plugin property: "project" with value: "projectId"
@@ -43,7 +41,7 @@ Feature: datatype parsers
Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_Timestamp"
- @BQ_SOURCE_DATETIME_TEST @BQ_SINK_TEST
+ @BQ_SOURCE_DATETIME_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST
Scenario: To verify User is able to run a pipeline using parse datetime directive
Given Open Datafusion Project to configure pipeline
Then Click on the Plus Green Button to import the pipelines
@@ -52,8 +50,6 @@ Feature: datatype parsers
Then Replace input plugin property: "project" with value: "projectId"
Then Replace input plugin property: "dataset" with value: "dataset"
Then Replace input plugin property: "table" with value: "bqSourceTable"
- Then Click on the Get Schema button
- Then Click on the Validate button
Then Close the Plugin Properties page
Then Navigate to the properties page of plugin: "BigQuery2"
Then Replace input plugin property: "project" with value: "projectId"
diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature
index fa59cb54c..fc6d55040 100644
--- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature
+++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature
@@ -13,9 +13,9 @@
# the License.
@Wrangler
-Feature: Wrangler - Run time scenarios
+Feature: Wrangler - Run time scenarios for parse csv
- @BQ_SOURCE_CSV_TEST @BQ_SINK_TEST
+ @BQ_SOURCE_CSV_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST
Scenario: To verify User is able to run a pipeline using parse csv directive
Given Open Datafusion Project to configure pipeline
Then Click on the Plus Green Button to import the pipelines
@@ -24,8 +24,6 @@ Feature: Wrangler - Run time scenarios
Then Replace input plugin property: "project" with value: "projectId"
Then Replace input plugin property: "dataset" with value: "dataset"
Then Replace input plugin property: "table" with value: "bqSourceTable"
- Then Click on the Get Schema button
- Then Click on the Validate button
Then Close the Plugin Properties page
Then Navigate to the properties page of plugin: "BigQuery2"
Then Replace input plugin property: "project" with value: "projectId"
diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature
new file mode 100644
index 000000000..0573ef93c
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature
@@ -0,0 +1,40 @@
+# Copyright © 2023 Cask Data, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+@Wrangler
+Feature: Parse as excel
+
+ @GCS_SOURCE_TEST @BQ_SINK_TEST
+ Scenario: To verify User is able to run a pipeline using parse Excel directive
+ Given Open Datafusion Project to configure pipeline
+ Then Click on the Plus Green Button to import the pipelines
+ Then Select the file for importing the pipeline for the plugin "Directive_parse_excel"
+ Then Navigate to the properties page of plugin: "GCSFile"
+ Then Replace input plugin property: "project" with value: "projectId"
+ Then Replace input plugin property: "path" with value: "gcsSourceBucket"
+ Then Close the Plugin Properties page
+ Then Navigate to the properties page of plugin: "BigQuery"
+ Then Replace input plugin property: "project" with value: "projectId"
+ Then Replace input plugin property: "table" with value: "bqTargetTable"
+ Then Replace input plugin property: "dataset" with value: "dataset"
+ Then Click on the Validate button
+ Then Close the Plugin Properties page
+ Then Rename the pipeline
+ Then Deploy the pipeline
+ Then Run the Pipeline in Runtime
+ Then Wait till pipeline is in running state
+ Then Open and capture logs
+ Then Verify the pipeline status is "Succeeded"
+ Then Close the pipeline logs
+ Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_excel"
diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature
index 5ac20c0b7..aab62f5fd 100644
--- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature
+++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsFixedLength.feature
@@ -15,7 +15,7 @@
@Wrangler
Feature: parse as fixed length
- @BQ_SOURCE_FXDLEN_TEST @BQ_SINK_TEST
+ @BQ_SOURCE_FXDLEN_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST
Scenario: To verify User is able to run a pipeline using parse fixedlength directive
Given Open Datafusion Project to configure pipeline
Then Click on the Plus Green Button to import the pipelines
@@ -24,8 +24,6 @@ Feature: parse as fixed length
Then Replace input plugin property: "project" with value: "projectId"
Then Replace input plugin property: "dataset" with value: "dataset"
Then Replace input plugin property: "table" with value: "bqSourceTable"
- Then Click on the Get Schema button
- Then Click on the Validate button
Then Close the Plugin Properties page
Then Navigate to the properties page of plugin: "BigQuery2"
Then Replace input plugin property: "project" with value: "projectId"
diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature
index c6c9e00df..1e5a80a41 100644
--- a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature
+++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsHl7.feature
@@ -15,7 +15,7 @@
@Wrangler
Feature: parse as HL7
- @BQ_SOURCE_HL7_TEST @BQ_SINK_TEST
+ @BQ_SOURCE_HL7_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST
Scenario: To verify User is able to run a pipeline using parse hl7 directive
Given Open Datafusion Project to configure pipeline
Then Click on the Plus Green Button to import the pipelines
@@ -24,8 +24,6 @@ Feature: parse as HL7
Then Replace input plugin property: "project" with value: "projectId"
Then Replace input plugin property: "dataset" with value: "dataset"
Then Replace input plugin property: "table" with value: "bqSourceTable"
- Then Click on the Get Schema button
- Then Click on the Validate button
Then Close the Plugin Properties page
Then Navigate to the properties page of plugin: "BigQuery2"
Then Replace input plugin property: "project" with value: "projectId"
diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature
new file mode 100644
index 000000000..7bc833f6b
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature
@@ -0,0 +1,41 @@
+# Copyright © 2023 Cask Data, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+@Wrangler
+Feature: parse as Json
+
+ @BQ_SOURCE_JSON_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST
+ Scenario: To verify User is able to run a pipeline using parse Json directive
+ Given Open Datafusion Project to configure pipeline
+ Then Click on the Plus Green Button to import the pipelines
+ Then Select the file for importing the pipeline for the plugin "Directive_parse_json"
+ Then Navigate to the properties page of plugin: "BigQueryTable"
+ Then Replace input plugin property: "project" with value: "projectId"
+ Then Replace input plugin property: "dataset" with value: "dataset"
+ Then Replace input plugin property: "table" with value: "bqSourceTable"
+ Then Close the Plugin Properties page
+ Then Navigate to the properties page of plugin: "BigQuery2"
+ Then Replace input plugin property: "project" with value: "projectId"
+ Then Replace input plugin property: "table" with value: "bqTargetTable"
+ Then Replace input plugin property: "dataset" with value: "dataset"
+ Then Click on the Validate button
+ Then Close the Plugin Properties page
+ Then Rename the pipeline
+ Then Deploy the pipeline
+ Then Run the Pipeline in Runtime
+ Then Wait till pipeline is in running state
+ Then Open and capture logs
+ Then Verify the pipeline status is "Succeeded"
+ Then Close the pipeline logs
+ Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_json"
diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsXmlToJson.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsXmlToJson.feature
new file mode 100644
index 000000000..f8c919e77
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsXmlToJson.feature
@@ -0,0 +1,41 @@
+# Copyright © 2023 Cask Data, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+@Wrangler
+Feature: parse as XmlToJson
+
+ @BQ_SOURCE_XML_TEST @BQ_SOURCE_TEST @BQ_SINK_TEST
+ Scenario: To verify User is able to run a pipeline using parse XmlToJson directive
+ Given Open Datafusion Project to configure pipeline
+ Then Click on the Plus Green Button to import the pipelines
+ Then Select the file for importing the pipeline for the plugin "Directive_parse_xml"
+ Then Navigate to the properties page of plugin: "BigQueryTable"
+ Then Replace input plugin property: "project" with value: "projectId"
+ Then Replace input plugin property: "dataset" with value: "dataset"
+ Then Replace input plugin property: "table" with value: "bqSourceTable"
+ Then Close the Plugin Properties page
+ Then Navigate to the properties page of plugin: "BigQuery2"
+ Then Replace input plugin property: "project" with value: "projectId"
+ Then Replace input plugin property: "table" with value: "bqTargetTable"
+ Then Replace input plugin property: "dataset" with value: "dataset"
+ Then Click on the Validate button
+ Then Close the Plugin Properties page
+ Then Rename the pipeline
+ Then Deploy the pipeline
+ Then Run the Pipeline in Runtime
+ Then Wait till pipeline is in running state
+ Then Open and capture logs
+ Then Verify the pipeline status is "Succeeded"
+ Then Close the pipeline logs
+ Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_xml"
diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java
index 0243dc4ed..b277ef375 100644
--- a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java
+++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java
@@ -17,8 +17,11 @@
package io.cdap.plugin.common.stepsdesign;
import com.google.cloud.bigquery.BigQueryException;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.StorageException;
import io.cdap.e2e.utils.BigQueryClient;
import io.cdap.e2e.utils.PluginPropertyUtils;
+import io.cdap.e2e.utils.StorageClient;
import io.cucumber.java.After;
import io.cucumber.java.Before;
import org.apache.commons.lang3.StringUtils;
@@ -26,6 +29,7 @@
import stepsdesign.BeforeActions;
import java.io.IOException;
+import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
@@ -37,12 +41,15 @@
*/
public class TestSetupHooks {
+ public static String gcsSourceBucketName = StringUtils.EMPTY;
+
@Before(order = 1, value = "@BQ_SOURCE_CSV_TEST")
public static void createTempSourceBQTable() throws IOException, InterruptedException {
createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"),
- PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv"));
+ PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv"));
}
- @Before(order = 1, value = "@BQ_SINK_TEST")
+
+ @Before(order = 2, value = "@BQ_SINK_TEST")
public static void setTempTargetBQTableName() {
String bqTargetTableName = "E2E_TARGET_" + UUID.randomUUID().toString().replaceAll("-", "_");
PluginPropertyUtils.addPluginProp("bqTargetTable", bqTargetTableName);
@@ -54,7 +61,8 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce
String bqTargetTableName = PluginPropertyUtils.pluginProp("bqTargetTable");
try {
BigQueryClient.dropBqQuery(bqTargetTableName);
- BeforeActions.scenario.write("BQ Target table - " + bqTargetTableName + " deleted successfully");
+ BeforeActions.scenario.write(
+ "BQ Target table - " + bqTargetTableName + " deleted successfully");
PluginPropertyUtils.removePluginProp("bqTargetTable");
} catch (BigQueryException e) {
if (e.getMessage().contains("Not found: Table")) {
@@ -66,30 +74,33 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce
}
/**
- * Create BigQuery table.
+ * Create BigQuery table test.
*/
@Before(order = 1, value = "@BQ_SOURCE_FXDLEN_TEST")
public static void createTempSourceBQTableFxdLen() throws IOException, InterruptedException {
createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileFxdLen"),
- PluginPropertyUtils.pluginProp("InsertBQDataQueryFileFxdLen"));
+ PluginPropertyUtils.pluginProp("InsertBQDataQueryFileFxdLen"));
}
+
@Before(order = 1, value = "@BQ_SOURCE_HL7_TEST")
public static void createTempSourceBQTableHl7() throws IOException, InterruptedException {
createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileHl7"),
- PluginPropertyUtils.pluginProp("InsertBQDataQueryFileHl7"));
+ PluginPropertyUtils.pluginProp("InsertBQDataQueryFileHl7"));
}
+
@Before(order = 1, value = "@BQ_SOURCE_TS_TEST")
public static void createTempSourceBQTableTimestamp() throws IOException, InterruptedException {
createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileTimestamp"),
- PluginPropertyUtils.pluginProp("InsertBQDataQueryFileTimestamp"));
+ PluginPropertyUtils.pluginProp("InsertBQDataQueryFileTimestamp"));
}
+
@Before(order = 1, value = "@BQ_SOURCE_DATETIME_TEST")
public static void createTempSourceBQTableDateTime() throws IOException, InterruptedException {
createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileDatetime"),
- PluginPropertyUtils.pluginProp("InsertBQDataQueryFileDatetime"));
+ PluginPropertyUtils.pluginProp("InsertBQDataQueryFileDatetime"));
}
- @After(order = 1, value = "@BQ_SOURCE_TEST")
+ @After(order = 2, value = "@BQ_SOURCE_TEST")
public static void deleteTempSourceBQTable() throws IOException, InterruptedException {
String bqSourceTable = PluginPropertyUtils.pluginProp("bqSourceTable");
BigQueryClient.dropBqQuery(bqSourceTable);
@@ -97,35 +108,91 @@ public static void deleteTempSourceBQTable() throws IOException, InterruptedExce
PluginPropertyUtils.removePluginProp("bqSourceTable");
}
- private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile, String bqInsertDataQueryFile)
- throws IOException, InterruptedException {
- String bqSourceTable = "E2E_SOURCE_" + UUID.randomUUID().toString().substring(0, 5).replaceAll("-",
- "_");
+ @Before(order = 1, value = "@BQ_SOURCE_JSON_TEST")
+ public static void createTempSourceBQTableJson() throws IOException, InterruptedException {
+ createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileJson"),
+ PluginPropertyUtils.pluginProp("InsertBQDataQueryFileJson"));
+ }
+
+ @Before(order = 1, value = "@BQ_SOURCE_XML_TEST")
+ public static void createTempSourceBQTableXml() throws IOException, InterruptedException {
+ createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileXml"),
+ PluginPropertyUtils.pluginProp("InsertBQDataQueryFileXml"));
+ }
+
+ @Before(order = 1, value = "@GCS_SOURCE_TEST")
+ public static void createBucketWithEXCELFile() throws IOException, URISyntaxException {
+ gcsSourceBucketName = createGCSBucketWithFile(PluginPropertyUtils.pluginProp("testFile"));
+ PluginPropertyUtils.addPluginProp("gcsSourceBucket", "gs://" + gcsSourceBucketName + "/" +
+ PluginPropertyUtils.pluginProp("testFile"));
+ BeforeActions.scenario.write("GCS source bucket1 name - " + gcsSourceBucketName);
+ }
+
+ private static String createGCSBucketWithFile(String filePath)
+ throws IOException, URISyntaxException {
+ String bucketName = StorageClient.createBucket("e2e-test-" + UUID.randomUUID()).getName();
+ StorageClient.uploadObject(bucketName, filePath, filePath);
+ return bucketName;
+ }
+
+ @After(order = 1, value = "@GCS_SOURCE_TEST")
+ public static void deleteSourceBucketWithFile() {
+ deleteGCSBucket(gcsSourceBucketName);
+ gcsSourceBucketName = StringUtils.EMPTY;
+ }
+
+ private static void deleteGCSBucket(String bucketName) {
+ try {
+ for (Blob blob : StorageClient.listObjects(bucketName).iterateAll()) {
+ StorageClient.deleteObject(bucketName, blob.getName());
+ }
+ StorageClient.deleteBucket(bucketName);
+ BeforeActions.scenario.write("Deleted GCS Bucket " + bucketName);
+ } catch (StorageException | IOException e) {
+ if (e.getMessage().contains("The specified bucket does not exist")) {
+ BeforeActions.scenario.write("GCS Bucket " + bucketName + " does not exist.");
+ } else {
+ Assert.fail(e.getMessage());
+ }
+ }
+ }
+
+
+ private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile,
+ String bqInsertDataQueryFile)
+ throws IOException, InterruptedException {
+ String bqSourceTable =
+ "E2E_SOURCE_" + UUID.randomUUID().toString().substring(0, 5).replaceAll("-",
+ "_");
String createTableQuery = StringUtils.EMPTY;
try {
createTableQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource
- ("/" + bqCreateTableQueryFile).toURI()))
- , StandardCharsets.UTF_8);
- createTableQuery = createTableQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset"))
- .replace("TABLE_NAME", bqSourceTable);
+ ("/" + bqCreateTableQueryFile).toURI()))
+ , StandardCharsets.UTF_8);
+ createTableQuery = createTableQuery.replace("DATASET",
+ PluginPropertyUtils.pluginProp("dataset"))
+ .replace("TABLE_NAME", bqSourceTable);
} catch (Exception e) {
- BeforeActions.scenario.write("Exception in reading " + bqCreateTableQueryFile + " - " + e.getMessage());
+ BeforeActions.scenario.write(
+ "Exception in reading " + bqCreateTableQueryFile + " - " + e.getMessage());
Assert.fail("Exception in BigQuery testdata prerequisite setup " +
- "- error in reading create table query file " + e.getMessage());
+ "- error in reading create table query file " + e.getMessage());
}
String insertDataQuery = StringUtils.EMPTY;
try {
insertDataQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource
- ("/" + bqInsertDataQueryFile).toURI()))
- , StandardCharsets.UTF_8);
- insertDataQuery = insertDataQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset"))
- .replace("TABLE_NAME", bqSourceTable);
+ ("/" + bqInsertDataQueryFile).toURI()))
+ , StandardCharsets.UTF_8);
+ insertDataQuery = insertDataQuery.replace("DATASET",
+ PluginPropertyUtils.pluginProp("dataset"))
+ .replace("TABLE_NAME", bqSourceTable);
} catch (Exception e) {
- BeforeActions.scenario.write("Exception in reading " + bqInsertDataQueryFile + " - " + e.getMessage());
+ BeforeActions.scenario.write(
+ "Exception in reading " + bqInsertDataQueryFile + " - " + e.getMessage());
Assert.fail("Exception in BigQuery testdata prerequisite setup " +
- "- error in reading insert data query file " + e.getMessage());
+ "- error in reading insert data query file " + e.getMessage());
}
BigQueryClient.getSoleQueryResult(createTableQuery);
try {
diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime
index aa50c50d0..0af0511b3 100644
--- a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime
+++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_DateTime
@@ -1,3 +1,3 @@
-{"create_date":"2023","id":1,"timecolumn":"2006-03-18"}
-{"create_date":"2023","id":2,"timecolumn":"2007-03-18"}
-{"create_date":"2023","id":3,"timecolumn":"2008-04-19"}
\ No newline at end of file
+{"create_date":"2024","id":"1","timecolumn":"2006-03-18"}
+{"create_date":"2024","id":"2","timecolumn":"2007-03-18"}
+{"create_date":"2024","id":"3","timecolumn":"2008-04-19"}
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel
new file mode 100644
index 000000000..3c3ae5154
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel
@@ -0,0 +1,2 @@
+{"copiedname":"very","id":0,"name":"very","phone":"8838.0","rollno":"3.0","uniquenum":"very,0"}
+{"copiedname":"hello","id":2,"name":"hell","phone":"12345.0","rollno":"1.0","uniquenum":"hello,2"}
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength
index 33010a877..591e939aa 100644
--- a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength
+++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_fixedlength
@@ -1,2 +1,2 @@
-{"Url":"http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"21 10 ABCXYZ","fixedlength_1":"21","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GIYSAIBRGAQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"21 10 ABCXYZ","id":" 10","url_authority":"example.com:80","url_filename":"/docs/books/tutorial/index.html?name=networking","url_host":"example.com","url_path":"/docs/books/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"}
-{"Url":"http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING","fixedlength":"19 13 ABCXYZ","fixedlength_1":"19","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GE4SAIBRGMQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"19 13 ABCXYZ","id":" 13","url_authority":"geeks.com:80","url_filename":"/docs/chair/tutorial/index.html?name=networking","url_host":"geeks.com","url_path":"/docs/chair/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"}
\ No newline at end of file
+{"fixedlength":"21 10 ABCXYZ","fixedlength_1":"21","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GIYSAIBRGAQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"21 10 ABCXYZ","id":" 10","url":"http://example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING","url_authority":"example.com:80","url_filename":"/docs/books/tutorial/index.html?name=networking","url_host":"example.com","url_path":"/docs/books/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"}
+{"fixedlength":"19 13 ABCXYZ","fixedlength_1":"19","fixedlength_3":" ABC","fixedlength_4":"XYZ","fixedlength_encode_base32":"GE4SAIBRGMQCAQKCINMFSWQ=","fixedlength_encode_base32_decode_base32":"19 13 ABCXYZ","id":" 13","url":"http://geeks.com:80/docs/chair/tutorial/index.html?name=networking#DOWNLOADING","url_authority":"geeks.com:80","url_filename":"/docs/chair/tutorial/index.html?name=networking","url_host":"geeks.com","url_path":"/docs/chair/tutorial/index.html","url_port":80,"url_protocol":"http","url_query":"name=networking","url_query_1":"name","url_query_2":"networking"}
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json
new file mode 100644
index 000000000..881f21c3c
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json
@@ -0,0 +1,3 @@
+{"body":"hello abc","copied":{"first":"Root","last":"joy"},"desc":"nickhello abc","id":22,"json_age":"{\"json_id\":22,\"copied\":{\"first\":\"Root\",\"last\":\"joy\"},\"json_age\":1,\"json_name\":{\"first\":\"Root\",\"last\":\"joy\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"22,{\\\"first\\\":\\\"Root\\\",\\\"last\\\":\\\"joy\\\"}\",\"body\":\"hello abc\",\"desc\":\"nickhello abc\"}","json_id_json_name":"22,{\"first\":\"Root\",\"last\":\"joy\"}","json_name":{"first":"Root","last":"joy"},"json_pet":"testing"}
+{"body":"hello def","copied":{"first":"dded","last":"share"},"desc":"hellohello def","id":23,"json_age":"{\"json_id\":23,\"copied\":{\"first\":\"dded\",\"last\":\"share\"},\"json_age\":2,\"json_name\":{\"first\":\"dded\",\"last\":\"share\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"23,{\\\"first\\\":\\\"dded\\\",\\\"last\\\":\\\"share\\\"}\",\"body\":\"hello def\",\"desc\":\"hellohello def\"}","json_id_json_name":"23,{\"first\":\"dded\",\"last\":\"share\"}","json_name":{"first":"dded","last":"share"},"json_pet":"testing"}
+{"body":"hello ghi","copied":{"first":"Root","last":"Joltie"},"desc":"domshello ghi","id":24,"json_age":"{\"json_id\":24,\"copied\":{\"first\":\"Root\",\"last\":\"Joltie\"},\"json_age\":3,\"json_name\":{\"first\":\"Root\",\"last\":\"Joltie\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"24,{\\\"first\\\":\\\"Root\\\",\\\"last\\\":\\\"Joltie\\\"}\",\"body\":\"hello ghi\",\"desc\":\"domshello ghi\"}","json_id_json_name":"24,{\"first\":\"Root\",\"last\":\"Joltie\"}","json_name":{"first":"Root","last":"Joltie"},"json_pet":"testing"}
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_xmltojson b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_xmltojson
new file mode 100644
index 000000000..4a72c0069
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_xmltojson
@@ -0,0 +1,6 @@
+{"distance":2.0,"distance2":0.3571428656578064,"email":"abc01@mail.com","email_domain":{"distance":2.0,"email_account":"abc01"},"email_porter":["abc","mail","com"],"id":"1","xmldata_note":{"body":"Dont forget me this week!","from":"Tani","heading":"Reminder","to":"Tove"}}
+{"distance":2.0,"distance2":0.3571428656578064,"email":"def02@mail.com","email_domain":{"distance":2.0,"email_account":"def02"},"email_porter":["def","mail","com"],"id":"2","xmldata_note":{"body":"Dont forget us this holiday!","from":"joy","heading":"Reminder","to":"Tove"}}
+{"distance":2.0,"distance2":0.3571428656578064,"email":"abc01@mail.com","email_domain":{"distance":2.0,"email_account":"abc01"},"email_porter":["abc","mail","com"],"id":"abc","xmldata_note":{"body":"Dont forget me this week!","from":"Tani","heading":"Reminder","to":"Tove"}}
+{"distance":2.0,"distance2":0.3571428656578064,"email":"ghi03@mail.com","email_domain":{"distance":2.0,"email_account":"ghi03"},"email_porter":["ghi","mail","com"],"id":"3","xmldata_note":{"body":"Dont forget him this weekend!","from":"shree","heading":"Reminder","to":"Tove"}}
+{"distance":2.0,"distance2":0.3571428656578064,"email":"def02@mail.com","email_domain":{"distance":2.0,"email_account":"def02"},"email_porter":["def","mail","com"],"id":"def","xmldata_note":{"body":"Dont forget us this holiday!","from":"joy","heading":"Reminder","to":"Tove"}}
+{"distance":2.0,"distance2":0.3571428656578064,"email":"ghi03@mail.com","email_domain":{"distance":2.0,"email_account":"ghi03"},"email_porter":["ghi","mail","com"],"id":"ghi","xmldata_note":{"body":"Dont forget him this weekend!","from":"shree","heading":"Reminder","to":"Tove"}}
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt
new file mode 100644
index 000000000..a711921e2
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt
@@ -0,0 +1 @@
+create table `DATASET.TABLE_NAME` (email STRING, xmldata STRING)
diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt
new file mode 100644
index 000000000..0dc9608ce
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt
@@ -0,0 +1,5 @@
+INSERT INTO DATASET.TABLE_NAME (email,xmldata)
+VALUES
+('abc01@mail.com',' Tove Tani Reminder Dont forget me this week! '),
+('def02@mail.com',' Tove joy Reminder Dont forget us this holiday! '),
+('ghi03@mail.com',' Tove shree Reminder Dont forget him this weekend! ');
diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt
new file mode 100644
index 000000000..dc9fa7d17
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt
@@ -0,0 +1,6 @@
+INSERT INTO DATASET.TABLE_NAME (body,json)
+VALUES
+(' hello abc', '{"id": 1, "name": {"first": "Root", "last": "joy"}, "age": 22, "pet": "nick", "height": 5.8}'),
+('hello def', '{"id": 2, "name": {"first": "dded", "last": "share"}, "age": 23, "pet": "hello", "height": 6.8}'),
+('hello ghi', '{"id": 3, "name": {"first": "Root", "last": "Joltie"}, "age": 24, "pet": "doms", "height": 7.8}');
+
diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt
new file mode 100644
index 000000000..be6b585ea
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt
@@ -0,0 +1 @@
+create table `DATASET.TABLE_NAME` (body STRING, json STRING)
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/test1.xlsx b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/test1.xlsx
new file mode 100644
index 000000000..adaa5291b
Binary files /dev/null and b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/test1.xlsx differ
diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties
index 3d6473dbf..810ea4d95 100644
--- a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties
+++ b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties
@@ -1,13 +1,17 @@
#json file path
-Directive_parse_Fixed_Length=testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json
+Directive_parse_Fixed_Length=testData/Wrangler/parse_fixedlength_wrangler-cdap-data-pipeline.json
Directive_parse_hl7=testData/Wrangler/parse_HL7_Wrangler-cdap-data-pipeline (1).json
Directive_parse_Timestamp=testData/Wrangler/parse_timestamp_wrangle-cdap-data-pipeline.json
-Directive_parse_Datetime=testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json
+Directive_parse_Datetime=testData/Wrangler/parse_datetime_wrangler-cdap-data-pipeline.json
+Directive_parse_json=testData/Wrangler/parse_json_wrangler1-cdap-data-pipeline.json
+Directive_parse_xml=testData/Wrangler/parse_xmltojson_wrangler-cdap-data-pipeline (1).json
+Directive_parse_excel=testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json
Directive_parse_csv=testData/Wrangler\
/parse_csv_wrangle-cdap-data-pipeline.json
bqSourceTable=dummy
sourcePath=example/hello.csv
gcsSourceBucket=dummy
+testFile=BQtesdata/BigQuery/test1.xlsx
#bq queries file path
CreateBQDataQueryFileFxdLen=BQtesdata/BigQuery/BigQueryCreateTableQueryFxdlen.txt
@@ -18,12 +22,16 @@ CreateBQDataQueryFileTimestamp=BQtesdata/BigQuery/BigQueryCreateTableQueryTimest
InsertBQDataQueryFileTimestamp=BQtesdata/BigQuery/BigQueryInsertDataQueryTimestamp.txt
CreateBQDataQueryFileDatetime=BQtesdata/BigQuery/BigQueryCreateTableQueryDatetime.txt
InsertBQDataQueryFileDatetime=BQtesdata/BigQuery/BigQueryInsertDataQueryDatetime.txt
+CreateBQTableQueryFileJson=BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt
+InsertBQDataQueryFileJson=BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt
+CreateBQDataQueryFileXml=BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt
+InsertBQDataQueryFileXml=BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt
CreateBQTableQueryFileCsv=BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt
InsertBQDataQueryFileCsv=BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt
#bq properties
projectId=cdf-athena
-dataset=test_automation
+dataset=Wrangler_Test
dataset2=Wrangler
#expectedBQFiles
@@ -31,4 +39,7 @@ ExpectedDirective_parse_FixedLength=BQValidationExpectedFiles/Directive_parse_fi
ExpectedDirective_parse_hl7=BQValidationExpectedFiles/Directive_parse_hl7
ExpectedDirective_parse_Datetime=BQValidationExpectedFiles/Directive_parse_DateTime
ExpectedDirective_parse_Timestamp=BQValidationExpectedFiles/Directive_parse_Timestamp
+ExpectedDirective_parse_json=BQValidationExpectedFiles/Directive_parse_json
+ExpectedDirective_parse_xml=BQValidationExpectedFiles/Directive_parse_xmltojson
+ExpectedDirective_parse_excel=BQValidationExpectedFiles/Directive_parse_excel
ExpectedDirective_parse_csv=BQValidationExpectedFiles/Directive_parse_csv
diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangler-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangler-cdap-data-pipeline.json
new file mode 100644
index 000000000..a0c7a8caf
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangler-cdap-data-pipeline.json
@@ -0,0 +1,173 @@
+{
+ "name": "parse_datetime_wrangler",
+ "description": "Data Pipeline Application",
+ "artifact": {
+ "name": "cdap-data-pipeline",
+ "version": "6.11.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "config": {
+ "resources": {
+ "memoryMB": 2048,
+ "virtualCores": 1
+ },
+ "driverResources": {
+ "memoryMB": 2048,
+ "virtualCores": 1
+ },
+ "connections": [
+ {
+ "from": "BigQueryTable",
+ "to": "Wrangler"
+ },
+ {
+ "from": "Wrangler",
+ "to": "BigQuery2"
+ }
+ ],
+ "postActions": [],
+ "properties": {},
+ "processTimingEnabled": true,
+ "stageLoggingEnabled": true,
+ "stages": [
+ {
+ "name": "BigQueryTable",
+ "plugin": {
+ "name": "BigQueryTable",
+ "type": "batchsource",
+ "label": "BigQueryTable",
+ "artifact": {
+ "name": "google-cloud",
+ "version": "0.24.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "properties": {
+ "useConnection": "false",
+ "dataset": "Wrangler_Test",
+ "table": "datetimetab",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timestamp\",\"type\":[\"string\",\"null\"]}]}",
+ "project": "auto-detect",
+ "serviceAccountType": "filePath",
+ "serviceFilePath": "auto-detect",
+ "enableQueryingViews": "false"
+ }
+ },
+ "outputSchema": [
+ {
+ "name": "etlSchemaBody",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timestamp\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "id": "BigQueryTable",
+ "type": "batchsource",
+ "label": "BigQueryTable",
+ "icon": "fa-plug",
+ "$$hashKey": "object:358",
+ "isPluginAvailable": true,
+ "_uiPosition": {
+ "left": "496px",
+ "top": "342px"
+ }
+ },
+ {
+ "name": "Wrangler",
+ "plugin": {
+ "name": "Wrangler",
+ "type": "transform",
+ "label": "Wrangler",
+ "artifact": {
+ "name": "wrangler-transform",
+ "version": "4.11.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "properties": {
+ "directives": "parse-as-datetime :timestamp \"yyyy-MM-dd'T'HH:mm:ssX'['z']'\"\ncurrent-datetime :create_date\ndatetime-to-timestamp :timestamp\nformat-datetime :create_date 'y'\nformat-date :timestamp yyyy-mm-dd\nrename timestamp timecolumn",
+ "field": "*",
+ "precondition": "false",
+ "workspaceId": "b28b92f3-93bb-4a4f-8258-ef5881543ecb",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ },
+ "outputSchema": [
+ {
+ "name": "etlSchemaBody",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "id": "Wrangler",
+ "type": "transform",
+ "label": "Wrangler",
+ "icon": "icon-DataPreparation",
+ "$$hashKey": "object:359",
+ "isPluginAvailable": true,
+ "_uiPosition": {
+ "left": "796px",
+ "top": "342px"
+ }
+ },
+ {
+ "name": "BigQuery2",
+ "plugin": {
+ "name": "BigQueryTable",
+ "type": "batchsink",
+ "label": "BigQuery2",
+ "artifact": {
+ "name": "google-cloud",
+ "version": "0.24.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "properties": {
+ "useConnection": "false",
+ "project": "auto-detect",
+ "serviceAccountType": "filePath",
+ "serviceFilePath": "auto-detect",
+ "dataset": "Wrangler",
+ "table": "ddtab",
+ "operation": "insert",
+ "truncateTable": "false",
+ "allowSchemaRelaxation": "false",
+ "location": "US",
+ "createPartitionedTable": "false",
+ "partitioningType": "TIME",
+ "timePartitioningType": "DAY",
+ "partitionFilterRequired": "false",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ },
+ "outputSchema": [
+ {
+ "name": "etlSchemaBody",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "inputSchema": [
+ {
+ "name": "Wrangler",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "id": "BigQuery2",
+ "type": "batchsink",
+ "label": "BigQuery2",
+ "icon": "fa-plug",
+ "$$hashKey": "object:360",
+ "isPluginAvailable": true,
+ "_uiPosition": {
+ "left": "1096px",
+ "top": "342px"
+ }
+ }
+ ],
+ "schedule": "0 1 */1 * *",
+ "engine": "spark",
+ "numOfRecordsPreview": 100,
+ "rangeRecordsPreview": {
+ "min": 1,
+ "max": "5000"
+ },
+ "maxConcurrentRuns": 1,
+ "pushdownEnabled": false,
+ "transformationPushdown": {}
+ },
+ "version": "a397cf5a-af9f-11ee-bad0-0000007dcfa3"
+}
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json
new file mode 100644
index 000000000..614f18fdf
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json
@@ -0,0 +1,186 @@
+{
+ "name": "parse_excel_wrangler_copy",
+ "description": "Data Pipeline Application",
+ "artifact": {
+ "name": "cdap-data-pipeline",
+ "version": "6.10.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "config": {
+ "resources": {
+ "memoryMB": 2048,
+ "virtualCores": 1
+ },
+ "driverResources": {
+ "memoryMB": 2048,
+ "virtualCores": 1
+ },
+ "connections": [
+ {
+ "from": "GCSFile",
+ "to": "Wrangler"
+ },
+ {
+ "from": "Wrangler",
+ "to": "BigQuery"
+ }
+ ],
+ "postActions": [],
+ "properties": {},
+ "processTimingEnabled": true,
+ "stageLoggingEnabled": true,
+ "stages": [
+ {
+ "name": "GCSFile",
+ "plugin": {
+ "name": "GCSFile",
+ "type": "batchsource",
+ "label": "GCSFile",
+ "artifact": {
+ "name": "google-cloud",
+ "version": "0.23.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "properties": {
+ "format": "blob",
+ "path": "gs://00000000-e2e-0014a44f-81be-4501-8360-0ddca1c39789/test1.xlsx",
+ "fileEncoding": "UTF-8",
+ "useConnection": "false",
+ "referenceName": "test",
+ "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}",
+ "project": "auto-detect",
+ "serviceAccountType": "filePath",
+ "serviceFilePath": "auto-detect",
+ "sampleSize": "1000",
+ "filenameOnly": "false",
+ "recursive": "false",
+ "ignoreNonExistingFolders": "false",
+ "encrypted": "false"
+ }
+ },
+ "outputSchema": [
+ {
+ "name": "etlSchemaBody",
+ "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}"
+ }
+ ],
+ "id": "GCSFile",
+ "type": "batchsource",
+ "label": "GCSFile",
+ "icon": "fa-plug",
+ "$$hashKey": "object:475",
+ "isPluginAvailable": true,
+ "_uiPosition": {
+ "left": "496px",
+ "top": "343px"
+ }
+ },
+ {
+ "name": "Wrangler",
+ "plugin": {
+ "name": "Wrangler",
+ "type": "transform",
+ "label": "Wrangler",
+ "artifact": {
+ "name": "wrangler-transform",
+ "version": "4.10.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "properties": {
+ "directives": "parse-as-excel :body '0' true\ncopy name copiedname\nmerge name bkd uniquenum ','\nrename bkd rollno\ndrop fwd\nswap id rollno\nsplit-to-rows :name 'o'\nfilter-rows-on condition-false rollno !~ '2.0'",
+ "field": "*",
+ "precondition": "false",
+ "workspaceId": "667f9e85-6c36-4d38-ad48-ef85db7a04a2",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}",
+ "on-error": "fail-pipeline"
+ }
+ },
+ "outputSchema": [
+ {
+ "name": "etlSchemaBody",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "inputSchema": [
+ {
+ "name": "GCSFile",
+ "schema": "{\"type\":\"record\",\"name\":\"blob\",\"fields\":[{\"name\":\"body\",\"type\":\"bytes\"}]}"
+ }
+ ],
+ "id": "Wrangler",
+ "type": "transform",
+ "label": "Wrangler",
+ "icon": "icon-DataPreparation",
+ "$$hashKey": "object:476",
+ "isPluginAvailable": true,
+ "_uiPosition": {
+ "left": "796px",
+ "top": "343px"
+ }
+ },
+ {
+ "name": "BigQuery",
+ "plugin": {
+ "name": "BigQueryTable",
+ "type": "batchsink",
+ "label": "BigQuery",
+ "artifact": {
+ "name": "google-cloud",
+ "version": "0.23.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "properties": {
+ "useConnection": "false",
+ "project": "auto-detect",
+ "serviceAccountType": "filePath",
+ "serviceFilePath": "auto-detect",
+ "dataset": "Wrangler",
+ "table": "extab34",
+ "operation": "insert",
+ "truncateTable": "false",
+ "allowSchemaRelaxation": "false",
+ "location": "US",
+ "createPartitionedTable": "false",
+ "partitioningType": "TIME",
+ "partitionFilterRequired": "false",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ },
+ "outputSchema": [
+ {
+ "name": "etlSchemaBody",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "inputSchema": [
+ {
+ "name": "Wrangler",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":[\"int\",\"null\"]},{\"name\":\"rollno\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"phone\",\"type\":[\"string\",\"null\"]},{\"name\":\"copiedname\",\"type\":[\"string\",\"null\"]},{\"name\":\"uniquenum\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "id": "BigQuery",
+ "type": "batchsink",
+ "label": "BigQuery",
+ "icon": "fa-plug",
+ "$$hashKey": "object:477",
+ "isPluginAvailable": true,
+ "_uiPosition": {
+ "left": "1096px",
+ "top": "343px"
+ }
+ }
+ ],
+ "schedule": "0 1 */1 * *",
+ "engine": "spark",
+ "numOfRecordsPreview": 100,
+ "rangeRecordsPreview": {
+ "min": 1,
+ "max": "5000"
+ },
+ "description": "Data Pipeline Application",
+ "maxConcurrentRuns": 1,
+ "pushdownEnabled": false,
+ "transformationPushdown": {}
+ },
+ "version": "2dd12daa-5395-11ee-9dac-000000d0cf32"
+}
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangler-cdap-data-pipeline.json
similarity index 83%
rename from wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json
rename to wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangler-cdap-data-pipeline.json
index 533727b11..78c875305 100644
--- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangle-cdap-data-pipeline.json
+++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_fixedlength_wrangler-cdap-data-pipeline.json
@@ -1,9 +1,9 @@
{
- "name": "parse_as_fixedlength",
+ "name": "parse_fixedlength_wrangler",
"description": "Data Pipeline Application",
"artifact": {
"name": "cdap-data-pipeline",
- "version": "6.10.0-SNAPSHOT",
+ "version": "6.11.0-SNAPSHOT",
"scope": "SYSTEM"
},
"config": {
@@ -38,14 +38,14 @@
"label": "BigQueryTable",
"artifact": {
"name": "google-cloud",
- "version": "0.23.0-SNAPSHOT",
+ "version": "0.24.0-SNAPSHOT",
"scope": "SYSTEM"
},
"properties": {
"useConnection": "false",
- "dataset": "Wrangler",
- "table": "fstab",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}",
+ "dataset": "Wrangler_Test",
+ "table": "fixedlengthtab",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]}]}",
"project": "auto-detect",
"serviceAccountType": "filePath",
"serviceFilePath": "auto-detect",
@@ -55,18 +55,18 @@
"outputSchema": [
{
"name": "etlSchemaBody",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]}]}"
}
],
"id": "BigQueryTable",
"type": "batchsource",
"label": "BigQueryTable",
"icon": "fa-plug",
- "$$hashKey": "object:503",
+ "$$hashKey": "object:31",
"isPluginAvailable": true,
"_uiPosition": {
"left": "496px",
- "top": "343px"
+ "top": "342px"
}
},
{
@@ -77,39 +77,32 @@
"label": "Wrangler",
"artifact": {
"name": "wrangler-transform",
- "version": "4.10.0-SNAPSHOT",
+ "version": "4.11.0-SNAPSHOT",
"scope": "SYSTEM"
},
"properties": {
- "directives": "parse-as-fixed-length :fixedlength 2,4,5,3\nsplit-url :url\nwrite-as-csv :url_protocol\nurl-encode :url\nurl-decode :url\nencode base32 :fixedlength\ndecode base32 :fixedlength_encode_base32\nsplit-to-columns :url_query '='\nrename :fixedlength_2 :id\nfilter-rows-on condition-true fixedlength_4 !~ 'XYZ'",
+ "directives": "parse-as-fixed-length :fixedlength 2,4,5,3\nsplit-url url\nwrite-as-csv :url_protocol\nurl-encode :url\nurl-decode :url\nencode base32 fixedlength\ndecode base32 fixedlength_encode_base32\nsplit-to-columns :url_query '='\nrename fixedlength_2 id\nfilter-rows-on condition-true fixedlength_4 !~ 'XYZ'",
"field": "*",
"precondition": "false",
- "workspaceId": "f4d30074-2193-4690-a589-2982afc0a21a",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}",
- "on-error": "fail-pipeline"
+ "workspaceId": "6b2760c2-e722-47d3-b5d2-ddefc5bc9ab0",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}"
- }
- ],
- "inputSchema": [
- {
- "name": "BigQueryTable",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":\"string\"},{\"name\":\"fixedlength\",\"type\":\"string\"}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}"
}
],
"id": "Wrangler",
"type": "transform",
"label": "Wrangler",
"icon": "icon-DataPreparation",
- "$$hashKey": "object:504",
+ "$$hashKey": "object:32",
"isPluginAvailable": true,
"_uiPosition": {
"left": "796px",
- "top": "343px"
+ "top": "342px"
}
},
{
@@ -120,7 +113,7 @@
"label": "BigQuery2",
"artifact": {
"name": "google-cloud",
- "version": "0.23.0-SNAPSHOT",
+ "version": "0.24.0-SNAPSHOT",
"scope": "SYSTEM"
},
"properties": {
@@ -128,39 +121,40 @@
"project": "auto-detect",
"serviceAccountType": "filePath",
"serviceFilePath": "auto-detect",
- "dataset": "Wrangler",
- "table": "fstabup",
+ "dataset": "Wrangler_Test",
+ "table": "fsdtable",
"operation": "insert",
"truncateTable": "false",
"allowSchemaRelaxation": "false",
"location": "US",
"createPartitionedTable": "false",
"partitioningType": "TIME",
+ "timePartitioningType": "DAY",
"partitionFilterRequired": "false",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}"
}
],
"inputSchema": [
{
"name": "Wrangler",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"Url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"url\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_3\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_4\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_protocol\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_authority\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_host\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_port\",\"type\":[\"int\",\"null\"]},{\"name\":\"url_path\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_filename\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"fixedlength_encode_base32_decode_base32\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"url_query_2\",\"type\":[\"string\",\"null\"]}]}"
}
],
"id": "BigQuery2",
"type": "batchsink",
"label": "BigQuery2",
"icon": "fa-plug",
- "$$hashKey": "object:505",
+ "$$hashKey": "object:33",
"isPluginAvailable": true,
"_uiPosition": {
"left": "1096px",
- "top": "343px"
+ "top": "342px"
}
}
],
@@ -175,5 +169,5 @@
"pushdownEnabled": false,
"transformationPushdown": {}
},
- "version": "88ba63d3-4c08-11ee-81a4-0000001ad828"
+ "version": "7f3d3a08-af99-11ee-a55b-00000031b618"
}
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_json_wrangler1-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_json_wrangler1-cdap-data-pipeline.json
new file mode 100644
index 000000000..a2430c249
--- /dev/null
+++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_json_wrangler1-cdap-data-pipeline.json
@@ -0,0 +1,473 @@
+{
+ "name": "parse_json_wrangler1",
+ "description": "Data Pipeline Application",
+ "artifact": {
+ "name": "cdap-data-pipeline",
+ "version": "6.11.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "config": {
+ "resources": {
+ "memoryMB": 2048,
+ "virtualCores": 1
+ },
+ "driverResources": {
+ "memoryMB": 2048,
+ "virtualCores": 1
+ },
+ "connections": [
+ {
+ "from": "BigQueryTable",
+ "to": "Wrangler"
+ },
+ {
+ "from": "Wrangler",
+ "to": "BigQuery2"
+ }
+ ],
+ "postActions": [],
+ "properties": {},
+ "processTimingEnabled": true,
+ "stageLoggingEnabled": true,
+ "stages": [
+ {
+ "name": "BigQueryTable",
+ "plugin": {
+ "name": "BigQueryTable",
+ "type": "batchsource",
+ "label": "BigQueryTable",
+ "artifact": {
+ "name": "google-cloud",
+ "version": "0.24.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "properties": {
+ "useConnection": "false",
+ "dataset": "Wrangler_Test",
+ "table": "jsontab",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json\",\"type\":[\"string\",\"null\"]}]}",
+ "project": "auto-detect",
+ "serviceAccountType": "filePath",
+ "serviceFilePath": "auto-detect",
+ "enableQueryingViews": "false"
+ }
+ },
+ "outputSchema": [
+ {
+ "name": "etlSchemaBody",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "id": "BigQueryTable",
+ "type": "batchsource",
+ "label": "BigQueryTable",
+ "icon": "fa-plug",
+ "$$hashKey": "object:443",
+ "isPluginAvailable": true,
+ "_uiPosition": {
+ "left": "496px",
+ "top": "342px"
+ },
+ "selected": false
+ },
+ {
+ "name": "Wrangler",
+ "plugin": {
+ "name": "Wrangler",
+ "type": "transform",
+ "label": "Wrangler",
+ "artifact": {
+ "name": "wrangler-transform",
+ "version": "4.11.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "properties": {
+ "directives": "parse-as-json :json 1\nltrim :body\nset-column :desc concat(json_pet,body)\ncopy :json_name :copied\nswap :json_id :json_age\nmerge :json_id :json_name :json_id_json_name ,\nmask-number :json_pet 'testing'\ndrop json_height\nwrite-as-json-map :json_age\nrename json_id id",
+ "field": "*",
+ "precondition": "false",
+ "workspaceId": "6e59a102-2268-4328-afce-e81e6eb9228b",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ },
+ "outputSchema": [
+ {
+ "name": "etlSchemaBody",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "id": "Wrangler",
+ "type": "transform",
+ "label": "Wrangler",
+ "icon": "icon-DataPreparation",
+ "$$hashKey": "object:444",
+ "isPluginAvailable": true,
+ "_uiPosition": {
+ "left": "796px",
+ "top": "342px"
+ },
+ "selected": false
+ },
+ {
+ "name": "BigQuery2",
+ "plugin": {
+ "name": "BigQueryTable",
+ "type": "batchsink",
+ "label": "BigQuery2",
+ "artifact": {
+ "name": "google-cloud",
+ "version": "0.24.0-SNAPSHOT",
+ "scope": "SYSTEM"
+ },
+ "properties": {
+ "useConnection": "false",
+ "project": "auto-detect",
+ "serviceAccountType": "filePath",
+ "serviceFilePath": "auto-detect",
+ "dataset": "Wrangler_Test",
+ "table": "jstabss",
+ "operation": "insert",
+ "truncateTable": "false",
+ "allowSchemaRelaxation": "false",
+ "location": "US",
+ "createPartitionedTable": "false",
+ "partitioningType": "TIME",
+ "timePartitioningType": "DAY",
+ "partitionFilterRequired": "false",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ },
+ "outputSchema": [
+ {
+ "name": "etlSchemaBody",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "inputSchema": [
+ {
+ "name": "Wrangler",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_age\",\"type\":[\"string\",\"null\"]},{\"name\":\"json_name\",\"type\":[{\"type\":\"record\",\"name\":\"json_name05F0DF247CD8481657781C26E1595028\",\"fields\":[{\"name\":\"first\",\"type\":[\"string\",\"null\"]},{\"name\":\"last\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"json_pet\",\"type\":[\"string\",\"null\"]},{\"name\":\"desc\",\"type\":[\"string\",\"null\"]},{\"name\":\"copied\",\"type\":[\"json_name05F0DF247CD8481657781C26E1595028\",\"null\"]},{\"name\":\"json_id_json_name\",\"type\":[\"string\",\"null\"]}]}"
+ }
+ ],
+ "id": "BigQuery2",
+ "type": "batchsink",
+ "label": "BigQuery2",
+ "icon": "fa-plug",
+ "$$hashKey": "object:445",
+ "isPluginAvailable": true,
+ "_uiPosition": {
+ "left": "1096px",
+ "top": "342px"
+ },
+ "_backendProperties": {
+ "schema": {
+ "name": "schema",
+ "description": "The schema of the data to write. If provided, must be compatible with the table schema.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "partitionFilter": {
+ "name": "partitionFilter",
+ "description": "Partition filter that can be used for partition elimination during Update or Upsert operations.This value is ignored if operation is not UPDATE or UPSERT.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "rangeStart": {
+ "name": "rangeStart",
+ "description": "Start value for range partitioning. The start value is inclusive. Ignored when table already exists",
+ "type": "long",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "serviceAccountJSON": {
+ "name": "serviceAccountJSON",
+ "description": "Content of the service account file.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "useConnection": {
+ "name": "useConnection",
+ "description": "Whether to use an existing connection.",
+ "type": "boolean",
+ "required": false,
+ "macroSupported": false,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "jsonStringFields": {
+ "name": "jsonStringFields",
+ "description": "Fields in input schema that should be treated as JSON strings. The schema of these fields should be of type STRING.",
+ "type": "string",
+ "required": false,
+ "macroSupported": false,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "project": {
+ "name": "project",
+ "description": "Google Cloud Project ID. It can be found on the Dashboard in the Google Cloud Platform Console.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "partitioningType": {
+ "name": "partitioningType",
+ "description": "Specifies the partitioning type. Can either be Integer or Time or None. Ignored when table already exists",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "timePartitioningType": {
+ "name": "timePartitioningType",
+ "description": "Specifies the time partitioning type. Can either be Daily or Hourly or Monthly or Yearly. Ignored when table already exists",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "jobLabels": {
+ "name": "jobLabels",
+ "description": "Key value pairs to be added as labels to the BigQuery job. Keys must be unique. [job_source, type] are reserved keys and cannot be used as label keys.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "relationTableKey": {
+ "name": "relationTableKey",
+ "description": "List of fields that determines relation between tables during Update and Upsert operations.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "rangeEnd": {
+ "name": "rangeEnd",
+ "description": "End value for range partitioning. The end value is exclusive. Ignored when table already exists",
+ "type": "long",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "clusteringOrder": {
+ "name": "clusteringOrder",
+ "description": "List of fields that determines the sort order of the data. Fields must be of type INT, LONG, STRING, DATE, TIMESTAMP, BOOLEAN or DECIMAL. Tables cannot be clustered on more than 4 fields. This value is only used when the BigQuery table is automatically created and ignored if the table already exists.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "partitionFilterRequired": {
+ "name": "partitionFilterRequired",
+ "description": "Whether to create a table that requires a partition filter. This value is ignored if the table already exists.",
+ "type": "boolean",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "serviceFilePath": {
+ "name": "serviceFilePath",
+ "description": "Path on the local file system of the service account key used for authorization. Can be set to 'auto-detect' when running on a Dataproc cluster. When running on other clusters, the file must be present on every node in the cluster.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "truncateTable": {
+ "name": "truncateTable",
+ "description": "Whether or not to truncate the table before writing to it. Should only be used with the Insert operation. This could overwrite the table schema",
+ "type": "boolean",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "partitionByField": {
+ "name": "partitionByField",
+ "description": "Partitioning column for the BigQuery table. This should be left empty if the BigQuery table is an ingestion-time partitioned table.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "connection": {
+ "name": "connection",
+ "description": "The existing connection to use.",
+ "type": "bigqueryconnectorconfig",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": [
+ "serviceAccountJSON",
+ "serviceFilePath",
+ "project",
+ "serviceAccountType",
+ "datasetProject"
+ ]
+ },
+ "table": {
+ "name": "table",
+ "description": "The table to write to. A table contains individual records organized in rows. Each record is composed of columns (also called fields). Every table is defined by a schema that describes the column names, data types, and other information.",
+ "type": "string",
+ "required": true,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "referenceName": {
+ "name": "referenceName",
+ "description": "This will be used to uniquely identify this source/sink for lineage, annotating metadata, etc.",
+ "type": "string",
+ "required": false,
+ "macroSupported": false,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "cmekKey": {
+ "name": "cmekKey",
+ "description": "The GCP customer managed encryption key (CMEK) name used to encrypt data written to any bucket, dataset or table created by the plugin. If the bucket, dataset or table already exists, this is ignored. More information can be found at https://cloud.google.com/data-fusion/docs/how-to/customer-managed-encryption-keys",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "createPartitionedTable": {
+ "name": "createPartitionedTable",
+ "description": "DEPRECATED!. Whether to create the BigQuery table with time partitioning. This value is ignored if the table already exists. When this is set to false, value of Partitioning type will be used. Use 'Partitioning type' property",
+ "type": "boolean",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "dedupeBy": {
+ "name": "dedupeBy",
+ "description": "Column names and sort order used to choose which input record to update/upsert when there are multiple input records with the same key. For example, if this is set to 'updated_time desc', then if there are multiple input records with the same key, the one with the largest value for 'updated_time' will be applied.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "allowSchemaRelaxation": {
+ "name": "allowSchemaRelaxation",
+ "description": "Whether to modify the BigQuery table schema if it differs from the input schema.",
+ "type": "boolean",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "bucket": {
+ "name": "bucket",
+ "description": "The Google Cloud Storage bucket to store temporary data in. Cloud Storage data will be deleted after it is loaded into BigQuery. If it is not provided, a unique bucket will be automatically created and then deleted after the run finishes. The service account must have permission to create buckets in the configured project.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "rangeInterval": {
+ "name": "rangeInterval",
+ "description": "Interval value for range partitioning. The interval value must be a positive integer.Ignored when table already exists",
+ "type": "long",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "gcsChunkSize": {
+ "name": "gcsChunkSize",
+ "description": "Optional property to tune chunk size in gcs upload request. The value of this property should be in number of bytes. By default, 8388608 bytes (8MB) will be used as upload request chunk size.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "location": {
+ "name": "location",
+ "description": "The location where the big query dataset will get created. This value is ignored if the dataset or temporary bucket already exist.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "serviceAccountType": {
+ "name": "serviceAccountType",
+ "description": "Service account type, file path where the service account is located or the JSON content of the service account.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "datasetProject": {
+ "name": "datasetProject",
+ "description": "The project the dataset belongs to. This is only required if the dataset is not in the same project that the BigQuery job will run in. If no value is given, it will default to the configured project ID.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "operation": {
+ "name": "operation",
+ "description": "Type of write operation to perform. This can be set to Insert, Update or Upsert.",
+ "type": "string",
+ "required": false,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ },
+ "dataset": {
+ "name": "dataset",
+ "description": "The dataset to write to. A dataset is contained within a specific project. Datasets are top-level containers that are used to organize and control access to tables and views.",
+ "type": "string",
+ "required": true,
+ "macroSupported": true,
+ "macroEscapingEnabled": false,
+ "children": []
+ }
+ },
+ "description": "This sink writes to a BigQuery table. BigQuery is Google's serverless, highly scalable, enterprise data warehouse. Data is first written to a temporary location on Google Cloud Storage, then loaded into BigQuery from there.",
+ "selected": false
+ }
+ ],
+ "schedule": "0 1 */1 * *",
+ "engine": "spark",
+ "numOfRecordsPreview": 100,
+ "rangeRecordsPreview": {
+ "min": 1,
+ "max": "5000"
+ },
+ "maxConcurrentRuns": 1,
+ "pushdownEnabled": false,
+ "transformationPushdown": {}
+ },
+ "version": "15e6341c-af95-11ee-a080-000000f3bab4"
+}
\ No newline at end of file
diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_xmltojson_wrangler-cdap-data-pipeline (1).json
similarity index 71%
rename from wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json
rename to wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_xmltojson_wrangler-cdap-data-pipeline (1).json
index cf0973aa6..13febd85e 100644
--- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_datetime_wrangle-cdap-data-pipeline.json
+++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/parse_xmltojson_wrangler-cdap-data-pipeline (1).json
@@ -1,9 +1,9 @@
{
- "name": "parse_as_datetime",
+ "name": "parse_xmltojson_wrangler",
"description": "Data Pipeline Application",
"artifact": {
"name": "cdap-data-pipeline",
- "version": "6.10.0-SNAPSHOT",
+ "version": "6.11.0-SNAPSHOT",
"scope": "SYSTEM"
},
"config": {
@@ -38,14 +38,14 @@
"label": "BigQueryTable",
"artifact": {
"name": "google-cloud",
- "version": "0.23.0-SNAPSHOT",
+ "version": "0.24.0-SNAPSHOT",
"scope": "SYSTEM"
},
"properties": {
"useConnection": "false",
- "dataset": "Wrangler",
- "table": "datetimeupd",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"}]}",
+ "dataset": "Wrangler_Test",
+ "table": "xmlnews",
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata\",\"type\":[\"string\",\"null\"]}]}",
"project": "auto-detect",
"serviceAccountType": "filePath",
"serviceFilePath": "auto-detect",
@@ -55,18 +55,18 @@
"outputSchema": [
{
"name": "etlSchemaBody",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata\",\"type\":[\"string\",\"null\"]}]}"
}
],
"id": "BigQueryTable",
"type": "batchsource",
"label": "BigQueryTable",
"icon": "fa-plug",
- "$$hashKey": "object:532",
+ "$$hashKey": "object:609",
"isPluginAvailable": true,
"_uiPosition": {
"left": "496px",
- "top": "343px"
+ "top": "342px"
},
"_backendProperties": {
"schema": {
@@ -258,39 +258,40 @@
"label": "Wrangler",
"artifact": {
"name": "wrangler-transform",
- "version": "4.10.0-SNAPSHOT",
+ "version": "4.11.0-SNAPSHOT",
"scope": "SYSTEM"
},
"properties": {
- "directives": "parse-as-datetime :timestamp \"yyyy-MM-dd'T'HH:mm:ssX'['z']'\"\ncurrent-datetime :create_date\ndatetime-to-timestamp :timestamp\nformat-datetime :create_date 'y'\nformat-date :timestamp yyyy-mm-dd\nrename timestamp timecolumn",
+ "directives": "parse-xml-to-json :xmldata 1\nsplit-email :email\ntext-distance block email email_account distance\ntext-metric longest-common-subsequence email email_account distance2\nwrite-as-json-object :email_domain distance,email_account\nstemming :email\nsplit-to-rows :email_account '0'\nrename :email_account id",
"field": "*",
"precondition": "false",
- "workspaceId": "7faca220-0705-4a1c-99d6-60d7dd657a0b",
- "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}",
+ "workspaceId": "4c1d141a-66f6-4b4c-bc5f-a92ca41bee42",
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}",
+ "expressionLanguage": "jexl",
"on-error": "fail-pipeline"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
- "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}"
}
],
"inputSchema": [
{
"name": "BigQueryTable",
- "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"},{\"name\":\"create_date\",\"type\":\"string\"},{\"name\":\"timestamp\",\"type\":\"string\"}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata\",\"type\":[\"string\",\"null\"]}]}"
}
],
"id": "Wrangler",
"type": "transform",
"label": "Wrangler",
"icon": "icon-DataPreparation",
- "$$hashKey": "object:533",
+ "$$hashKey": "object:610",
"isPluginAvailable": true,
"_uiPosition": {
"left": "796px",
- "top": "343px"
+ "top": "342px"
},
"selected": false
},
@@ -302,7 +303,7 @@
"label": "BigQuery2",
"artifact": {
"name": "google-cloud",
- "version": "0.23.0-SNAPSHOT",
+ "version": "0.24.0-SNAPSHOT",
"scope": "SYSTEM"
},
"properties": {
@@ -310,39 +311,40 @@
"project": "auto-detect",
"serviceAccountType": "filePath",
"serviceFilePath": "auto-detect",
- "dataset": "Wrangler",
- "table": "dateupd",
+ "dataset": "Wrangler_Test",
+ "table": "fintab",
"operation": "insert",
"truncateTable": "false",
"allowSchemaRelaxation": "false",
"location": "US",
"createPartitionedTable": "false",
"partitioningType": "TIME",
+ "timePartitioningType": "DAY",
"partitionFilterRequired": "false",
- "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
- "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}"
}
],
"inputSchema": [
{
"name": "Wrangler",
- "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"id\",\"type\":[\"long\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"timecolumn\",\"type\":[\"string\",\"null\"]}]}"
+ "schema": "{\"type\":\"record\",\"name\":\"outputSchema\",\"fields\":[{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"xmldata_note\",\"type\":[{\"type\":\"record\",\"name\":\"xmldata_note69A9BFB19CE40D9BB21E66FF1DCB2823\",\"fields\":[{\"name\":\"heading\",\"type\":[\"string\",\"null\"]},{\"name\":\"from\",\"type\":[\"string\",\"null\"]},{\"name\":\"to\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"id\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_domain\",\"type\":[{\"type\":\"record\",\"name\":\"email_domain53E9571E3B0C6D8ACD29805625EDE284\",\"fields\":[{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_account\",\"type\":[\"string\",\"null\"]}]},\"null\"]},{\"name\":\"distance\",\"type\":[\"float\",\"null\"]},{\"name\":\"distance2\",\"type\":[\"float\",\"null\"]},{\"name\":\"email_porter\",\"type\":[{\"type\":\"array\",\"items\":[\"string\",\"null\"]},\"null\"]}]}"
}
],
"id": "BigQuery2",
"type": "batchsink",
"label": "BigQuery2",
"icon": "fa-plug",
- "$$hashKey": "object:534",
+ "$$hashKey": "object:611",
"isPluginAvailable": true,
"_uiPosition": {
"left": "1096px",
- "top": "343px"
+ "top": "342px"
},
"selected": false
}
@@ -358,5 +360,5 @@
"pushdownEnabled": false,
"transformationPushdown": {}
},
- "version": "6ab2074d-4e26-11ee-84d2-000000ba158f"
+ "version": "42a96af3-af8e-11ee-8372-00000073831c"
}
\ No newline at end of file