From 5bbdd61c772f8889e6bc25c7011a443210ffbe3d Mon Sep 17 00:00:00 2001 From: priyabhatnagar Date: Wed, 21 Jun 2023 22:25:25 +0530 Subject: [PATCH 1/2] Wrangler plugin e2e tests --- .github/workflows/e2e.yml | 105 +++++++++++++ pom.xml | 138 +++++++++++++++- wrangler-transform/pom.xml | 1 - .../features/Wrangler/RunTime.feature | 91 +++++++++++ .../common/stepsdesign/TestSetupHooks.java | 127 +++++++++++++++ .../common/stepsdesign/package-info.java | 20 +++ .../wrangler/actions/ValidationHelper.java | 122 +++++++++++++++ .../plugin/wrangler/actions/package-info.java | 20 +++ .../plugin/wrangler/runners/TestRunner.java | 36 +++++ .../wrangler/runners/TestRunnerRequired.java | 35 +++++ .../plugin/wrangler/runners/package-info.java | 20 +++ .../plugin/wrangler/stepsdesign/Wrangler.java | 41 +++++ .../wrangler/stepsdesign/package-info.java | 20 +++ .../Directive_Concatenate_titlecase | 5 + .../Directive_Fillempty_sendtoerror | 5 + .../Directive_copy_drop_count_setcolmn | 5 + .../BigQuery/BigQueryCreateTableQuery.txt | 2 + .../BigQuery/BigQueryInsertDataQuery.txt | 9 ++ .../resources/pluginParameters.properties | 17 ++ ...lumnTestPipeline-1-cdap-data-pipeline.json | 146 +++++++++++++++++ ...tive_Fill_empty_v2-cdap-data-pipeline.json | 147 ++++++++++++++++++ ...umn_concatenate_v1-cdap-data-pipeline.json | 147 ++++++++++++++++++ 22 files changed, 1254 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/e2e.yml create mode 100644 wrangler-transform/src/e2e-test/features/Wrangler/RunTime.feature create mode 100644 wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java create mode 100644 wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/package-info.java create mode 100644 wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/ValidationHelper.java create mode 100644 wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/package-info.java create mode 100644 wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java create mode 100644 wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunnerRequired.java create mode 100644 wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/package-info.java create mode 100644 wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/Wrangler.java create mode 100644 wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/package-info.java create mode 100644 wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Concatenate_titlecase create mode 100644 wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Fillempty_sendtoerror create mode 100644 wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_copy_drop_count_setcolmn create mode 100644 wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQuery.txt create mode 100644 wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt create mode 100644 wrangler-transform/src/e2e-test/resources/pluginParameters.properties create mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json create mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json create mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 000000000..a8497c94d --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,105 @@ +# Copyright © 2023 Cask Data, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +# This workflow will build a Java project with Maven +# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven +# Note: Any changes to this workflow would be used only after merging into develop +name: Build e2e tests + +on: + push: + branches: [ develop ] + pull_request: + branches: [ develop ] + types: [ opened, synchronize, reopened, labeled ] + workflow_dispatch: + +jobs: + build: + runs-on: k8s-runner-e2e + # We allow builds: + # 1) When triggered manually + # 2) When it's a merge into a branch + # 3) For PRs that are labeled as build and + # - It's a code change + # - A build label was just added + # A bit complex, but prevents builds when other labels are manipulated + if: > + github.event_name == 'workflow_dispatch' + || github.event_name == 'push' + || (contains(github.event.pull_request.labels.*.name, 'build') + && (github.event.action != 'labeled' || github.event.label.name == 'build') + ) + strategy: + matrix: + module: [wrangler-transform] + fail-fast: false + + steps: + # Pinned 1.0.0 version + - uses: actions/checkout@v3 + with: + path: plugin + submodules: 'recursive' + ref: ${{ github.event.workflow_run.head_sha }} + + - uses: dorny/paths-filter@b2feaf19c27470162a626bd6fa8438ae5b263721 + if: github.event_name != 'workflow_dispatch' && github.event_name != 'push' + id: filter + with: + working-directory: plugin + filters: | + e2e-test: + - '${{ matrix.module }}/**/e2e-test/**' + + - name: Checkout e2e test repo + uses: actions/checkout@v3 + with: + repository: cdapio/cdap-e2e-tests + path: e2e + + - name: Cache + uses: actions/cache@v3 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ github.workflow }}-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven-${{ github.workflow }} + + - name: Run required e2e tests + if: github.event_name != 'workflow_dispatch' && github.event_name != 'push' && steps.filter.outputs.e2e-test == 'false' + run: python3 e2e/src/main/scripts/run_e2e_test.py --module ${{ matrix.module }} --testRunner TestRunnerRequired.java + + - name: Run all e2e tests + if: github.event_name == 'workflow_dispatch' || github.event_name == 'push' || steps.filter.outputs.e2e-test == 'true' + run: python3 e2e/src/main/scripts/run_e2e_test.py --module ${{ matrix.module }} + + - name: Upload report + uses: actions/upload-artifact@v3 + if: always() + with: + name: Cucumber report - ${{ matrix.module }} + path: ./**/target/cucumber-reports + + - name: Upload debug files + uses: actions/upload-artifact@v3 + if: always() + with: + name: Debug files - ${{ matrix.module }} + path: ./**/target/e2e-debug + + - name: Upload files to GCS + uses: google-github-actions/upload-cloud-storage@v0 + if: always() + with: + path: ./plugin + destination: e2e-tests-cucumber-reports/${{ github.event.repository.name }}/${{ github.ref }} + glob: '**/target/cucumber-reports/**' diff --git a/pom.xml b/pom.xml index 0907f6871..b43b5c99d 100644 --- a/pom.xml +++ b/pom.xml @@ -100,7 +100,7 @@ 1.106.0 2.6.2 2.0.0 - 20.0 + 31.0.1-jre 2.4.0 2.2 2.2.4 @@ -122,6 +122,7 @@ 1.11 1.7.15 0.4 + ${project.basedir}/src/test/java/ @@ -172,6 +173,7 @@ + ${testSourceLocation} @@ -186,7 +188,7 @@ org.apache.felix maven-bundle-plugin - 3.3.0 + 3.5.0 true @@ -397,7 +399,6 @@ releases - org.sonatype.plugins nexus-staging-maven-plugin @@ -429,6 +430,135 @@ + + e2e-tests + + src/e2e-test/java + TestRunner.java + + + + + src/e2e-test/resources + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.18.1 + + true + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.0.0 + + + org.apache.maven.surefire + surefire-junit47 + 3.0.0 + + + + + ${TEST_RUNNER} + + + classes + 2 + 2 + true + + + + ${GOOGLE_APPLICATION_CREDENTIALS} + + + ${SERVICE_ACCOUNT_TYPE} + + + ${SERVICE_ACCOUNT_FILE_PATH} + + + ${SERVICE_ACCOUNT_JSON} + + + + + + + integration-test + verify + + + + + + + net.masterthought + maven-cucumber-reporting + 5.5.0 + + + + execution + verify + + generate + + + Cucumber Reports + target/cucumber-reports/advanced-reports + 1 + false + ${project.build.directory}/cucumber-reports + + **/*.json + + ${project.build.directory}/cucumber-reports + true + + + + + + + + + + com.google.guava + guava + ${guava.version} + + + + + + + org.slf4j + slf4j-api + 1.7.15 + + + + io.cdap.tests.e2e + cdap-e2e-framework + 0.3.0-SNAPSHOT + test + + + + ch.qos.logback + logback-classic + 1.2.8 + runtime + + + + - diff --git a/wrangler-transform/pom.xml b/wrangler-transform/pom.xml index c99f8e3d8..b09712c0d 100644 --- a/wrangler-transform/pom.xml +++ b/wrangler-transform/pom.xml @@ -215,5 +215,4 @@ - diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/RunTime.feature b/wrangler-transform/src/e2e-test/features/Wrangler/RunTime.feature new file mode 100644 index 000000000..ef5e2ba1e --- /dev/null +++ b/wrangler-transform/src/e2e-test/features/Wrangler/RunTime.feature @@ -0,0 +1,91 @@ +# Copyright © 2023 Cask Data, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +@Wrangler +Feature: Wrangler - Run time scenarios + + @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using the copy count and delete directives in the wrangler plugin + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_copy_drop_count_setcolmn" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_copy_drop_count_setcolmn" + + @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using the fill null and send to error directives in the wrangler plugin + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_Fillempty_sendtoerror" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_Fillempty_sendtoerror" + + @BQ_SOURCE_TEST @BQ_SINK_TEST + Scenario: To verify User is able to run a pipeline using the Format,concatenate,title case and copy column directives in the wrangler plugin + Given Open Datafusion Project to configure pipeline + Then Click on the Plus Green Button to import the pipelines + Then Select the file for importing the pipeline for the plugin "Directive_Concatenate_titlecase" + Then Navigate to the properties page of plugin: "BigQueryTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Replace input plugin property: "table" with value: "bqSourceTable" + Then Click on the Get Schema button + Then Click on the Validate button + Then Close the Plugin Properties page + Then Navigate to the properties page of plugin: "BigQuery2" + Then Replace input plugin property: "table" with value: "bqTargetTable" + Then Replace input plugin property: "dataset" with value: "dataset" + Then Click on the Validate button + Then Close the Plugin Properties page + Then Rename the pipeline + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Close the pipeline logs + Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_Concatenate_titlecase" diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java new file mode 100644 index 000000000..624331825 --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java @@ -0,0 +1,127 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package io.cdap.plugin.common.stepsdesign; + +import com.google.cloud.bigquery.BigQueryException; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.StorageException; +import io.cdap.e2e.utils.BigQueryClient; +import io.cdap.e2e.utils.PluginPropertyUtils; +import io.cdap.e2e.utils.StorageClient; +import io.cucumber.java.After; +import io.cucumber.java.Before; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.Assert; +import stepsdesign.BeforeActions; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.sql.SQLException; +import java.util.NoSuchElementException; +import java.util.UUID; + +import static io.cdap.e2e.pages.locators.CdfGCSLocators.filePath; + +/** + * BQ test hooks. + */ +public class TestSetupHooks { + + @Before(order = 1, value = "@BQ_SINK_TEST") + public static void setTempTargetBQTableName() { + String bqTargetTableName = "E2E_TARGET_" + UUID.randomUUID().toString().replaceAll("-", "_"); + PluginPropertyUtils.addPluginProp("bqTargetTable", bqTargetTableName); + BeforeActions.scenario.write("BQ Target table name - " + bqTargetTableName); + } + + @After(order = 1, value = "@BQ_SINK_TEST") + public static void deleteTempTargetBQTable() throws IOException, InterruptedException { + String bqTargetTableName = PluginPropertyUtils.pluginProp("bqTargetTable"); + try { + BigQueryClient.dropBqQuery(bqTargetTableName); + BeforeActions.scenario.write("BQ Target table - " + bqTargetTableName + " deleted successfully"); + PluginPropertyUtils.removePluginProp("bqTargetTable"); + } catch (BigQueryException e) { + if (e.getMessage().contains("Not found: Table")) { + BeforeActions.scenario.write("BQ Target Table " + bqTargetTableName + " does not exist"); + } else { + Assert.fail(e.getMessage()); + } + } + } + + /** + * Create BigQuery table. + */ + @Before(order = 1, value = "@BQ_SOURCE_TEST") + public static void createTempSourceBQTable() throws IOException, InterruptedException { + createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFile"), + PluginPropertyUtils.pluginProp("InsertBQDataQueryFile")); + } + + @After(order = 1, value = "@BQ_SOURCE_TEST") + public static void deleteTempSourceBQTable() throws IOException, InterruptedException { + String bqSourceTable = PluginPropertyUtils.pluginProp("bqSourceTable"); + BigQueryClient.dropBqQuery(bqSourceTable); + BeforeActions.scenario.write("BQ source Table " + bqSourceTable + " deleted successfully"); + PluginPropertyUtils.removePluginProp("bqSourceTable"); + } + + private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile, String bqInsertDataQueryFile) + throws IOException, InterruptedException { + String bqSourceTable = "E2E_SOURCE_" + UUID.randomUUID().toString().substring(0, 5).replaceAll("-", + "_"); + + String createTableQuery = StringUtils.EMPTY; + try { + createTableQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource + ("/" + bqCreateTableQueryFile).toURI())) + , StandardCharsets.UTF_8); + createTableQuery = createTableQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset")) + .replace("TABLE_NAME", bqSourceTable); + } catch (Exception e) { + BeforeActions.scenario.write("Exception in reading " + bqCreateTableQueryFile + " - " + e.getMessage()); + Assert.fail("Exception in BigQuery testdata prerequisite setup " + + "- error in reading create table query file " + e.getMessage()); + } + + String insertDataQuery = StringUtils.EMPTY; + try { + insertDataQuery = new String(Files.readAllBytes(Paths.get(TestSetupHooks.class.getResource + ("/" + bqInsertDataQueryFile).toURI())) + , StandardCharsets.UTF_8); + insertDataQuery = insertDataQuery.replace("DATASET", PluginPropertyUtils.pluginProp("dataset")) + .replace("TABLE_NAME", bqSourceTable); + } catch (Exception e) { + BeforeActions.scenario.write("Exception in reading " + bqInsertDataQueryFile + " - " + e.getMessage()); + Assert.fail("Exception in BigQuery testdata prerequisite setup " + + "- error in reading insert data query file " + e.getMessage()); + } + BigQueryClient.getSoleQueryResult(createTableQuery); + try { + BigQueryClient.getSoleQueryResult(insertDataQuery); + } catch (NoSuchElementException e) { + // Insert query does not return any record. + // Iterator on TableResult values in getSoleQueryResult method throws NoSuchElementException + } + PluginPropertyUtils.addPluginProp("bqSourceTable", bqSourceTable); + BeforeActions.scenario.write("BQ Source Table " + bqSourceTable + " created successfully"); + } +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/package-info.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/package-info.java new file mode 100644 index 000000000..63f8efabc --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Package contains the stepDesign for common features. + */ +package io.cdap.plugin.common.stepsdesign; diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/ValidationHelper.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/ValidationHelper.java new file mode 100644 index 000000000..56908ffaf --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/ValidationHelper.java @@ -0,0 +1,122 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package io.cdap.plugin.wrangler.actions; + +import com.esotericsoftware.minlog.Log; +import com.google.cloud.bigquery.FieldValueList; +import com.google.cloud.bigquery.TableResult; +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import io.cdap.e2e.utils.BigQueryClient; +import io.cdap.e2e.utils.PluginPropertyUtils; +import io.cucumber.core.logging.Logger; +import io.cucumber.core.logging.LoggerFactory; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; + +/** + * Validation Helper. + */ +public class ValidationHelper { + + private static final Logger LOG = LoggerFactory.getLogger(ValidationHelper.class); + static Gson gson = new Gson(); + public static boolean validateActualDataToExpectedData(String table, String fileName) throws IOException, + InterruptedException, URISyntaxException { + Map bigQueryMap = new HashMap<>(); + Map fileMap = new HashMap<>(); + Path importExpectedFile = Paths.get(ValidationHelper.class.getResource("/" + fileName).toURI()); + + getBigQueryTableData(table, bigQueryMap); + getFileData(importExpectedFile.toString(), fileMap); + + boolean isMatched = bigQueryMap.equals(fileMap); + + return isMatched; + } + + public static void getFileData(String fileName, Map fileMap) { + try (BufferedReader br = new BufferedReader(new FileReader(fileName))) { + String line; + while ((line = br.readLine()) != null) { + JsonObject json = gson.fromJson(line, JsonObject.class); + String idKey = getIdKey(json); + if (idKey != null) { + JsonElement idElement = json.get(idKey); + if (idElement.isJsonPrimitive()) { + String idValue = idElement.getAsString(); + fileMap.put(idValue, json); + } + } else { + Log.error("ID key not found"); + } + } + } catch (IOException e) { + System.err.println("Error reading the file: " + e.getMessage()); + } + } + + private static void getBigQueryTableData(String targetTable, Map bigQueryMap) + throws IOException, InterruptedException { + String dataset = PluginPropertyUtils.pluginProp("dataset"); + String projectId = PluginPropertyUtils.pluginProp("projectId"); + String selectQuery = "SELECT TO_JSON(t) FROM `" + projectId + "." + dataset + "." + targetTable + "` AS t"; + TableResult result = BigQueryClient.getQueryResult(selectQuery); + + for (FieldValueList row : result.iterateAll()) { + JsonObject json = gson.fromJson(row.get(0).getStringValue(), JsonObject.class); + String idKey = getIdKey(json); // Get the actual ID key from the JSON object + if (idKey != null) { + JsonElement idElement = json.get(idKey); + if (idElement.isJsonPrimitive()) { + String id = idElement.getAsString(); + bigQueryMap.put(id, json); + } else { + LOG.error("Data Mismatched"); + } + } + } + } + + /** + * Retrieves the key for the ID element in the provided JSON object. + * + * @param json The JSON object to search for the ID key. + */ + private static String getIdKey(JsonObject json) { + if (json.has("id")) { + return "id"; + } else if (json.has("ID")) { + return "ID"; + } else if (json.has("Age")) { + return "Age"; + } else if (json.has("Customer_id")) { + return "Customer_id"; + } else if (json.has("body")) { + return "body"; + } else { + return null; + } + } +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/package-info.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/package-info.java new file mode 100644 index 000000000..4d0f2be85 --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/actions/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Package contains the actions for Wrangler features. + */ +package io.cdap.plugin.wrangler.actions; diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java new file mode 100644 index 000000000..87b0d1aec --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunner.java @@ -0,0 +1,36 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package io.cdap.plugin.wrangler.runners; + +import io.cucumber.junit.Cucumber; +import io.cucumber.junit.CucumberOptions; +import org.junit.runner.RunWith; + +/** + * Test Runner to execute Wrangler plugin test cases. + */ +@RunWith(Cucumber.class) +@CucumberOptions( + features = {"src/e2e-test/features"}, + glue = {"stepsdesign", "io.cdap.plugin.common.stepsdesign", "io.cdap.plugin.wrangler.stepsdesign", + "io.cdap.plugin.wrangler.locators"}, + tags = {"@Wrangler"}, + plugin = {"pretty", "html:target/cucumber-html-report/wrangler-required", + "json:target/cucumber-reports/cucumber-wrangler-required.json", + "junit:target/cucumber-reports/cucumber-wrangler-required.xml"} +) + public class TestRunner { +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunnerRequired.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunnerRequired.java new file mode 100644 index 000000000..868e067ff --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/TestRunnerRequired.java @@ -0,0 +1,35 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package io.cdap.plugin.wrangler.runners; + +import io.cucumber.junit.Cucumber; +import io.cucumber.junit.CucumberOptions; +import org.junit.runner.RunWith; + +/** + * Test Runner to execute Wrangler plugin test cases. + */ +@RunWith(Cucumber.class) +@CucumberOptions( + features = {"src/e2e-test/features"}, + glue = {"stepsdesign", "io.cdap.plugin.common.stepsdesign", "io.cdap.plugin.wrangler.stepsdesign"}, + tags = {"@Wrangler_Required"}, + plugin = {"pretty", "html:target/cucumber-html-report/wrangler-required", + "json:target/cucumber-reports/cucumber-wrangler-required.json", + "junit:target/cucumber-reports/cucumber-wrangler-required.xml"} +) +public class TestRunnerRequired { +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/package-info.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/package-info.java new file mode 100644 index 000000000..b90a7504c --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/runners/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Package contains the runners for Wrangler features. + */ +package io.cdap.plugin.wrangler.runners; diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/Wrangler.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/Wrangler.java new file mode 100644 index 000000000..9d51ea34c --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/Wrangler.java @@ -0,0 +1,41 @@ +package io.cdap.plugin.wrangler.stepsdesign; +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import io.cdap.e2e.utils.CdfHelper; +import io.cdap.e2e.utils.PluginPropertyUtils; +import io.cdap.plugin.wrangler.actions.ValidationHelper; +import io.cucumber.java.en.Then; +import org.junit.Assert; + +import java.io.IOException; +import java.net.URISyntaxException; + +/** + * Step Design to execute Wrangler plugin test cases. + */ + +public class Wrangler implements CdfHelper { + + @Then("Validate The Data From BQ To BQ With Actual And Expected File for: {string}") + public void validateTheDataFromBQToBQWithActualAndExpectedFileFor(String expectedFile) throws IOException, + InterruptedException, URISyntaxException { + boolean recordsMatched = ValidationHelper.validateActualDataToExpectedData( + PluginPropertyUtils.pluginProp("bqTargetTable"), + PluginPropertyUtils.pluginProp(expectedFile)); + Assert.assertTrue("Value of records in actual and expected file is equal", recordsMatched); + } +} diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/package-info.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/package-info.java new file mode 100644 index 000000000..3e212c76c --- /dev/null +++ b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/wrangler/stepsdesign/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright © 2023 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Package contains the stepDesign for Wrangler features. + */ +package io.cdap.plugin.wrangler.stepsdesign; diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Concatenate_titlecase b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Concatenate_titlecase new file mode 100644 index 000000000..89d12e7da --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Concatenate_titlecase @@ -0,0 +1,5 @@ +{"Address":"Address1","Age":"20","Col1":"testdata","Customer_id":"1","First_name":"Shelby","ID":"0","Last_name":"Shelbylastname","body":"Test","create_date":"2021-01-28","email":"Test@gmail.com","email_copy":"WrangleTest@gmail.com","name":"Surya","update_date":"2021-01-30"} +{"Address":"Address2","Age":"21","Col1":"testdata","Customer_id":"2","First_name":"Shelby","ID":"1","Last_name":"Shelbylastname","body":"User","create_date":"2021-01-27","email":"Test1@gmail.com","email_copy":"WrangleTest1@gmail.com","name":"Ragini","update_date":"2021-01-30"} +{"Address":"Address3","Age":"22","Col1":"testdata","Customer_id":"3","First_name":"Shelby","ID":"2","Last_name":"Shelbylastname","body":"Data","create_date":"2021-01-26","email":"Test2@gmail.com","email_copy":"WrangleTest2@gmail.com","name":"Sahil","update_date":"2021-01-30"} +{"Address":"Address4","Age":"23","Col1":"testdata","Customer_id":"4","First_name":"Shelby","ID":"3","Last_name":"Shelbylastname","body":"Content","create_date":"2021-01-25","email":"Test3@gmail.com","email_copy":"WrangleTest3@gmail.com","name":"","update_date":"2021-01-26"} +{"Address":"Address5","Age":"24","Col1":"testdata","Customer_id":"5","First_name":"Shelby","ID":"4","Last_name":"Shelbylastname","body":"Files","create_date":"2021-01-23","email":"Test4@gmail.com","email_copy":"WrangleTest4@gmail.com","name":"Ashish","update_date":"2021-01-24"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Fillempty_sendtoerror b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Fillempty_sendtoerror new file mode 100644 index 000000000..4c81bd8ea --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Fillempty_sendtoerror @@ -0,0 +1,5 @@ +{"Address":"Address1","Age":"20","Col1":"testdata","Customer_id":"1","First_name":"Shelby","ID":"0","Last_name":"Shelbylastname","body":"Test","create_date":"2021-01-28","email":"Test@gmail.com","name":"Surya","update_date":"2021-01-30"} +{"Address":"Address2","Age":"21","Col1":"testdata","Customer_id":"2","First_name":"Shelby","ID":"1","Last_name":"Shelbylastname","body":"User","create_date":"2021-01-27","email":"Test1@gmail.com","name":"Ragini","update_date":"2021-01-30"} +{"Address":"Address3","Age":"22","Col1":"testdata","Customer_id":"3","First_name":"Shelby","ID":"2","Last_name":"Shelbylastname","body":"Data","create_date":"2021-01-26","email":"Test2@gmail.com","name":"Sahil","update_date":"2021-01-30"} +{"Address":"Address4","Age":"23","Col1":"testdata","Customer_id":"4","First_name":"Shelby","ID":"3","Last_name":"Shelbylastname","body":"Content","create_date":"2021-01-25","email":"Test3@gmail.com","name":"Shubhangi","update_date":"2021-01-26"} +{"Address":"Address5","Age":"24","Col1":"testdata","Customer_id":"5","First_name":"Shelby","ID":"4","Last_name":"Shelbylastname","body":"Files","create_date":"2021-01-23","email":"Test4@gmail.com","name":"Ashish","update_date":"2021-01-24"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_copy_drop_count_setcolmn b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_copy_drop_count_setcolmn new file mode 100644 index 000000000..ada363557 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_copy_drop_count_setcolmn @@ -0,0 +1,5 @@ +{"Age":"20","Col1":"testdata","Customer_id":"1","Customer_id_copy":"1","First_name":"Shelby","ID":"0","Last_name":"Shelbylastname","body":"Test","create_date":"2021-01-28","email":"Test@gmail.com","name":"Surya","name_count":5,"update_date":"2021-01-30"} +{"Age":"21","Col1":"testdata","Customer_id":"2","Customer_id_copy":"2","First_name":"Shelby","ID":"1","Last_name":"Shelbylastname","body":"User","create_date":"2021-01-27","email":"Test1@gmail.com","name":"Ragini","name_count":6,"update_date":"2021-01-30"} +{"Age":"22","Col1":"testdata","Customer_id":"3","Customer_id_copy":"3","First_name":"Shelby","ID":"2","Last_name":"Shelbylastname","body":"Data","create_date":"2021-01-26","email":"Test2@gmail.com","name":"Sahil","name_count":5,"update_date":"2021-01-30"} +{"Age":"23","Col1":"testdata","Customer_id":"4","Customer_id_copy":"4","First_name":"Shelby","ID":"3","Last_name":"Shelbylastname","body":"Content","create_date":"2021-01-25","email":"Test3@gmail.com","name":"","name_count":0,"update_date":"2021-01-26"} +{"Age":"24","Col1":"testdata","Customer_id":"5","Customer_id_copy":"5","First_name":"Shelby","ID":"4","Last_name":"Shelbylastname","body":"Files","create_date":"2021-01-23","email":"Test4@gmail.com","name":"Ashish","name_count":6,"update_date":"2021-01-24"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQuery.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQuery.txt new file mode 100644 index 000000000..54cf3655d --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQuery.txt @@ -0,0 +1,2 @@ +create table `DATASET.TABLE_NAME` (ID STRING, name STRING, email STRING, Customer_id STRING, First_name STRING, +Last_name STRING, Age STRING, Address STRING, Col1 STRING, create_date STRING, update_date STRING, body STRING) diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt new file mode 100644 index 000000000..747e20095 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt @@ -0,0 +1,9 @@ +insert into `DATASET.TABLE_NAME` (ID, name, email, Customer_id, First_name, Last_name, Age, Address, Col1, +create_date, update_date, body) values +('0','Surya','Test@gmail.com','1','Shelby','Shelbylastname', '20', 'Address1','testdata', '2021-01-28', '2021-01-30', 'Test'), +('1','Ragini','Test1@gmail.com','2','Shelby','Shelbylastname', '21', 'Address2','testdata', '2021-01-27', '2021-01-30', 'User'), +('2','Sahil','Test2@gmail.com','3','Shelby','Shelbylastname', '22', 'Address3','testdata', '2021-01-26', '2021-01-30', 'Data'), +('3','','Test3@gmail.com','4','Shelby','Shelbylastname', '23', 'Address4','testdata', '2021-01-25', '2021-01-26', 'Content'), +('4','Ashish','Test4@gmail.com','5','Shelby','Shelbylastname', '24', 'Address5','testdata', '2021-01-23', '2021-01-24', 'Files'), +( '5' ,' Jojo','Test4@gmail.com ','6','Shelby','Shelbylastname', '25', 'Address6','testdata', '2021-01-22', '2021-01-23', 'Testuser'); + diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties new file mode 100644 index 000000000..f1311c9b4 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties @@ -0,0 +1,17 @@ +#json file path +Directive_copy_drop_count_setcolmn=testData/Wrangler\ + /Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json +Directive_Fillempty_sendtoerror=testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json +Directive_Concatenate_titlecase=testData/Wrangler\ + /Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json +bqSourceTable=dummy +#bq queries file path +CreateBQTableQueryFile=BQtesdata/BigQuery/BigQueryCreateTableQuery.txt +InsertBQDataQueryFile=BQtesdata/BigQuery/BigQueryInsertDataQuery.txt +#bq properties +projectId=cdf-athena +dataset=test_automation +#expectedBQFiles +ExpectedDirective_copy_drop_count_setcolmn=BQValidationExpectedFiles/Directive_copy_drop_count_setcolmn +ExpectedDirective_Concatenate_titlecase=BQValidationExpectedFiles/Directive_Concatenate_titlecase +ExpectedDirective_Fillempty_sendtoerror=BQValidationExpectedFiles/Directive_Fillempty_sendtoerror diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json new file mode 100644 index 000000000..33285fb48 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json @@ -0,0 +1,146 @@ +{ + "name": "Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.9.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.22.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "DATASET", + "table": "wranglersampletable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "enableQueryingViews": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "datasetProject": "cdf-athena" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug" + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.9.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "field": "*", + "precondition": "false", + "directives": "parse-as-csv :body ',' true\ncopy :Customer_id :Customer_id_copy true\ndrop :Address\nset-column :name_count string:length(name)", + "on-error": "fail-pipeline", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}", + "workspaceId": "8a6750d6-f2ee-4fd8-b78d-5f1b11e3301d" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}", + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation" + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.22.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "cdf-athena", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "referenceName": "test", + "dataset": "Wrangler", + "table": "bqTargetTable1", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false" + } + }, + "outputSchema": "", + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug" + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + }, + "version": "-SNAPSHOT" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json new file mode 100644 index 000000000..f797e1962 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json @@ -0,0 +1,147 @@ +{ + "name": "Wrangler_Directive_Fill_empty_v2", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.9.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.22.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "test_automation", + "table": "E2E_SOURCE_da362", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "enableQueryingViews": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "datasetProject": "cdf-athena" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug" + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.9.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "field": "*", + "precondition": "false", + "directives": "parse-as-csv :body ',' true\nfill-null-or-empty :name 'Shubhangi'\nsend-to-error empty(Address)", + "on-error": "fail-pipeline", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}", + "workspaceId": "8a6750d6-f2ee-4fd8-b78d-5f1b11e3301d" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}", + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation" + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.22.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "cdf-athena", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "referenceName": "test", + "dataset": "test_automation", + "table": "E2E_TARGET_25", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}", + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug" + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + }, + "version": "-SNAPSHOT" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json new file mode 100644 index 000000000..47f401659 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json @@ -0,0 +1,147 @@ +{ + "name": "Wrangler_Directive_Format_copycolumn_concatenate_v1", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.9.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.22.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "test_automation", + "table": "E2E_SOURCE_da362", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "enableQueryingViews": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "datasetProject": "cdf-athena" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug" + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.9.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "field": "*", + "precondition": "false", + "directives": "parse-as-csv :body ',' true\ntitlecase :name\nset-column :email_copy 'Wrangle' + email", + "on-error": "fail-pipeline", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}", + "workspaceId": "8a6750d6-f2ee-4fd8-b78d-5f1b11e3301d" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}", + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation" + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.22.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "cdf-athena", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "referenceName": "test", + "dataset": "test_automation", + "table": "E2E_TARGET_07", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}", + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug" + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "description": "Data Pipeline Application", + "maxConcurrentRuns": 1 + }, + "version": "-SNAPSHOT" +} \ No newline at end of file From bb39338cc7ab27a6f6d4d2e00b7b84614282af01 Mon Sep 17 00:00:00 2001 From: AnkitCLI Date: Tue, 22 Aug 2023 15:39:14 +0530 Subject: [PATCH 2/2] Addressed comments --- .../Directive_Concatenate_titlecase | 10 +- .../Directive_Fillempty_sendtoerror | 9 +- .../Directive_copy_drop_count_setcolmn | 10 +- .../BigQuery/BigQueryInsertDataQuery.txt | 11 +- .../resources/pluginParameters.properties | 6 +- ...lumnTestPipeline-1-cdap-data-pipeline.json | 146 ----- ...columnTestPipeline-cdap-data-pipeline.json | 436 +++++++++++++++ ...rective_Fill_empty-cdap-data-pipeline.json | 179 ++++++ ...tive_Fill_empty_v2-cdap-data-pipeline.json | 147 ----- ...umn_concatenate_v1-cdap-data-pipeline.json | 147 ----- ...etcolumn_titlecase-cdap-data-pipeline.json | 527 ++++++++++++++++++ 11 files changed, 1164 insertions(+), 464 deletions(-) delete mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json create mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-cdap-data-pipeline.json create mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty-cdap-data-pipeline.json delete mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json delete mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json create mode 100644 wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_setcolumn_titlecase-cdap-data-pipeline.json diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Concatenate_titlecase b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Concatenate_titlecase index 89d12e7da..27bea5daa 100644 --- a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Concatenate_titlecase +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Concatenate_titlecase @@ -1,5 +1,5 @@ -{"Address":"Address1","Age":"20","Col1":"testdata","Customer_id":"1","First_name":"Shelby","ID":"0","Last_name":"Shelbylastname","body":"Test","create_date":"2021-01-28","email":"Test@gmail.com","email_copy":"WrangleTest@gmail.com","name":"Surya","update_date":"2021-01-30"} -{"Address":"Address2","Age":"21","Col1":"testdata","Customer_id":"2","First_name":"Shelby","ID":"1","Last_name":"Shelbylastname","body":"User","create_date":"2021-01-27","email":"Test1@gmail.com","email_copy":"WrangleTest1@gmail.com","name":"Ragini","update_date":"2021-01-30"} -{"Address":"Address3","Age":"22","Col1":"testdata","Customer_id":"3","First_name":"Shelby","ID":"2","Last_name":"Shelbylastname","body":"Data","create_date":"2021-01-26","email":"Test2@gmail.com","email_copy":"WrangleTest2@gmail.com","name":"Sahil","update_date":"2021-01-30"} -{"Address":"Address4","Age":"23","Col1":"testdata","Customer_id":"4","First_name":"Shelby","ID":"3","Last_name":"Shelbylastname","body":"Content","create_date":"2021-01-25","email":"Test3@gmail.com","email_copy":"WrangleTest3@gmail.com","name":"","update_date":"2021-01-26"} -{"Address":"Address5","Age":"24","Col1":"testdata","Customer_id":"5","First_name":"Shelby","ID":"4","Last_name":"Shelbylastname","body":"Files","create_date":"2021-01-23","email":"Test4@gmail.com","email_copy":"WrangleTest4@gmail.com","name":"Ashish","update_date":"2021-01-24"} \ No newline at end of file +{"Address":"Address1","Age":20,"Col1":"Testing","Customer_id":1,"First_name":"Shelby","ID":0,"Last_name":"Shelbylastname","body":"Test abc","create_date":"2021-01-28","email":"Test@gmail.com","email_copy":"WrangleTest@gmail.com","name":"Surya","update_date":"2021-01-30"} +{"Address":"Address2","Age":21,"Col1":"Debugging","Customer_id":2,"First_name":"Shelby","ID":1,"Last_name":"Shelbylastname","body":"User def","create_date":"2021-01-27","email":"Test1@gmail.com","email_copy":"WrangleTest1@gmail.com","name":"Ragini","update_date":"2021-01-30"} +{"Address":"Address3","Age":22,"Col1":"Manual","Customer_id":3,"First_name":"Shelby","ID":2,"Last_name":"Shelbylastname","body":"Data ghi","create_date":"2021-01-26","email":"Test2@gmail.com","email_copy":"WrangleTest2@gmail.com","name":"Sahil","update_date":"2021-01-30"} +{"Address":"Address4","Age":23,"Col1":"Hello","Customer_id":4,"First_name":"Shelby","ID":3,"Last_name":"Shelbylastname","body":"Content jkl","create_date":"2021-01-25","email":"Test3@gmail.com","email_copy":"WrangleTest3@gmail.com","name":"gaurav","update_date":"2021-01-26"} +{"Address":"","Age":24,"Col1":"Ronie","Customer_id":5,"First_name":"Shelby","ID":4,"Last_name":"Shelbylastname","body":"Files mno","create_date":"2021-01-23","email":"Test4@gmail.com","email_copy":"WrangleTest4@gmail.com","name":"","update_date":"2021-01-24"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Fillempty_sendtoerror b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Fillempty_sendtoerror index 4c81bd8ea..88f841cf4 100644 --- a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Fillempty_sendtoerror +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_Fillempty_sendtoerror @@ -1,5 +1,4 @@ -{"Address":"Address1","Age":"20","Col1":"testdata","Customer_id":"1","First_name":"Shelby","ID":"0","Last_name":"Shelbylastname","body":"Test","create_date":"2021-01-28","email":"Test@gmail.com","name":"Surya","update_date":"2021-01-30"} -{"Address":"Address2","Age":"21","Col1":"testdata","Customer_id":"2","First_name":"Shelby","ID":"1","Last_name":"Shelbylastname","body":"User","create_date":"2021-01-27","email":"Test1@gmail.com","name":"Ragini","update_date":"2021-01-30"} -{"Address":"Address3","Age":"22","Col1":"testdata","Customer_id":"3","First_name":"Shelby","ID":"2","Last_name":"Shelbylastname","body":"Data","create_date":"2021-01-26","email":"Test2@gmail.com","name":"Sahil","update_date":"2021-01-30"} -{"Address":"Address4","Age":"23","Col1":"testdata","Customer_id":"4","First_name":"Shelby","ID":"3","Last_name":"Shelbylastname","body":"Content","create_date":"2021-01-25","email":"Test3@gmail.com","name":"Shubhangi","update_date":"2021-01-26"} -{"Address":"Address5","Age":"24","Col1":"testdata","Customer_id":"5","First_name":"Shelby","ID":"4","Last_name":"Shelbylastname","body":"Files","create_date":"2021-01-23","email":"Test4@gmail.com","name":"Ashish","update_date":"2021-01-24"} \ No newline at end of file +{"Address":"Address1","Age":20,"Col1":"testing","Customer_id":1,"First_name":"Shelby","ID":0,"Last_name":"Shelbylastname","body":"Test abc","create_date":"2021-01-28","email":"Test@gmail.com","name":"Surya","update_date":"2021-01-30"} +{"Address":"Address2","Age":21,"Col1":"debugging","Customer_id":2,"First_name":"Shelby","ID":1,"Last_name":"Shelbylastname","body":"User def","create_date":"2021-01-27","email":"Test1@gmail.com","name":"Ragini","update_date":"2021-01-30"} +{"Address":"Address3","Age":22,"Col1":"manual","Customer_id":3,"First_name":"Shelby","ID":2,"Last_name":"Shelbylastname","body":"Data ghi","create_date":"2021-01-26","email":"Test2@gmail.com","name":"Sahil","update_date":"2021-01-30"} +{"Address":"Address4","Age":23,"Col1":"hello","Customer_id":4,"First_name":"Shelby","ID":3,"Last_name":"Shelbylastname","body":"Content jkl","create_date":"2021-01-25","email":"Test3@gmail.com","name":"gaurav","update_date":"2021-01-26"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_copy_drop_count_setcolmn b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_copy_drop_count_setcolmn index ada363557..56272231d 100644 --- a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_copy_drop_count_setcolmn +++ b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_copy_drop_count_setcolmn @@ -1,5 +1,5 @@ -{"Age":"20","Col1":"testdata","Customer_id":"1","Customer_id_copy":"1","First_name":"Shelby","ID":"0","Last_name":"Shelbylastname","body":"Test","create_date":"2021-01-28","email":"Test@gmail.com","name":"Surya","name_count":5,"update_date":"2021-01-30"} -{"Age":"21","Col1":"testdata","Customer_id":"2","Customer_id_copy":"2","First_name":"Shelby","ID":"1","Last_name":"Shelbylastname","body":"User","create_date":"2021-01-27","email":"Test1@gmail.com","name":"Ragini","name_count":6,"update_date":"2021-01-30"} -{"Age":"22","Col1":"testdata","Customer_id":"3","Customer_id_copy":"3","First_name":"Shelby","ID":"2","Last_name":"Shelbylastname","body":"Data","create_date":"2021-01-26","email":"Test2@gmail.com","name":"Sahil","name_count":5,"update_date":"2021-01-30"} -{"Age":"23","Col1":"testdata","Customer_id":"4","Customer_id_copy":"4","First_name":"Shelby","ID":"3","Last_name":"Shelbylastname","body":"Content","create_date":"2021-01-25","email":"Test3@gmail.com","name":"","name_count":0,"update_date":"2021-01-26"} -{"Age":"24","Col1":"testdata","Customer_id":"5","Customer_id_copy":"5","First_name":"Shelby","ID":"4","Last_name":"Shelbylastname","body":"Files","create_date":"2021-01-23","email":"Test4@gmail.com","name":"Ashish","name_count":6,"update_date":"2021-01-24"} \ No newline at end of file +{"Age":20,"Col1":"testing","Customer_id":1,"First_name":"Shelby","ID":0,"Last_name":"Shelbylastname","body":"Test abc","body_copy":"Test abc","body_copy_1":"Test","body_copy_2":"abc","create_date":"2021-01-28","email":"Test@gmail.com","name":"Surya","name_count":5,"update_date":"2021-01-30"} +{"Age":21,"Col1":"debugging","Customer_id":2,"First_name":"Shelby","ID":1,"Last_name":"Shelbylastname","body":"User def","body_copy":"User def","body_copy_1":"User","body_copy_2":"def","create_date":"2021-01-27","email":"Test1@gmail.com","name":"Ragini","name_count":6,"update_date":"2021-01-30"} +{"Age":22,"Col1":"manual","Customer_id":3,"First_name":"Shelby","ID":2,"Last_name":"Shelbylastname","body":"Data ghi","body_copy":"Data ghi","body_copy_1":"Data","body_copy_2":"ghi","create_date":"2021-01-26","email":"Test2@gmail.com","name":"Sahil","name_count":5,"update_date":"2021-01-30"} +{"Age":23,"Col1":"hello","Customer_id":4,"First_name":"Shelby","ID":3,"Last_name":"Shelbylastname","body":"Content jkl","body_copy":"Content jkl","body_copy_1":"Content","body_copy_2":"jkl","create_date":"2021-01-25","email":"Test3@gmail.com","name":"gaurav","name_count":6,"update_date":"2021-01-26"} +{"Age":24,"Col1":"ronie","Customer_id":5,"First_name":"Shelby","ID":4,"Last_name":"Shelbylastname","body":"Files mno","body_copy":"Files mno","body_copy_1":"Files","body_copy_2":"mno","create_date":"2021-01-23","email":"Test4@gmail.com","name":"","name_count":0,"update_date":"2021-01-24"} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt index 747e20095..b51c4e460 100644 --- a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt +++ b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQuery.txt @@ -1,9 +1,8 @@ insert into `DATASET.TABLE_NAME` (ID, name, email, Customer_id, First_name, Last_name, Age, Address, Col1, create_date, update_date, body) values -('0','Surya','Test@gmail.com','1','Shelby','Shelbylastname', '20', 'Address1','testdata', '2021-01-28', '2021-01-30', 'Test'), -('1','Ragini','Test1@gmail.com','2','Shelby','Shelbylastname', '21', 'Address2','testdata', '2021-01-27', '2021-01-30', 'User'), -('2','Sahil','Test2@gmail.com','3','Shelby','Shelbylastname', '22', 'Address3','testdata', '2021-01-26', '2021-01-30', 'Data'), -('3','','Test3@gmail.com','4','Shelby','Shelbylastname', '23', 'Address4','testdata', '2021-01-25', '2021-01-26', 'Content'), -('4','Ashish','Test4@gmail.com','5','Shelby','Shelbylastname', '24', 'Address5','testdata', '2021-01-23', '2021-01-24', 'Files'), -( '5' ,' Jojo','Test4@gmail.com ','6','Shelby','Shelbylastname', '25', 'Address6','testdata', '2021-01-22', '2021-01-23', 'Testuser'); +('0','Surya','Test@gmail.com','1','Shelby','Shelbylastname', '20', 'Address1','testing', '2021-01-28', '2021-01-30', 'Test abc'), +('1','Ragini','Test1@gmail.com','2','Shelby','Shelbylastname', '21', 'Address2','debugging', '2021-01-27', '2021-01-30', 'User def'), +('2','Sahil','Test2@gmail.com','3','Shelby','Shelbylastname', '22', 'Address3','manual', '2021-01-26', '2021-01-30', 'Data ghi'), +('3','gaurav','Test3@gmail.com','4','Shelby','Shelbylastname', '23', 'Address4','hello', '2021-01-25', '2021-01-26', 'Content jkl'), +('4','','Test4@gmail.com','5','Shelby','Shelbylastname', '24', '','ronie', '2021-01-23', '2021-01-24', 'Files mno'); diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties index f1311c9b4..5ac7d6fac 100644 --- a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties +++ b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties @@ -1,9 +1,9 @@ #json file path Directive_copy_drop_count_setcolmn=testData/Wrangler\ - /Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json -Directive_Fillempty_sendtoerror=testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json + /Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-cdap-data-pipeline.json +Directive_Fillempty_sendtoerror=testData/Wrangler/Wrangler_Directive_Fill_empty-cdap-data-pipeline.json Directive_Concatenate_titlecase=testData/Wrangler\ - /Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json + /Wrangler_Directive_Format_setcolumn_titlecase-cdap-data-pipeline.json bqSourceTable=dummy #bq queries file path CreateBQTableQueryFile=BQtesdata/BigQuery/BigQueryCreateTableQuery.txt diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json deleted file mode 100644 index 33285fb48..000000000 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1-cdap-data-pipeline.json +++ /dev/null @@ -1,146 +0,0 @@ -{ - "name": "Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-1", - "description": "Data Pipeline Application", - "artifact": { - "name": "cdap-data-pipeline", - "version": "6.9.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "config": { - "resources": { - "memoryMB": 2048, - "virtualCores": 1 - }, - "driverResources": { - "memoryMB": 2048, - "virtualCores": 1 - }, - "connections": [ - { - "from": "BigQueryTable", - "to": "Wrangler" - }, - { - "from": "Wrangler", - "to": "BigQuery2" - } - ], - "postActions": [], - "properties": {}, - "processTimingEnabled": true, - "stageLoggingEnabled": true, - "stages": [ - { - "name": "BigQueryTable", - "plugin": { - "name": "BigQueryTable", - "type": "batchsource", - "label": "BigQueryTable", - "artifact": { - "name": "google-cloud", - "version": "0.22.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "useConnection": "false", - "dataset": "DATASET", - "table": "wranglersampletable", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", - "enableQueryingViews": "false", - "project": "auto-detect", - "serviceAccountType": "filePath", - "serviceFilePath": "auto-detect", - "datasetProject": "cdf-athena" - } - }, - "outputSchema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", - "id": "BigQueryTable", - "type": "batchsource", - "label": "BigQueryTable", - "icon": "fa-plug" - }, - { - "name": "Wrangler", - "plugin": { - "name": "Wrangler", - "type": "transform", - "label": "Wrangler", - "artifact": { - "name": "wrangler-transform", - "version": "4.9.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "field": "*", - "precondition": "false", - "directives": "parse-as-csv :body ',' true\ncopy :Customer_id :Customer_id_copy true\ndrop :Address\nset-column :name_count string:length(name)", - "on-error": "fail-pipeline", - "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}", - "workspaceId": "8a6750d6-f2ee-4fd8-b78d-5f1b11e3301d" - } - }, - "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}", - "inputSchema": [ - { - "name": "BigQueryTable", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" - } - ], - "id": "Wrangler", - "type": "transform", - "label": "Wrangler", - "icon": "icon-DataPreparation" - }, - { - "name": "BigQuery2", - "plugin": { - "name": "BigQueryTable", - "type": "batchsink", - "label": "BigQuery2", - "artifact": { - "name": "google-cloud", - "version": "0.22.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "useConnection": "false", - "project": "cdf-athena", - "serviceAccountType": "filePath", - "serviceFilePath": "auto-detect", - "referenceName": "test", - "dataset": "Wrangler", - "table": "bqTargetTable1", - "operation": "insert", - "truncateTable": "false", - "allowSchemaRelaxation": "false", - "location": "US", - "createPartitionedTable": "false", - "partitioningType": "TIME", - "partitionFilterRequired": "false" - } - }, - "outputSchema": "", - "inputSchema": [ - { - "name": "Wrangler", - "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}" - } - ], - "id": "BigQuery2", - "type": "batchsink", - "label": "BigQuery2", - "icon": "fa-plug" - } - ], - "schedule": "0 1 */1 * *", - "engine": "spark", - "numOfRecordsPreview": 100, - "rangeRecordsPreview": { - "min": 1, - "max": "5000" - }, - "description": "Data Pipeline Application", - "maxConcurrentRuns": 1 - }, - "version": "-SNAPSHOT" -} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-cdap-data-pipeline.json new file mode 100644 index 000000000..655da72c7 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline-cdap-data-pipeline.json @@ -0,0 +1,436 @@ +{ + "name": "Wrangler_Directive_Copy_Count_DeletecolumnTestPipeline", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "newupdatedtable", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:352", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "327.5px" + }, + "_backendProperties": { + "schema": { + "name": "schema", + "description": "The schema of the table to read.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "viewMaterializationDataset": { + "name": "viewMaterializationDataset", + "description": "The dataset in the specified project where the temporary table should be created. Defaults to the same dataset in which the table is located.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "enableQueryingViews": { + "name": "enableQueryingViews", + "description": "Whether to allow querying views. Since BigQuery views are not materialized by default, querying them may have a performance overhead.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceAccountJSON": { + "name": "serviceAccountJSON", + "description": "Content of the service account file.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionTo": { + "name": "partitionTo", + "description": "It's inclusive partition end date. It should be a String with format \"yyyy-MM-dd\". This value is ignored if the table does not support partitioning.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "cmekKey": { + "name": "cmekKey", + "description": "The GCP customer managed encryption key (CMEK) name used to encrypt data written to any bucket, dataset or table created by the plugin. If the bucket, dataset or table already exists, this is ignored. More information can be found at https://cloud.google.com/data-fusion/docs/how-to/customer-managed-encryption-keys", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "useConnection": { + "name": "useConnection", + "description": "Whether to use an existing connection.", + "type": "boolean", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + }, + "project": { + "name": "project", + "description": "Google Cloud Project ID. It can be found on the Dashboard in the Google Cloud Platform Console.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "viewMaterializationProject": { + "name": "viewMaterializationProject", + "description": "The project name where the temporary table should be created. Defaults to the same project in which the table is located.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "filter": { + "name": "filter", + "description": "The WHERE clause filters out rows by evaluating each row against boolean expression, and discards all rows that do not return TRUE (that is, rows that return FALSE or NULL).", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "bucket": { + "name": "bucket", + "description": "The Google Cloud Storage bucket to store temporary data in. Cloud Storage data will be deleted after it is loaded into BigQuery. If it is not provided, a unique bucket will be automatically created and then deleted after the run finishes. The service account must have permission to create buckets in the configured project.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionFrom": { + "name": "partitionFrom", + "description": "It's inclusive partition start date. It should be a String with format \"yyyy-MM-dd\". This value is ignored if the table does not support partitioning.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceFilePath": { + "name": "serviceFilePath", + "description": "Path on the local file system of the service account key used for authorization. Can be set to 'auto-detect' when running on a Dataproc cluster. When running on other clusters, the file must be present on every node in the cluster.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceAccountType": { + "name": "serviceAccountType", + "description": "Service account type, file path where the service account is located or the JSON content of the service account.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "connection": { + "name": "connection", + "description": "The existing connection to use.", + "type": "bigqueryconnectorconfig", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [ + "serviceAccountJSON", + "serviceFilePath", + "project", + "serviceAccountType", + "datasetProject" + ] + }, + "datasetProject": { + "name": "datasetProject", + "description": "The project the dataset belongs to. This is only required if the dataset is not in the same project that the BigQuery job will run in. If no value is given, it will default to the configured project ID.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "dataset": { + "name": "dataset", + "description": "The dataset to write to. A dataset is contained within a specific project. Datasets are top-level containers that are used to organize and control access to tables and views.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "table": { + "name": "table", + "description": "The table to read from. A table contains individual records organized in rows. Each record is composed of columns (also called fields). Every table is defined by a schema that describes the column names, data types, and other information.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "referenceName": { + "name": "referenceName", + "description": "This will be used to uniquely identify this source for lineage, annotating metadata, etc.", + "type": "string", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + } + }, + "description": "This source reads the entire contents of a BigQuery table. BigQuery is Google's serverless, highly scalable, enterprise data warehouse.Data is first written to a temporary location on Google Cloud Storage, then read into the pipeline from there.", + "selected": false + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "copy :body :body_copy true\nparse-as-csv :body_copy ' ' false\ndrop :Address\nset-column :name_count string:length(name)", + "field": "*", + "precondition": "false", + "workspaceId": "06222040-4a4b-4c0c-9325-944e9f0aaf4a", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:353", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "327.5px" + }, + "selected": false, + "_backendProperties": { + "schema": { + "name": "schema", + "description": "Specifies the schema that has to be output.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "preconditionSQL": { + "name": "preconditionSQL", + "description": "SQL Precondition expression specifying filtering before applying directives (false to filter)", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "udd": { + "name": "udd", + "description": "List of User Defined Directives (UDD) that have to be loaded.", + "type": "string", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + }, + "field": { + "name": "field", + "description": "Name of the input field to be wrangled or '*' to wrangle all the fields.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "on-error": { + "name": "on-error", + "description": "How to handle error in record processing", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "directives": { + "name": "directives", + "description": "Recipe for wrangling the input records", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "expressionLanguage": { + "name": "expressionLanguage", + "description": "Toggle to configure precondition language between JEXL and SQL", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "precondition": { + "name": "precondition", + "description": "JEXL Precondition expression specifying filtering before applying directives (true to filter)", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + } + }, + "description": "Wrangler - A interactive tool for data cleansing and transformation.", + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ] + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "directivecount", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_1\",\"type\":[\"string\",\"null\"]},{\"name\":\"body_copy_2\",\"type\":[\"string\",\"null\"]},{\"name\":\"name_count\",\"type\":[\"int\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:354", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "327.5px" + }, + "selected": false + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "90f47236-40c8-11ee-80b7-000000d8e91a" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty-cdap-data-pipeline.json new file mode 100644 index 000000000..63dd47537 --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty-cdap-data-pipeline.json @@ -0,0 +1,179 @@ +{ + "name": "Wrangler_Directive_Fill_empty", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "newupdatedtable", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:444", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "327.5px" + } + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "fill-null-or-empty :name 'Shubhangi'\nsend-to-error empty(Address)", + "field": "*", + "precondition": "false", + "workspaceId": "06222040-4a4b-4c0c-9325-944e9f0aaf4a", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:445", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "327.5px" + } + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "senderrortable", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:446", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "327.5px" + } + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "6386d949-40c9-11ee-bdf6-0000009b7ad5" +} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json deleted file mode 100644 index f797e1962..000000000 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Fill_empty_v2-cdap-data-pipeline.json +++ /dev/null @@ -1,147 +0,0 @@ -{ - "name": "Wrangler_Directive_Fill_empty_v2", - "description": "Data Pipeline Application", - "artifact": { - "name": "cdap-data-pipeline", - "version": "6.9.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "config": { - "resources": { - "memoryMB": 2048, - "virtualCores": 1 - }, - "driverResources": { - "memoryMB": 2048, - "virtualCores": 1 - }, - "connections": [ - { - "from": "BigQueryTable", - "to": "Wrangler" - }, - { - "from": "Wrangler", - "to": "BigQuery2" - } - ], - "postActions": [], - "properties": {}, - "processTimingEnabled": true, - "stageLoggingEnabled": true, - "stages": [ - { - "name": "BigQueryTable", - "plugin": { - "name": "BigQueryTable", - "type": "batchsource", - "label": "BigQueryTable", - "artifact": { - "name": "google-cloud", - "version": "0.22.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "useConnection": "false", - "dataset": "test_automation", - "table": "E2E_SOURCE_da362", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", - "enableQueryingViews": "false", - "project": "auto-detect", - "serviceAccountType": "filePath", - "serviceFilePath": "auto-detect", - "datasetProject": "cdf-athena" - } - }, - "outputSchema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", - "id": "BigQueryTable", - "type": "batchsource", - "label": "BigQueryTable", - "icon": "fa-plug" - }, - { - "name": "Wrangler", - "plugin": { - "name": "Wrangler", - "type": "transform", - "label": "Wrangler", - "artifact": { - "name": "wrangler-transform", - "version": "4.9.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "field": "*", - "precondition": "false", - "directives": "parse-as-csv :body ',' true\nfill-null-or-empty :name 'Shubhangi'\nsend-to-error empty(Address)", - "on-error": "fail-pipeline", - "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}", - "workspaceId": "8a6750d6-f2ee-4fd8-b78d-5f1b11e3301d" - } - }, - "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}", - "inputSchema": [ - { - "name": "BigQueryTable", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" - } - ], - "id": "Wrangler", - "type": "transform", - "label": "Wrangler", - "icon": "icon-DataPreparation" - }, - { - "name": "BigQuery2", - "plugin": { - "name": "BigQueryTable", - "type": "batchsink", - "label": "BigQuery2", - "artifact": { - "name": "google-cloud", - "version": "0.22.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "useConnection": "false", - "project": "cdf-athena", - "serviceAccountType": "filePath", - "serviceFilePath": "auto-detect", - "referenceName": "test", - "dataset": "test_automation", - "table": "E2E_TARGET_25", - "operation": "insert", - "truncateTable": "false", - "allowSchemaRelaxation": "false", - "location": "US", - "createPartitionedTable": "false", - "partitioningType": "TIME", - "partitionFilterRequired": "false", - "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}" - } - }, - "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}", - "inputSchema": [ - { - "name": "Wrangler", - "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]}]}" - } - ], - "id": "BigQuery2", - "type": "batchsink", - "label": "BigQuery2", - "icon": "fa-plug" - } - ], - "schedule": "0 1 */1 * *", - "engine": "spark", - "numOfRecordsPreview": 100, - "rangeRecordsPreview": { - "min": 1, - "max": "5000" - }, - "description": "Data Pipeline Application", - "maxConcurrentRuns": 1 - }, - "version": "-SNAPSHOT" -} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json deleted file mode 100644 index 47f401659..000000000 --- a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_copycolumn_concatenate_v1-cdap-data-pipeline.json +++ /dev/null @@ -1,147 +0,0 @@ -{ - "name": "Wrangler_Directive_Format_copycolumn_concatenate_v1", - "description": "Data Pipeline Application", - "artifact": { - "name": "cdap-data-pipeline", - "version": "6.9.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "config": { - "resources": { - "memoryMB": 2048, - "virtualCores": 1 - }, - "driverResources": { - "memoryMB": 2048, - "virtualCores": 1 - }, - "connections": [ - { - "from": "BigQueryTable", - "to": "Wrangler" - }, - { - "from": "Wrangler", - "to": "BigQuery2" - } - ], - "postActions": [], - "properties": {}, - "processTimingEnabled": true, - "stageLoggingEnabled": true, - "stages": [ - { - "name": "BigQueryTable", - "plugin": { - "name": "BigQueryTable", - "type": "batchsource", - "label": "BigQueryTable", - "artifact": { - "name": "google-cloud", - "version": "0.22.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "useConnection": "false", - "dataset": "test_automation", - "table": "E2E_SOURCE_da362", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", - "enableQueryingViews": "false", - "project": "auto-detect", - "serviceAccountType": "filePath", - "serviceFilePath": "auto-detect", - "datasetProject": "cdf-athena" - } - }, - "outputSchema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", - "id": "BigQueryTable", - "type": "batchsource", - "label": "BigQueryTable", - "icon": "fa-plug" - }, - { - "name": "Wrangler", - "plugin": { - "name": "Wrangler", - "type": "transform", - "label": "Wrangler", - "artifact": { - "name": "wrangler-transform", - "version": "4.9.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "field": "*", - "precondition": "false", - "directives": "parse-as-csv :body ',' true\ntitlecase :name\nset-column :email_copy 'Wrangle' + email", - "on-error": "fail-pipeline", - "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}", - "workspaceId": "8a6750d6-f2ee-4fd8-b78d-5f1b11e3301d" - } - }, - "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}", - "inputSchema": [ - { - "name": "BigQueryTable", - "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" - } - ], - "id": "Wrangler", - "type": "transform", - "label": "Wrangler", - "icon": "icon-DataPreparation" - }, - { - "name": "BigQuery2", - "plugin": { - "name": "BigQueryTable", - "type": "batchsink", - "label": "BigQuery2", - "artifact": { - "name": "google-cloud", - "version": "0.22.0-SNAPSHOT", - "scope": "SYSTEM" - }, - "properties": { - "useConnection": "false", - "project": "cdf-athena", - "serviceAccountType": "filePath", - "serviceFilePath": "auto-detect", - "referenceName": "test", - "dataset": "test_automation", - "table": "E2E_TARGET_07", - "operation": "insert", - "truncateTable": "false", - "allowSchemaRelaxation": "false", - "location": "US", - "createPartitionedTable": "false", - "partitioningType": "TIME", - "partitionFilterRequired": "false", - "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}" - } - }, - "outputSchema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}", - "inputSchema": [ - { - "name": "Wrangler", - "schema": "{\"type\":\"record\",\"name\":\"record\",\"fields\":[{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"ID\",\"type\":[\"string\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"string\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"string\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}" - } - ], - "id": "BigQuery2", - "type": "batchsink", - "label": "BigQuery2", - "icon": "fa-plug" - } - ], - "schedule": "0 1 */1 * *", - "engine": "spark", - "numOfRecordsPreview": 100, - "rangeRecordsPreview": { - "min": 1, - "max": "5000" - }, - "description": "Data Pipeline Application", - "maxConcurrentRuns": 1 - }, - "version": "-SNAPSHOT" -} \ No newline at end of file diff --git a/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_setcolumn_titlecase-cdap-data-pipeline.json b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_setcolumn_titlecase-cdap-data-pipeline.json new file mode 100644 index 000000000..f4cfe65fc --- /dev/null +++ b/wrangler-transform/src/e2e-test/resources/testData/Wrangler/Wrangler_Directive_Format_setcolumn_titlecase-cdap-data-pipeline.json @@ -0,0 +1,527 @@ +{ + "name": "Wrangler_Directive_Format_setcolumn_titlecase", + "description": "Data Pipeline Application", + "artifact": { + "name": "cdap-data-pipeline", + "version": "6.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "config": { + "resources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "driverResources": { + "memoryMB": 2048, + "virtualCores": 1 + }, + "connections": [ + { + "from": "BigQueryTable", + "to": "Wrangler" + }, + { + "from": "Wrangler", + "to": "BigQuery2" + } + ], + "postActions": [], + "properties": {}, + "processTimingEnabled": true, + "stageLoggingEnabled": true, + "stages": [ + { + "name": "BigQueryTable", + "plugin": { + "name": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "dataset": "Wrangler", + "table": "newupdatedtable", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "enableQueryingViews": "false" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQueryTable", + "type": "batchsource", + "label": "BigQueryTable", + "icon": "fa-plug", + "$$hashKey": "object:435", + "isPluginAvailable": true, + "_uiPosition": { + "left": "496px", + "top": "327.5px" + }, + "selected": false + }, + { + "name": "Wrangler", + "plugin": { + "name": "Wrangler", + "type": "transform", + "label": "Wrangler", + "artifact": { + "name": "wrangler-transform", + "version": "4.10.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "directives": "set-column :email_copy 'Wrangle' + email\ntitlecase :Col1", + "field": "*", + "precondition": "false", + "workspaceId": "06222040-4a4b-4c0c-9325-944e9f0aaf4a", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}", + "on-error": "fail-pipeline" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "BigQueryTable", + "schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "Wrangler", + "type": "transform", + "label": "Wrangler", + "icon": "icon-DataPreparation", + "$$hashKey": "object:436", + "isPluginAvailable": true, + "_uiPosition": { + "left": "796px", + "top": "327.5px" + }, + "selected": false, + "_backendProperties": { + "schema": { + "name": "schema", + "description": "Specifies the schema that has to be output.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "preconditionSQL": { + "name": "preconditionSQL", + "description": "SQL Precondition expression specifying filtering before applying directives (false to filter)", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "udd": { + "name": "udd", + "description": "List of User Defined Directives (UDD) that have to be loaded.", + "type": "string", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + }, + "field": { + "name": "field", + "description": "Name of the input field to be wrangled or '*' to wrangle all the fields.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "on-error": { + "name": "on-error", + "description": "How to handle error in record processing", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "directives": { + "name": "directives", + "description": "Recipe for wrangling the input records", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "expressionLanguage": { + "name": "expressionLanguage", + "description": "Toggle to configure precondition language between JEXL and SQL", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "precondition": { + "name": "precondition", + "description": "JEXL Precondition expression specifying filtering before applying directives (true to filter)", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + } + }, + "description": "Wrangler - A interactive tool for data cleansing and transformation." + }, + { + "name": "BigQuery2", + "plugin": { + "name": "BigQueryTable", + "type": "batchsink", + "label": "BigQuery2", + "artifact": { + "name": "google-cloud", + "version": "0.23.0-SNAPSHOT", + "scope": "SYSTEM" + }, + "properties": { + "useConnection": "false", + "project": "auto-detect", + "serviceAccountType": "filePath", + "serviceFilePath": "auto-detect", + "dataset": "Wrangler", + "table": "titlecasetable", + "operation": "insert", + "truncateTable": "false", + "allowSchemaRelaxation": "false", + "location": "US", + "createPartitionedTable": "false", + "partitioningType": "TIME", + "partitionFilterRequired": "false", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}" + } + }, + "outputSchema": [ + { + "name": "etlSchemaBody", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "inputSchema": [ + { + "name": "Wrangler", + "schema": "{\"type\":\"record\",\"name\":\"output\",\"fields\":[{\"name\":\"ID\",\"type\":[\"long\",\"null\"]},{\"name\":\"name\",\"type\":[\"string\",\"null\"]},{\"name\":\"email\",\"type\":[\"string\",\"null\"]},{\"name\":\"Customer_id\",\"type\":[\"long\",\"null\"]},{\"name\":\"First_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Last_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"Age\",\"type\":[\"long\",\"null\"]},{\"name\":\"Address\",\"type\":[\"string\",\"null\"]},{\"name\":\"Col1\",\"type\":[\"string\",\"null\"]},{\"name\":\"create_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"update_date\",\"type\":[\"string\",\"null\"]},{\"name\":\"body\",\"type\":[\"string\",\"null\"]},{\"name\":\"email_copy\",\"type\":[\"string\",\"null\"]}]}" + } + ], + "id": "BigQuery2", + "type": "batchsink", + "label": "BigQuery2", + "icon": "fa-plug", + "$$hashKey": "object:437", + "isPluginAvailable": true, + "_uiPosition": { + "left": "1096px", + "top": "327.5px" + }, + "_backendProperties": { + "schema": { + "name": "schema", + "description": "The schema of the data to write. If provided, must be compatible with the table schema.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionFilter": { + "name": "partitionFilter", + "description": "Partition filter that can be used for partition elimination during Update or Upsert operations.This value is ignored if operation is not UPDATE or UPSERT.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "rangeStart": { + "name": "rangeStart", + "description": "Start value for range partitioning. The start value is inclusive. Ignored when table already exists", + "type": "long", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceAccountJSON": { + "name": "serviceAccountJSON", + "description": "Content of the service account file.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "useConnection": { + "name": "useConnection", + "description": "Whether to use an existing connection.", + "type": "boolean", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + }, + "project": { + "name": "project", + "description": "Google Cloud Project ID. It can be found on the Dashboard in the Google Cloud Platform Console.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitioningType": { + "name": "partitioningType", + "description": "Specifies the partitioning type. Can either be Integer or Time or None. Ignored when table already exists", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "relationTableKey": { + "name": "relationTableKey", + "description": "List of fields that determines relation between tables during Update and Upsert operations.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "rangeEnd": { + "name": "rangeEnd", + "description": "End value for range partitioning. The end value is exclusive. Ignored when table already exists", + "type": "long", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "clusteringOrder": { + "name": "clusteringOrder", + "description": "List of fields that determines the sort order of the data. Fields must be of type INT, LONG, STRING, DATE, TIMESTAMP, BOOLEAN or DECIMAL. Tables cannot be clustered on more than 4 fields. This value is only used when the BigQuery table is automatically created and ignored if the table already exists.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionFilterRequired": { + "name": "partitionFilterRequired", + "description": "Whether to create a table that requires a partition filter. This value is ignored if the table already exists.", + "type": "boolean", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceFilePath": { + "name": "serviceFilePath", + "description": "Path on the local file system of the service account key used for authorization. Can be set to 'auto-detect' when running on a Dataproc cluster. When running on other clusters, the file must be present on every node in the cluster.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "truncateTable": { + "name": "truncateTable", + "description": "Whether or not to truncate the table before writing to it. Should only be used with the Insert operation. This could overwrite the table schema", + "type": "boolean", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "partitionByField": { + "name": "partitionByField", + "description": "Partitioning column for the BigQuery table. This should be left empty if the BigQuery table is an ingestion-time partitioned table.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "connection": { + "name": "connection", + "description": "The existing connection to use.", + "type": "bigqueryconnectorconfig", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [ + "serviceAccountJSON", + "serviceFilePath", + "project", + "serviceAccountType", + "datasetProject" + ] + }, + "table": { + "name": "table", + "description": "The table to write to. A table contains individual records organized in rows. Each record is composed of columns (also called fields). Every table is defined by a schema that describes the column names, data types, and other information.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "referenceName": { + "name": "referenceName", + "description": "This will be used to uniquely identify this source/sink for lineage, annotating metadata, etc.", + "type": "string", + "required": false, + "macroSupported": false, + "macroEscapingEnabled": false, + "children": [] + }, + "cmekKey": { + "name": "cmekKey", + "description": "The GCP customer managed encryption key (CMEK) name used to encrypt data written to any bucket, dataset or table created by the plugin. If the bucket, dataset or table already exists, this is ignored. More information can be found at https://cloud.google.com/data-fusion/docs/how-to/customer-managed-encryption-keys", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "createPartitionedTable": { + "name": "createPartitionedTable", + "description": "DEPRECATED!. Whether to create the BigQuery table with time partitioning. This value is ignored if the table already exists. When this is set to false, value of Partitioning type will be used. Use 'Partitioning type' property", + "type": "boolean", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "dedupeBy": { + "name": "dedupeBy", + "description": "Column names and sort order used to choose which input record to update/upsert when there are multiple input records with the same key. For example, if this is set to 'updated_time desc', then if there are multiple input records with the same key, the one with the largest value for 'updated_time' will be applied.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "allowSchemaRelaxation": { + "name": "allowSchemaRelaxation", + "description": "Whether to modify the BigQuery table schema if it differs from the input schema.", + "type": "boolean", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "bucket": { + "name": "bucket", + "description": "The Google Cloud Storage bucket to store temporary data in. Cloud Storage data will be deleted after it is loaded into BigQuery. If it is not provided, a unique bucket will be automatically created and then deleted after the run finishes. The service account must have permission to create buckets in the configured project.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "rangeInterval": { + "name": "rangeInterval", + "description": "Interval value for range partitioning. The interval value must be a positive integer.Ignored when table already exists", + "type": "long", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "gcsChunkSize": { + "name": "gcsChunkSize", + "description": "Optional property to tune chunk size in gcs upload request. The value of this property should be in number of bytes. By default, 8388608 bytes (8MB) will be used as upload request chunk size.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "location": { + "name": "location", + "description": "The location where the big query dataset will get created. This value is ignored if the dataset or temporary bucket already exist.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "serviceAccountType": { + "name": "serviceAccountType", + "description": "Service account type, file path where the service account is located or the JSON content of the service account.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "datasetProject": { + "name": "datasetProject", + "description": "The project the dataset belongs to. This is only required if the dataset is not in the same project that the BigQuery job will run in. If no value is given, it will default to the configured project ID.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "operation": { + "name": "operation", + "description": "Type of write operation to perform. This can be set to Insert, Update or Upsert.", + "type": "string", + "required": false, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + }, + "dataset": { + "name": "dataset", + "description": "The dataset to write to. A dataset is contained within a specific project. Datasets are top-level containers that are used to organize and control access to tables and views.", + "type": "string", + "required": true, + "macroSupported": true, + "macroEscapingEnabled": false, + "children": [] + } + }, + "description": "This sink writes to a BigQuery table. BigQuery is Google's serverless, highly scalable, enterprise data warehouse. Data is first written to a temporary location on Google Cloud Storage, then loaded into BigQuery from there.", + "selected": false + } + ], + "schedule": "0 1 */1 * *", + "engine": "spark", + "numOfRecordsPreview": 100, + "rangeRecordsPreview": { + "min": 1, + "max": "5000" + }, + "maxConcurrentRuns": 1, + "pushdownEnabled": false, + "transformationPushdown": {} + }, + "version": "404b446c-40ca-11ee-8c7e-0000001618d3" +} \ No newline at end of file