wrangler e2e tests

data-integrations · Sep 15, 2023 · 747a1f0 · 747a1f0
1 parent ae63527
commit 747a1f0
Show file tree

Hide file tree

Showing 18 changed files with 945 additions and 7 deletions.
diff --git a/pom.xml b/pom.xml
@@ -492,7 +492,7 @@
               <execution>
                 <goals>
                   <goal>integration-test</goal>
-                  <goal>verify</goal>
+<!--                  <goal>verify</goal>-->
                 </goals>
               </execution>
             </executions>

diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsCsv.feature
@@ -13,7 +13,7 @@
 # the License.
 
 @Wrangler
-Feature:  Wrangler - Run time scenarios
+Feature:  Wrangler - Run time scenarios for parse csv
 
   @BQ_SOURCE_CSV_TEST @BQ_SINK_TEST
   Scenario: To verify User is able to run a pipeline using parse csv directive

diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsExcel.feature
@@ -0,0 +1,42 @@
+# Copyright © 2023 Cask Data, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+@Wrangler
+Feature:  Wrangler - Run time scenarios for parse as excel
+
+  @GCS_SOURCE_TEST @BQ_SINK_TEST
+  Scenario: To verify User is able to run a pipeline using parse excel directive
+    Given Open Datafusion Project to configure pipeline
+    Then Click on the Plus Green Button to import the pipelines
+    Then Select the file for importing the pipeline for the plugin "Directive_parse_excel"
+    Then Navigate to the properties page of plugin: "GCSFile"
+    Then Replace input plugin property: "project" with value: "projectId"
+    Then Replace input plugin property: "path" with value: "gcsSourceBucket"
+    Then Click on the Get Schema button
+    Then Click on the Validate button
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "BigQuery"
+    Then Replace input plugin property: "project" with value: "projectId"
+    Then Replace input plugin property: "table" with value: "bqTargetTable"
+    Then Replace input plugin property: "dataset" with value: "dataset"
+    Then Click on the Validate button
+    Then Close the Plugin Properties page
+    Then Rename the pipeline
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Close the pipeline logs
+    Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_excel"
diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsJson.feature
@@ -0,0 +1,43 @@
+# Copyright © 2023 Cask Data, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+@Wrangler
+Feature:  Wrangler - Run time scenarios for parse as json
+
+  @BQ_SOURCE_JSON_TEST @BQ_SINK_TEST
+  Scenario: To verify User is able to run a pipeline using parse json directive
+    Given Open Datafusion Project to configure pipeline
+    Then Click on the Plus Green Button to import the pipelines
+    Then Select the file for importing the pipeline for the plugin "Directive_parse_json"
+    Then Navigate to the properties page of plugin: "BigQueryTable"
+    Then Replace input plugin property: "project" with value: "projectId"
+    Then Replace input plugin property: "dataset" with value: "dataset"
+    Then Replace input plugin property: "table" with value: "bqSourceTable"
+    Then Click on the Get Schema button
+    Then Click on the Validate button
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "BigQuery2"
+    Then Replace input plugin property: "project" with value: "projectId"
+    Then Replace input plugin property: "table" with value: "bqTargetTable"
+    Then Replace input plugin property: "dataset" with value: "dataset"
+    Then Click on the Validate button
+    Then Close the Plugin Properties page
+    Then Rename the pipeline
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Close the pipeline logs
+    Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_json"
diff --git a/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsXmlToJson.feature b/wrangler-transform/src/e2e-test/features/Wrangler/ParseAsXmlToJson.feature
@@ -0,0 +1,43 @@
+# Copyright © 2023 Cask Data, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+@Wrangler
+Feature:  Wrangler - Run time scenarios for parse as XmlToJson
+
+  @BQ_SOURCE_XML_TEST @BQ_SINK_TEST
+  Scenario: To verify User is able to run a pipeline using parse XmlToJson directive
+    Given Open Datafusion Project to configure pipeline
+    Then Click on the Plus Green Button to import the pipelines
+    Then Select the file for importing the pipeline for the plugin "Directive_parse_xml"
+    Then Navigate to the properties page of plugin: "BigQueryTable"
+    Then Replace input plugin property: "project" with value: "projectId"
+    Then Replace input plugin property: "dataset" with value: "dataset"
+    Then Replace input plugin property: "table" with value: "bqSourceTable"
+    Then Click on the Get Schema button
+    Then Click on the Validate button
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "BigQuery2"
+    Then Replace input plugin property: "project" with value: "projectId"
+    Then Replace input plugin property: "table" with value: "bqTargetTable"
+    Then Replace input plugin property: "dataset" with value: "dataset"
+    Then Click on the Validate button
+    Then Close the Plugin Properties page
+    Then Rename the pipeline
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Close the pipeline logs
+    Then Validate The Data From BQ To BQ With Actual And Expected File for: "ExpectedDirective_parse_xml"
diff --git a/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java b/wrangler-transform/src/e2e-test/java/io/cdap/plugin/common/stepsdesign/TestSetupHooks.java
@@ -24,7 +24,6 @@
 import io.cdap.e2e.utils.StorageClient;
 import io.cucumber.java.After;
 import io.cucumber.java.Before;
-import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.junit.Assert;
 import stepsdesign.BeforeActions;
@@ -34,16 +33,14 @@
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Paths;
-import java.sql.SQLException;
 import java.util.NoSuchElementException;
 import java.util.UUID;
 
-import static io.cdap.e2e.pages.locators.CdfGCSLocators.filePath;
-
 /**
  * Setup BQ for Wrangler tests.
  */
 public class TestSetupHooks {
+  public static String gcsSourceBucketName = StringUtils.EMPTY;
 
   @Before(order = 1, value = "@BQ_SINK_TEST")
   public static void setTempTargetBQTableName() {
@@ -74,8 +71,58 @@ public static void deleteTempTargetBQTable() throws IOException, InterruptedExce
   @Before(order = 1, value = "@BQ_SOURCE_CSV_TEST")
   public static void createTempSourceBQTable() throws IOException, InterruptedException {
     createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileCsv"),
-                                   PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv"));
+            PluginPropertyUtils.pluginProp("InsertBQDataQueryFileCsv"));
+  }
+  @Before(order = 1, value = "@BQ_SOURCE_JSON_TEST")
+  public static void createTempSourceBQTableJson() throws IOException, InterruptedException {
+    createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQTableQueryFileJson"),
+                                   PluginPropertyUtils.pluginProp("InsertBQDataQueryFileJson"));
+  }
+  @Before(order = 1, value = "@BQ_SOURCE_XML_TEST")
+  public static void createTempSourceBQTableXml() throws IOException, InterruptedException {
+    createSourceBQTableWithQueries(PluginPropertyUtils.pluginProp("CreateBQDataQueryFileXml"),
+            PluginPropertyUtils.pluginProp("InsertBQDataQueryFileXml"));
+  }
+  @After(order = 1, value = "@BQ_SOURCE_TEST")
+  public static void deleteTempSourceBQTable() throws IOException, InterruptedException {
+    String bqSourceTable = PluginPropertyUtils.pluginProp("bqSourceTable");
+    BigQueryClient.dropBqQuery(bqSourceTable);
+    BeforeActions.scenario.write("BQ source Table " + bqSourceTable + " deleted successfully");
+    PluginPropertyUtils.removePluginProp("bqSourceTable");
   }
+  @Before(order = 1, value = "@GCS_SOURCE_TEST")
+  public static void createBucketWithEXCELFile() throws IOException, URISyntaxException {
+    gcsSourceBucketName = createGCSBucketWithFile(PluginPropertyUtils.pluginProp("testFile"));
+    PluginPropertyUtils.addPluginProp("gcsSourceBucket", "gs://" + gcsSourceBucketName + "/"  +
+            PluginPropertyUtils.pluginProp("testFile"));
+    BeforeActions.scenario.write("GCS source bucket1 name - " + gcsSourceBucketName);
+  }
+  private static String createGCSBucketWithFile(String filePath) throws IOException, URISyntaxException {
+    String bucketName = StorageClient.createBucket("e2e-test-" + UUID.randomUUID()).getName();
+    StorageClient.uploadObject(bucketName, filePath, filePath);
+    return bucketName;
+  }
+  @After(order = 1, value = "@GCS_SOURCE_TEST")
+  public static void deleteSourceBucketWithFile() {
+    deleteGCSBucket(gcsSourceBucketName);
+    gcsSourceBucketName = StringUtils.EMPTY;
+  }
+  private static void deleteGCSBucket(String bucketName) {
+    try {
+      for (Blob blob : StorageClient.listObjects(bucketName).iterateAll()) {
+        StorageClient.deleteObject(bucketName, blob.getName());
+      }
+      StorageClient.deleteBucket(bucketName);
+      BeforeActions.scenario.write("Deleted GCS Bucket " + bucketName);
+    } catch (StorageException | IOException e) {
+      if (e.getMessage().contains("The specified bucket does not exist")) {
+        BeforeActions.scenario.write("GCS Bucket " + bucketName + " does not exist.");
+      } else {
+        Assert.fail(e.getMessage());
+      }
+    }
+  }
+
 
   private static void createSourceBQTableWithQueries(String bqCreateTableQueryFile, String bqInsertDataQueryFile)
     throws IOException, InterruptedException {

diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_excel
@@ -0,0 +1,2 @@
+{"copiedname":"very","id":0,"name":"very","phone":"8838.0","rollno":"3.0","uniquenum":"very,0"}
+{"copiedname":"hello","id":2,"name":"hell","phone":"12345.0","rollno":"1.0","uniquenum":"hello,2"}
diff --git a/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json b/wrangler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_json
@@ -0,0 +1,3 @@
+{"Body":"hello abc","copied":{"first":"Root","last":"joy"},"desc":"nick, hello abc","id":22,"json_age":"{\"json_id\":22,\"copied\":{\"first\":\"Root\",\"last\":\"joy\"},\"json_age\":1,\"json_name\":{\"first\":\"Root\",\"last\":\"joy\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"22,{\\\"first\\\":\\\"Root\\\",\\\"last\\\":\\\"joy\\\"}\",\"body\":\"hello abc\",\"desc\":\"nick, hello abc\"}","json_id_json_name":"22,{\"first\":\"Root\",\"last\":\"joy\"}","json_name":{"first":"Root","last":"joy"},"json_pet":"testing"}
+{"Body":"hello def","copied":{"first":"dded","last":"share"},"desc":"hello, hello def","id":23,"json_age":"{\"json_id\":23,\"copied\":{\"first\":\"dded\",\"last\":\"share\"},\"json_age\":2,\"json_name\":{\"first\":\"dded\",\"last\":\"share\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"23,{\\\"first\\\":\\\"dded\\\",\\\"last\\\":\\\"share\\\"}\",\"body\":\"hello def\",\"desc\":\"hello, hello def\"}","json_id_json_name":"23,{\"first\":\"dded\",\"last\":\"share\"}","json_name":{"first":"dded","last":"share"},"json_pet":"testing"}
+{"Body":"hello ghi","copied":{"first":"Root","last":"Joltie"},"desc":"doms, hello ghi","id":24,"json_age":"{\"json_id\":24,\"copied\":{\"first\":\"Root\",\"last\":\"Joltie\"},\"json_age\":3,\"json_name\":{\"first\":\"Root\",\"last\":\"Joltie\"},\"json_pet\":\"testing\",\"json_id_json_name\":\"24,{\\\"first\\\":\\\"Root\\\",\\\"last\\\":\\\"Joltie\\\"}\",\"body\":\"hello ghi\",\"desc\":\"doms, hello ghi\"}","json_id_json_name":"24,{\"first\":\"Root\",\"last\":\"Joltie\"}","json_name":{"first":"Root","last":"Joltie"},"json_pet":"testing"}
diff --git a/...gler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_xmltojson b/...gler-transform/src/e2e-test/resources/BQValidationExpectedFiles/Directive_parse_xmltojson
@@ -0,0 +1,6 @@
+{"Email":"abc01@mail.com","distance":2.0,"distance2":0.3571428656578064,"email_domain":{"distance":2.0,"email_account":"abc01"},"email_porter":["abc","mail","com"],"id":"1","xmldata_note":{"body":"Dont forget me this week!","from":"Tani","heading":"Reminder","to":"Tove"}}
+{"Email":"def02@mail.com","distance":2.0,"distance2":0.3571428656578064,"email_domain":{"distance":2.0,"email_account":"def02"},"email_porter":["def","mail","com"],"id":"2","xmldata_note":{"body":"Dont forget us this holiday!","from":"joy","heading":"Reminder","to":"Tove"}}
+{"Email":"ghi03@mail.com","distance":2.0,"distance2":0.3571428656578064,"email_domain":{"distance":2.0,"email_account":"ghi03"},"email_porter":["ghi","mail","com"],"id":"3","xmldata_note":{"body":"Dont forget him this weekend!","from":"shree","heading":"Reminder","to":"Tove"}}
+{"Email":"abc01@mail.com","distance":2.0,"distance2":0.3571428656578064,"email_domain":{"distance":2.0,"email_account":"abc01"},"email_porter":["abc","mail","com"],"id":"abc","xmldata_note":{"body":"Dont forget me this week!","from":"Tani","heading":"Reminder","to":"Tove"}}
+{"Email":"def02@mail.com","distance":2.0,"distance2":0.3571428656578064,"email_domain":{"distance":2.0,"email_account":"def02"},"email_porter":["def","mail","com"],"id":"def","xmldata_note":{"body":"Dont forget us this holiday!","from":"joy","heading":"Reminder","to":"Tove"}}
+{"Email":"ghi03@mail.com","distance":2.0,"distance2":0.3571428656578064,"email_domain":{"distance":2.0,"email_account":"ghi03"},"email_porter":["ghi","mail","com"],"id":"ghi","xmldata_note":{"body":"Dont forget him this weekend!","from":"shree","heading":"Reminder","to":"Tove"}}
diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt
@@ -0,0 +1 @@
+create table `DATASET.TABLE_NAME` (email STRING, xmldata STRING)
diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt
@@ -0,0 +1,5 @@
+INSERT INTO DATASET.TABLE_NAME (email,xmldata)
+VALUES
+('abc01@mail.com','<?xml version=1.0 encoding=UTF-8?> <note> <to>Tove</to> <from>Tani</from> <heading>Reminder</heading> <body>Dont forget me this week!</body> </note>'),
+('def02@mail.com','<?xml version=1.0 encoding=UTF-8?> <note> <to>Tove</to> <from>joy</from> <heading>Reminder</heading> <body>Dont forget us this holiday!</body> </note>'),
+('ghi03@mail.com','<?xml version=1.0 encoding=UTF-8?> <note> <to>Tove</to> <from>shree</from> <heading>Reminder</heading> <body>Dont forget him this weekend!</body> </note>');
diff --git a/...-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt b/...-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt
@@ -0,0 +1,6 @@
+INSERT INTO DATASET.TABLE_NAME (body,json)
+VALUES
+(' hello abc', '{"id": 1, "name": {"first": "Root", "last": "joy"}, "age": 22, "pet": "nick", "height": 5.8}'),
+('hello def', '{"id": 2, "name": {"first": "dded", "last": "share"}, "age": 23, "pet": "hello", "height": 6.8}'),
+('hello ghi', '{"id": 3, "name": {"first": "Root", "last": "Joltie"}, "age": 24, "pet": "doms", "height": 7.8}');
+
diff --git a/...gler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt b/...gler-transform/src/e2e-test/resources/BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt
@@ -0,0 +1 @@
+create table `DATASET.TABLE_NAME` (body STRING, json STRING)
diff --git a/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/test1.xlsx b/wrangler-transform/src/e2e-test/resources/BQtesdata/BigQuery/test1.xlsx
diff --git a/wrangler-transform/src/e2e-test/resources/pluginParameters.properties b/wrangler-transform/src/e2e-test/resources/pluginParameters.properties
@@ -1,10 +1,18 @@
 #json file path
+Directive_parse_json=testData/Wrangler/parse_json_Wrangle-cdap-data-pipeline (1).json
+Directive_parse_xml=testData/Wrangler/parse_xmltojson_wrangle-cdap-data-pipeline.json
+Directive_parse_excel=testData/Wrangler/parse_excel_wrangler_copy-cdap-data-pipeline.json
 Directive_parse_csv=testData/Wrangler\
   /parse_csv_wrangle-cdap-data-pipeline.json
 bqSourceTable=dummy
 sourcePath=example/hello.csv
 gcsSourceBucket=dummy
+testFile=BQtesdata/BigQuery/test1.xlsx
 #bq queries file path
+CreateBQTableQueryFileJson=BQtesdata/BigQuery/BigQuerycreateTableQueryjson.txt
+InsertBQDataQueryFileJson=BQtesdata/BigQuery/BigQueryInsertDataQueryparsejson.txt
+CreateBQDataQueryFileXml=BQtesdata/BigQuery/BigQueryCreateTableQueryXml.txt
+InsertBQDataQueryFileXml=BQtesdata/BigQuery/BigQueryInsertDataQueryXml.txt
 CreateBQTableQueryFileCsv=BQtesdata/BigQuery/BigQueryCreateTableQueryCsv.txt
 InsertBQDataQueryFileCsv=BQtesdata/BigQuery/BigQueryInsertDataQueryCsv.txt
 
@@ -13,4 +21,7 @@ projectId=cdf-athena
 dataset=test_automation
 dataset2=Wrangler
 #expectedBQFiles
+ExpectedDirective_parse_json=BQValidationExpectedFiles/Directive_parse_json
+ExpectedDirective_parse_xml=BQValidationExpectedFiles/Directive_parse_xmltojson
+ExpectedDirective_parse_excel=BQValidationExpectedFiles/Directive_parse_excel
 ExpectedDirective_parse_csv=BQValidationExpectedFiles/Directive_parse_csv