From db83ff384ff1edf69f50d57a874a4c8ede7f3a10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Wed, 12 Feb 2020 22:09:58 -0300 Subject: [PATCH 01/30] =?UTF-8?q?#91=20Actualizaci=C3=B3n=20dependencias?= =?UTF-8?q?=20por=20seguridad=20en=20HIVE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index b9ce54a..a75ef58 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 com.huemulsolutions.bigdata huemul-bigdatagovernance - 2.3.1 + 2.4-SNAPSHOT HuemulSolutions - BigDataGovernance Enable full data quality and data lineage for BigData Projects. Huemul BigDataGovernance, es una librería que trabaja sobre Spark, Hive y HDFS. Permite la implementación de una **estrategia corporativa de dato único**, basada en buenas prácticas de Gobierno de Datos. @@ -105,7 +105,7 @@ Finalmente, también automatiza la generación de código a partir de las defini org.apache.hive hive-jdbc - 1.1.0 + 2.3.4 @@ -158,9 +158,8 @@ Finalmente, también automatiza la generación de código a partir de las defini hbase-hadoop2-compat 1.1.2 - - + + + org.apache.tika + tika-core + 1.23 + + + + + org.apache.tika + tika-parsers + 1.23 + + + + + org.apache.tika + tika-xmp + 1.23 + add custom columns at the end + val localCustomColumn = SchemaConf.getCustomColumn() + if ( localCustomColumn != null) { + fieldsDetail.append(new StructField(localCustomColumn.getcolumnName_Business, if (allColumnsAsString) StringType else localCustomColumn.getDataType, nullable = true)) + } + return StructType(fieldsDetail) } From d78b389d94bacd5d864f6582cbeefc7c14277e82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Mon, 24 Feb 2020 11:49:15 -0300 Subject: [PATCH 11/30] Mejora rendimiento drop table en HIVE --- .../huemulsolutions/bigdata/tables/huemul_Table.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala index 82d48ca..8d98de7 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala @@ -3539,8 +3539,9 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C //FOR SPARK--> NOT SUPPORTED FOR SPARK if (huemulBigDataGov.GlobalSettings.externalBBDD_conf.Using_SPARK.getActiveForHBASE()){ - val TablesListFromHive = huemulBigDataGov.spark.catalog.listTables(databaseName).collect() - if (TablesListFromHive.filter { x => x.name.toUpperCase() == tableName.toUpperCase() }.length > 0) + //comment two next lines: get error when doesn't have serDe + //val TablesListFromHive = huemulBigDataGov.spark.catalog.listTables(databaseName).collect() + // if (TablesListFromHive.filter { x => x.name.toUpperCase() == tableName.toUpperCase() }.length > 0) huemulBigDataGov.spark.sql(sqlDrop01) } @@ -3552,8 +3553,9 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C //FOR SPARK if (huemulBigDataGov.GlobalSettings.externalBBDD_conf.Using_SPARK.getActive()) { - val TablesListFromHive = huemulBigDataGov.spark.catalog.listTables(databaseName).collect() - if (TablesListFromHive.filter { x => x.name.toUpperCase() == tableName.toUpperCase() }.length > 0) + //comment two next lines: get error when doesn't have serDe + //val TablesListFromHive = huemulBigDataGov.spark.catalog.listTables(databaseName).collect() + // if (TablesListFromHive.filter { x => x.name.toUpperCase() == tableName.toUpperCase() }.length > 0) huemulBigDataGov.spark.sql(sqlDrop01) } From d81166316f8426c26b594ac89c2052ec11505e6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Mon, 24 Feb 2020 11:52:10 -0300 Subject: [PATCH 12/30] =?UTF-8?q?#93=20repara=20enlaces=20de=20documentaci?= =?UTF-8?q?=C3=B3n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6eedef5..e29a33b 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,8 @@ Utilizando una metodología sólida que permite a los ingenieros de tu equipo ce ### ¿Cómo se genera el código? Hay dos formas de generar el código de tu solución -1. La primera forma es generar el código desde cero utilizando los [template que están disponibles acá](https://HuemulSolutions.github.io/template). -2. En la segunda forma solo debes crear la definición de tu interfaz de entrada utilizando este [template sencillo](https://HuemulSolutions.github.io/template_raw), y luego puedes generar el código de tu tabla y el código de masterización en forma automática!. En el código de la tabla se implementa en forma automática validaciones de calidad de datos, y te permite agregar fácilmente otras validaciones más complejas. +1. La primera forma es generar el código desde cero utilizando los [template que están disponibles acá](https://github.com/HuemulSolutions/BigDataGovernance_2.3_TemplateBase). +2. En la segunda forma solo debes crear la definición de tu interfaz de entrada utilizando el código de ejemplo "raw_entidad_mes.scala" (https://github.com/HuemulSolutions/BigDataGovernance_2.3_TemplateBase/blob/master/src/main/scala/com/yourcompany/yourapplication/datalake/raw_entidad_mes.scala), y luego puedes generar el código de tu tabla y el código de masterización en forma automática!. En el código de la tabla se implementa en forma automática validaciones de calidad de datos, y te permite agregar fácilmente otras validaciones más complejas. ![Branching](https://HuemulSolutions.github.io/huemul_flujo_genera_codigo.png) From 1e8ef79b28ea8e50ce914f358550d04dd1f132fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Sun, 8 Mar 2020 17:21:17 -0300 Subject: [PATCH 13/30] #93 ajustes en caracteres especiales --- README.md | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index e29a33b..afaceae 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,51 @@ -**Huemul BigDataGovernance**, es una librería que trabaja sobre Spark, Hive y HDFS. Permite la implementación de una **estrategia corporativa de dato único**, basada en buenas prácticas de Gobierno de Datos. +**Huemul BigDataGovernance**, es una librería que trabaja sobre Spark, Hive y HDFS. Permite la implementación de una **estrategia corporativa de dato único**, basada en buenas prácticas de Gobierno de Datos. -Permite implementar tablas con control de Primary Key y Foreing Key al insertar y actualizar datos utilizando la librería, Validación de nulos, largos de textos, máximos/mínimos de números y fechas, valores únicos y valores por default. También permite clasificar los campos en aplicabilidad de derechos ARCO para facilitar la implementación de leyes de protección de datos tipo GDPR, identificar los niveles de seguridad y si se está aplicando algún tipo de encriptación. Adicionalmente permite agregar reglas de validación más complejas sobre la misma tabla. +Permite implementar tablas con control de Primary Key y Foreing Key al insertar y actualizar datos utilizando la librería, Validación de nulos, largos de textos, máximos/mínimos de números y fechas, valores únicos y valores por default. También permite clasificar los campos en aplicabilidad de derechos ARCO para facilitar la implementación de leyes de protección de datos tipo GDPR, identificar los niveles de seguridad y si se está aplicando algún tipo de encriptación. Adicionalmente permite agregar reglas de validación más complejas sobre la misma tabla. -Facilita la configuración y lectura de las interfaces de entrada, permitiendo ajustar los parámetros de lectura en esquemas altamente cambientes, crea trazabilidad de las interfaces con las tablas en forma automática, y almacena los diccionarios de datos en un repositorio central. +Facilita la configuración y lectura de las interfaces de entrada, permitiendo ajustar los parámetros de lectura en esquemas altamente cambientes, crea trazabilidad de las interfaces con las tablas en forma automática, y almacena los diccionarios de datos en un repositorio central. -Finalmente, también automatiza la generación de código a partir de las definiciones de las interfaces de entrada, y la creación del código inicial de lógica de negocio. +Finalmente, también automatiza la generación de código a partir de las definiciones de las interfaces de entrada, y la creación del código inicial de lógica de negocio. -### ¿Cómo Funciona? -El diseño de Huemul BigDataGovernance está pensado en optimizar el tiempo de desarrollo de los analistas de datos, y al mismo tiempo aumentar la calidad y gobierno de los datos. +### ¿Cómo Funciona? +El diseño de Huemul BigDataGovernance está pensado en optimizar el tiempo de desarrollo de los analistas de datos, y al mismo tiempo aumentar la calidad y gobierno de los datos. -Utilizando una metodología sólida que permite a los ingenieros de tu equipo centrar sus esfuerzos en la definición de las interfaces de entrada, la definición de las tablas y la construcción de los procesos de masterización robustos. +Utilizando una metodología sólida que permite a los ingenieros de tu equipo centrar sus esfuerzos en la definición de las interfaces de entrada, la definición de las tablas y la construcción de los procesos de masterización robustos. ![Branching](https://huemulsolutions.github.io/huemul_pasos.png) -### ¿Cómo se genera el código? -Hay dos formas de generar el código de tu solución +### ¿Cómo se genera el código? +Hay dos formas de generar el código de tu solución -1. La primera forma es generar el código desde cero utilizando los [template que están disponibles acá](https://github.com/HuemulSolutions/BigDataGovernance_2.3_TemplateBase). -2. En la segunda forma solo debes crear la definición de tu interfaz de entrada utilizando el código de ejemplo "raw_entidad_mes.scala" (https://github.com/HuemulSolutions/BigDataGovernance_2.3_TemplateBase/blob/master/src/main/scala/com/yourcompany/yourapplication/datalake/raw_entidad_mes.scala), y luego puedes generar el código de tu tabla y el código de masterización en forma automática!. En el código de la tabla se implementa en forma automática validaciones de calidad de datos, y te permite agregar fácilmente otras validaciones más complejas. +1. La primera forma es generar el código desde cero utilizando los [template que están disponibles acá](https://github.com/HuemulSolutions/BigDataGovernance_2.3_TemplateBase). +2. En la segunda forma solo debes crear la definición de tu interfaz de entrada utilizando el código de ejemplo "raw_entidad_mes.scala" (https://github.com/HuemulSolutions/BigDataGovernance_2.3_TemplateBase/blob/master/src/main/scala/com/yourcompany/yourapplication/datalake/raw_entidad_mes.scala), y luego puedes generar el código de tu tabla y el código de masterización en forma automática!. En el código de la tabla se implementa en forma automática validaciones de calidad de datos, y te permite agregar fácilmente otras validaciones más complejas. ![Branching](https://HuemulSolutions.github.io/huemul_flujo_genera_codigo.png) ### Acelera los desarrollos en 5X y mejora la calidad de datos! -¿Sabías que, en promedio, deberías aplicar como mínimo 3 reglas de calidad de datos por cada columna?, es decir, en una tabla con 10 columnas deberías programar más de 30 reglas de validación (son más de 300 líneas de código si programas cada regla en 10 líneas). y esto es solo para asegurar la validez de tus datos, sin contar reglas de integridad, completitud y precisión.... **y aún no has aplicado ninguna regla de transformación de negocio** +¿Sabías que, en promedio, deberías aplicar como mínimo 3 reglas de calidad de datos por cada columna?, es decir, en una tabla con 10 columnas deberías programar más de 30 reglas de validación (son más de 300 líneas de código si programas cada regla en 10 líneas). y esto es solo para asegurar la validez de tus datos, sin contar reglas de integridad, completitud y precisión.... **y aún no has aplicado ninguna regla de transformación de negocio** -Con Huemul BigDataGovernance, esas 300 líneas de código se reducen a 30 (1 línea por cada validación)... y además te entrega de forma automática documentación de tu proyecto. +Con Huemul BigDataGovernance, esas 300 líneas de código se reducen a 30 (1 línea por cada validación)... y además te entrega de forma automática documentación de tu proyecto. ### Simplicidad y Eficiencia -Huemul BigDataGovernance permite reducir en forma importante el tiempo de desarrollo de tus proyectos BigData, aumentando la calidad de los datos, **en el mismo código se definen las estructuras de datos, se crea automáticamente el diccionarios de datos, trazabilidad de la información, reglas de data quality, planes de pruebas y criterios de negocio, TODO AL MISMO TIEMPO!** +Huemul BigDataGovernance permite reducir en forma importante el tiempo de desarrollo de tus proyectos BigData, aumentando la calidad de los datos, **en el mismo código se definen las estructuras de datos, se crea automáticamente el diccionarios de datos, trazabilidad de la información, reglas de data quality, planes de pruebas y criterios de negocio, TODO AL MISMO TIEMPO!** -Toda la **documentación del proyecto siempre estará actualizada**, cada vez que se ejecuta el código en producción se actualizan los diccionarios y respositorios de trazabilidad, nunca más tendrás que preocuparte por actualizar manualmente la documentación. +Toda la **documentación del proyecto siempre estará actualizada**, cada vez que se ejecuta el código en producción se actualizan los diccionarios y respositorios de trazabilidad, nunca más tendrás que preocuparte por actualizar manualmente la documentación. -### Modelo de Operación Basado en Buenas Prácticas de Gobierno de Datos -La implementación de todas estas etapas puede tardar más de una semana, con Huemul BigDataGovernance lo puedes hacer en unas pocas horas. +### Modelo de Operación Basado en Buenas Prácticas de Gobierno de Datos +La implementación de todas estas etapas puede tardar más de una semana, con Huemul BigDataGovernance lo puedes hacer en unas pocas horas. ![Branching](https://HuemulSolutions.github.io/huemul_ciclocompleto.png) -Debido al tiempo que demora implementar todas estas estapas, en la práctica solo se logra trabajar en la lógica de negocio sin DataQuality, los planes de pruebas y documentación de los procesos nunca se complentan adecuadamente, esto poniendo en riesgo el éxito de las soluciones analíticas. +Debido al tiempo que demora implementar todas estas estapas, en la práctica solo se logra trabajar en la lógica de negocio sin DataQuality, los planes de pruebas y documentación de los procesos nunca se complentan adecuadamente, esto poniendo en riesgo el éxito de las soluciones analíticas. -La construcción de Huemul BigDataGovernance está basada en las buenas prácticas descritas en el DAMA-DMBOK2 ([Data Management Body Of Knowledge](www.dama.org)), y permite agilizar el desarrollo de proyectos de BigData a nivel corporativo. +La construcción de Huemul BigDataGovernance está basada en las buenas prácticas descritas en el DAMA-DMBOK2 ([Data Management Body Of Knowledge](www.dama.org)), y permite agilizar el desarrollo de proyectos de BigData a nivel corporativo. -### Metodología Flexible -El uso de la librería permite desarrollar en forma flexible tus proyectos de BigData. Trabajar directamente sobre los datos en bruto es una buena opción si tienes un proyecto puntual sobre una interfaz en particular, las transformaciones y validaciones que hagan no serán utilizadas por el resto de la organización (desde "A" hasta "D"). Si quieres juntar muchos datos desde distintas fuentes, la mejor estrategia será generar una base consolidada, donde el DataQuality sea implementado una sola vez, y toda la organización pueda acceder a los datos a validados. +### Metodología Flexible +El uso de la librería permite desarrollar en forma flexible tus proyectos de BigData. Trabajar directamente sobre los datos en bruto es una buena opción si tienes un proyecto puntual sobre una interfaz en particular, las transformaciones y validaciones que hagan no serán utilizadas por el resto de la organización (desde "A" hasta "D"). Si quieres juntar muchos datos desde distintas fuentes, la mejor estrategia será generar una base consolidada, donde el DataQuality sea implementado una sola vez, y toda la organización pueda acceder a los datos a validados. -Nuestra metodología permite implementar ambas estrategias a la vez +Nuestra metodología permite implementar ambas estrategias a la vez ![Branching](https://HuemulSolutions.github.io/huemul_metodologia.png) From 099165e3636b3f745c652ae3c25c256377cbd5fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Sun, 8 Mar 2020 17:25:34 -0300 Subject: [PATCH 14/30] =?UTF-8?q?#93=20correcci=C3=B3n=20link=20a=20dama?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index afaceae..3a35bb4 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,8 @@ Utilizando una metodolog ### ¿Cómo se genera el código? Hay dos formas de generar el código de tu solución -1. La primera forma es generar el código desde cero utilizando los [template que están disponibles acá](https://github.com/HuemulSolutions/BigDataGovernance_2.3_TemplateBase). -2. En la segunda forma solo debes crear la definición de tu interfaz de entrada utilizando el código de ejemplo "raw_entidad_mes.scala" (https://github.com/HuemulSolutions/BigDataGovernance_2.3_TemplateBase/blob/master/src/main/scala/com/yourcompany/yourapplication/datalake/raw_entidad_mes.scala), y luego puedes generar el código de tu tabla y el código de masterización en forma automática!. En el código de la tabla se implementa en forma automática validaciones de calidad de datos, y te permite agregar fácilmente otras validaciones más complejas. +1. La primera forma es generar el código desde cero utilizando los [template que están disponibles acá](https://github.com/HuemulSolutions/BigDataGovernance_2.4_TemplateBase). +2. En la segunda forma solo debes crear la definición de tu interfaz de entrada utilizando el código de ejemplo "raw_entidad_mes.scala" (https://github.com/HuemulSolutions/BigDataGovernance_2.4_TemplateBase/blob/master/src/main/scala/com/yourcompany/yourapplication/datalake/raw_entidad_mes.scala), y luego puedes generar el código de tu tabla y el código de masterización en forma automática!. En el código de la tabla se implementa en forma automática validaciones de calidad de datos, y te permite agregar fácilmente otras validaciones más complejas. ![Branching](https://HuemulSolutions.github.io/huemul_flujo_genera_codigo.png) @@ -40,7 +40,7 @@ La implementaci Debido al tiempo que demora implementar todas estas estapas, en la práctica solo se logra trabajar en la lógica de negocio sin DataQuality, los planes de pruebas y documentación de los procesos nunca se complentan adecuadamente, esto poniendo en riesgo el éxito de las soluciones analíticas. -La construcción de Huemul BigDataGovernance está basada en las buenas prácticas descritas en el DAMA-DMBOK2 ([Data Management Body Of Knowledge](www.dama.org)), y permite agilizar el desarrollo de proyectos de BigData a nivel corporativo. +La construcción de Huemul BigDataGovernance está basada en las buenas prácticas descritas en el DAMA-DMBOK2 ([Data Management Body Of Knowledge](http://www.dama.org)), y permite agilizar el desarrollo de proyectos de BigData a nivel corporativo. ### Metodología Flexible El uso de la librería permite desarrollar en forma flexible tus proyectos de BigData. Trabajar directamente sobre los datos en bruto es una buena opción si tienes un proyecto puntual sobre una interfaz en particular, las transformaciones y validaciones que hagan no serán utilizadas por el resto de la organización (desde "A" hasta "D"). Si quieres juntar muchos datos desde distintas fuentes, la mejor estrategia será generar una base consolidada, donde el DataQuality sea implementado una sola vez, y toda la organización pueda acceder a los datos a validados. From e38e4772c9f1ba0937acd3ad345268362ba2a7f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Sun, 8 Mar 2020 18:09:50 -0300 Subject: [PATCH 15/30] =?UTF-8?q?Actualiza=20versi=C3=B3n=20JDK?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 0d2ed0b..759db41 100644 --- a/pom.xml +++ b/pom.xml @@ -118,11 +118,11 @@ Finalmente, también automatiza la generación de código a partir de las defini jdk.tools jdk.tools - 1.7.0_05 + 1.7 system ${JAVA_HOME}/lib/tools.jar - + org.apache.hbase hbase-client From 8522fd1633b6975a75bbce0030efa871de4e34ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Sun, 8 Mar 2020 22:46:19 -0300 Subject: [PATCH 16/30] Agrega clase huemulType_cloudProvider con opciones de nube --- .../bigdata/common/huemulType_cloudProvider.scala | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 src/main/scala/com/huemulsolutions/bigdata/common/huemulType_cloudProvider.scala diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemulType_cloudProvider.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemulType_cloudProvider.scala new file mode 100644 index 0000000..3321604 --- /dev/null +++ b/src/main/scala/com/huemulsolutions/bigdata/common/huemulType_cloudProvider.scala @@ -0,0 +1,7 @@ +package com.huemulsolutions.bigdata.common + +object huemulType_cloudProvider extends Enumeration { + type huemulType_cloudProvider = Value + val None, azure_databricks = Value +} + From e65f9cb5a7baf19e0ace4d371532d5f6c148fc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Sun, 8 Mar 2020 22:46:56 -0300 Subject: [PATCH 17/30] =?UTF-8?q?m=C3=A9todo=20setCloudProvider=20y=20getC?= =?UTF-8?q?loudProvider?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bigdata/common/huemul_GlobalPath.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_GlobalPath.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_GlobalPath.scala index b908ac4..8ce0648 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_GlobalPath.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_GlobalPath.scala @@ -1,6 +1,7 @@ package com.huemulsolutions.bigdata.common import scala.collection.mutable.ArrayBuffer +import com.huemulsolutions.bigdata.common.huemulType_cloudProvider._ class huemul_KeyValuePath(Environment: String, PathOrDataBase: String) extends Serializable { /**example: "prod, desa, qa" @@ -11,6 +12,7 @@ class huemul_KeyValuePath(Environment: String, PathOrDataBase: String) extends S val Value: String = PathOrDataBase } + class huemul_GlobalPath() extends Serializable { /**example: "prod, desa, qa" **/ @@ -78,6 +80,13 @@ class huemul_GlobalPath() extends Serializable { //set > 1 to cache hive metadata var HIVE_HourToUpdateMetadata: Integer = 0 + //from 2.4 --> cloud provider for technical configuration + private var _cloudProvider: huemulType_cloudProvider = huemulType_cloudProvider.None + def getCloudProvider(): huemulType_cloudProvider = {return _cloudProvider} + def setCloudProvider(value: huemulType_cloudProvider) { + _cloudProvider = value + } + //FROM 2.2 //Add Hbase available private var _HBase_available: Boolean = false From d44549e8cc8d5520b922c6188a86b299fa15144b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Mon, 9 Mar 2020 01:56:56 -0300 Subject: [PATCH 18/30] =?UTF-8?q?Crea=20enumeraci=C3=B3n=20para=20identifi?= =?UTF-8?q?car=20el=20proveedor=20de=20ecosistema=20bigData?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bigdata/common/huemulType_bigDataProvider.scala | 7 +++++++ .../bigdata/common/huemulType_cloudProvider.scala | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 src/main/scala/com/huemulsolutions/bigdata/common/huemulType_bigDataProvider.scala delete mode 100644 src/main/scala/com/huemulsolutions/bigdata/common/huemulType_cloudProvider.scala diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemulType_bigDataProvider.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemulType_bigDataProvider.scala new file mode 100644 index 0000000..d731f94 --- /dev/null +++ b/src/main/scala/com/huemulsolutions/bigdata/common/huemulType_bigDataProvider.scala @@ -0,0 +1,7 @@ +package com.huemulsolutions.bigdata.common + +object huemulType_bigDataProvider extends Enumeration { + type huemulType_bigDataProvider = Value + val None, databricks = Value +} + diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemulType_cloudProvider.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemulType_cloudProvider.scala deleted file mode 100644 index 3321604..0000000 --- a/src/main/scala/com/huemulsolutions/bigdata/common/huemulType_cloudProvider.scala +++ /dev/null @@ -1,7 +0,0 @@ -package com.huemulsolutions.bigdata.common - -object huemulType_cloudProvider extends Enumeration { - type huemulType_cloudProvider = Value - val None, azure_databricks = Value -} - From a01459d53cbb06ea7b8b9a728deb281649c2db88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Mon, 9 Mar 2020 01:57:15 -0300 Subject: [PATCH 19/30] =?UTF-8?q?crea=20m=C3=A9todos=20para=20configurar?= =?UTF-8?q?=20proveedor=20de=20ecosistema=20bigData?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bigdata/common/huemul_GlobalPath.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_GlobalPath.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_GlobalPath.scala index 8ce0648..1a69ab5 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_GlobalPath.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_GlobalPath.scala @@ -1,7 +1,7 @@ package com.huemulsolutions.bigdata.common import scala.collection.mutable.ArrayBuffer -import com.huemulsolutions.bigdata.common.huemulType_cloudProvider._ +import com.huemulsolutions.bigdata.common.huemulType_bigDataProvider._ class huemul_KeyValuePath(Environment: String, PathOrDataBase: String) extends Serializable { /**example: "prod, desa, qa" @@ -80,11 +80,11 @@ class huemul_GlobalPath() extends Serializable { //set > 1 to cache hive metadata var HIVE_HourToUpdateMetadata: Integer = 0 - //from 2.4 --> cloud provider for technical configuration - private var _cloudProvider: huemulType_cloudProvider = huemulType_cloudProvider.None - def getCloudProvider(): huemulType_cloudProvider = {return _cloudProvider} - def setCloudProvider(value: huemulType_cloudProvider) { - _cloudProvider = value + //from 2.4 --> bigData provider for technical configuration + private var _bigDataProvider: huemulType_bigDataProvider = huemulType_bigDataProvider.None + def getBigDataProvider(): huemulType_bigDataProvider = {return _bigDataProvider} + def setBigDataProvider(value: huemulType_bigDataProvider) { + _bigDataProvider = value } //FROM 2.2 From d795cb619f38562eb4cc42b93f32c2b136a71597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Mon, 9 Mar 2020 01:58:17 -0300 Subject: [PATCH 20/30] Implementa ajustes para introducir especificaciones de databricks --- .../common/huemul_BigDataGovernance.scala | 11 +- .../bigdata/tables/huemul_Table.scala | 159 ++++++++++++------ 2 files changed, 118 insertions(+), 52 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala index d6fcd30..04206d0 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala @@ -519,10 +519,13 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett return GlobalSettings.GetDataBase(this, dataBaseFromGlobal) } - def close() { + def close(stopSpark: Boolean) { application_closeAll(this.IdApplication) this.spark.catalog.clearCache() - this.spark.close() + if (stopSpark) { + this.spark.close() + this.spark.stop() + } if (RegisterInControl) this.CONTROL_connection.connection.close() if (ImpalaEnabled) this.impala_connection.connection.close() @@ -530,6 +533,10 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett val connHIVE = GlobalSettings.externalBBDD_conf.Using_HIVE.getJDBC_connection(this) connHIVE.connection.close() } + } + + def close() { + close(if (GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) false else true) } diff --git a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala index 8d98de7..1a39634 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala @@ -1109,6 +1109,10 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C if (_PartitionField != null && _PartitionField.toUpperCase() != x.getName.toUpperCase()) { ColumnsCreateTable += s"$coma${x.getName} ${DataTypeLocal} \n" coma = "," + } else if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { + //from 2.4 --> add partitioned field + ColumnsCreateTable += s"$coma${x.getName} ${DataTypeLocal} \n" + coma = "," } } @@ -1860,26 +1864,48 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C var Field = x.get(this).asInstanceOf[huemul_Columns] if (_PartitionField.toUpperCase() == x.getName().toUpperCase() ) { + if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { + PartitionForCreateTable += s"${coma_partition}${_PartitionField}" //without datatype + } else { PartitionForCreateTable += s"${coma_partition}${_PartitionField} ${Field.DataType.sql}" + } coma_partition = "," } } var lCreateTableScript: String = "" - if (getStorageType == huemulType_StorageType.PARQUET || getStorageType == huemulType_StorageType.ORC) { - //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) - lCreateTableScript = s""" - CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.Normal)} (${getColumns_CreateTable(true) }) - ${if (_PartitionField.length() > 0) s"PARTITIONED BY (${PartitionForCreateTable})" else "" } - STORED AS ${getStorageType.toString()} - LOCATION '${getFullNameWithPath()}'""" - } else if (getStorageType == huemulType_StorageType.HBASE) { - lCreateTableScript = s""" - CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.Normal)} (${getColumns_CreateTable(true) }) - ROW FORMAT SERDE 'org.apache.hadoop.hive.hbase.HBaseSerDe' - STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' - WITH SERDEPROPERTIES ("hbase.columns.mapping"="${getHBaseCatalogForHIVE(huemulType_InternalTableType.Normal)}") - TBLPROPERTIES ("hbase.table.name"="${getHBaseNamespace(huemulType_InternalTableType.Normal)}:${getHBaseTableName(huemulType_InternalTableType.Normal)}")""" + //FROM 2.4 --> INCLUDE SPECIAL OPTIONS FOR DATABRICKS + if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { + if (getStorageType == huemulType_StorageType.PARQUET || getStorageType == huemulType_StorageType.ORC) { + //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) + lCreateTableScript = s""" + CREATE TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.Normal)} (${getColumns_CreateTable(true) }) + USING ${getStorageType.toString()} + ${if (_PartitionField.length() > 0) s"PARTITIONED BY (${PartitionForCreateTable})" else "" } + LOCATION '${getFullNameWithPath()}'""" + } else if (getStorageType == huemulType_StorageType.HBASE) { + lCreateTableScript = s""" + CREATE TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.Normal)} (${getColumns_CreateTable(true) }) + USING 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ("hbase.columns.mapping"="${getHBaseCatalogForHIVE(huemulType_InternalTableType.Normal)}") + TBLPROPERTIES ("hbase.table.name"="${getHBaseNamespace(huemulType_InternalTableType.Normal)}:${getHBaseTableName(huemulType_InternalTableType.Normal)}")""" + } + } else { + if (getStorageType == huemulType_StorageType.PARQUET || getStorageType == huemulType_StorageType.ORC) { + //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) + lCreateTableScript = s""" + CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.Normal)} (${getColumns_CreateTable(true) }) + ${if (_PartitionField.length() > 0) s"PARTITIONED BY (${PartitionForCreateTable})" else "" } + STORED AS ${getStorageType.toString()} + LOCATION '${getFullNameWithPath()}'""" + } else if (getStorageType == huemulType_StorageType.HBASE) { + lCreateTableScript = s""" + CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.Normal)} (${getColumns_CreateTable(true) }) + ROW FORMAT SERDE 'org.apache.hadoop.hive.hbase.HBaseSerDe' + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ("hbase.columns.mapping"="${getHBaseCatalogForHIVE(huemulType_InternalTableType.Normal)}") + TBLPROPERTIES ("hbase.table.name"="${getHBaseNamespace(huemulType_InternalTableType.Normal)}:${getHBaseTableName(huemulType_InternalTableType.Normal)}")""" + } } if (huemulBigDataGov.DebugMode) @@ -1894,18 +1920,32 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C private def DF_CreateTable_DQ_Script(): String = { var coma_partition = "" - var PartitionForCreateTable = s"dq_control_id STRING" + var PartitionForCreateTable = if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) s"dq_control_id" else s"dq_control_id STRING" var lCreateTableScript: String = "" - if (getStorageType_DQResult == huemulType_StorageType.PARQUET || getStorageType_DQResult == huemulType_StorageType.ORC) { - //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) - lCreateTableScript = s""" - CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.DQ)} (${getColumns_CreateTable(true, huemulType_InternalTableType.DQ) }) - PARTITIONED BY (${PartitionForCreateTable}) - STORED AS ${getStorageType_DQResult.toString()} - LOCATION '${getFullNameWithPath_DQ()}'""" - } else if (getStorageType_DQResult == huemulType_StorageType.HBASE) { - raiseError("huemul_Table Error: HBase is not available for DQ Table", 1061) + //FROM 2.4 --> INCLUDE SPECIAL OPTIONS FOR DATABRICKS + if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { + if (getStorageType_DQResult == huemulType_StorageType.PARQUET || getStorageType_DQResult == huemulType_StorageType.ORC) { + //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) + lCreateTableScript = s""" + CREATE TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.DQ)} (${getColumns_CreateTable(true, huemulType_InternalTableType.DQ) }) + USING ${getStorageType_DQResult.toString()} + PARTITIONED BY (${PartitionForCreateTable}) + LOCATION '${getFullNameWithPath_DQ()}'""" + } else if (getStorageType_DQResult == huemulType_StorageType.HBASE) { + raiseError("huemul_Table Error: HBase is not available for DQ Table", 1061) + } + } else { + if (getStorageType_DQResult == huemulType_StorageType.PARQUET || getStorageType_DQResult == huemulType_StorageType.ORC) { + //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) + lCreateTableScript = s""" + CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.DQ)} (${getColumns_CreateTable(true, huemulType_InternalTableType.DQ) }) + PARTITIONED BY (${PartitionForCreateTable}) + STORED AS ${getStorageType_DQResult.toString()} + LOCATION '${getFullNameWithPath_DQ()}'""" + } else if (getStorageType_DQResult == huemulType_StorageType.HBASE) { + raiseError("huemul_Table Error: HBase is not available for DQ Table", 1061) + } } if (huemulBigDataGov.DebugMode) @@ -1922,32 +1962,51 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C private def DF_CreateTable_OldValueTrace_Script(): String = { var coma_partition = "" - //STORED AS ${_StorageType_OldValueTrace} - //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) - val lCreateTableScript = s""" - CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.OldValueTrace)} (${getColumns_CreateTable(true, huemulType_InternalTableType.OldValueTrace) }) - ${if (getStorageType_OldValueTrace == "csv") {s""" - ROW FORMAT DELIMITED - FIELDS TERMINATED BY '\t' - STORED AS TEXTFILE """} - else if (getStorageType_OldValueTrace == "json") { - s""" - ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' - """ - } - else if (getStorageType_OldValueTrace == huemulType_StorageType.PARQUET) { - """PARTITIONED BY (MDM_columnName STRING) - STORED AS PARQUET""" - } - else if (getStorageType_OldValueTrace == huemulType_StorageType.ORC) { - """PARTITIONED BY (MDM_columnName STRING) - STORED AS ORC""" - } - } - LOCATION '${getFullNameWithPath_OldValueTrace()}'""" - //${if (_StorageType_OldValueTrace == "csv") {s""" - //TBLPROPERTIES("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss.SSSZ")"""}}""" - + var lCreateTableScript: String = "" + //FROM 2.4 --> INCLUDE SPECIAL OPTIONS FOR DATABRICKS + if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { + lCreateTableScript = s""" + CREATE TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.OldValueTrace)} (${getColumns_CreateTable(true, huemulType_InternalTableType.OldValueTrace) }) + ${if (getStorageType_OldValueTrace == huemulType_StorageType.PARQUET) { + """USING PARQUET + PARTITIONED BY (MDM_columnName)""" + } + else if (getStorageType_OldValueTrace == huemulType_StorageType.ORC) { + """USING ORC + PARTITIONED BY (MDM_columnName)""" + } + } + LOCATION '${getFullNameWithPath_OldValueTrace()}'""" + //${if (_StorageType_OldValueTrace == "csv") {s""" + //TBLPROPERTIES("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss.SSSZ")"""}}""" + } else { + //STORED AS ${_StorageType_OldValueTrace} + //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) + lCreateTableScript = s""" + CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.OldValueTrace)} (${getColumns_CreateTable(true, huemulType_InternalTableType.OldValueTrace) }) + ${if (getStorageType_OldValueTrace == "csv") {s""" + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE """} + else if (getStorageType_OldValueTrace == "json") { + s""" + ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' + """ + } + else if (getStorageType_OldValueTrace == huemulType_StorageType.PARQUET) { + """PARTITIONED BY (MDM_columnName STRING) + STORED AS PARQUET""" + } + else if (getStorageType_OldValueTrace == huemulType_StorageType.ORC) { + """PARTITIONED BY (MDM_columnName STRING) + STORED AS ORC""" + } + } + LOCATION '${getFullNameWithPath_OldValueTrace()}'""" + //${if (_StorageType_OldValueTrace == "csv") {s""" + //TBLPROPERTIES("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss.SSSZ")"""}}""" + } + if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(s"Create Table sentence: ${lCreateTableScript} ") From 98ca5cf35e42fc2eabc2994f57c65c42e95e2c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Mon, 9 Mar 2020 21:49:42 -0300 Subject: [PATCH 21/30] Compatibilidad databriks --- .../bigdata/tables/huemul_Table.scala | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala index 1a39634..ed1eb9f 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala @@ -1092,14 +1092,22 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C if (tableType == huemulType_InternalTableType.DQ) { //create StructType - if ("dq_control_id".toUpperCase() != x.getName.toUpperCase()) { + //FROM 2.4 --> INCLUDE PARTITIONED COLUMN IN CREATE TABLE ONLY FOR DATABRIKS COMPATIBILITY + if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { + ColumnsCreateTable += s"$coma${x.getName} ${DataTypeLocal} \n" + coma = "," + } else if ("dq_control_id".toUpperCase() != x.getName.toUpperCase()) { ColumnsCreateTable += s"$coma${x.getName} ${DataTypeLocal} \n" coma = "," } } else if (tableType == huemulType_InternalTableType.OldValueTrace) { //create StructType MDM_columnName - if ("MDM_columnName".toUpperCase() != x.getName.toUpperCase()) { + //FROM 2.4 --> INCLUDE PARTITIONED COLUMN IN CREATE TABLE ONLY FOR DATABRIKS COMPATIBILITY + if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { + ColumnsCreateTable += s"$coma${x.getName} ${DataTypeLocal} \n" + coma = "," + } else if ("MDM_columnName".toUpperCase() != x.getName.toUpperCase()) { ColumnsCreateTable += s"$coma${x.getName} ${DataTypeLocal} \n" coma = "," } From 92aca40481a0109c8c3580e198884b24558a0c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Mon, 9 Mar 2020 21:49:53 -0300 Subject: [PATCH 22/30] =?UTF-8?q?Mejora=20al=20cerrar=20conexi=C3=B3n=20JD?= =?UTF-8?q?BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bigdata/common/huemul_BigDataGovernance.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala index 04206d0..1798904 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala @@ -520,7 +520,8 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett } def close(stopSpark: Boolean) { - application_closeAll(this.IdApplication) + println(s"this.IdApplication: ${this.IdApplication}, IdApplication: ${IdApplication}") + application_closeAll(IdApplication) this.spark.catalog.clearCache() if (stopSpark) { this.spark.close() @@ -531,7 +532,8 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett if (GlobalSettings.externalBBDD_conf.Using_HIVE.getActive() == true || GlobalSettings.externalBBDD_conf.Using_HIVE.getActiveForHBASE() == true ) { val connHIVE = GlobalSettings.externalBBDD_conf.Using_HIVE.getJDBC_connection(this) - connHIVE.connection.close() + if (connHIVE.connection != null) + connHIVE.connection.close() } } @@ -544,6 +546,7 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett if (RegisterInControl) { val ExecResult1 = CONTROL_connection.ExecuteJDBC_NoResulSet(s"""DELETE FROM control_singleton WHERE application_id = ${ReplaceSQLStringNulls(ApplicationInUse)}""") val ExecResult2 = CONTROL_connection.ExecuteJDBC_NoResulSet(s"""DELETE FROM control_executors WHERE application_id = ${ReplaceSQLStringNulls(ApplicationInUse)}""") + println(s"""DELETE FROM control_executors WHERE application_id = ${ReplaceSQLStringNulls(ApplicationInUse)}""") } } From e12b8212adc557be2db2f3f52cc6f80c1627288b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Tue, 10 Mar 2020 01:18:08 -0300 Subject: [PATCH 23/30] =?UTF-8?q?Validaci=C3=B3n=20de=20conexi=C3=B3n=20JD?= =?UTF-8?q?BC=20al=20cerrar.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bigdata/common/huemul_BigDataGovernance.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala index 1798904..8ed310f 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala @@ -532,8 +532,10 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett if (GlobalSettings.externalBBDD_conf.Using_HIVE.getActive() == true || GlobalSettings.externalBBDD_conf.Using_HIVE.getActiveForHBASE() == true ) { val connHIVE = GlobalSettings.externalBBDD_conf.Using_HIVE.getJDBC_connection(this) - if (connHIVE.connection != null) - connHIVE.connection.close() + if (connHIVE != null) { + if (connHIVE.connection != null) + connHIVE.connection.close() + } } } From 610801b032e368ef34e818680109dcdff9113395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Tue, 10 Mar 2020 01:18:26 -0300 Subject: [PATCH 24/30] #96 incorpora nuevo tipo de almacenamieto DELTA --- .../huemulsolutions/bigdata/tables/huemulType_StorageType.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/tables/huemulType_StorageType.scala b/src/main/scala/com/huemulsolutions/bigdata/tables/huemulType_StorageType.scala index fdea001..67f7af9 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/tables/huemulType_StorageType.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/tables/huemulType_StorageType.scala @@ -4,5 +4,5 @@ package com.huemulsolutions.bigdata.tables object huemulType_StorageType extends Enumeration { type huemulType_StorageType = Value - val PARQUET, ORC, AVRO, HBASE = Value + val PARQUET, ORC, AVRO, HBASE, DELTA = Value } From 27c140115872efe83535552c7aa77a3ac938dcc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Tue, 10 Mar 2020 01:18:43 -0300 Subject: [PATCH 25/30] #96 incorpora DELTA como almacenamiento. --- .../bigdata/tables/huemul_Table.scala | 112 +++++++++++++----- 1 file changed, 80 insertions(+), 32 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala index ed1eb9f..e56a110 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala @@ -709,8 +709,8 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C raiseError(s"huemul_Table Error: PartitionField shouldn't be defined if TableType is ${this._TableType}",1036) //from 2.2 --> validate tableType with Format - if (this._TableType == huemulType_Tables.Transaction && !(this.getStorageType == huemulType_StorageType.PARQUET || this.getStorageType == huemulType_StorageType.ORC)) - raiseError(s"huemul_Table Error: Transaction Tables only available with PARQUET or ORC StorageType ",1057) + if (this._TableType == huemulType_Tables.Transaction && !(this.getStorageType == huemulType_StorageType.PARQUET || this.getStorageType == huemulType_StorageType.ORC || this.getStorageType == huemulType_StorageType.DELTA)) + raiseError(s"huemul_Table Error: Transaction Tables only available with PARQUET, DELTA or ORC StorageType ",1057) //Fron 2.2 --> validate tableType HBASE and turn on globalSettings if (this.getStorageType == huemulType_StorageType.HBASE && !huemulBigDataGov.GlobalSettings.getHBase_available) @@ -1092,7 +1092,7 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C if (tableType == huemulType_InternalTableType.DQ) { //create StructType - //FROM 2.4 --> INCLUDE PARTITIONED COLUMN IN CREATE TABLE ONLY FOR DATABRIKS COMPATIBILITY + //FROM 2.4 --> INCLUDE PARTITIONED COLUMN IN CREATE TABLE ONLY FOR databricks COMPATIBILITY if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { ColumnsCreateTable += s"$coma${x.getName} ${DataTypeLocal} \n" coma = "," @@ -1103,7 +1103,7 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C } else if (tableType == huemulType_InternalTableType.OldValueTrace) { //create StructType MDM_columnName - //FROM 2.4 --> INCLUDE PARTITIONED COLUMN IN CREATE TABLE ONLY FOR DATABRIKS COMPATIBILITY + //FROM 2.4 --> INCLUDE PARTITIONED COLUMN IN CREATE TABLE ONLY FOR databricks COMPATIBILITY if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { ColumnsCreateTable += s"$coma${x.getName} ${DataTypeLocal} \n" coma = "," @@ -1884,7 +1884,7 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C var lCreateTableScript: String = "" //FROM 2.4 --> INCLUDE SPECIAL OPTIONS FOR DATABRICKS if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { - if (getStorageType == huemulType_StorageType.PARQUET || getStorageType == huemulType_StorageType.ORC) { + if (getStorageType == huemulType_StorageType.PARQUET || getStorageType == huemulType_StorageType.ORC || getStorageType == huemulType_StorageType.DELTA) { //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) lCreateTableScript = s""" CREATE TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.Normal)} (${getColumns_CreateTable(true) }) @@ -1899,7 +1899,7 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C TBLPROPERTIES ("hbase.table.name"="${getHBaseNamespace(huemulType_InternalTableType.Normal)}:${getHBaseTableName(huemulType_InternalTableType.Normal)}")""" } } else { - if (getStorageType == huemulType_StorageType.PARQUET || getStorageType == huemulType_StorageType.ORC) { + if (getStorageType == huemulType_StorageType.PARQUET || getStorageType == huemulType_StorageType.ORC || getStorageType == huemulType_StorageType.DELTA) { //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) lCreateTableScript = s""" CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.Normal)} (${getColumns_CreateTable(true) }) @@ -1933,7 +1933,7 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C var lCreateTableScript: String = "" //FROM 2.4 --> INCLUDE SPECIAL OPTIONS FOR DATABRICKS if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { - if (getStorageType_DQResult == huemulType_StorageType.PARQUET || getStorageType_DQResult == huemulType_StorageType.ORC) { + if (getStorageType_DQResult == huemulType_StorageType.PARQUET || getStorageType_DQResult == huemulType_StorageType.ORC ) { //get from: https://docs.databricks.com/user-guide/tables.html (see Create Partitioned Table section) lCreateTableScript = s""" CREATE TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.DQ)} (${getColumns_CreateTable(true, huemulType_InternalTableType.DQ) }) @@ -1942,6 +1942,13 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C LOCATION '${getFullNameWithPath_DQ()}'""" } else if (getStorageType_DQResult == huemulType_StorageType.HBASE) { raiseError("huemul_Table Error: HBase is not available for DQ Table", 1061) + } else if (getStorageType_DQResult == huemulType_StorageType.DELTA) { + //for delta, databricks get all columns and partition columns + //see https://docs.databricks.com/spark/latest/spark-sql/language-manual/create-table.html + lCreateTableScript = s""" + CREATE TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.DQ)} + USING ${getStorageType_DQResult.toString()} + LOCATION '${getFullNameWithPath_DQ()}'""" } } else { if (getStorageType_DQResult == huemulType_StorageType.PARQUET || getStorageType_DQResult == huemulType_StorageType.ORC) { @@ -1953,6 +1960,11 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C LOCATION '${getFullNameWithPath_DQ()}'""" } else if (getStorageType_DQResult == huemulType_StorageType.HBASE) { raiseError("huemul_Table Error: HBase is not available for DQ Table", 1061) + } else if (getStorageType_DQResult == huemulType_StorageType.DELTA) { + lCreateTableScript = s""" + CREATE EXTERNAL TABLE IF NOT EXISTS ${internalGetTable(huemulType_InternalTableType.DQ)} + STORED AS ${getStorageType_DQResult.toString()} + LOCATION '${getFullNameWithPath_DQ()}'""" } } @@ -1983,6 +1995,10 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C """USING ORC PARTITIONED BY (MDM_columnName)""" } + else if (getStorageType_OldValueTrace == huemulType_StorageType.DELTA) { + """USING DELTA + PARTITIONED BY (MDM_columnName)""" + } } LOCATION '${getFullNameWithPath_OldValueTrace()}'""" //${if (_StorageType_OldValueTrace == "csv") {s""" @@ -2009,6 +2025,10 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C """PARTITIONED BY (MDM_columnName STRING) STORED AS ORC""" } + else if (getStorageType_OldValueTrace == huemulType_StorageType.DELTA) { + """PARTITIONED BY (MDM_columnName STRING) + STORED AS DELTA""" + } } LOCATION '${getFullNameWithPath_OldValueTrace()}'""" //${if (_StorageType_OldValueTrace == "csv") {s""" @@ -2674,6 +2694,13 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(s"copy to temp dir: $tempPath ") } + if (this.getStorageType == huemulType_StorageType.DELTA) { + val fsPath = new org.apache.hadoop.fs.Path(tempPath) + if (fs.exists(fsPath)){ + fs.delete(fsPath, true) + } + } + if (this.getNumPartitions == null || this.getNumPartitions <= 0) DFTempCopy.write.mode(SaveMode.Overwrite).format(this.getStorageType.toString()).save(tempPath) //2.2 -> this._StorageType.toString() instead of "parquet" else @@ -3075,9 +3102,10 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C org.apache.hadoop.fs.FileUtil.copy(fs, ProdFullPath, fs, ManualFullPath, false, true, huemulBigDataGov.spark.sparkContext.hadoopConfiguration) val DestTableName: String = InternalGetTable(DestEnvironment) - huemulBigDataGov.logMessageInfo(s"MSCK REPAIR TABLE ${DestTableName}") - huemulBigDataGov.spark.sql(s"MSCK REPAIR TABLE ${DestTableName}") - + if (this.getStorageType != huemulType_StorageType.DELTA) { + huemulBigDataGov.logMessageInfo(s"MSCK REPAIR TABLE ${DestTableName}") + huemulBigDataGov.spark.sql(s"MSCK REPAIR TABLE ${DestTableName}") + } } else { val ProdFullPath = new org.apache.hadoop.fs.Path(s"${getFullNameWithPath()}/${_PartitionField.toLowerCase()}") val ManualFullPath = new org.apache.hadoop.fs.Path(s"${getFullNameWithPath2(DestEnvironment)}/${_PartitionField.toLowerCase()}") @@ -3282,10 +3310,23 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C } else { this.PartitionValue = DFDistinct(0).getAs[String](_PartitionField) val FullPath = new org.apache.hadoop.fs.Path(s"${getFullNameWithPath()}/${_PartitionField.toLowerCase()}=${this.PartitionValue}") + val fs = FileSystem.get(huemulBigDataGov.spark.sparkContext.hadoopConfiguration) - LocalControl.NewStep("Save: Drop old partition") - val fs = FileSystem.get(huemulBigDataGov.spark.sparkContext.hadoopConfiguration) - fs.delete(FullPath, true) + //if (huemulBigDataGov.GlobalSettings.getBigDataProvider() == huemulType_bigDataProvider.databricks) { + if (this.getStorageType == huemulType_StorageType.DELTA) { + //FROM 2.4 --> NEW DELETE FOR DATABRICKS + LocalControl.NewStep("Save: Drop old partition") + + val FullPath = new org.apache.hadoop.fs.Path(getFullNameWithPath()) + if (fs.exists(FullPath)) { + val strSQL_delete: String = s"DELETE FROM delta.`${getFullNameWithPath}` WHERE ${_PartitionField} = '${this.PartitionValue}' " + if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(strSQL_delete) + val dfDelete = huemulBigDataGov.spark.sql(strSQL_delete) + } + } else { + LocalControl.NewStep("Save: Drop old partition") + fs.delete(FullPath, true) + } if (this.getNumPartitions > 0) { LocalControl.NewStep("Save: Set num FileParts") @@ -3342,14 +3383,16 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C //Hive read partitioning metadata, see https://docs.databricks.com/user-guide/tables.html val _tableName : String = internalGetTable(huemulType_InternalTableType.Normal) if (_PartitionField != null && _PartitionField != "") { - LocalControl.NewStep("Save: Repair Hive Metadata") - val _refreshTable: String = s"MSCK REPAIR TABLE ${_tableName}" - if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(_refreshTable) - //new from 2.3 - if (getStorageType == huemulType_StorageType.HBASE) { - runSQLexternalTable(_refreshTable, true) - } else { - runSQLexternalTable(_refreshTable, false) + if (this.getStorageType != huemulType_StorageType.DELTA) { + LocalControl.NewStep("Save: Repair Hive Metadata") + val _refreshTable: String = s"MSCK REPAIR TABLE ${_tableName}" + if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(_refreshTable) + //new from 2.3 + if (getStorageType == huemulType_StorageType.HBASE) { + runSQLexternalTable(_refreshTable, true) + } else { + runSQLexternalTable(_refreshTable, false) + } } } @@ -3394,7 +3437,7 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C try { LocalControl.NewStep("Save: OldVT Result: Saving Old Value Trace result") if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(s"saving path: ${getFullNameWithPath_OldValueTrace()} ") - if (getStorageType_OldValueTrace == huemulType_StorageType.PARQUET || getStorageType_OldValueTrace == huemulType_StorageType.ORC){ + if (getStorageType_OldValueTrace == huemulType_StorageType.PARQUET || getStorageType_OldValueTrace == huemulType_StorageType.ORC || getStorageType_OldValueTrace == huemulType_StorageType.DELTA){ DF_Final.write.mode(SaveMode.Append).partitionBy("MDM_columnName").format(getStorageType_OldValueTrace.toString()).save(getFullNameWithPath_OldValueTrace()) //DF_Final.coalesce(numPartitionsForDQFiles).write.mode(SaveMode.Append).partitionBy("MDM_columnName").format(getStorageType_OldValueTrace.toString()).save(getFullNameWithPath_OldValueTrace()) //DF_Final.coalesce(numPartitionsForDQFiles).write.mode(SaveMode.Append).format(_StorageType_OldValueTrace).save(GetFullNameWithPath_OldValueTrace()) @@ -3434,11 +3477,13 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C //Hive read partitioning metadata, see https://docs.databricks.com/user-guide/tables.html val _tableNameOldValueTrace: String = internalGetTable(huemulType_InternalTableType.OldValueTrace) - LocalControl.NewStep("Save: OldVT Result: Repair Hive Metadata") - val _refreshTable: String = s"MSCK REPAIR TABLE ${_tableNameOldValueTrace}" - if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(s"REFRESH TABLE ${_tableNameOldValueTrace}") - //new from 2.3 - runSQLexternalTable(_refreshTable, false) + if (this.getStorageType_OldValueTrace != huemulType_StorageType.DELTA) { + LocalControl.NewStep("Save: OldVT Result: Repair Hive Metadata") + val _refreshTable: String = s"MSCK REPAIR TABLE ${_tableNameOldValueTrace}" + if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(s"REFRESH TABLE ${_tableNameOldValueTrace}") + //new from 2.3 + runSQLexternalTable(_refreshTable, false) + } if (huemulBigDataGov.ImpalaEnabled) { LocalControl.NewStep("Save: OldVT Result: refresh Impala Metadata") @@ -3520,11 +3565,14 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C //Hive read partitioning metadata, see https://docs.databricks.com/user-guide/tables.html val _tableNameDQ: String = internalGetTable(huemulType_InternalTableType.DQ) - val _refreshTable: String = s"MSCK REPAIR TABLE ${_tableNameDQ}" - LocalControl.NewStep("Save: Repair Hive Metadata") - if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(_refreshTable) - //new from 2.3 - runSQLexternalTable(_refreshTable, false) + + if (this.getStorageType_DQResult != huemulType_StorageType.DELTA) { + val _refreshTable: String = s"MSCK REPAIR TABLE ${_tableNameDQ}" + LocalControl.NewStep("Save: Repair Hive Metadata") + if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(_refreshTable) + //new from 2.3 + runSQLexternalTable(_refreshTable, false) + } if (huemulBigDataGov.ImpalaEnabled) { LocalControl.NewStep("Save: refresh Impala Metadata") From 11f2bbf3d26cb08ba15c1f763eb44c24e5cb78bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Tue, 10 Mar 2020 01:39:18 -0300 Subject: [PATCH 26/30] =?UTF-8?q?#96=20eliminaci=C3=B3n=20de=20tabla=20tem?= =?UTF-8?q?poral=20con=20formatos=20distintos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/huemulsolutions/bigdata/tables/huemul_Table.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala index e56a110..ba7fe1d 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala @@ -2694,11 +2694,9 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C if (huemulBigDataGov.DebugMode) huemulBigDataGov.logMessageDebug(s"copy to temp dir: $tempPath ") } - if (this.getStorageType == huemulType_StorageType.DELTA) { - val fsPath = new org.apache.hadoop.fs.Path(tempPath) - if (fs.exists(fsPath)){ - fs.delete(fsPath, true) - } + val fsPath = new org.apache.hadoop.fs.Path(tempPath) + if (fs.exists(fsPath)){ + fs.delete(fsPath, true) } if (this.getNumPartitions == null || this.getNumPartitions <= 0) From 20d7608b5f68962dca7617398db982a62c279c92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Wed, 11 Mar 2020 18:44:47 -0300 Subject: [PATCH 27/30] =?UTF-8?q?Configuraci=C3=B3n=20release=202.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Instalacion/huemul_bdg_2.4_minor.sql | 473 ++++++++++++++++++ .../Instalacion/huemul_cluster_setting_2.4.sh | 44 ++ .../resources/Instalacion/queries pruebas.sql | 39 -- 3 files changed, 517 insertions(+), 39 deletions(-) create mode 100644 src/main/resources/Instalacion/huemul_bdg_2.4_minor.sql create mode 100644 src/main/resources/Instalacion/huemul_cluster_setting_2.4.sh delete mode 100644 src/main/resources/Instalacion/queries pruebas.sql diff --git a/src/main/resources/Instalacion/huemul_bdg_2.4_minor.sql b/src/main/resources/Instalacion/huemul_bdg_2.4_minor.sql new file mode 100644 index 0000000..d316c57 --- /dev/null +++ b/src/main/resources/Instalacion/huemul_bdg_2.4_minor.sql @@ -0,0 +1,473 @@ + +CREATE TABLE control_config ( config_id int + ,version_mayor int + ,version_minor int + ,version_patch int + ,config_dtlog varchar(50) + ,primary key (config_id)); + +create table control_executors ( + application_id varchar(100) + ,idsparkport varchar(50) + ,idportmonitoring varchar(500) + ,executor_dtstart varchar(30) + ,executor_name varchar(100) + ,primary key (application_id) + ); + +create table control_singleton ( + singleton_id varchar(100) + ,application_id varchar(100) + ,singleton_name varchar(100) + ,mdm_fhcreate varchar(30) + ,primary key (singleton_id) + ); + +create table control_area ( + area_id varchar(50) + ,area_idpadre varchar(50) + ,area_name varchar(100) + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (area_id) + ); + +create table control_process ( + process_id varchar(200) + ,area_id varchar(50) + ,process_name varchar(200) + ,process_filename varchar(200) + ,process_description varchar(1000) + ,process_owner varchar(200) + ,process_frequency varchar(50) + ,mdm_manualchange int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (process_id) + ); + +create table control_processexec ( + processexec_id varchar(50) + ,processexec_idparent varchar(50) + ,process_id varchar(200) + ,malla_id varchar(50) + ,application_id varchar(100) + ,processexec_isstart int + ,processexec_iscancelled int + ,processexec_isenderror int + ,processexec_isendok int + ,processexec_dtstart varchar(30) + ,processexec_dtend varchar(30) + ,processexec_durhour int + ,processexec_durmin int + ,processexec_dursec int + ,processexec_whosrun varchar(200) + ,processexec_debugmode int + ,processexec_environment varchar(200) + ,processexec_param_year int + ,processexec_param_month int + ,processexec_param_day int + ,processexec_param_hour int + ,processexec_param_min int + ,processexec_param_sec int + ,processexec_param_others varchar(1000) + ,processexec_huemulversion varchar(50) + ,processexec_controlversion varchar(50) + ,error_id varchar(50) + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (processexec_id) + ); + +create table control_processexecparams ( + processexec_id varchar(50) + ,processexecparams_name varchar(500) + ,processexecparams_value varchar(4000) + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (processexec_id, processexecparams_name) + ); + +create table control_processexecstep ( + processexecstep_id varchar(50) + ,processexec_id varchar(50) + ,processexecstep_name varchar(200) + ,processexecstep_status varchar(20) + ,processexecstep_dtstart varchar(30) + ,processexecstep_dtend varchar(30) + ,processexecstep_durhour int + ,processexecstep_durmin int + ,processexecstep_dursec int + ,error_id varchar(50) + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (processexecstep_id) + ); + + +create table control_query (query_id varchar(50) + ,processexecstep_id varchar(50) + ,processexec_id varchar(50) + ,rawfiles_id varchar(50) + ,rawfilesdet_id varchar(50) + ,table_id varchar(50) + ,query_alias varchar(200) + ,query_sql_from varchar(4000) + ,query_sql_where varchar(4000) + ,query_numerrors int + ,query_autoinc int + ,query_israw int + ,query_isfinaltable int + ,query_isquery int + ,query_isreferenced int + ,query_numrows_real int + ,query_numrows_expected int + ,query_duration_hour int + ,query_duration_min int + ,query_duration_sec int + ,error_id varchar(50) + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (query_id) + ); + +create table control_querycolumn (querycol_id varchar(50) + ,query_id varchar(50) + ,rawfilesdet_id varchar(50) + ,column_id varchar(50) + ,querycol_pos int + ,querycol_name varchar(200) + ,querycol_sql varchar(4000) + ,querycol_posstart int + ,querycol_posend int + ,querycol_line int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (querycol_id) + ); + +create index idx_control_querycolumn_i01 on control_querycolumn (query_id, querycol_name); + +create table control_querycolumnori (querycolori_id varchar(50) + ,querycol_id varchar(50) + ,table_idori varchar(50) + ,column_idori varchar(50) + ,rawfilesdet_idori varchar(50) + ,rawfilesdetfields_idori varchar(50) + ,query_idori varchar(50) + ,querycol_idori varchar(50) + ,querycolori_dbname varchar(200) + ,querycolori_tabname varchar(200) + ,querycolori_tabalias varchar(200) + ,querycolori_colname varchar(200) + ,querycolori_isselect int + ,querycolori_iswhere int + ,querycolori_ishaving int + ,querycolori_isorder int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (querycolori_id) + ); + + +create table control_rawfiles ( + rawfiles_id varchar(50) + ,area_id varchar(50) + ,rawfiles_logicalname varchar(500) + ,rawfiles_groupname varchar(500) + ,rawfiles_description varchar(1000) + ,rawfiles_owner varchar(200) + ,rawfiles_frequency varchar(200) + ,mdm_manualchange int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (rawfiles_id) + ); + +create index idx_control_rawfiles_i01 on control_rawfiles (rawfiles_logicalname, rawfiles_groupname); +create unique index idx_control_rawfiles_i02 on control_rawfiles (rawfiles_logicalname); + +create table control_rawfilesdet ( + rawfilesdet_id varchar(50) + ,rawfiles_id varchar(50) + ,rawfilesdet_startdate varchar(30) + ,rawfilesdet_enddate varchar(30) + ,rawfilesdet_filename varchar(1000) + ,rawfilesdet_localpath varchar(1000) + ,rawfilesdet_globalpath varchar(1000) + ,rawfilesdet_data_colseptype varchar(50) + ,rawfilesdet_data_colsep varchar(50) + ,rawfilesdet_data_headcolstring varchar(4000) + ,rawfilesdet_log_colseptype varchar(50) + ,rawfilesdet_log_colsep varchar(50) + ,rawfilesdet_log_headcolstring varchar(4000) + ,rawfilesdet_log_numrowsfield varchar(200) + ,rawfilesdet_contactname varchar(200) + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (rawfilesdet_id) + ); + +create index idx_control_rawfilesdet_i01 on control_rawfilesdet (rawfiles_id, rawfilesdet_startdate); + +create table control_rawfilesdetfields ( + rawfilesdet_id varchar(50) + ,rawfilesdetfields_logicalname varchar(200) + ,rawfilesdetfields_itname varchar(200) + ,rawfilesdetfields_description varchar(1000) + ,rawfilesdetfields_datatype varchar(50) + ,rawfilesdetfields_position int + ,rawfilesdetfields_posini int + ,rawfilesdetfields_posfin int + ,rawfilesdetfields_applytrim int + ,rawfilesdetfields_convertnull int + ,mdm_active int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (rawfilesdet_id, rawfilesdetfields_logicalname) + ); + + +create table control_rawfilesuse ( + rawfilesuse_id varchar(50) + ,rawfiles_id varchar(50) + ,process_id varchar(200) + ,processexec_id varchar(50) + ,rawfilesuse_year int + ,rawfilesuse_month int + ,rawfilesuse_day int + ,rawfilesuse_hour int + ,rawfilesuse_minute int + ,rawfilesuse_second int + ,rawfilesuse_params varchar(1000) + ,rawfiles_fullname varchar(1000) + ,rawfiles_fullpath varchar(1000) + ,rawfiles_numrows varchar(50) + ,rawfiles_headerline varchar(4000) + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (rawfilesuse_id) + ); + +create table control_tables ( + table_id varchar(50) + ,area_id varchar(50) + ,table_bbddname varchar(200) + ,table_name varchar(200) + ,table_description varchar(1000) + ,table_businessowner varchar(200) + ,table_itowner varchar(200) + ,table_partitionfield varchar(200) + ,table_tabletype varchar(50) + ,table_compressiontype varchar(50) + ,table_storagetype varchar(50) + ,table_localpath varchar(1000) + ,table_globalpath varchar(1000) + ,table_fullname_dq varchar(1200) + ,table_dq_isused int + ,table_fullname_ovt varchar(1200) + ,table_ovt_isused int + ,table_sqlcreate varchar(4000) + ,table_frequency varchar(200) + ,table_autoincupdate int + ,table_backup int + ,mdm_manualchange int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (table_id) + ); + +create unique index idx_control_tables_i01 on control_tables (table_bbddname, table_name ); + +create table control_tablesrel ( + tablerel_id varchar(50) + ,table_idpk varchar(50) + ,table_idfk varchar(50) + ,tablefk_namerelationship varchar(100) + ,tablerel_validnull int + ,mdm_manualchange int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (tablerel_id) + ); + +create index idx_control_tablesrel_i01 on control_tablesrel (table_idpk, table_idfk, tablefk_namerelationship); + +create table control_tablesrelcol ( + tablerel_id varchar(50) + ,column_idfk varchar(50) + ,column_idpk varchar(50) + ,mdm_manualchange int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (tablerel_id, column_idfk) + ); + + +create table control_columns ( + column_id varchar(50) + ,table_id varchar(50) + ,column_position int + ,column_name varchar(200) + ,column_description varchar(1000) + ,column_formula varchar(1000) + ,column_datatype varchar(50) + ,column_sensibledata int + ,column_enabledtlog int + ,column_enableoldvalue int + ,column_enableprocesslog int + ,column_enableoldvaluetrace int + ,column_defaultvalue varchar(1000) + ,column_securitylevel varchar(200) + ,column_encrypted varchar(200) + ,column_arco varchar(200) + ,column_nullable int + ,column_ispk int + ,column_isunique int + ,column_dq_minlen int + ,column_dq_maxlen int + ,column_dq_mindecimalvalue decimal(30,10) + ,column_dq_maxdecimalvalue decimal(30,10) + ,column_dq_mindatetimevalue varchar(50) + ,column_dq_maxdatetimevalue varchar(50) + ,column_dq_regexp varchar(1000) + ,column_businessglossary varchar(100) + ,column_responsible varchar(100) + ,mdm_active int + ,mdm_manualchange int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (column_id) + ); + +create index idx_control_columns_i01 on control_columns (table_id, column_name); + + +create table control_tablesuse ( tablesuse_id varchar(50) + ,table_id varchar(50) + ,process_id varchar(200) + ,processexec_id varchar(50) + ,processexecstep_id varchar(50) + ,tableuse_year int + ,tableuse_month int + ,tableuse_day int + ,tableuse_hour int + ,tableuse_minute int + ,tableuse_second int + ,tableuse_params varchar(1000) + ,tableuse_read int + ,tableuse_write int + ,tableuse_numrowsnew int + ,tableuse_numrowsupdate int + ,tableuse_numrowsupdatable int + ,tableuse_numrowsnochange int + ,tableuse_numrowsmarkdelete int + ,tableuse_numrowstotal int + ,tableuse_numrowsexcluded int + ,tableuse_partitionvalue varchar(200) + ,tableuse_pathbackup varchar(1000) + ,tableuse_backupstatus int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (tablesuse_id) + ); + + + CREATE INDEX IDX_control_tablesuse_I01 ON control_tablesuse (tableuse_backupstatus); + + +create table control_dq ( + dq_id varchar(50) + ,table_id varchar(50) + ,process_id varchar(200) + ,processexec_id varchar(50) + ,column_id varchar(50) + ,column_name varchar(200) + ,dq_aliasdf varchar(200) + ,dq_name varchar(200) + ,dq_description varchar(1000) + ,dq_querylevel varchar(50) + ,dq_notification varchar(50) + ,dq_sqlformula varchar(4000) + ,dq_dq_toleranceerror_rows int + ,dq_dq_toleranceerror_percent decimal(30,10) + ,dq_resultdq varchar(4000) + ,dq_errorcode int + ,dq_externalcode varchar(200) + ,dq_numrowsok int + ,dq_numrowserror int + ,dq_numrowstotal int + ,dq_iserror int + ,dq_iswarning int + ,dq_duration_hour int + ,dq_duration_minute int + ,dq_duration_second int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (dq_id) + ); + + +create table control_error ( + error_id varchar(50) + ,error_message varchar(4000) + ,error_code int + ,error_trace varchar(4000) + ,error_classname varchar(100) + ,error_filename varchar(500) + ,error_linenumber varchar(100) + ,error_methodname varchar(100) + ,error_detail varchar(500) + ,mdm_fhcrea varchar(30) + ,mdm_processname varchar(1000) + ,primary key (error_id) + ); + + +create table control_date ( + date_id varchar(10) + ,date_year int + ,date_month int + ,date_day int + ,date_dayofweek int + ,date_dayname varchar(20) + ,date_monthname varchar(20) + ,date_quarter int + ,date_week int + ,date_isweekend int + ,date_isworkday int + ,date_isbankworkday int + ,date_numworkday int + ,date_numworkdayrev int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (date_id) + ); + + +create table control_testplan ( + testplan_id varchar(200) + ,testplangroup_id varchar(200) + ,processexec_id varchar(200) + ,process_id varchar(200) + ,testplan_name varchar(200) + ,testplan_description varchar(1000) + ,testplan_resultexpected varchar(1000) + ,testplan_resultreal varchar(1000) + ,testplan_isok int + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (testplan_id) + ); + +create index idx_control_testplan_i01 on control_testplan (testplangroup_id, testplan_name); + + +create table control_testplanfeature ( + feature_id varchar(200) + ,testplan_id varchar(200) + ,mdm_fhcreate varchar(30) + ,mdm_processname varchar(200) + ,primary key (feature_id, testplan_id) + ); + diff --git a/src/main/resources/Instalacion/huemul_cluster_setting_2.4.sh b/src/main/resources/Instalacion/huemul_cluster_setting_2.4.sh new file mode 100644 index 0000000..647a02a --- /dev/null +++ b/src/main/resources/Instalacion/huemul_cluster_setting_2.4.sh @@ -0,0 +1,44 @@ +#!/bin/bash +clear +echo "Creating HDFS Paths: START" +hdfs dfs -mkdir /user +hdfs dfs -mkdir /user/data +hdfs dfs -mkdir /user/data/production +hdfs dfs -mkdir /user/data/production/temp +hdfs dfs -mkdir /user/data/production/raw +hdfs dfs -mkdir /user/data/production/master +hdfs dfs -mkdir /user/data/production/dim +hdfs dfs -mkdir /user/data/production/analytics +hdfs dfs -mkdir /user/data/production/reporting +hdfs dfs -mkdir /user/data/production/sandbox +hdfs dfs -mkdir /user/data/production/dqerror +hdfs dfs -mkdir /user/data/production/mdm_oldvalue +hdfs dfs -mkdir /user/data/production/backup +hdfs dfs -mkdir /user/data/experimental +hdfs dfs -mkdir /user/data/experimental/temp +hdfs dfs -mkdir /user/data/experimental/raw +hdfs dfs -mkdir /user/data/experimental/master +hdfs dfs -mkdir /user/data/experimental/dim +hdfs dfs -mkdir /user/data/experimental/analytics +hdfs dfs -mkdir /user/data/experimental/reporting +hdfs dfs -mkdir /user/data/experimental/sandbox +hdfs dfs -mkdir /user/data/experimental/dqerror +hdfs dfs -mkdir /user/data/experimental/mdm_oldvalue +hdfs dfs -mkdir /user/data/experimental/backup +echo "Creating HDFS Paths: FINISH" +echo "STARTING HIVE SETUP" +hive -e "CREATE DATABASE production_master" +hive -e "CREATE DATABASE experimental_master" +hive -e "CREATE DATABASE production_dim" +hive -e "CREATE DATABASE experimental_dim" +hive -e "CREATE DATABASE production_analytics" +hive -e "CREATE DATABASE experimental_analytics" +hive -e "CREATE DATABASE production_reporting" +hive -e "CREATE DATABASE experimental_reporting" +hive -e "CREATE DATABASE production_sandbox" +hive -e "CREATE DATABASE experimental_sandbox" +hive -e "CREATE DATABASE production_DQError" +hive -e "CREATE DATABASE experimental_DQError" +hive -e "CREATE DATABASE production_mdm_oldvalue" +hive -e "CREATE DATABASE experimental_mdm_oldvalue" +echo "STARTING HIVE SETUP" \ No newline at end of file diff --git a/src/main/resources/Instalacion/queries pruebas.sql b/src/main/resources/Instalacion/queries pruebas.sql deleted file mode 100644 index 8603032..0000000 --- a/src/main/resources/Instalacion/queries pruebas.sql +++ /dev/null @@ -1,39 +0,0 @@ -SELECT * -FROM control_process - -SELECT * -FROM control_error - -SELECT * -FROM control_processexec -ORDER BY 1 DESC - -SELECT * -FROM control_processexecstep -WHERE processexec_id = '201906021838500087922858746189' -ORDER BY 1 desc - -SELECT * -FROM control_dq - -SELECT * -FROM control_testplan -WHERE testplangroup_id = 'pp041' -AND testplan_name = 'TestPlan_IsOK' - -select cast(count(1) as Integer) as cantidad, cast(sum(testplan_isok) as Integer) as total_ok - from control_testplan - -SELECT 'application_1559496407342_0002' - , '39637' - , 'http://172.17.0.2:4041' - , '2019-06-02 17:38:08:074' - , '01 - Plan pruebas Proc_PlanPruebas_CargaMaster' -FROM dual - -SELECT * FROM dual - - -SELECT mdm_manualchange - FROM control_process - WHERE process_id = 'algo' \ No newline at end of file From 956bcfe5e25987bf47e9447d728117a51f027ad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Wed, 11 Mar 2020 23:14:55 -0300 Subject: [PATCH 28/30] #102 aplica mejoras de memoria y rendimiento --- .../bigdata/tables/huemul_Table.scala | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala index ba7fe1d..775f445 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/tables/huemul_Table.scala @@ -1379,26 +1379,21 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C /** - CREATE SQL SCRIPT FOR OLD VALUE TRACE INSERT + SCRIPT FOR OLD VALUE TRACE INSERT */ - private def SQL_Step_OldValueTrace(Alias: String, ProcessName: String): String = { + private def OldValueTrace_save(Alias: String, ProcessName: String, LocalControl: huemul_Control) = { //Get PK - var StringSQl_PK: String = "SELECT " + var StringSQl_PK_base: String = "SELECT " var coma: String = "" getALLDeclaredFields().filter { x => x.setAccessible(true) x.get(this).isInstanceOf[huemul_Columns] && x.get(this).asInstanceOf[huemul_Columns].getIsPK } .foreach { x => - StringSQl_PK += s" ${coma}${x.getName}" + StringSQl_PK_base += s" ${coma}${x.getName}" coma = "," } - //Get SQL for get columns value change - var StringSQl: String = "" - var StringUnion: String = "" - var count_fulltrace = 0 - getALLDeclaredFields().filter { x => x.setAccessible(true) x.get(this).isInstanceOf[huemul_Columns] && x.get(this).asInstanceOf[huemul_Columns].getMDM_EnableOldValue_FullTrace } @@ -1406,16 +1401,20 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C //Get field var Field = x.get(this).asInstanceOf[huemul_Columns] - StringSQl += s" ${StringUnion} ${StringSQl_PK}, CAST(new_${x.getName} as string) AS MDM_newValue, CAST(old_${x.getName} as string) AS MDM_oldValue, CAST(${_MDM_AutoInc} AS BIGINT) as MDM_AutoInc, '${Control.Control_Id}' as processExec_id, now() as MDM_fhChange, cast('$ProcessName' as string) as MDM_ProcessChange, cast('${x.getName.toLowerCase()}' as string) as MDM_columnName FROM $Alias WHERE ___ActionType__ = 'UPDATE' and __Change_${x.getName} = 1 " - StringUnion = " \n UNION ALL " - count_fulltrace += 1 + val StringSQL = s"${StringSQl_PK_base}, CAST(new_${x.getName} as string) AS MDM_newValue, CAST(old_${x.getName} as string) AS MDM_oldValue, CAST(${_MDM_AutoInc} AS BIGINT) as MDM_AutoInc, '${Control.Control_Id}' as processExec_id, now() as MDM_fhChange, cast('$ProcessName' as string) as MDM_ProcessChange, cast('${x.getName.toLowerCase()}' as string) as MDM_columnName FROM $Alias WHERE ___ActionType__ = 'UPDATE' and __Change_${x.getName} = 1 " + val aliasFullTrace: String = s"__SQL_ovt_full_${x.getName}" + + val tempSQL_OldValueFullTrace_DF = huemulBigDataGov.DF_ExecuteQuery(aliasFullTrace,StringSQL) + + val numRowsAffected = tempSQL_OldValueFullTrace_DF.count() + if (numRowsAffected > 0) { + val Result = savePersist_OldValueTrace(LocalControl,tempSQL_OldValueFullTrace_DF) + if (!Result) + huemulBigDataGov.logMessageWarn(s"Old value trace full trace can't save to disk, column ${x.getName}") + } + + LocalControl.NewStep(s"Ref & Master: ovt full trace finished, ${numRowsAffected} rows changed for ${x.getName} column ") } - - if (count_fulltrace == 0) - StringSQl = null - - - return StringSQl } @@ -3181,8 +3180,9 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C //CREATE NEW DATAFRAME WITH MDM OLD VALUE FULL TRACE if (huemulBigDataGov.GlobalSettings.MDM_SaveOldValueTrace) { LocalControl.NewStep("Ref & Master: MDM Old Value Full Trace") - val SQL_FullTrace = SQL_Step_OldValueTrace("__FullJoin", huemulBigDataGov.ProcessNameCall) + OldValueTrace_save("__FullJoin", huemulBigDataGov.ProcessNameCall, LocalControl) + /* var tempSQL_OldValueFullTrace_DF : DataFrame = null if (SQL_FullTrace != null){ //if null, doesn't have the mdm old "value full trace" to get tempSQL_OldValueFullTrace_DF = huemulBigDataGov.DF_ExecuteQuery("__SQL_OldValueFullTrace_DF",SQL_FullTrace) @@ -3194,6 +3194,8 @@ class huemul_Table(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_C return Result } } + * + */ } From 77aa42abd88b23449f1be3ed4627fe4d45d032b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Wed, 11 Mar 2020 23:16:19 -0300 Subject: [PATCH 29/30] =?UTF-8?q?Comentar=20traza=20de=20cierre=20de=20ses?= =?UTF-8?q?i=C3=B3n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bigdata/common/huemul_BigDataGovernance.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala index 8ed310f..123b307 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala @@ -520,7 +520,7 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett } def close(stopSpark: Boolean) { - println(s"this.IdApplication: ${this.IdApplication}, IdApplication: ${IdApplication}") + //println(s"this.IdApplication: ${this.IdApplication}, IdApplication: ${IdApplication}") application_closeAll(IdApplication) this.spark.catalog.clearCache() if (stopSpark) { @@ -548,7 +548,7 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett if (RegisterInControl) { val ExecResult1 = CONTROL_connection.ExecuteJDBC_NoResulSet(s"""DELETE FROM control_singleton WHERE application_id = ${ReplaceSQLStringNulls(ApplicationInUse)}""") val ExecResult2 = CONTROL_connection.ExecuteJDBC_NoResulSet(s"""DELETE FROM control_executors WHERE application_id = ${ReplaceSQLStringNulls(ApplicationInUse)}""") - println(s"""DELETE FROM control_executors WHERE application_id = ${ReplaceSQLStringNulls(ApplicationInUse)}""") + //println(s"""DELETE FROM control_executors WHERE application_id = ${ReplaceSQLStringNulls(ApplicationInUse)}""") } } From 6cfc53aa4387a0088d99bf454c11d1a96b3c1602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Rodr=C3=ADguez=20Robotham?= Date: Wed, 11 Mar 2020 23:20:49 -0300 Subject: [PATCH 30/30] =?UTF-8?q?Ajustes=20publicaci=C3=B3n=20release=202.?= =?UTF-8?q?4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 2 +- .../bigdata/common/huemul_BigDataGovernance.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 759db41..bb2ea31 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 com.huemulsolutions.bigdata huemul-bigdatagovernance - 2.4-SNAPSHOT + 2.4 HuemulSolutions - BigDataGovernance Enable full data quality and data lineage for BigData Projects. Huemul BigDataGovernance, es una librería que trabaja sobre Spark, Hive y HDFS. Permite la implementación de una **estrategia corporativa de dato único**, basada en buenas prácticas de Gobierno de Datos. diff --git a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala index 123b307..308d868 100644 --- a/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala +++ b/src/main/scala/com/huemulsolutions/bigdata/common/huemul_BigDataGovernance.scala @@ -52,7 +52,7 @@ import org.apache.log4j.Level * @param LocalSparkSession(opcional) permite enviar una sesión de Spark ya iniciada. */ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSettings: huemul_GlobalPath, LocalSparkSession: SparkSession = null) extends Serializable { - val currentVersion: String = "2.4-SNAPSHOT" + val currentVersion: String = "2.4" val GlobalSettings = globalSettings val warehouseLocation = new File("spark-warehouse").getAbsolutePath //@transient lazy val log_info = org.apache.log4j.LogManager.getLogger(s"$appName [with huemul]")