Skip to content

Commit

Permalink
Merge pull request #133 from HuemulSolutions/develop_2.6.2
Browse files Browse the repository at this point in the history
Develop 2.6.2
  • Loading branch information
huemulDeveloper authored Nov 9, 2020
2 parents 396be0a + 9246e4c commit f173997
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 32 deletions.
13 changes: 0 additions & 13 deletions .idea/libraries/Maven__org_glassfish_javax_el_3_0_1_b11.xml

This file was deleted.

87 changes: 84 additions & 3 deletions huemul-bigdatagovernance.iml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,79 @@
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-core_2.11:2.3.3" level="project" />
<orderEntry type="library" name="Maven: com.twitter:chill_2.11:0.8.4" level="project" />
<orderEntry type="library" name="Maven: com.twitter:chill-java:0.8.4" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-launcher_2.11:2.3.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-kvstore_2.11:2.3.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-network-common_2.11:2.3.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-network-shuffle_2.11:2.3.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.spark:spark-unsafe_2.11:2.3.3" level="project" />
<orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.9.4" level="project" />
<orderEntry type="library" name="Maven: com.jamesmurty.utils:java-xmlbuilder:1.1" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.lz4:lz4-java:1.4.0" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: com.github.luben:zstd-jni:1.3.2-2" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-all:4.1.17.Final" level="project" />
<orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-core:3.1.5" level="project" />
<orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-jvm:3.1.5" level="project" />
<orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-json:3.1.5" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: io.dropwizard.metrics:metrics-graphite:3.1.5" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.6.7.1" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.module:jackson-module-scala_2.11:2.6.7.1" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.module:jackson-module-paranamer:2.7.9" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.py4j:py4j:0.10.7" level="project" />
<orderEntry type="library" name="Maven: org.apache.spark:spark-tags_2.11:2.3.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-sql_2.11:2.3.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: com.univocity:univocity-parsers:2.5.9" level="project" />
<orderEntry type="library" name="Maven: org.apache.spark:spark-sketch_2.11:2.3.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.spark:spark-catalyst_2.11:2.3.3" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.janino:janino:3.0.8" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.janino:commons-compiler:3.0.8" level="project" />
<orderEntry type="library" name="Maven: org.antlr:antlr4-runtime:4.7" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.orc:orc-core:nohive:1.4.4" level="project" />
<orderEntry type="library" name="Maven: io.airlift:aircompressor:0.8" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.orc:orc-mapreduce:nohive:1.4.4" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.parquet:parquet-column:1.8.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.parquet:parquet-common:1.8.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.parquet:parquet-encoding:1.8.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.parquet:parquet-hadoop:1.8.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.parquet:parquet-jackson:1.8.3" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.arrow:arrow-vector:0.8.0" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.arrow:arrow-format:0.8.0" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.arrow:arrow-memory:0.8.0" level="project" />
<orderEntry type="library" name="Maven: joda-time:joda-time:2.9.9" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: com.carrotsearch:hppc:0.7.2" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: com.vlkan:flatbuffers:1.2.0-3f79e055" level="project" />
<orderEntry type="library" name="Maven: org.apache.spark:spark-streaming_2.11:2.3.3" level="project" />
<orderEntry type="library" name="Maven: javax.mail:mail:1.4.1" level="project" />
<orderEntry type="library" name="Maven: jline:jline:2.12.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.tez:tez-runtime-internals:0.9.1.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.tez:tez-api:0.9.1.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-hdfs-client:3.1.1.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okhttp:okhttp:2.7.5" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okio:okio:1.6.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.tez:tez-common:0.9.1.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.tez:hadoop-shim:0.9.1.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.hive:hive-llap-ext-client:3.1.0.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.hive:hive-exec:3.1.0.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.hive:hive-vector-code-gen:3.1.0.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.velocity:velocity:1.5" level="project" />
<orderEntry type="library" name="Maven: org.antlr:ST4:4.0.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-registry:3.1.1.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: commons-daemon:commons-daemon:1.0.13" level="project" />
<orderEntry type="library" name="Maven: dnsjava:dnsjava:2.1.7" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.groovy:groovy-all:2.4.11" level="project" />
<orderEntry type="library" name="Maven: org.apache.calcite:calcite-core:1.16.0.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.calcite:calcite-linq4j:1.16.0.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: com.esri.geometry:esri-geometry-api:2.0.0" level="project" />
<orderEntry type="library" name="Maven: com.yahoo.datasketches:sketches-core:0.9.0" level="project" />
<orderEntry type="library" name="Maven: com.yahoo.datasketches:memory:0.9.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.calcite:calcite-druid:1.16.0.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.calcite.avatica:avatica:1.10.0.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.hive:hive-llap-client:3.1.0.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.hive:hive-streaming:3.1.0.3.1.0.0-78" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-dbcp2:2.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.3" level="project" />
<orderEntry type="library" name="Maven: com.huemulsolutions.bigdata:huemul-sql-decode:1.0" level="project" />
<orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.11.8" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-core_2.11:2.3.3" level="project" />
Expand Down Expand Up @@ -130,7 +203,6 @@
<orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.orc:orc-core:1.3.3" level="project" />
<orderEntry type="library" name="Maven: org.eclipse.jetty.aggregate:jetty-all:7.6.0.v20120127" level="project" />
<orderEntry type="library" name="Maven: org.apache.geronimo.specs:geronimo-jta_1.1_spec:1.1.1" level="project" />
<orderEntry type="library" name="Maven: javax.mail:mail:1.4.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.geronimo.specs:geronimo-jaspic_1.0_spec:1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.geronimo.specs:geronimo-annotation_1.0_spec:1.1.1" level="project" />
Expand Down Expand Up @@ -231,7 +303,7 @@
<orderEntry type="library" name="Maven: org.eclipse.jetty:jetty-webapp:9.3.19.v20170502" level="project" />
<orderEntry type="library" name="Maven: org.eclipse.jetty:jetty-xml:9.3.19.v20170502" level="project" />
<orderEntry type="library" name="Maven: org.glassfish.web:javax.servlet.jsp:2.3.2" level="project" />
<orderEntry type="library" name="Maven: org.glassfish:javax.el:3.0.1-b11" level="project" />
<orderEntry type="library" name="Maven: org.glassfish:javax.el:3.0.1-b12" level="project" />
<orderEntry type="library" name="Maven: javax.servlet.jsp:javax.servlet.jsp-api:2.3.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.htrace:htrace-core4:4.2.0-incubating" level="project" />
<orderEntry type="library" name="Maven: com.lmax:disruptor:3.3.6" level="project" />
Expand Down Expand Up @@ -268,7 +340,7 @@
<orderEntry type="module-library">
<library name="Maven: jdk.tools:jdk.tools:1.8">
<CLASSES>
<root url="jar://C:/Program Files/Java/jdk1.8.0_181/lib/tools.jar!/" />
<root url="jar://C:/Program Files/Java/jdk1.8.0_261/lib/tools.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
Expand Down Expand Up @@ -384,6 +456,15 @@
<orderEntry type="library" name="Maven: org.apache.cxf:cxf-rt-frontend-jaxrs:3.3.4" level="project" />
<orderEntry type="library" name="Maven: jakarta.ws.rs:jakarta.ws.rs-api:2.1.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.cxf:cxf-rt-security:3.3.4" level="project" />
<orderEntry type="library" name="Maven: javax.xml.ws:jaxws-api:2.3.0" level="project" />
<orderEntry type="library" name="Maven: javax.xml.soap:javax.xml.soap-api:1.4.0" level="project" />
<orderEntry type="library" name="Maven: com.sun.activation:javax.activation:1.2.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.geronimo.specs:geronimo-ws-metadata_2.0_spec:1.1.3" level="project" />
<orderEntry type="library" name="Maven: com.sun.xml.messaging.saaj:saaj-impl:1.4.0-b03" level="project" />
<orderEntry type="library" name="Maven: org.jvnet.mimepull:mimepull:1.9.7" level="project" />
<orderEntry type="library" name="Maven: org.jacorb:jacorb-omgapi:3.7" level="project" />
<orderEntry type="library" name="Maven: org.apache.geronimo.specs:geronimo-jta_1.1_spec:1.1.1" level="project" />
<orderEntry type="library" name="Maven: org.jboss.spec.javax.rmi:jboss-rmi-api_1.0_spec:1.0.6.Final" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-exec:1.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.opennlp:opennlp-tools:1.9.1" level="project" />
<orderEntry type="library" name="Maven: com.googlecode.json-simple:json-simple:1.1.1" level="project" />
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.huemulsolutions.bigdata</groupId>
<artifactId>huemul-bigdatagovernance</artifactId>
<version>2.6.1</version>
<version>2.6.2</version>
<name>HuemulSolutions - BigDataGovernance</name>
<description>Enable full data quality and data lineage for BigData Projects.
Huemul BigDataGovernance, es una librería que trabaja sobre Spark, Hive y HDFS. Permite la implementación de una **estrategia corporativa de dato único**, basada en buenas prácticas de Gobierno de Datos.
Expand Down Expand Up @@ -122,7 +122,7 @@ Finalmente, también automatiza la generación de código a partir de las defini
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
<systemPath>c:/Program Files/Java/jdk1.8.0_261/lib/tools.jar</systemPath>
</dependency>

<dependency>
Expand Down
42 changes: 42 additions & 0 deletions src/main/scala/com/huemulsolutions/bigdata/common/Init.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,47 @@ object Init {

new huemul_Control(huemulBigDataGov,null, huemulType_Frequency.ANY_MOMENT, false, false)
//Control.Init_CreateTables()

huemulBigDataGov.close()
}
}

object testReadUrlMonitoring {
def main(args: Array[String]): Unit = {
val Global: huemul_GlobalPath = new huemul_GlobalPath()
Global.GlobalEnvironments = "production, experimental"
Global.CONTROL_Setting.append(new huemul_KeyValuePath("production",s"file.txt"))
Global.IMPALA_Setting.append(new huemul_KeyValuePath("production",s"file.txt"))
Global.TEMPORAL_Path.append(new huemul_KeyValuePath("production",s"/usr/production/temp/"))
Global.DQError_Path.append(new huemul_KeyValuePath("production",s"/usr/production/temp/"))
Global.DQError_DataBase.append(new huemul_KeyValuePath("production",s"dqerror_database"))
Global.setValidationLight()

val huemulBigDataGov = new huemul_BigDataGovernance(s"BigData API test URL Monitoring", args, Global)

val control_test = new huemul_Control(huemulBigDataGov,null, huemulType_Frequency.ANY_MOMENT, false, false)

control_test.NewStep(s"start test")
//get url from spark
val URLMonitor = s"${huemulBigDataGov.IdPortMonitoring}/api/v1/applications/"
control_test.NewStep(s"url monitoring: $URLMonitor")
//Get Id App from Spark URL Monitoring
try {
var i = 1
while (i <= 5) {
val (idAppFromAPI, status) = huemulBigDataGov.getIdFromExecution(URLMonitor)
control_test.NewStep(s"wait for 10 seconds, cycle $i/5, read from port: $idAppFromAPI, $status ")
Thread.sleep(10000)

i += 1
}
} catch {
case e: Exception =>
huemulBigDataGov.logMessageError(e)
}


huemulBigDataGov.close()
//Control.Init_CreateTables()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import java.util.TimeZone
import java.util.concurrent.ThreadLocalRandom
import java.text.SimpleDateFormat

import scala.io.Source
import scala.io.{BufferedSource, Source}
import scala.collection.mutable.ArrayBuffer
import org.apache.spark.sql.types.DecimalType
import com.huemulsolutions.bigdata.control.huemul_JDBCProperties
Expand Down Expand Up @@ -41,7 +41,7 @@ import org.apache.log4j.{Level, Logger}
* @param LocalSparkSession(opcional) permite enviar una sesión de Spark ya iniciada.
*/
class huemul_BigDataGovernance (appName: String, args: Array[String], globalSettings: huemul_GlobalPath, LocalSparkSession: SparkSession = null) extends Serializable {
val currentVersion: String = "2.6.1"
val currentVersion: String = "2.6.2"
val GlobalSettings: huemul_GlobalPath = globalSettings
val warehouseLocation: String = new File("spark-warehouse").getAbsolutePath
//@transient lazy val log_info = org.apache.log4j.LogManager.getLogger(s"$appName [with huemul]")
Expand Down Expand Up @@ -404,7 +404,7 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett
/*********************
* START SPARK AND CONTROL MODEL CONNECTION
*************************/
//from 2.6.1 add userName and password withou using connectionString
//from 2.6.2 add userName and password withou using connectionString
val controlUserName: String = GlobalSettings.getUserName(this, GlobalSettings.CONTROL_Setting)
val controlPassword: String = GlobalSettings.getPassword(this, GlobalSettings.CONTROL_Setting)
@transient val CONTROL_connection: huemul_JDBCProperties = new huemul_JDBCProperties(this
Expand Down Expand Up @@ -584,7 +584,15 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett

//Get Id App from Spark URL Monitoring
try {
IdAppFromAPI = this.getIdFromExecution(URLMonitor)
val (idFromURL2, result2) = this.getIdFromExecution(URLMonitor)

//get OK
if (result2 == 0)
IdAppFromAPI = idFromURL2
else
logMessageWarn("")


} catch {
case _: Exception =>
StillAlive = false
Expand Down Expand Up @@ -860,19 +868,74 @@ class huemul_BigDataGovernance (appName: String, args: Array[String], globalSett
def getCaseType(tableStorage: com.huemulsolutions.bigdata.tables.huemulType_StorageType.huemulType_StorageType, value: String): String = {
if (tableStorage == com.huemulsolutions.bigdata.tables.huemulType_StorageType.AVRO) value.toLowerCase() else value
}

def getMovedHRef(html: String): String = {
val posIni: Int = html.indexOf("""<a href="""")
var urlFound: String = null

if (posIni >= 0) {
val html2: String = html.substring(posIni + 9,html.length)
val posEnd: Int = html2.indexOf("""">""")

if (posEnd >= 0) {
urlFound = html2.substring(0,posEnd)
}

}

urlFound
}

/**
* Get execution Id from spark monitoring url
*/

def getIdFromExecution(url: String): String = {
def getIdFromExecution(url: String, iterator: Int = 0): (String, Int) = {
if (iterator >= 3)
return (null, -10)

var result: Int = 1
import spark.implicits._
val html = Source.fromURL(url)
val vals = spark.sparkContext.parallelize(
html.mkString :: Nil)

//spark.read.json(vals).show(truncate = false)
spark.read.json(vals).select($"id").first().getString(0)

var idFromURL: String = ""

//try to open api
var html: BufferedSource = null
try {
html = Source.fromURL(url)
} catch {
case _ : java.net.ConnectException =>
//no connection found, stillAlive = false
return ("", -1)
case e : Exception =>
//other error, return 0
logMessageInfo(s"getIdFromExecution error: ${e.getMessage}")
return ("", 0)
}

//if connection, get Id
try {
val vals = spark.sparkContext.parallelize(html.mkString :: Nil)
//spark.read.json(vals).show(truncate = false)
idFromURL = spark.read.json(vals).select($"id").first().getString(0)

return (idFromURL, 1)
} catch {
case _ : Exception =>
result = -2
}

//error to get Id, try to read from html redirect location
val newURL = getMovedHRef(html.mkString)

if (newURL != null) {
//get new url, try to get Id
return getIdFromExecution(newURL, iterator + 1)
} else {
//not redirect url, throw error
return ("", -3)
}

(idFromURL, result)
}


Expand Down
Loading

0 comments on commit f173997

Please sign in to comment.