Mu-Sigma
diff --git a/‎.gitignore
Lines changed: 1 addition & 1 deletion b/‎.gitignore
Lines changed: 1 addition & 1 deletion
diff --git a/‎DESCRIPTION
Lines changed: 8 additions & 5 deletions b/‎DESCRIPTION
Lines changed: 8 additions & 5 deletions
diff --git a/‎NAMESPACE
Lines changed: 8 additions & 0 deletions b/‎NAMESPACE
Lines changed: 8 additions & 0 deletions
diff --git a/‎R/core-functions-batch.R
Lines changed: 32 additions & 17 deletions b/‎R/core-functions-batch.R
Lines changed: 32 additions & 17 deletions
@@ -5,7 +5,7 @@
 metastore_db/
 .DS_Store
 vignettes/metastore_db/
-vignettes/pipeline.RDS
+vignettes/*.RDS
 vignettes/*.out
 vignettes/*.R
 vignettes/*.html
@@ -1,16 +1,18 @@
 Package: analysisPipelines
 Type: Package
 Title: Compose interoperable analysis pipelines, and put them into production
-Version: 0.0.0.9002
+Version: 0.0.0.9003
 Authors@R: c(
-      person("Naren","Srinivasan", email = "Naren.Srinivasan@mu-sigma.com", role = c("cre","aut")),
+      person("Naren","Srinivasan", email = "Naren.Srinivasan@mu-sigma.com", role = c("aut")),
+      person("Zubin Dowlaty","",  email = "Zubin.Dowlaty@mu-sigma.com", role = c("ctb")),
       person("Sanjay","",  email = "Sanjay@mu-sigma.com", role = c("ctb")),
       person("Neeratyoy","Mallik", email = "Neeratyoy.Mallik@mu-sigma.com", role = c("ctb")),
-		  person("Anoop S","",  email = "Anoop.S@mu-sigma.com", role = c("ctb"))
+		  person("Anoop S","",  email = "Anoop.S@mu-sigma.com", role = c("ctb")),
+		  person("Mu Sigma, Inc.", email = "ird.experiencelab@mu-sigma.com", role = c("cre"))
 		  )
-Description: The package aims at enabling data scientists to compose pipelines of analysis which consist of data manipulation, exploratory analysis & reporting, as well as modeling steps. It also aims to enable data scientists to use tools of their choice through an R interface, and compose interoperable pipelines between R, Spark, and Python.
+Description: The package aims at enabling data scientists to compose pipelines of analysis which consist of data manipulation, exploratory analysis & reporting, as well as modeling steps. It also aims to enable data scientists to use tools of their choice through an R interface, and compose interoperable pipelines between R, Spark, and Python. Credits to Mu Sigma for supporting the development of the package.
 Depends: R (>= 3.4.0), tibble, magrittr, data.table, pipeR, devtools
-Imports: ggplot2, dplyr, futile.logger, RCurl
+Imports: ggplot2, dplyr, futile.logger, RCurl, proto
 Suggests: plotly, knitr, rmarkdown, SparkR, parallel, visNetwork, rjson, DT, shiny
 Remotes: github::cran/SparkR
 Encoding: UTF-8
@@ -23,6 +25,7 @@ Collate:
     'analysisPipelines_package.R'
     'core-functions.R'
     'core-functions-batch.R'
+    'core-functions-meta-pipelines.R'
     'core-streaming-functions.R'
     'r-batch-eda-utilities.R'
     'spark-structured-streaming-utilities.R'
 
@@ -3,28 +3,35 @@
 export(AnalysisPipeline)
 export(BaseAnalysisPipeline)
 export(CheckColumnType)
+export(MetaAnalysisPipeline)
 export(StreamingAnalysisPipeline)
 export(assessEngineSetUp)
 export(bivarPlots)
 export(castKafkaStreamAsString)
 export(convertKafkaValueFromJson)
 export(correlationMatPlot)
+export(createPipelineInstance)
+export(exportAsMetaPipeline)
 export(generateReport)
 export(genericPipelineException)
 export(getDatatype)
 export(getInput)
 export(getLoggerDetails)
 export(getOutputById)
 export(getPipeline)
+export(getPipelinePrototype)
 export(getRegistry)
 export(ignoreCols)
+export(loadMetaPipeline)
 export(loadPipeline)
 export(loadPredefinedFunctionRegistry)
+export(loadRegistry)
 export(multiVarOutlierPlot)
 export(outlierPlot)
 export(prepExecution)
 export(registerFunction)
 export(savePipeline)
+export(saveRegistry)
 export(setInput)
 export(setLoggerDetails)
 export(sparkRSessionCreateIfNotPresent)
@@ -33,6 +40,7 @@ export(updateObject)
 export(visualizePipeline)
 exportClasses(AnalysisPipeline)
 exportClasses(BaseAnalysisPipeline)
+exportClasses(MetaAnalysisPipeline)
 exportClasses(StreamingAnalysisPipeline)
 exportMethods(checkSchemaMatch)
 exportMethods(generateOutput)
 
@@ -54,14 +54,6 @@ setMethod(
       ## Calling the parent constructor
       .Object <- methods::callNextMethod(.Object, ...)
 
-      # for(rowNo in 1:nrow(batchPredefFunctions)){
-      #   .Object %>>% registerFunction(functionName = batchPredefFunctions[['functionName']][[rowNo]],
-      #                                 heading =  batchPredefFunctions[['heading']][[rowNo]],
-      #                                 # batchPredefFunctions[['outAsIn']][[rowNo]],
-      #                                 engine = batchPredefFunctions[['engine']][[rowNo]],
-      #                                 exceptionFunction = batchPredefFunctions[['exceptionHandlingFunction']][[rowNo]],
-      #                                 userDefined = F, loadPipeline = F ) -> .Object
-      # }
       return(.Object)
 
     },error = function(e){
@@ -242,6 +234,7 @@ checkSchema <- function(dfOld, dfNew){
     startPipelineExecution <- Sys.time()
     futile.logger::flog.info("||  Pipeline Execution STARTED  ||" , name='logger.execution')
 
+    maxEngineName <- "r"
     outputCache <- .getCache()
 
     topOrder <- object@pipelineExecutor$topologicalOrdering
@@ -255,9 +248,10 @@ checkSchema <- function(dfOld, dfNew){
     engineCount %>>% dplyr::filter(numOp == max(numOp)) -> maxEngine
 
 
-    maxEngineName <- "r"
     if(nrow(maxEngine) == 1){
       maxEngineName <- maxEngine$engine
+    }else{
+      maxEngineName <- maxEngine$engine[1]
     }
 
     inputToExecute <- object@input
@@ -400,17 +394,21 @@ checkSchema <- function(dfOld, dfNew){
         # Set parameters
 
         params <- unlist(funcDetails$parameters, recursive = F)
-        dep <- unlist(funcDetails$dependencies, recursive = F)
+        dep <- unique(unlist(funcDetails$dependencies, recursive = F))
         depTerms <- paste0("f", dep)
 
         params <- lapply(params, function(p, depTerms, outputCache){
           if(class(p) == "formula"){
-            formulaTerm <- attr(terms(p), "term.label")
-            if(length(formulaTerm) == 1 && formulaTerm %in% depTerms){
-
-              ## Formula of previous function in pipeline
-              actualParamObjectName <- paste0(formulaTerm, ".out")
-              p <-  get(actualParamObjectName, envir = outputCache)
+            isDepParam <- analysisPipelines:::isDependencyParam(p)
+            if(isDepParam){
+              formulaTerm <- analysisPipelines:::getTerm(p)
+              argName <-  analysisPipelines:::getResponse(p)
+              if(formulaTerm %in% depTerms){
+
+                ## Formula of previous function in pipeline
+                actualParamObjectName <- paste0(formulaTerm, ".out")
+                p <-  get(actualParamObjectName, envir = outputCache)
+              }
             }
           }
 
@@ -421,10 +419,27 @@ checkSchema <- function(dfOld, dfNew){
         #Call
 
         #Assign as named parameters
+        #Get names of params
+        # paramNames <- lapply(params, function(p){
+        #   return(names(p))
+        # })  %>>% unlist
+        # params <-lapply(params, function(p){
+        #   names(p) <- NULL
+        #   return(p)
+        # })
+        # names(params) <- paramNames
         args <- params
         if(funcDetails$isDataFunction){
-          args <- append(list(inputToExecute), params)
+          formals(funcDetails$operation) %>>% as.list %>>% names %>>% dplyr::first() -> firstArgName
+          firstArg <- list(inputToExecute)
+          names(firstArg) <- firstArgName
+          args <- append(firstArg, params)
         }
+        # }else{
+        #   firstParam <- params[1]
+        #   names(firstParam) <- "object"
+        #   args <- append(firstParam, params[-1])
+        # }
         output <- tryCatch({do.call(what = funcDetails$operation,
                                     args = args)},
                            error = function(e){