Skip to content

Commit

Permalink
fix #384
Browse files Browse the repository at this point in the history
  • Loading branch information
eblondel committed May 2, 2024
1 parent f9134cb commit faca923
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 19 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: geoflow
Version: 0.20240419
Date: 2024-04-19
Version: 0.20240502
Date: 2024-05-02
Title: Tools to Orchestrate Geospatial (Meta)Data Management Workflows and Manage FAIR Services
Description: An engine to facilitate the orchestration and execution of metadata-driven data management workflows, in compliance with FAIR
(Findable, Accessible, Interoperable and Reusable) data management principles. By means of a pivot metadata model, relying on the DublinCore standard (<https://dublincore.org/>),
Expand Down
2 changes: 2 additions & 0 deletions R/executeWorkflowJob.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ executeWorkflowJob <- function(config, jobdir = NULL, queue = NULL, monitor = NU
config$logger.info("SkipDataDownload is false: copying and fetching data...")
#we copy data to job data dir (for data files)
entity$copyDataToJobDir(config, jobdir)
#enrich with data types
entity$enrichWithDatatypes(config, jobdir)
#vector data: we enrich entity with features
#control is added in case of entity already enriched with features/coverages (when loaded from custom R entity handlers)
if(!skipEnrichWithData) if(is.null(entity$data$features) && is.null(entity$data$coverages)){
Expand Down
101 changes: 84 additions & 17 deletions R/geoflow_entity.R
Original file line number Diff line number Diff line change
Expand Up @@ -574,37 +574,28 @@ geoflow_entity <- R6Class("geoflow_entity",

},

#'@description This function will enrich the entity data objects with data features (vector data) or coverages (grid data). This method will overwrite
#' spatial metadata such as the bounding box (unless global option \code{skipDynamicBbox} is enabled). Note that the user spatial extent is not overwriten
#' since it may contain finer geometries than a bounding box.
#'@description Function that will scan zip data files and resolve data objects sourceType and uploadType
#'@param config geoflow config object
#'@param jobdir relative path of the job directory
enrichWithData = function(config, jobdir = NULL){
enrichWithDatatypes = function(config, jobdir = NULL){

if(is.null(jobdir)) jobdir <- config$job
wd <- getwd()
setwd("./data")

skipDynamicBbox <- if(!is.null(config$profile$options$skipDynamicBbox)) config$profile$options$skipDynamicBbox else FALSE
enrichDataStrategy <- if(!is.null(config$profile$options$enrichDataStrategy)) config$profile$options$enrichDataStrategy else "first"
#TODO enrichDataSourceStrategy <- if(!is.null(config$profile$options$enrichDataSourceStrategy)) config$profile$options$enrichDataSourceStrategy else "first"

data_objects <- list()
if(is.null(self$data$dir)){
data_objects <- list(self$data)
}else{
data_objects <- self$data$getData()
}

srid <- if(!is.null(self$srid)) self$srid else ""
data_srids <- c()

if(length(data_objects)>0){

data_objects <- lapply(1:length(data_objects), function(k){

data_object = data_objects[[k]]

datasource <- data_object$source[[1]] #TODO we still look at first source
datasource_name = NULL
datasource_ext = NULL
Expand All @@ -623,7 +614,7 @@ geoflow_entity <- R6Class("geoflow_entity",
#setwd(wd)
#return(NULL)
}

#in case of a datasource type requiring a file we check its presence
#if absent we abort the function enrich With features
types_without_file <- c("dbtable","dbview","dbquery")
Expand All @@ -637,7 +628,7 @@ geoflow_entity <- R6Class("geoflow_entity",

#basefilename
basefilename <- datasource_name

#inherit sourceType for source
if(datasource_file_needed){
data_object$sourceType = switch(datasource_ext,
Expand All @@ -646,7 +637,7 @@ geoflow_entity <- R6Class("geoflow_entity",
basefilepath = file.path(getwd(), paste0(basefilename,".zip"))
if(file.exists(basefilepath)){
#for srcType != "other"
#(re-zipped files on 'basefinename' with 'other' sourceType do not exist,
#(re-zipped files on 'basefilename' with 'other' sourceType do not exist,
#but are just copied, not unzipped/rezipped with different name)
zip_files = zip::zip_list(basefilepath)
if(any(endsWith(zip_files$filename, ".gpkg"))){
Expand All @@ -670,7 +661,8 @@ geoflow_entity <- R6Class("geoflow_entity",
)
#additional rule for uploadType
if(datasource_ext == "zip") if(!is.null(data_object$uploadType)) if(data_object$uploadType == "other"){
data_object$uploadType = data_object$sourceType
config$logger.info(sprintf("Zip data archived scanned, setting uploadType based on sourceType '%s'", data_object$sourceType))
data_object$setUploadType(data_object$sourceType)
if(data_object$uploadType == "geotiff") data_object$setSpatialRepresentationType("grid")
}
#overwrite top sourceType
Expand All @@ -684,6 +676,81 @@ geoflow_entity <- R6Class("geoflow_entity",
self$data$data[[k]]$setSpatialRepresentationType(data_object$spatialRepresentationType)
}
}
return(data_object)
})

if(is.null(self$data$dir)){
self$data <- data_objects[[1]]
}else{
self$data$data <- data_objects
}
}
setwd(self$getEntityJobDirPath(config, jobdir))
},

#'@description This function will enrich the entity data objects with data features (vector data) or coverages (grid data). This method will overwrite
#' spatial metadata such as the bounding box (unless global option \code{skipDynamicBbox} is enabled). Note that the user spatial extent is not overwriten
#' since it may contain finer geometries than a bounding box.
#'@param config geoflow config object
#'@param jobdir relative path of the job directory
enrichWithData = function(config, jobdir = NULL){

if(is.null(jobdir)) jobdir <- config$job
wd <- getwd()
setwd("./data")

skipDynamicBbox <- if(!is.null(config$profile$options$skipDynamicBbox)) config$profile$options$skipDynamicBbox else FALSE
enrichDataStrategy <- if(!is.null(config$profile$options$enrichDataStrategy)) config$profile$options$enrichDataStrategy else "first"
#TODO enrichDataSourceStrategy <- if(!is.null(config$profile$options$enrichDataSourceStrategy)) config$profile$options$enrichDataSourceStrategy else "first"

data_objects <- list()
if(is.null(self$data$dir)){
data_objects <- list(self$data)
}else{
data_objects <- self$data$getData()
}

srid <- if(!is.null(self$srid)) self$srid else ""
data_srids <- c()

if(length(data_objects)>0){

data_objects <- lapply(1:length(data_objects), function(k){

data_object = data_objects[[k]]

datasource <- data_object$source[[1]] #TODO we still look at first source
datasource_name = NULL
datasource_ext = NULL
datasource_file = NULL
if(!is.null(datasource)){
datasource_parts <- unlist(strsplit(datasource, "\\.(?=[^\\.]+$)", perl=TRUE))
datasource_name <- datasource_parts[1]
datasource_ext <- datasource_parts[2]
datasource_file <- attr(datasource, "uri")
attributes(datasource) <- NULL
if(is.null(datasource_file)) datasource_file <- datasource
}

if(data_object$sourceType == "other"){
config$logger.warn("Metadata dynamic handling based on 'data' not implemented for source type 'other'")
#setwd(wd)
#return(NULL)
}

#in case of a datasource type requiring a file we check its presence
#if absent we abort the function enrich With features
types_without_file <- c("dbtable","dbview","dbquery")
datasource_file_needed <- !(data_object$sourceType %in% types_without_file)
if(datasource_file_needed && is.null(datasource_file)){
warnMsg <- sprintf("No source file/URL for datasource '%s'. Data source copying aborted!", datasource_name)
config$logger.warn(warnMsg)
#setwd(wd)
#return(NULL)
}

#basefilename
basefilename <- datasource_name

#encoding mappings
st_encoding <- switch(options("encoding")[[1]],
Expand Down

0 comments on commit faca923

Please sign in to comment.