diff --git a/R/map_neon.ecocomdp.10003.001.001.R b/R/map_neon.ecocomdp.10003.001.001.R index 250c431..71e4483 100644 --- a/R/map_neon.ecocomdp.10003.001.001.R +++ b/R/map_neon.ecocomdp.10003.001.001.R @@ -10,6 +10,9 @@ map_neon.ecocomdp.10003.001.001 <- function( neon.data.product.id = "DP1.10003.001", ...){ + # devtools::install_github('NEONScience/NEON-geolocation/geoNEON', force = T) + library(geoNEON) + #NEON target taxon group is BIRD neon_method_id <- "neon.ecocomdp.10003.001.001" @@ -23,71 +26,102 @@ map_neon.ecocomdp.10003.001.001 <- function( " and cannot be mapped using method ", neon_method_id) - allTabs_bird <- neon.data.list - # extract NEON data tables from list object ---- - allTabs_bird$brd_countdata <- tidyr::as_tibble(allTabs_bird$brd_countdata) + brd_countdata <- tidyr::as_tibble(allTabs_bird$brd_countdata) - allTabs_bird$brd_perpoint <- tidyr::as_tibble(allTabs_bird$brd_perpoint) + brd_perpoint <- tidyr::as_tibble(allTabs_bird$brd_perpoint) - data_bird <- dplyr::left_join( - allTabs_bird$brd_countdata, - dplyr::select(allTabs_bird$brd_perpoint, - -uid, - -startDate)) + my_token = Sys.getenv('NEON_PAT') + if(my_token == ""){my_token <- NA} + + # get pointID-specific location information + brd_perpoint_locs <- geoNEON::getLocTOS(data = brd_perpoint, + dataProd = "brd_perpoint", token = my_token) + + # Create the event table (location for specific pointIDs) + brd_perpoint_mod <- brd_perpoint_locs %>% select(c(-decimalLatitude, -decimalLongitude)) %>% + rename( + locality = points, + eventID_raw = eventID, +# eventRemarks = remarks, + eventDate = startDate, + decimalLatitude = adjDecimalLatitude, + decimalLongitude = adjDecimalLongitude, + coordinateUncertaintyInMeters = adjCoordinateUncertainty) + + brd_perpoint_mod <- brd_perpoint_mod %>% + mutate(eventID = paste(eventID_raw, plotID, pointID, sep = "."), + minimumElevationInMeters = adjElevation, + maximumElevationInMeters = adjElevation, + country = "United States", + countryCode = "US", + continent = "North America", + habitat = paste("NLCD:", nlcdClass, sep = " "), + samplingProtocol = paste0("https://data.neonscience.org/api/v0/documents/", + samplingProtocolVersion), + samplingEffort = "6 minutes", + sampleSizeValue = 49087.38, # sampled area actually varies based on visibility and acoustics, but this value is the area of 125 m radius circle around observer at a single sampling pointID + sampleSizeUnit = "square metre", + datasetID = substr(neon.data.list[grep("citation_", names(neon.data.list), ignore.case = TRUE)], 7, 40) + # if the name of citation item, the location of the DOI url or the length of the url changes the datasetID extraction will produce unexpected results + ) + + brd_perpoint_mod$reportedWeather <- paste0("startCloudCoverInPercent:", brd_perpoint_mod$startCloudCoverPercentage,"; ", "endCloudCoverInPercent:", brd_perpoint_mod$endCloudCoverPercentage,"; ", + "startRelativeHumidityInPercent:", brd_perpoint_mod$startRH,"; ", "endRelativeHumidityInPercent:", brd_perpoint_mod$endRH,"; ", + "observedAirTemperatureInDegreesCelsius:", brd_perpoint_mod$observedAirTemp,"; ", "ObservedWindSpeedinKmPerHour:", brd_perpoint_mod$kmPerHourObservedWindSpeed) + # reportedWeather should be a supported core field (from Humboldt extension) in event table + brd_perpoint_mod$eventID <- ifelse(brd_perpoint_mod$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "UNDE_009.B2.2021-06-10", brd_perpoint_mod$eventID) # change pointID from C3 to B2 to resolve duplicate and allow merge + brd_perpoint_mod$pointID <- ifelse(brd_perpoint_mod$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "B2", brd_perpoint_mod$pointID) # change pointID from C3 to B2 to resolve duplicate and allow merge + + brd_countdata$eventID <- ifelse(brd_countdata$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "UNDE_009.B2.2021-06-10", brd_countdata$eventID) # change pointID from C3 to B2 to resolve duplicate and allow merge + brd_countdata$pointID <- ifelse(brd_countdata$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "B2", brd_countdata$pointID) # change pointID from C3 to B2 to resolve duplicate and allow merge + + brd_countdata_mod <- brd_countdata %>% rename( + eventID_raw = eventID, eventDate = startDate) %>% + mutate(eventID = paste(eventID_raw, plotID, pointID, sep = ".")) + + data_bird <- dplyr::left_join( + brd_countdata_mod, + dplyr::select(brd_perpoint_mod, + -uid)) + # table(data_bird$samplingImpractical) # all NA # table(data_bird$samplingImpracticalRemarks) data_bird <- dplyr::select( data_bird, - # -uid, -identifiedBy, - # -eventID, # it is just plotID, pointID, startDate -measuredBy, - -samplingImpractical, -samplingImpracticalRemarks) - - # remove invalde records - data_bird <- data_bird %>% - dplyr::filter( - is.finite(clusterSize), - clusterSize >= 0, - !is.na(clusterSize)) - - + -samplingImpractical, + -samplingImpracticalRemarks) + #location ---- - table_location_raw <- data_bird %>% - dplyr::select(domainID, siteID, plotID, namedLocation, - decimalLatitude, decimalLongitude, elevation, - nlcdClass, plotType, geodeticDatum) %>% + dplyr::select(domainID, siteID, plotID, namedLocation, locality, + decimalLatitude, decimalLongitude, elevation, habitat, plotType, geodeticDatum, samplingProtocol, datasetID, + country, countryCode, continent, samplingEffort, sampleSizeValue, sampleSizeUnit) %>% dplyr::distinct() + table_location_raw$sampleSizeValue <- as.character(table_location_raw$sampleSizeValue) # ecocomDP ancillary location table fields can't be numeric table_location <- make_neon_location_table( loc_info = table_location_raw, - loc_col_names = c("domainID", "siteID", "plotID", "namedLocation")) + loc_col_names = c("domainID", "siteID", "namedLocation","locality")) table_location_ancillary <- make_neon_ancillary_location_table( loc_info = table_location_raw, loc_col_names = c("domainID", "siteID", "plotID", "namedLocation"), - ancillary_var_names = c("namedLocation", "nlcdClass", - "plotType","geodeticDatum")) - - - + ancillary_var_names = c("namedLocation", "habitat", "plotType","geodeticDatum","samplingProtocol","samplingEffort", "datasetID", + "country","countryCode","continent","samplingEffort","sampleSizeValue","sampleSizeUnit") ) + table_location_ancillary$unit <- ifelse(table_location_ancillary$variable_name == "samplingEffort", "minute", NA) + table_location_ancillary$unit <- ifelse(table_location_ancillary$variable_name == "sampleSizeValue", "square metre", table_location_ancillary$unit) + # taxon ---- - my_dots <- list(...) - - if("token" %in% names(my_dots)){ - my_token <- my_dots$token - }else{ - my_token <- NA - } # get bird taxon table from NEON neon_bird_taxon_table <- neonOS::getTaxonList( @@ -96,7 +130,7 @@ map_neon.ecocomdp.10003.001.001 <- function( dplyr::filter(taxonID %in% data_bird$taxonID) table_taxon <- neon_bird_taxon_table %>% - dplyr::select(taxonID, taxonRank, scientificName, nameAccordingToID) %>% + dplyr::select(taxonID, taxonRank, scientificName, vernacularName, family, kingdom, nameAccordingToID) %>% dplyr::distinct() %>% dplyr::rename(taxon_id = taxonID, taxon_rank = taxonRank, @@ -107,7 +141,7 @@ map_neon.ecocomdp.10003.001.001 <- function( taxon_name, authority_system) - + # Put vernacularName, family, and kingdom in taxon_ancillary table? @@ -117,19 +151,21 @@ map_neon.ecocomdp.10003.001.001 <- function( table_observation_wide_all <- data_bird %>% # dplyr::rename(location_id, plotID, trapID) %>% - dplyr::rename(location_id = namedLocation) %>% + dplyr::rename(location_id = locality) %>% # package id dplyr::mutate(package_id = my_package_id) %>% dplyr:: rename( observation_id = uid, event_id = eventID, - datetime = startDate, + datetime = eventDate, taxon_id = taxonID, - value = clusterSize) %>% + individualCount = clusterSize) %>% dplyr::mutate( - variable_name = "cluster size", + variable_name = "individual count", unit = "count of individuals") %>% dplyr::filter(!is.na(taxon_id)) + table_observation_wide_all$value <- ifelse(is.na(table_observation_wide_all$individualCount) & table_observation_wide_all$targetTaxaPresent == "Y", 1, table_observation_wide_all$individualCount) + table_observation <- table_observation_wide_all %>% dplyr::select( @@ -156,19 +192,21 @@ map_neon.ecocomdp.10003.001.001 <- function( "sexOrAge", "clusterCode", "nativeStatusCode", - "endCloudCoverPercentage", "observedHabitat", - "observedAirTemp", - "startCloudCoverPercentage", - "endCloudCoverPercentage", - "startRH", - "endRH", - "kmPerHourObservedWindSpeed", + # "startCloudCoverPercentage", + # "endCloudCoverPercentage", + # "startRH", + # "endRH", + # "observedAirTemp", + # "kmPerHourObservedWindSpeed", + "reportedWeather", "laboratoryName", "samplingProtocolVersion", "remarks", "release", "publicationDate")) + table_observation_ancillary$unit <- ifelse(table_observation_ancillary$variable_name == "pointCountMinute", "minute", table_observation_ancillary$unit) + table_observation_ancillary$unit <- ifelse(table_observation_ancillary$variable_name == "observerDistance", "meter", table_observation_ancillary$unit) # data summary ---- # make dataset_summary -- required table