From fb5fdda14ba6cc07fbab1dbe90a8104bc7b38503 Mon Sep 17 00:00:00 2001 From: Sam Simkin Date: Wed, 14 May 2025 17:48:05 -0600 Subject: [PATCH 1/2] NEON bird mapping update Includes adding pointID specific coordinates and mapping some additional fields --- R/map_neon.ecocomdp.10003.001.001.R | 84 ++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/R/map_neon.ecocomdp.10003.001.001.R b/R/map_neon.ecocomdp.10003.001.001.R index 250c431..56cf19b 100644 --- a/R/map_neon.ecocomdp.10003.001.001.R +++ b/R/map_neon.ecocomdp.10003.001.001.R @@ -10,6 +10,10 @@ map_neon.ecocomdp.10003.001.001 <- function( neon.data.product.id = "DP1.10003.001", ...){ + # devtools::install_github('NEONScience/NEON-geolocation/geoNEON', force = T) + library(geoNEON) + + #NEON target taxon group is BIRD neon_method_id <- "neon.ecocomdp.10003.001.001" @@ -28,44 +32,69 @@ map_neon.ecocomdp.10003.001.001 <- function( # extract NEON data tables from list object ---- - allTabs_bird$brd_countdata <- tidyr::as_tibble(allTabs_bird$brd_countdata) + brd_countdata <- tidyr::as_tibble(allTabs_bird$brd_countdata) + + brd_perpoint <- tidyr::as_tibble(allTabs_bird$brd_perpoint) - allTabs_bird$brd_perpoint <- tidyr::as_tibble(allTabs_bird$brd_perpoint) + # get pointID-specific location information + brd_perpoint_locs <- geoNEON::getLocTOS(data = brd_perpoint, + dataProd = "brd_perpoint", + token = neon_token) + + # Create the event table (location for specific pointIDs) + brd_perpoint_mod <- brd_perpoint_locs %>% select(c(-decimalLatitude, -decimalLongitude)) %>% + rename( + locality = points, + eventRemarks = remarks, + eventDate = startDate, + decimalLatitude = adjDecimalLatitude, + decimalLongitude = adjDecimalLongitude, + coordinateUncertaintyInMeters = adjCoordinateUncertainty) + + brd_perpoint_mod <- brd_perpoint_mod %>% + mutate(minimumElevationInMeters = adjElevation, + maximumElevationInMeters = adjElevation, + countryCode = "US", + habitat = paste("NLCD:", nlcdClass, sep = " "), + samplingProtocol = paste0("https://data.neonscience.org/api/v0/documents/", + samplingProtocolVersion), + samplingEffort = "6 minutes", + sampleSizeValue = 49087.38, # sampled area actually varies based on visibility and acoustics, but this value is the area of 125 m radius circle around observer at a single sampling pointID + sampleSizeUnit = "square metre", + truncDate = lubridate::date(eventDate), + datasetID = "https://doi.org/10.48443/00pg-vm19" + ) + + + brd_perpoint_mod$eventID <- ifelse(brd_perpoint_mod$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "UNDE_009.B2.2021-06-10", brd_perpoint_mod$eventID) # change pointID from C3 to B2 to resolve duplicate and allow merge + brd_perpoint_mod$pointID <- ifelse(brd_perpoint_mod$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "B2", brd_perpoint_mod$pointID) # change pointID from C3 to B2 to resolve duplicate and allow merge + data_bird <- dplyr::left_join( - allTabs_bird$brd_countdata, - dplyr::select(allTabs_bird$brd_perpoint, - -uid, - -startDate)) + brd_countdata, + dplyr::select(brd_perpoint_mod, + -uid)) + # table(data_bird$samplingImpractical) # all NA # table(data_bird$samplingImpracticalRemarks) data_bird <- dplyr::select( data_bird, - # -uid, -identifiedBy, # -eventID, # it is just plotID, pointID, startDate -measuredBy, -samplingImpractical, -samplingImpracticalRemarks) - # remove invalde records - data_bird <- data_bird %>% - dplyr::filter( - is.finite(clusterSize), - clusterSize >= 0, - !is.na(clusterSize)) - - #location ---- - table_location_raw <- data_bird %>% dplyr::select(domainID, siteID, plotID, namedLocation, decimalLatitude, decimalLongitude, elevation, - nlcdClass, plotType, geodeticDatum) %>% + habitat, plotType, geodeticDatum, samplingProtocol,samplingEffort, countryCode, datasetID, eventRemarks) %>% dplyr::distinct() +# table_location_raw$coordinateUncertaintyInMeters <- as.character(table_location_raw$coordinateUncertaintyInMeters) # ecocomDP ancillary location table fields can't be numeric table_location <- make_neon_location_table( loc_info = table_location_raw, @@ -74,20 +103,23 @@ map_neon.ecocomdp.10003.001.001 <- function( table_location_ancillary <- make_neon_ancillary_location_table( loc_info = table_location_raw, loc_col_names = c("domainID", "siteID", "plotID", "namedLocation"), - ancillary_var_names = c("namedLocation", "nlcdClass", - "plotType","geodeticDatum")) + ancillary_var_names = c("namedLocation", "habitat", + "plotType","geodeticDatum","samplingProtocol","samplingEffort", "countryCode", "datasetID","eventRemarks") ) + #,"coordinateUncertaintyInMeters")) # taxon ---- - my_dots <- list(...) + # my_dots <- list(...) # gives "Error: '...' used in an incorrect context" + # + # if("token" %in% names(my_dots)){ + # my_token <- my_dots$token + # }else{ + # my_token <- NA + # } - if("token" %in% names(my_dots)){ - my_token <- my_dots$token - }else{ - my_token <- NA - } + my_token <- Sys.getenv('NEON_PAT') # get bird taxon table from NEON neon_bird_taxon_table <- neonOS::getTaxonList( @@ -96,7 +128,7 @@ map_neon.ecocomdp.10003.001.001 <- function( dplyr::filter(taxonID %in% data_bird$taxonID) table_taxon <- neon_bird_taxon_table %>% - dplyr::select(taxonID, taxonRank, scientificName, nameAccordingToID) %>% + dplyr::select(taxonID, taxonRank, scientificName, vernacularName, family, kingdom, nameAccordingToID) %>% dplyr::distinct() %>% dplyr::rename(taxon_id = taxonID, taxon_rank = taxonRank, From ffbcca274fb2cafa0c6184ee8c2b9b8e3fae5775 Mon Sep 17 00:00:00 2001 From: Sam Simkin Date: Fri, 16 May 2025 18:41:55 -0600 Subject: [PATCH 2/2] additional NEON bird updates --- R/map_neon.ecocomdp.10003.001.001.R | 100 +++++++++++++++------------- 1 file changed, 53 insertions(+), 47 deletions(-) diff --git a/R/map_neon.ecocomdp.10003.001.001.R b/R/map_neon.ecocomdp.10003.001.001.R index 56cf19b..71e4483 100644 --- a/R/map_neon.ecocomdp.10003.001.001.R +++ b/R/map_neon.ecocomdp.10003.001.001.R @@ -12,7 +12,6 @@ map_neon.ecocomdp.10003.001.001 <- function( # devtools::install_github('NEONScience/NEON-geolocation/geoNEON', force = T) library(geoNEON) - #NEON target taxon group is BIRD neon_method_id <- "neon.ecocomdp.10003.001.001" @@ -27,99 +26,102 @@ map_neon.ecocomdp.10003.001.001 <- function( " and cannot be mapped using method ", neon_method_id) - allTabs_bird <- neon.data.list - # extract NEON data tables from list object ---- brd_countdata <- tidyr::as_tibble(allTabs_bird$brd_countdata) brd_perpoint <- tidyr::as_tibble(allTabs_bird$brd_perpoint) - + my_token = Sys.getenv('NEON_PAT') + if(my_token == ""){my_token <- NA} + # get pointID-specific location information brd_perpoint_locs <- geoNEON::getLocTOS(data = brd_perpoint, - dataProd = "brd_perpoint", - token = neon_token) + dataProd = "brd_perpoint", token = my_token) # Create the event table (location for specific pointIDs) brd_perpoint_mod <- brd_perpoint_locs %>% select(c(-decimalLatitude, -decimalLongitude)) %>% rename( locality = points, - eventRemarks = remarks, + eventID_raw = eventID, +# eventRemarks = remarks, eventDate = startDate, decimalLatitude = adjDecimalLatitude, decimalLongitude = adjDecimalLongitude, coordinateUncertaintyInMeters = adjCoordinateUncertainty) brd_perpoint_mod <- brd_perpoint_mod %>% - mutate(minimumElevationInMeters = adjElevation, + mutate(eventID = paste(eventID_raw, plotID, pointID, sep = "."), + minimumElevationInMeters = adjElevation, maximumElevationInMeters = adjElevation, + country = "United States", countryCode = "US", + continent = "North America", habitat = paste("NLCD:", nlcdClass, sep = " "), samplingProtocol = paste0("https://data.neonscience.org/api/v0/documents/", samplingProtocolVersion), samplingEffort = "6 minutes", sampleSizeValue = 49087.38, # sampled area actually varies based on visibility and acoustics, but this value is the area of 125 m radius circle around observer at a single sampling pointID sampleSizeUnit = "square metre", - truncDate = lubridate::date(eventDate), - datasetID = "https://doi.org/10.48443/00pg-vm19" + datasetID = substr(neon.data.list[grep("citation_", names(neon.data.list), ignore.case = TRUE)], 7, 40) + # if the name of citation item, the location of the DOI url or the length of the url changes the datasetID extraction will produce unexpected results ) + brd_perpoint_mod$reportedWeather <- paste0("startCloudCoverInPercent:", brd_perpoint_mod$startCloudCoverPercentage,"; ", "endCloudCoverInPercent:", brd_perpoint_mod$endCloudCoverPercentage,"; ", + "startRelativeHumidityInPercent:", brd_perpoint_mod$startRH,"; ", "endRelativeHumidityInPercent:", brd_perpoint_mod$endRH,"; ", + "observedAirTemperatureInDegreesCelsius:", brd_perpoint_mod$observedAirTemp,"; ", "ObservedWindSpeedinKmPerHour:", brd_perpoint_mod$kmPerHourObservedWindSpeed) + # reportedWeather should be a supported core field (from Humboldt extension) in event table brd_perpoint_mod$eventID <- ifelse(brd_perpoint_mod$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "UNDE_009.B2.2021-06-10", brd_perpoint_mod$eventID) # change pointID from C3 to B2 to resolve duplicate and allow merge brd_perpoint_mod$pointID <- ifelse(brd_perpoint_mod$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "B2", brd_perpoint_mod$pointID) # change pointID from C3 to B2 to resolve duplicate and allow merge + brd_countdata$eventID <- ifelse(brd_countdata$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "UNDE_009.B2.2021-06-10", brd_countdata$eventID) # change pointID from C3 to B2 to resolve duplicate and allow merge + brd_countdata$pointID <- ifelse(brd_countdata$uid == "0c2a3910-fbd4-4343-a419-193a3329a8ca", "B2", brd_countdata$pointID) # change pointID from C3 to B2 to resolve duplicate and allow merge + + brd_countdata_mod <- brd_countdata %>% rename( + eventID_raw = eventID, eventDate = startDate) %>% + mutate(eventID = paste(eventID_raw, plotID, pointID, sep = ".")) + data_bird <- dplyr::left_join( - brd_countdata, + brd_countdata_mod, dplyr::select(brd_perpoint_mod, -uid)) - # table(data_bird$samplingImpractical) # all NA # table(data_bird$samplingImpracticalRemarks) data_bird <- dplyr::select( data_bird, -identifiedBy, - # -eventID, # it is just plotID, pointID, startDate -measuredBy, - -samplingImpractical, -samplingImpracticalRemarks) - + -samplingImpractical, + -samplingImpracticalRemarks) + #location ---- table_location_raw <- data_bird %>% - dplyr::select(domainID, siteID, plotID, namedLocation, - decimalLatitude, decimalLongitude, elevation, - habitat, plotType, geodeticDatum, samplingProtocol,samplingEffort, countryCode, datasetID, eventRemarks) %>% + dplyr::select(domainID, siteID, plotID, namedLocation, locality, + decimalLatitude, decimalLongitude, elevation, habitat, plotType, geodeticDatum, samplingProtocol, datasetID, + country, countryCode, continent, samplingEffort, sampleSizeValue, sampleSizeUnit) %>% dplyr::distinct() -# table_location_raw$coordinateUncertaintyInMeters <- as.character(table_location_raw$coordinateUncertaintyInMeters) # ecocomDP ancillary location table fields can't be numeric + table_location_raw$sampleSizeValue <- as.character(table_location_raw$sampleSizeValue) # ecocomDP ancillary location table fields can't be numeric table_location <- make_neon_location_table( loc_info = table_location_raw, - loc_col_names = c("domainID", "siteID", "plotID", "namedLocation")) + loc_col_names = c("domainID", "siteID", "namedLocation","locality")) table_location_ancillary <- make_neon_ancillary_location_table( loc_info = table_location_raw, loc_col_names = c("domainID", "siteID", "plotID", "namedLocation"), - ancillary_var_names = c("namedLocation", "habitat", - "plotType","geodeticDatum","samplingProtocol","samplingEffort", "countryCode", "datasetID","eventRemarks") ) - #,"coordinateUncertaintyInMeters")) - - - + ancillary_var_names = c("namedLocation", "habitat", "plotType","geodeticDatum","samplingProtocol","samplingEffort", "datasetID", + "country","countryCode","continent","samplingEffort","sampleSizeValue","sampleSizeUnit") ) + table_location_ancillary$unit <- ifelse(table_location_ancillary$variable_name == "samplingEffort", "minute", NA) + table_location_ancillary$unit <- ifelse(table_location_ancillary$variable_name == "sampleSizeValue", "square metre", table_location_ancillary$unit) + # taxon ---- - # my_dots <- list(...) # gives "Error: '...' used in an incorrect context" - # - # if("token" %in% names(my_dots)){ - # my_token <- my_dots$token - # }else{ - # my_token <- NA - # } - - my_token <- Sys.getenv('NEON_PAT') # get bird taxon table from NEON neon_bird_taxon_table <- neonOS::getTaxonList( @@ -139,7 +141,7 @@ map_neon.ecocomdp.10003.001.001 <- function( taxon_name, authority_system) - + # Put vernacularName, family, and kingdom in taxon_ancillary table? @@ -149,19 +151,21 @@ map_neon.ecocomdp.10003.001.001 <- function( table_observation_wide_all <- data_bird %>% # dplyr::rename(location_id, plotID, trapID) %>% - dplyr::rename(location_id = namedLocation) %>% + dplyr::rename(location_id = locality) %>% # package id dplyr::mutate(package_id = my_package_id) %>% dplyr:: rename( observation_id = uid, event_id = eventID, - datetime = startDate, + datetime = eventDate, taxon_id = taxonID, - value = clusterSize) %>% + individualCount = clusterSize) %>% dplyr::mutate( - variable_name = "cluster size", + variable_name = "individual count", unit = "count of individuals") %>% dplyr::filter(!is.na(taxon_id)) + table_observation_wide_all$value <- ifelse(is.na(table_observation_wide_all$individualCount) & table_observation_wide_all$targetTaxaPresent == "Y", 1, table_observation_wide_all$individualCount) + table_observation <- table_observation_wide_all %>% dplyr::select( @@ -188,19 +192,21 @@ map_neon.ecocomdp.10003.001.001 <- function( "sexOrAge", "clusterCode", "nativeStatusCode", - "endCloudCoverPercentage", "observedHabitat", - "observedAirTemp", - "startCloudCoverPercentage", - "endCloudCoverPercentage", - "startRH", - "endRH", - "kmPerHourObservedWindSpeed", + # "startCloudCoverPercentage", + # "endCloudCoverPercentage", + # "startRH", + # "endRH", + # "observedAirTemp", + # "kmPerHourObservedWindSpeed", + "reportedWeather", "laboratoryName", "samplingProtocolVersion", "remarks", "release", "publicationDate")) + table_observation_ancillary$unit <- ifelse(table_observation_ancillary$variable_name == "pointCountMinute", "minute", table_observation_ancillary$unit) + table_observation_ancillary$unit <- ifelse(table_observation_ancillary$variable_name == "observerDistance", "meter", table_observation_ancillary$unit) # data summary ---- # make dataset_summary -- required table