From 4f88b69d95d8bd16187df65a9b7eee7b92325812 Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Tue, 24 Sep 2024 19:11:20 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- mod_data-disc.html | 100 +++++++++++++++++++-------------------------- search.json | 2 +- sitemap.xml | 54 ++++++++++++------------ 4 files changed, 72 insertions(+), 86 deletions(-) diff --git a/.nojekyll b/.nojekyll index f41caf1..e9cc063 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -d2afa541 \ No newline at end of file +0677f0ba \ No newline at end of file diff --git a/mod_data-disc.html b/mod_data-disc.html index a5a342e..b11c24a 100644 --- a/mod_data-disc.html +++ b/mod_data-disc.html @@ -704,38 +704,24 @@

Downloading Data

-

If you’re quite lucky, the data you want might be stored in a repository that developed (and maintains!) an R package. These packages may or may not be on CRAN (packages can often also be found on GitHub or Bioconductor). Typically these packages have a short “vignette” that demonstrates how their functions should be used.

-

Consider the following example adapted from the dataone package vignette.

+

If you’re quite lucky, the data you want might be stored in a repository that developed (and maintains!) an R package. These packages may or may not be on CRAN (packages can often also be found on GitHub or Bioconductor). Typically these packages have a “vignette” that demonstrates how their functions should be used.

+

Consider the following example adapted from the USGS dataRetrieval package vignette. Visit USGS National Water Dashboard interactive map to find site numbers and check data availability.

-
# Load needed packages
-## install.packages("librarian")
-librarian::shelf(dataone)
-
-# DataONE requires "coordinating nodes" so make one
-cn <- dataone::CNode()
-
-# Get a reference to a node based on its identifier
-mn <- dataone::getMNode(x = cn, "urn:node:KNB")
-
-# Generate a query
-query_list <- list(q = "id:Blandy.77.1", fl = "resourceMap")
-
-# Use it to search DataONE
-query_result <- dataone::query(x = cn, solrQuery = query_list, as = "data.frame")
-
-# Identify package ID
-pkg_id <- query_result[1, 1]
-
-# Download the data
-temp_file_name <- dataone::getPackage(x = mn, id = pkg_id)
-
-
-
1
-
-dataone downloads data to a “temporary directory” and returns the name of the file/path. You’ll need that to read in the data so be sure to assign it to an object! -
-
-
+
# Load needed packages
+## install.packages("librarian")
+librarian::shelf(dataRetrieval)
+
+# Set up the parameters for the Santa Ynez River site
+siteNumber <- "11133000"  # USGS site number for Santa Ynez River at Narrows
+parameterCd <- "00060"    # Parameter code for discharge (cubic feet per second)
+startDate <- "2024-01-01" # Start date
+endDate <- "2024-01-31"   # End date
+
+# Retrieve daily discharge data
+dischargeData <- readNWISdv(siteNumber, parameterCd, startDate, endDate)
+
+# View the first few rows of the data
+head(dischargeData)
@@ -861,19 +847,19 @@

Downloading Data

Step 1: Using the “subset/Get Data” tab on the right-hand side of the data page, generate a list of file names for your specified target area and time period. Download the list of links as a TXT file named “list.txt”. Be sure to document the target area and temporal coverage you selected in your data inventory table.

-
https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0802_v003-2023m0804t120832.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
-https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0805_v003-2023m0807t093718.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
-https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0806_v003-2023m0809t092629.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
-https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0807_v003-2023m0809t092635.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
-https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0808_v003-2023m0810t092721.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
-https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0809_v003-2023m0811t101920.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
+
https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0802_v003-2023m0804t120832.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
+https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0805_v003-2023m0807t093718.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
+https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0806_v003-2023m0809t092629.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
+https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0807_v003-2023m0809t092635.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
+https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0808_v003-2023m0810t092721.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]
+https://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0809_v003-2023m0811t101920.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]

Step 2: Open a command line window and execute the wget command. Replace the placeholder for username and password with your EarthData login credentials.

-
wget -nc --load-cookies ..\.urs_cookies --save-cookies ..\.urs_cookies --keep-session-cookies --user=XXX --password=XXX
---content-disposition -i list.txt
+
wget -nc --load-cookies ..\.urs_cookies --save-cookies ..\.urs_cookies --keep-session-cookies --user=XXX --password=XXX
+--content-disposition -i list.txt

If you encounter any issue, follow this step-by-step guide on using wget and curl specifically with the GES DISC data system.

@@ -886,24 +872,24 @@

Data format and

CSV and TXT are common formats for data storage. In addition, formats like NetCDF, HDF5, Matlab, and Rdata/RDS are frequently used in research, along with spatial datasets such as geotiff, shapefiles, and raster files (refer to the spatial module for more details).

In the R environment, data structure are typically checked using the following functions.

-
library(dplyr)
-
-# Define URL as an object
-dt_url <- "https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/77/10/f32823fba432f58f66c06b589b7efac6" 
-
-# Read it into R
-lobster_df <- read.csv(file = dt_url,na=-99999)
-
-# Check the structure of the data
-head(lobster_df)
-
-summary(lobster_df)
-
-str(lobster_df)
-
-glimpse(lobster_df)
-
-anyNA(lobster_df)
+
library(dplyr)
+
+# Define URL as an object
+dt_url <- "https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/77/10/f32823fba432f58f66c06b589b7efac6" 
+
+# Read it into R
+lobster_df <- read.csv(file = dt_url,na=-99999)
+
+# Check the structure of the data
+head(lobster_df)
+
+summary(lobster_df)
+
+str(lobster_df)
+
+glimpse(lobster_df)
+
+anyNA(lobster_df)
diff --git a/search.json b/search.json index 6e7ec3e..895bf29 100644 --- a/search.json +++ b/search.json @@ -84,7 +84,7 @@ "href": "mod_data-disc.html#downloading-data", "title": "Data Discovery & Management", "section": "Downloading Data", - "text": "Downloading Data\nOnce you’ve found data, filled out your data inventory, and decided which datasets you actually want, it’s time to download some of them! There are several methods you can use and it’s possible that each won’t work in all cases so it’s important to be at least somewhat familiar with several of these tools.\nMost of these methods will work regardless of the format of the data (i.e., its file extension) but the format of the data will be important when you want to ‘read in’ the data and begin to work with it.\n\n\n\n\n\n\nActivity: Data Download\n\n\n\n\nEach member work on the data that you have been assigned.\nDiscuss with your group how to collaborate on coding without creating merge conflicts\n\nMany right answers here so discuss the pros/cons of each and pick one that feels best for your group!\n\nWrite a script for your group to download data using your chosen method\nZoom rooms for each download method will be available. You are encouraged to join the room that corresponds to your chosen method to discuss with others working on the same approach.\n\nIf no datasets in your group’s inventory need the download method you chose, try to run the example code included below\n\n\n\n\nBelow are some example code chunks for five methods of downloading data in a scripted way. There will be contexts where only a Graphical User Interface (“GUI”; [GOO-ee]) is available but the details of that method of downloading are usually specific to the portal you’re accessing so we won’t include an artificial general case.\n\nData Entity URLR PackageBatch DownloadAPI CallCommand Line\n\n\nSometimes you might have a URL directly to a particular dataset (usually one hosted by a data repository). If you copy/paste this URL into your browser the download would automatically begin. However, we want to make our workflows entirely scripted (or close to it) so see the example below for how to download data via a data entity URL.\nThe dataset we download below is one collected at the Santa Barbara Coastal (SBC) LTER on California spiny lobster (Panulirus interruptus) populations.\n\n# Define URL as an object\n1dt_url <- \"https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/77/10/f32823fba432f58f66c06b589b7efac6\"\n\n# Read it into R\nlobster_df <- read.csv(file = dt_url)\n\n\n1\n\nYou can typically find this URL in the repository where you found the dataset\n\n\n\n\n\n\nIf you’re quite lucky, the data you want might be stored in a repository that developed (and maintains!) an R package. These packages may or may not be on CRAN (packages can often also be found on GitHub or Bioconductor). Typically these packages have a short “vignette” that demonstrates how their functions should be used.\nConsider the following example adapted from the dataone package vignette.\n\n# Load needed packages\n## install.packages(\"librarian\")\nlibrarian::shelf(dataone)\n\n# DataONE requires \"coordinating nodes\" so make one\ncn <- dataone::CNode()\n\n# Get a reference to a node based on its identifier\nmn <- dataone::getMNode(x = cn, \"urn:node:KNB\")\n\n# Generate a query\nquery_list <- list(q = \"id:Blandy.77.1\", fl = \"resourceMap\")\n\n# Use it to search DataONE\nquery_result <- dataone::query(x = cn, solrQuery = query_list, as = \"data.frame\")\n\n# Identify package ID\npkg_id <- query_result[1, 1]\n\n# Download the data\n1temp_file_name <- dataone::getPackage(x = mn, id = pkg_id)\n\n\n1\n\ndataone downloads data to a “temporary directory” and returns the name of the file/path. You’ll need that to read in the data so be sure to assign it to an object!\n\n\n\n\n\n\nYou may want to download several data files hosted in the same repository online for different spatial/temporal replicates. You could try to use the data entity URL or an associated package (if one exists) or you could write code to do a “batch download” where you’d just download each file using a piece of code that repeats itself as much as needed.\nThe dataset we demonstrate downloading below is NOAA weather station data. Specifically it is the Integrated Surface Data (ISD).\n\n# Specify the start/end years for which you want to download data\ntarget_years <- 2000:2005\n\n# Loop across years\nfor(focal_year in target_years){\n\n # Message a progress note\n1 message(\"Downloading data for \", focal_year)\n\n # Assemble the URL manually\n2 focal_url <- paste0( \"https://www1.ncdc.noaa.gov/pub/data/gsod/\", focal_year, \"/gsod_\", focal_year, \".tar\")\n\n # Assemble your preferred file name once it's downloaded\n3 focal_file <- paste0(\"gsod_\", focal_year, \".tar\")\n\n # Download the data\n utils::download.file(url = focal_url, destfile = focal_file, method = \"curl\")\n}\n\n\n1\n\nThis message isn’t required but can be nice! Downloading data can take a long time so including a progress message can re-assure you that your R session hasn’t crashed\n\n2\n\nTo create a working URL you’ll likely need to click an example data file URL and try to exactly mimic its format\n\n3\n\nThis step again isn’t required but can let you exert a useful level of control over the naming convention of your data file(s)\n\n\n\n\n\n\nIn slightly more complicated contexts, you’ll need to make a request via an Application Programming Interface (“API”). As the name might suggest, these platforms serve as a bridge between some application and code. Using such a method to download data is a–relatively–frequent occurrence in synthesis work.\nHere we’ll demonstrate an API call for NOAA’s Tides and Currents data.\n\n# Load needed packages\n## install.packages(\"librarian\")\nlibrarian::shelf(httr, jsonlite)\n\n# Define a 'custom function' to fetch desired data\n1fetch_tide <- function(station_id, product = \"predictions\", datum = \"MLLW\", time_zone = \"lst_ldt\", units = \"english\", interval = \"h\", format = \"json\"){\n\n2 # Custom error flags\n\n # Get a few key dates (relative to today)\n yesterday <- Sys.Date() - 1\n two_days_from_now <- Sys.Date() + 2\n\n # Adjust begin/end dates\n begin_date <- format(yesterday, \"%Y%m%d\")\n end_date <- format(two_days_from_now, \"%Y%m%d\")\n \n # Construct the API URL\n3 tide_url <- paste0(\n \"https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?\",\n \"product=\", product,\n \"&application=NOS.COOPS.TAC.WL\",\n \"&begin_date=\", begin_date,\n \"&end_date=\", end_date,\n \"&datum=\", datum,\n \"&station=\", station_id,\n \"&time_zone=\", time_zone,\n \"&units=\", units,\n \"&interval=\", interval,\n \"&format=\", format)\n\n # Make the API request\n response <- httr::GET(url = tide_url)\n \n # If the request is successful...\n if(httr::status_code(response) == 200){\n \n # Parse the JSON response\n tide_data <- jsonlite::fromJSON(httr::content(response, \"text\", encoding = \"UTF-8\"))\n\n # And return it\n return(tide_data)\n\n # Otherwise...\n } else {\n\n # Pass the error message back to the user\n stop(\"Failed to fetch tide data\\nStatus code: \", httr::status_code(response))\n\n }\n}\n\n# Invoke the function\ntide_df <- fetch_tide(station_id = \"9411340\")\n\n\n1\n\nWhen you do need to make an API call, a custom function is a great way of standardizing your entries. This way you only need to figure out how to do the call once and from then on you can lean on the (likely more familiar) syntax of the language in which you wrote the function!\n\n2\n\nWe’re excluding error checks for simplicity’s sake but you will want to code informative error checks. Basically you want to consider inputs to the function that would break it and pre-emptively stop the function (with an informative message) when those malformed inputs are received\n\n3\n\nJust like the batch download, we need to assemble the URL that the API is expecting\n\n\n\n\n\n\nWhile many ecologists are trained in programming languages like R or Python, some operations require the Command Line Interface (“CLI”; a.k.a. “shell”, “bash”, “terminal”, etc.). Don’t worry if you’re new to this language! There are a lot of good resources for learning the fundamentals, including The Carpentries’ workshop “The Unix Shell”.\nBelow we demonstrate download via command line for NASA OMI/Aura Sulfur Dioxide (SO2). The OMI science team produces this Level-3 Aura/OMI Global OMSO2e Data Products (0.25 degree Latitude/Longitude grids) for atmospheric analysis.\n\nStep 1: Using the “subset/Get Data” tab on the right-hand side of the data page, generate a list of file names for your specified target area and time period. Download the list of links as a TXT file named “list.txt”. Be sure to document the target area and temporal coverage you selected in your data inventory table.\n\n\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0802_v003-2023m0804t120832.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0805_v003-2023m0807t093718.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0806_v003-2023m0809t092629.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0807_v003-2023m0809t092635.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0808_v003-2023m0810t092721.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0809_v003-2023m0811t101920.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\n\n\nStep 2: Open a command line window and execute the wget command. Replace the placeholder for username and password with your EarthData login credentials.\n\n\nwget -nc --load-cookies ..\\.urs_cookies --save-cookies ..\\.urs_cookies --keep-session-cookies --user=XXX --password=XXX\n--content-disposition -i list.txt\n\n\nIf you encounter any issue, follow this step-by-step guide on using wget and curl specifically with the GES DISC data system.\n\n\n\n\n\nData format and structure\nCSV and TXT are common formats for data storage. In addition, formats like NetCDF, HDF5, Matlab, and Rdata/RDS are frequently used in research, along with spatial datasets such as geotiff, shapefiles, and raster files (refer to the spatial module for more details).\nIn the R environment, data structure are typically checked using the following functions.\n\nlibrary(dplyr)\n\n# Define URL as an object\ndt_url <- \"https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/77/10/f32823fba432f58f66c06b589b7efac6\" \n\n# Read it into R\nlobster_df <- read.csv(file = dt_url,na=-99999)\n\n# Check the structure of the data\nhead(lobster_df)\n\nsummary(lobster_df)\n\nstr(lobster_df)\n\nglimpse(lobster_df)\n\nanyNA(lobster_df)", + "text": "Downloading Data\nOnce you’ve found data, filled out your data inventory, and decided which datasets you actually want, it’s time to download some of them! There are several methods you can use and it’s possible that each won’t work in all cases so it’s important to be at least somewhat familiar with several of these tools.\nMost of these methods will work regardless of the format of the data (i.e., its file extension) but the format of the data will be important when you want to ‘read in’ the data and begin to work with it.\n\n\n\n\n\n\nActivity: Data Download\n\n\n\n\nEach member work on the data that you have been assigned.\nDiscuss with your group how to collaborate on coding without creating merge conflicts\n\nMany right answers here so discuss the pros/cons of each and pick one that feels best for your group!\n\nWrite a script for your group to download data using your chosen method\nZoom rooms for each download method will be available. You are encouraged to join the room that corresponds to your chosen method to discuss with others working on the same approach.\n\nIf no datasets in your group’s inventory need the download method you chose, try to run the example code included below\n\n\n\n\nBelow are some example code chunks for five methods of downloading data in a scripted way. There will be contexts where only a Graphical User Interface (“GUI”; [GOO-ee]) is available but the details of that method of downloading are usually specific to the portal you’re accessing so we won’t include an artificial general case.\n\nData Entity URLR PackageBatch DownloadAPI CallCommand Line\n\n\nSometimes you might have a URL directly to a particular dataset (usually one hosted by a data repository). If you copy/paste this URL into your browser the download would automatically begin. However, we want to make our workflows entirely scripted (or close to it) so see the example below for how to download data via a data entity URL.\nThe dataset we download below is one collected at the Santa Barbara Coastal (SBC) LTER on California spiny lobster (Panulirus interruptus) populations.\n\n# Define URL as an object\n1dt_url <- \"https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/77/10/f32823fba432f58f66c06b589b7efac6\"\n\n# Read it into R\nlobster_df <- read.csv(file = dt_url)\n\n\n1\n\nYou can typically find this URL in the repository where you found the dataset\n\n\n\n\n\n\nIf you’re quite lucky, the data you want might be stored in a repository that developed (and maintains!) an R package. These packages may or may not be on CRAN (packages can often also be found on GitHub or Bioconductor). Typically these packages have a “vignette” that demonstrates how their functions should be used.\nConsider the following example adapted from the USGS dataRetrieval package vignette. Visit USGS National Water Dashboard interactive map to find site numbers and check data availability.\n\n# Load needed packages\n## install.packages(\"librarian\")\nlibrarian::shelf(dataRetrieval)\n\n# Set up the parameters for the Santa Ynez River site\nsiteNumber <- \"11133000\" # USGS site number for Santa Ynez River at Narrows\nparameterCd <- \"00060\" # Parameter code for discharge (cubic feet per second)\nstartDate <- \"2024-01-01\" # Start date\nendDate <- \"2024-01-31\" # End date\n\n# Retrieve daily discharge data\ndischargeData <- readNWISdv(siteNumber, parameterCd, startDate, endDate)\n\n# View the first few rows of the data\nhead(dischargeData)\n\n\n\nYou may want to download several data files hosted in the same repository online for different spatial/temporal replicates. You could try to use the data entity URL or an associated package (if one exists) or you could write code to do a “batch download” where you’d just download each file using a piece of code that repeats itself as much as needed.\nThe dataset we demonstrate downloading below is NOAA weather station data. Specifically it is the Integrated Surface Data (ISD).\n\n# Specify the start/end years for which you want to download data\ntarget_years <- 2000:2005\n\n# Loop across years\nfor(focal_year in target_years){\n\n # Message a progress note\n1 message(\"Downloading data for \", focal_year)\n\n # Assemble the URL manually\n2 focal_url <- paste0( \"https://www1.ncdc.noaa.gov/pub/data/gsod/\", focal_year, \"/gsod_\", focal_year, \".tar\")\n\n # Assemble your preferred file name once it's downloaded\n3 focal_file <- paste0(\"gsod_\", focal_year, \".tar\")\n\n # Download the data\n utils::download.file(url = focal_url, destfile = focal_file, method = \"curl\")\n}\n\n\n1\n\nThis message isn’t required but can be nice! Downloading data can take a long time so including a progress message can re-assure you that your R session hasn’t crashed\n\n2\n\nTo create a working URL you’ll likely need to click an example data file URL and try to exactly mimic its format\n\n3\n\nThis step again isn’t required but can let you exert a useful level of control over the naming convention of your data file(s)\n\n\n\n\n\n\nIn slightly more complicated contexts, you’ll need to make a request via an Application Programming Interface (“API”). As the name might suggest, these platforms serve as a bridge between some application and code. Using such a method to download data is a–relatively–frequent occurrence in synthesis work.\nHere we’ll demonstrate an API call for NOAA’s Tides and Currents data.\n\n# Load needed packages\n## install.packages(\"librarian\")\nlibrarian::shelf(httr, jsonlite)\n\n# Define a 'custom function' to fetch desired data\n1fetch_tide <- function(station_id, product = \"predictions\", datum = \"MLLW\", time_zone = \"lst_ldt\", units = \"english\", interval = \"h\", format = \"json\"){\n\n2 # Custom error flags\n\n # Get a few key dates (relative to today)\n yesterday <- Sys.Date() - 1\n two_days_from_now <- Sys.Date() + 2\n\n # Adjust begin/end dates\n begin_date <- format(yesterday, \"%Y%m%d\")\n end_date <- format(two_days_from_now, \"%Y%m%d\")\n \n # Construct the API URL\n3 tide_url <- paste0(\n \"https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?\",\n \"product=\", product,\n \"&application=NOS.COOPS.TAC.WL\",\n \"&begin_date=\", begin_date,\n \"&end_date=\", end_date,\n \"&datum=\", datum,\n \"&station=\", station_id,\n \"&time_zone=\", time_zone,\n \"&units=\", units,\n \"&interval=\", interval,\n \"&format=\", format)\n\n # Make the API request\n response <- httr::GET(url = tide_url)\n \n # If the request is successful...\n if(httr::status_code(response) == 200){\n \n # Parse the JSON response\n tide_data <- jsonlite::fromJSON(httr::content(response, \"text\", encoding = \"UTF-8\"))\n\n # And return it\n return(tide_data)\n\n # Otherwise...\n } else {\n\n # Pass the error message back to the user\n stop(\"Failed to fetch tide data\\nStatus code: \", httr::status_code(response))\n\n }\n}\n\n# Invoke the function\ntide_df <- fetch_tide(station_id = \"9411340\")\n\n\n1\n\nWhen you do need to make an API call, a custom function is a great way of standardizing your entries. This way you only need to figure out how to do the call once and from then on you can lean on the (likely more familiar) syntax of the language in which you wrote the function!\n\n2\n\nWe’re excluding error checks for simplicity’s sake but you will want to code informative error checks. Basically you want to consider inputs to the function that would break it and pre-emptively stop the function (with an informative message) when those malformed inputs are received\n\n3\n\nJust like the batch download, we need to assemble the URL that the API is expecting\n\n\n\n\n\n\nWhile many ecologists are trained in programming languages like R or Python, some operations require the Command Line Interface (“CLI”; a.k.a. “shell”, “bash”, “terminal”, etc.). Don’t worry if you’re new to this language! There are a lot of good resources for learning the fundamentals, including The Carpentries’ workshop “The Unix Shell”.\nBelow we demonstrate download via command line for NASA OMI/Aura Sulfur Dioxide (SO2). The OMI science team produces this Level-3 Aura/OMI Global OMSO2e Data Products (0.25 degree Latitude/Longitude grids) for atmospheric analysis.\n\nStep 1: Using the “subset/Get Data” tab on the right-hand side of the data page, generate a list of file names for your specified target area and time period. Download the list of links as a TXT file named “list.txt”. Be sure to document the target area and temporal coverage you selected in your data inventory table.\n\n\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0802_v003-2023m0804t120832.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0805_v003-2023m0807t093718.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0806_v003-2023m0809t092629.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0807_v003-2023m0809t092635.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0808_v003-2023m0810t092721.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\nhttps://acdisc.gesdisc.eosdis.nasa.gov/opendap/HDF-EOS5/ncml/Aura_OMI_Level3/OMSO2e.003/2023/OMI-Aura_L3-OMSO2e_2023m0809_v003-2023m0811t101920.he5.ncml.nc4?ColumnAmountSO2[119:659][0:1439],lat[119:659],lon[0:1439]\n\n\nStep 2: Open a command line window and execute the wget command. Replace the placeholder for username and password with your EarthData login credentials.\n\n\nwget -nc --load-cookies ..\\.urs_cookies --save-cookies ..\\.urs_cookies --keep-session-cookies --user=XXX --password=XXX\n--content-disposition -i list.txt\n\n\nIf you encounter any issue, follow this step-by-step guide on using wget and curl specifically with the GES DISC data system.\n\n\n\n\n\nData format and structure\nCSV and TXT are common formats for data storage. In addition, formats like NetCDF, HDF5, Matlab, and Rdata/RDS are frequently used in research, along with spatial datasets such as geotiff, shapefiles, and raster files (refer to the spatial module for more details).\nIn the R environment, data structure are typically checked using the following functions.\n\nlibrary(dplyr)\n\n# Define URL as an object\ndt_url <- \"https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/77/10/f32823fba432f58f66c06b589b7efac6\" \n\n# Read it into R\nlobster_df <- read.csv(file = dt_url,na=-99999)\n\n# Check the structure of the data\nhead(lobster_df)\n\nsummary(lobster_df)\n\nstr(lobster_df)\n\nglimpse(lobster_df)\n\nanyNA(lobster_df)", "crumbs": [ "Phase I -- Prepare", "Data Discovery" diff --git a/sitemap.xml b/sitemap.xml index ea4ff71..05e22ca 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,110 +2,110 @@ https://lter.github.io/ssecr/policy_conduct.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/policy_pronouns.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/mod_data-disc.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/mod_data-viz.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/mod_version-control.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/mod_next-steps.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/mod_facilitation.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/policy_ai.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/index.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/instructors.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/mod_wrangle.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/proj_milestones.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/mod_findings.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/fellows.html - 2024-09-23T19:52:31.138Z + 2024-09-24T19:10:45.358Z https://lter.github.io/ssecr/mod_credit.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/mod_project-mgmt.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/mod_reproducibility.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/mod_stats.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/mod_team-sci.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/mod_reports.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/mod_thinking.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/policy_usability.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/mod_interactivity.html - 2024-09-23T19:52:31.158Z + 2024-09-24T19:10:45.378Z https://lter.github.io/ssecr/mod_spatial.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/proj_teams.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.380Z https://lter.github.io/ssecr/policy_attendance.html - 2024-09-23T19:52:31.159Z + 2024-09-24T19:10:45.379Z https://lter.github.io/ssecr/CONTRIBUTING.html - 2024-09-23T19:52:31.114Z + 2024-09-24T19:10:45.334Z