From 4fb3c692db706793280978230e4934313689eb0d Mon Sep 17 00:00:00 2001
From: Mengqi Zhao <mengqi.zhao@pnnl.gov>
Date: Mon, 14 Oct 2024 08:50:16 -0700
Subject: [PATCH] Fix paper typos and fix warning message in the test

---
 DESCRIPTION                 |  1 +
 R/yield_impacts_functions.R | 15 ++++++++++-----
 README.md                   |  2 +-
 inst/extras/devTests.R      |  2 +-
 paper/paper.md              |  6 +++---
 tests/testthat/helper.R     |  2 +-
 6 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f3443c8..5a1baae 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -36,6 +36,7 @@ Imports:
     raster (>= 3.6.20),
     reshape2 (>= 1.4.4),
     sandwich (>= 3.1.0),
+    sf (>= 1.0.16),
     sp (>= 2.1.1),
     stringr (>= 1.5.0),
     tibble (>= 3.2.1),
diff --git a/R/yield_impacts_functions.R b/R/yield_impacts_functions.R
index a653f58..b131a3e 100644
--- a/R/yield_impacts_functions.R
+++ b/R/yield_impacts_functions.R
@@ -68,6 +68,8 @@ weather_clean <- function(file = NULL,
   country_name <- iso <- NULL
 
   d <- data.table::fread( file, skip = 0, stringsAsFactors = FALSE, header = TRUE )
+  cols_to_num <- names(d)[!names(d) %in% c('year', 'month')]
+  d <- d[, (cols_to_num) := lapply(.SD, as.numeric), .SDcols = cols_to_num]
   d <- data.table::melt( d, id.vars = c('year', 'month'), variable.name = 'country_id' )
   d$country_id <- as.numeric( as.character( gsub( "X", "", d$country_id ) ) )
   d <- merge( d, country_id, by = "country_id", all.x = TRUE )
@@ -213,7 +215,7 @@ data_merge <- function(data = NULL,
   yield <- subset( yield, crop == crop_name )
   d <- merge( data, yield, by = c( "iso", "year", "crop" ) )
   d <- subset( d, !is.na( yield ) )
-  d$id <- NULL
+  # d$id <- NULL
   d <- merge( d, co2_hist, by = "year" )
   d <- merge_data( d, gdp_hist, "iso", "year" )
   d <- subset( d, select = c( "iso", "year", "gdp_pcap_ppp", "crop", "area_harvest",
@@ -349,9 +351,9 @@ prep_regression <- function(data = NULL)
   d$temp_mean_2 <- ( d$temp_mean )^2
   d$temp_max_2 <- ( d$temp_max )^2
   d$temp_min_2 <- ( d$temp_min )^2
-  d$ln_temp_mean <- log( d$temp_mean )
-  d$ln_temp_max <- log( d$temp_max )
-  d$ln_temp_min <- log( d$temp_min )
+  d$ln_temp_mean <- suppressWarnings( log( d$temp_mean ) )
+  d$ln_temp_max <- suppressWarnings( log( d$temp_max ) )
+  d$ln_temp_min <- suppressWarnings( log( d$temp_min ) )
   d$precip_mean_2 <- ( d$precip_mean )^2
   d$precip_max_2 <- ( d$precip_max )^2
   d$precip_min_2 <- ( d$precip_min )^2
@@ -459,7 +461,7 @@ plot_fit <- function(data = NULL,
 
   d <- data
 
-  p <- ggplot2::ggplot( d, ggplot2::aes_string( x = 'yield', y = fit_name, size = 'area_harvest', color = 'GCAM_region_name' ) ) +
+  p <- ggplot2::ggplot( d, ggplot2::aes( x = yield, y = .data[[fit_name]], size = area_harvest, color = GCAM_region_name ) ) +
     ggplot2::geom_point( shape = 21, stroke = 0.5 ) +
     ggplot2::scale_size_area( max_size = 20 ) +
     ggplot2::guides( color = ggplot2::guide_legend( ncol = 1 ) ) +
@@ -884,6 +886,9 @@ plot_projection <- function(data = NULL,
 
   year <- iso <- NULL
 
+  data <- data %>%
+    dplyr::filter(!is.na(yield_impact))
+
   p <- ggplot2::ggplot( data, ggplot2::aes( x = year, y = yield_impact, color = iso ) ) +
     ggplot2::geom_line( ) +
     ggplot2::facet_wrap( ~ GCAM_region_name, scales = 'free_y' ) +
diff --git a/README.md b/README.md
index cd16924..c617295 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <!-- badges: start -->
-[![R-CMD-check.yaml](https://github.com/JGCRI/gaia/actions/workflows/R-CDM-check.yaml/badge.svg)](https://github.com/JGCRI/gaia/actions/workflows/R-CDM-check.yaml)
+[![R-CMD-check.yaml](https://github.com/JGCRI/gaia/actions/workflows/R-CDM-check.yaml/badge.svg?branch=main)](https://github.com/JGCRI/gaia/actions/workflows/R-CDM-check.yaml)
 [![test-coverage.yaml](https://github.com/JGCRI/gaia/actions/workflows/test-coverage.yaml/badge.svg)](https://github.com/JGCRI/gaia/actions/workflows/test-coverage.yaml)
 [![codecov](https://codecov.io/gh/JGCRI/gaia/branch/main/graph/badge.svg?token=XQ913U4IYM)](https://codecov.io/gh/JGCRI/gaia)
 [![docs](https://github.com/JGCRI/gaia/actions/workflows/docs.yaml/badge.svg)](https://github.com/JGCRI/gaia/actions/workflows/docs.yaml)
diff --git a/inst/extras/devTests.R b/inst/extras/devTests.R
index f8e9151..ace5745 100644
--- a/inst/extras/devTests.R
+++ b/inst/extras/devTests.R
@@ -82,7 +82,7 @@ crop_cal <- gaia::crop_calendars(output_dir = output_dir)
 # test data_aggregation
 # climate_hist_dir <- file.path(output_dir, 'weighted_climate', 'country_climate_hist')
 
-climate_hist_dir <- file.path('C:/WorkSpace/github/test_scripts/gaia/output/climate/country_climate_txt')
+climate_hist_dir <- file.path('C:/WorkSpace/GCIMS/GCIMS_Yield/regression_analysis/data/data_raw/country_climate_txt')
 # climate_hist_dir <- file.path(output_dir, 'weighted_climate', 'canesm5_hist')
 climate_impact_dir <- file.path(output_dir, 'weighted_climate', 'canesm5')
 
diff --git a/paper/paper.md b/paper/paper.md
index 8a48f78..7da09a3 100644
--- a/paper/paper.md
+++ b/paper/paper.md
@@ -28,7 +28,7 @@ bibliography: paper.bib
 
 # Summary
 
-`gaia` is an open-source R package designed to estimate crop yield shocks in response to annual weather variations and CO^2 concentrations at the country scale for 12 major crops. It enables the projection of annual yield shocks under various future climate scenarios, differentiated by crop type, country, and year. This innovative tool streamlines the workflow from raw climate data processing to projections of annual shocks to crop yields at the country level, using the response surfaces developed and documented in @Waldhoff_2020, an empirical econometric model that leverages historical weather, CO^2, and crop yield data for robust empirical fitting for 12 crops. `gaia` uses these response surfaces with bias-corrected, gridded monthly temperature and precipitation projections (e.g., from the Coupled Model Intercomparison Project Phase 6 (CMIP6, @Oneil_2016) and Inter-Sectoral Impact Model Intercomparison Project (ISIMIP, @Warszawski_2014)) to project shocks that can be applied to agricultural productivity changes at the country-level for use in economic models. The historical and future projections use gridded, country-crop specific monthly growing season precipitation and temperature data, aggregated to the national level, weighted by cropland area derived from MIRCA [@Portmann_2010]. These annual, country, crop-specific yield shocks can be aggregated to different regional definitions, crop commodity definitions, and time periods (e.g., 20-year rolling-average trends), as needed by specific economic models. `gaia` serves as a lightweight, powerful model that equips researchers with projections of annual yield shocks for multiple crops, at a model-specific spatial resolution that is necessary to explore crop yields responses to a broad range of future climate projections, enhancing human-Earth system analysis capabilities.
+`gaia` is an open-source R package designed to estimate crop yield shocks in response to annual weather variations and CO~2~ concentrations at the country scale for 12 major crops. It enables the projection of annual yield shocks under various future climate scenarios, differentiated by crop type, country, and year. This innovative tool streamlines the workflow from raw climate data processing to projections of annual shocks to crop yields at the country level, using the response surfaces developed and documented in @Waldhoff_2020, an empirical econometric model that leverages historical weather, CO~2~, and crop yield data for robust empirical fitting for 12 crops. `gaia` uses these response surfaces with bias-corrected, gridded monthly temperature and precipitation projections (e.g., from the Coupled Model Intercomparison Project Phase 6 (CMIP6, @Oneil_2016) and Inter-Sectoral Impact Model Intercomparison Project (ISIMIP, @Warszawski_2014)) to project shocks that can be applied to agricultural productivity changes at the country-level for use in economic models. The historical and future projections use gridded, country-crop specific monthly growing season precipitation and temperature data, aggregated to the national level, weighted by cropland area derived from MIRCA [@Portmann_2010]. These annual, country, crop-specific yield shocks can be aggregated to different regional definitions, crop commodity definitions, and time periods (e.g., 20-year rolling-average trends), as needed by specific economic models. `gaia` serves as a lightweight, powerful model that equips researchers with projections of annual yield shocks for multiple crops, at a model-specific spatial resolution that is necessary to explore crop yields responses to a broad range of future climate projections, enhancing human-Earth system analysis capabilities.
 
 
 # Statement of need
@@ -55,8 +55,8 @@ The primary functionality of `gaia` is encapsulated in the `yield_impact` wrappe
 1. `weighted_climate`: Processes CMIP-ISIMIP climate NetCDF data and calculates cropland-weighted precipitation and temperature at the country level, differentiated by crop type and irrigation type. The function accepts both daily or monthly climate data that are consistent with the CMIP-ISIMIP NetCDF data format
 2. `crop_calenders`: Generates crop planting months for each country and crop based on crop calendar data [@Sacks_2010].
 3. `data_aggregation`: Calculates crop growing seasons using climate variables processed by `weighted_climate` and crop calendars for both historical and projected periods. This function prepares climate and yield data for subsequent model fitting.
-4. `yield_regression`: Performs regression analysis fitted with historical annual crop yields, monthly growing season temperature and precipitation, CO^2 concentrations, GDP per capita, and year. The default econometric model applied in `gaia` is from @Waldhoff_2020. User can specify alternative formulas that are consistent with the data processed in `data_aggregation`.
-5. `yield_shock_projection`: Projects yield shocks for future climate scenarios using the fitted model and temperature, precipitation, and CO^2 projections from the climate scenario.
+4. `yield_regression`: Performs regression analysis fitted with historical annual crop yields, monthly growing season temperature and precipitation, CO~2~ concentrations, GDP per capita, and year. The default econometric model applied in `gaia` is from @Waldhoff_2020. User can specify alternative formulas that are consistent with the data processed in `data_aggregation`.
+5. `yield_shock_projection`: Projects yield shocks for future climate scenarios using the fitted model and temperature, precipitation, and CO~2~ projections from the climate scenario.
 6. `gcam_agprodchange`: Remaps country-level yield shocks to GCAM-required spatial scales (i.e., region, basin, technology intersections), based on harvested areas, and aggregates crops to GCAM commodities. This function applies the projected shocks to GCAM scenario agricultural productivity growth rates (the unit used to project future yields in GCAM) and creates ready-to-use XML outputs for GCAM.
 
 
diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R
index 94faeaf..a009002 100644
--- a/tests/testthat/helper.R
+++ b/tests/testthat/helper.R
@@ -21,7 +21,7 @@ start_year_i = 2015
 end_year_i = 2100
 smooth_window_i = 20
 
-diagnostics_i <- F
+diagnostics_i <- T
 use_default_coeff_i <- F