fix(FIMSFrame): Assume yyyy-mm-dd format for dates

and check column names in FIMSFrame The column names of the S4@data object were being checked with the validation function but there was no check in FIMSFrame and to do the calculations within FIMSFrame all of the columns need to be present. Right now we are checking it twice, once in FIMSFrame and once in the validator of the FIMSFrame class. We might need a different class like an input data class and we could check the class upon input to FIMSFrame but this works for right now. @k-doering-NOAA do you have any ideas here? Had to change the test of a data frame without the proper columns to just error out because no warnings are given just a stop() command. When writing a data frame to a csv and reading it back in, the date formatting can be lost, e.g., 0001-01-01 turns into 1-1-1. FIMS requires a yyyy-mm-dd format. Now the use of the as.Date() function will create a date object from a character object but only if it is in the correct format. If it is in the wrong format, e.g., yyyy/mm/dd, then the function will error out. Right now the start_year and end_year are formatted as integers because for plotting, I thought it would be better to have year 1 versus year 0001 but we can change this. @ian-taylor-NOAA what do you think? Close #639 --------- Co-authored-by: Kelli.Johnson <1536491690113302@mil>
NOAA-FIMS · Jul 12, 2024 · 39d0743 · 39d0743
1 parent 8aacee0
commit 39d0743
Show file tree

Hide file tree

Showing 2 changed files with 71 additions and 31 deletions.
diff --git a/R/fimsframe.R b/R/fimsframe.R
@@ -225,31 +225,25 @@ setValidity(
       errors <- c(errors, "data must have at least one row")
     }
 
-    # Check columns
-    if (!"type" %in% colnames(object@data)) {
-      errors <- c(errors, "data must contain 'type'")
-    }
-    if (!"datestart" %in% colnames(object@data)) {
-      errors <- c(errors, "data must contain 'datestart'")
-    }
-    if (!"dateend" %in% colnames(object@data)) {
-      errors <- c(errors, "data must contain 'dateend'")
-    }
-    if (!"dateend" %in% colnames(object@data)) {
-      errors <- c(errors, "data must contain 'value'")
-    }
-    if (!"dateend" %in% colnames(object@data)) {
-      errors <- c(errors, "data must contain 'unit'")
-    }
-    if (!"dateend" %in% colnames(object@data)) {
-      errors <- c(errors, "data must contain 'uncertainty'")
+    errors <- c(errors, validate_data_colnames(object@data))
+
+    # Add checks for other slots
+    # Check the format for acceptable variants of the ideal yyyy-mm-dd
+    grepl_datestart <- grepl(
+      "[0-9]{1,4}-[0-9]{1,2}-[0-9]{1-2}",
+      data_mile1[["datestart"]]
+    )
+    grepl_dateend <- grepl(
+      "[0-9]{1,4}-[0-9]{1,2}-[0-9]{1-2}",
+      data_mile1[["dateend"]]
+    )
+    if (!all(grepl_datestart)) {
+      errors <- c(errors, "datestart must be in 'yyyy-mm-dd' format")
     }
-    if (!"age" %in% colnames(object@data)) {
-      errors <- c(errors, "data must contain 'age'")
+    if (!all(grepl_dateend)) {
+      errors <- c(errors, "dateend must be in 'yyyy-mm-dd' format")
     }
 
-    # TODO: Add checks for other slots
-
     # Return
     if (length(errors) == 0) {
       return(TRUE)
@@ -259,6 +253,36 @@ setValidity(
   }
 )
 
+validate_data_colnames <- function(data) {
+  the_column_names <- colnames(data)
+  errors <- character()
+  if (!"type" %in% the_column_names) {
+    errors <- c(errors, "data must contain 'type'")
+  }
+  if (!"name" %in% the_column_names) {
+    errors <- c(errors, "data must contain 'name'")
+  }
+  if (!"datestart" %in% the_column_names) {
+    errors <- c(errors, "data must contain 'datestart'")
+  }
+  if (!"dateend" %in% the_column_names) {
+    errors <- c(errors, "data must contain 'dateend'")
+  }
+  if (!"dateend" %in% the_column_names) {
+    errors <- c(errors, "data must contain 'value'")
+  }
+  if (!"dateend" %in% the_column_names) {
+    errors <- c(errors, "data must contain 'unit'")
+  }
+  if (!"dateend" %in% the_column_names) {
+    errors <- c(errors, "data must contain 'uncertainty'")
+  }
+  if (!"age" %in% the_column_names) {
+    errors <- c(errors, "data must contain 'age'")
+  }
+  return(errors)
+}
+
 # Constructors ----
 # All constructors in this file are documented in 1 roxygen file via @rdname.
 
@@ -281,14 +305,30 @@ setValidity(
 #' on the child class. Use [showClass()] to see all available slots.
 #' @export
 FIMSFrame <- function(data) {
-  # Get the earliest and latest year of data and use to calculate n years for
-  # population simulation
-  start_year <- as.integer(
-    strsplit(min(data[["datestart"]], na.rm = TRUE), "-")[[1]][1]
-  )
-  end_year <- as.integer(
-    strsplit(max(data[["dateend"]], na.rm = TRUE), "-")[[1]][1]
-  )
+  errors <- validate_data_colnames(data)
+  if (length(errors) > 0) {
+    stop(
+      "Check the columns of your data, the following are missing:\n",
+      paste(errors, sep = "\n", collapse = "\n")
+    )
+  }
+  # datestart and dateend need to be date classes so leading zeros are present
+  # but writing and reading from csv file removes the classes so they must be
+  # enforced here
+  # e.g., 0004-01-01 for January 01 0004
+  date_formats <- c("%Y-%m-%d")
+  data[["datestart"]] <- as.Date(data[["datestart"]], tryFormats = date_formats)
+  data[["dateend"]] <- as.Date(data[["dateend"]], tryFormats = date_formats)
+
+  # Get the earliest and latest year formatted as a string of 4 integers
+  start_year <- as.integer(format(
+    as.Date(min(data[["datestart"]], na.rm = TRUE), tryFormats = date_formats),
+    "%Y"
+  ))
+  end_year <- as.integer(format(
+    as.Date(max(data[["dateend"]], na.rm = TRUE), tryFormats = date_formats),
+    "%Y"
+  ))
   n_years <- as.integer(end_year - start_year + 1)
   years <- start_year:end_year
 

diff --git a/tests/testthat/test-fimsframe.R b/tests/testthat/test-fimsframe.R
@@ -73,7 +73,7 @@ test_that("Show method works as expected", {
 
 test_that("Validators work as expected", {
   bad_input <- data.frame(test = 1, test2 = 2)
-  expect_warning(expect_error(FIMSFrame(bad_input)))
+  expect_error(FIMSFrame(bad_input))
 })
 
 n_years <- fims_frame@n_years