Copstone Function Testing.Rmd

---
title: "Functions Test"
author: "Adam White"
date: '`r Sys.Date()`'
output: pdf_document
---


```{r}
# DATA COLLECTION SECTION 1
 
library(dplyr)
library(tidyverse) # if not installed run install.packages('tidyverse')
library(arrow) # install.packages('arrow')
library(lubridate) # install.packages('lubridate')
# Observed weather
obs_met <- read_csv("https://s3.flare-forecast.org/targets/fcre_v2/fcre/observed-met_fcre.csv")

# Forecasted weather
forecast_dir <- arrow::s3_bucket(bucket = "drivers/noaa/gefs-v12/stage2/parquet/0",
                                 endpoint_override =  "s3.flare-forecast.org", 
                                 anonymous = TRUE)
forecast_dates <- seq.Date(lubridate::as_date('2021-06-01'), lubridate::as_date('2021-06-30'), by = 'day')

# running this will show you what the column names are
arrow::open_dataset(forecast_dir) 

#this dataset is VERY large and needs to be filtered before collecting
forecasted_met <- arrow::open_dataset(forecast_dir) |> 
  filter(site_id == 'fcre', # Falling Creek Reservoir (site_id code) #sunp for sunapee
         reference_datetime %in% forecast_dates) |> 
  # you can also filter/select based on other columns in the dataset
  # collect brings the data into your local environment
  collect()

#===================================#
# Do join
# Wrangle the data into the same formats
forecasted_met <- 
  forecasted_met |> 
  tidyr::pivot_wider(names_from = variable, 
                     id_cols = c(horizon, parameter, reference_datetime, datetime),
                     values_from = prediction) |>
  
  # calculate wind speed from eastward and northward directions
  dplyr::mutate(wind_speed = sqrt(eastward_wind^2 + northward_wind^2)) |> 
  dplyr::select(#'site_id', 
                #'height',
                'horizon',
                'parameter',
                'reference_datetime', 
                'datetime', 
                "air_temperature",
                "air_pressure",
                "relative_humidity",
                "surface_downwelling_longwave_flux_in_air", 
                "surface_downwelling_shortwave_flux_in_air",
                "precipitation_flux",
                "wind_speed") |> 
  tidyr::pivot_longer(cols = air_temperature:wind_speed,
                      names_to = 'variable', values_to = 'prediction') 

met_joined <- dplyr::inner_join(forecasted_met, 
                                obs_met, 
                                by = c('datetime', 'variable'))
```


```{r}
source("/Users/Adam/Capstone Functions.R")

param <- ensemble_parameters_seperate(met_joined)

param

p = as.numeric(c(param))

final_ens <- ensemble_converter(met_joined, Parameters = p)

head(final_ens) # switch to long format


ref_datetime <- lubridate::as_date('2021-06-20')
air_temp = met_joined[which(met_joined$variable == 'air_temperature'),]

obs_temp = pivot_wider(data = air_temp, id_cols = c(horizon, datetime), 
                         names_from = parameter, 
                         values_from = 'prediction')

obs_temp = pivot_wider(data = air_temp, id_cols = c(horizon, datetime), 
                         names_from = parameter, 
                         values_from = 'observation')

obs_temp = arrange(obs_temp, datetime)
obs_temp = obs_temp$`1`
obs <<- as.numeric(obs_temp)

ens_just_data <- final_ens[4:33]

plt = ensemble_plot(ens_just_data, obs, ref_datetime = ref_datetime, Ens_Obs_Joined = met_joined,
                    horizon_interval = c(50:300))

# plot the obs as points

plt


# parse_date_time()

vignette # essentially a how to of how to use a package 
pkgdown:: # will build the directory and help build an r package very easy 
  # need a github reoi ready
```


```{r}

par <- ensemble_parameters(met_joined)


```


```{r}
# extracting just the specific variables we would like to look at for June
air_temp_june = met_joined[which(met_joined$variable == 'air_temperature'),]

# arrange them all by parameter (ensemble num) and then only every 24 hours
# ordering by ensemble
air_temp_june = arrange(air_temp_june, parameter)

# every 24 hours FOR NOW -------------------- CHANGE LATER
#air_temp_june = air_temp_june[which(air_temp_june$horizon %% 24 == 0),]

# creating wide tables for every covariate
obs_june_temp = pivot_wider(data = air_temp_june, id_cols = c(horizon, datetime), 
            names_from = parameter, 
            values_from = 'observation')
air_temp_june = pivot_wider(data = air_temp_june, id_cols = c(horizon, datetime, 
                                                              reference_datetime), 
            names_from = parameter, 
            values_from = 'prediction')

# orderign by date time not horizon 
air_temp_june = arrange(air_temp_june, datetime)

# removing the horizon, datetime, and final column (NA present)
horizon = air_temp_june$horizon
horizon = air_temp_june$horizon #saving for later usage
ref_datetime <- air_temp_june$reference_datetime
date_time <- air_temp_june$datetime
air_temp_june = air_temp_june[-1:-3]
air_temp_june = air_temp_june[-31]

# convert all the dataframes to matrices
temp_june <<- as.matrix(air_temp_june)

temp_june = temp_june + 3

plt = ensemble_plot(temp_june, obs, ref_datetime = ref_datetime, Ens_Obs_Joined = met_joined,
                    horizon_interval = c(50:200))
plt
```


```{r}
# extracting just the specific variables we would like to look at for June
air_temp_june = met_joined[which(met_joined$variable == 'air_temperature'),]

# arrange them all by parameter (ensemble num) and then only every 24 hours
# ordering by ensemble
air_temp_june = arrange(air_temp_june, parameter)

# every 24 hours FOR NOW -------------------- CHANGE LATER
#air_temp_june = air_temp_june[which(air_temp_june$horizon %% 24 == 0),]

# creating wide tables for every covariate
obs_june_temp = pivot_wider(data = air_temp_june, id_cols = c(horizon, datetime), 
            names_from = parameter, 
            values_from = 'observation')
air_temp_june = pivot_wider(data = air_temp_june, id_cols = c(horizon, datetime, 
                                                              reference_datetime), 
            names_from = parameter, 
            values_from = 'prediction')

# orderign by date time not horizon 
air_temp_june = arrange(air_temp_june, datetime)

# removing the horizon, datetime, and final column (NA present)
horizon = air_temp_june$horizon
horizon = air_temp_june$horizon #saving for later usage
ref_datetime <- air_temp_june$reference_datetime
date_time <- air_temp_june$datetime
air_temp_june = air_temp_june[-1:-3]
air_temp_june = air_temp_june[-31]

# convert all the dataframes to matrices
temp_june <<- as.matrix(air_temp_june)


all_data <- cbind(temp_june, ens_just_data, obs)

subset = all_data[which(ref_datetime == lubridate::as_date('2021-06-20')),]

plt1 <- ggmatplot(x = c(105:155), y = subset[105:155,], plot_type = 'line',
                  linetype = 'solid', 
                  color = c(rep('darkgray', 30), rep('blue', 30), 'red'))

plt1

color = c(rep('darkgray', 30), rep('blue', 30), 'red')

```