From b51427a34d31dc1e7a33fc2a8f04c47b160ab098 Mon Sep 17 00:00:00 2001
From: "J. Allen Baron" <jabaron.phd@gmail.com>
Date: Sat, 1 May 2021 21:37:14 -0400
Subject: [PATCH 1/3] Generate example deliverable: Combined rank - Civ Assoc

---
 Reports/CA_rank_model-EXAMPLE.Rmd | 165 ++++++++++++++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 Reports/CA_rank_model-EXAMPLE.Rmd

diff --git a/Reports/CA_rank_model-EXAMPLE.Rmd b/Reports/CA_rank_model-EXAMPLE.Rmd
new file mode 100644
index 0000000..d344248
--- /dev/null
+++ b/Reports/CA_rank_model-EXAMPLE.Rmd
@@ -0,0 +1,165 @@
+---
+title: "Ranking Civic Associations with Combined Measures"
+subtitle: "EXAMPLE"
+author: "J. Allen Baron"
+date: "4/21/2021"
+geometry: margin=0.5in
+output:
+  pdf_document: default
+---
+
+```{r setup, include=FALSE}
+library(here)
+library(tidyverse)
+library(scales)
+library(gridExtra)
+library(kableExtra)
+
+knitr::opts_chunk$set(echo = FALSE)
+knitr::opts_knit$set(root.dir = here::here())
+```
+
+```{r functions, include = FALSE}
+source("src/load_data.R")
+
+plot_sf <- function(sf_df, fill_var) {
+    ggplot(sf_df) +
+        geom_sf(aes_string(geometry = "geometry", fill = fill_var)) +
+        scale_fill_viridis_c() +
+        theme_minimal() +
+        theme(
+            axis.text = element_blank(),
+            axis.ticks = element_blank(),
+            axis.title = element_blank()
+        ) +
+        labs(title = fill_var) +
+        theme(legend.title = element_blank())
+}
+```
+
+```{r message = FALSE, results = 'hide', warning = FALSE}
+# NOTE: My preference is to use canopy and plantable land separately in this
+#   approach, instead of using open plantable land; currently using canopy ONLY
+
+ca_data <- 'data/civ_stats.csv' %>%
+    readr::read_csv(
+        col_types = readr::cols_only(
+            geo_id = col_double(),
+            civ_name = col_character(),
+            pct_in_poverty = col_double(),
+            #rank_pct_in_poverty = col_double(),
+            pct_nonwhite = col_double(),
+            #rank_pct_nonwhite = col_double(),
+            #canopy_sq_ft_per_capita = col_double(),
+            #rank_canopy_sq_ft_per_capita = col_double(),
+            thousand_ppl_per_sq_mile = col_double(),
+            #rank_thousand_ppl_per_sq_mile = col_double(),
+            pct_canopy = col_double()
+            #rank_pct_canopy = col_double(),
+            #pct_open_plantable = col_double(),
+            #rank_pct_open_plantable = col_double()
+        )
+    )
+
+shape_files <- read_geos_civ_assoc()
+```
+
+```{r}
+ca_sf <- ca_data %>%
+    dplyr::full_join(shape_files, by = c("civ_name", "geo_id"))
+```
+
+
+# Original Measures of Interest
+
+```{r, fig.width = 7, fig.asp = 1, fig.align = "center"}
+purrr::map(
+    c("pct_in_poverty", "pct_nonwhite", "thousand_ppl_per_sq_mile", "pct_canopy"),
+    ~ plot_sf(ca_sf, fill_var = .x)
+) %>%
+  purrr::set_names(
+    c("pct_in_poverty", "pct_nonwhite", "thousand_ppl_per_sq_mile", "pct_canopy")
+  ) %>%
+  gridExtra::grid.arrange(grobs = ., ncol = 2)
+```
+\newpage
+
+# Rescale All Measures of Interest 
+
+Rescale values between 1-10 such that 1 represents low interest in marketing for a given measure and 10 represents greateest interest. Measures where larger values are of lower interest are reversed (e.g. percent canopy).
+
+```{r}
+cols_direct <- c("pct_in_poverty", "pct_nonwhite", "thousand_ppl_per_sq_mile")
+cols_reverse <- "pct_canopy"
+
+ca_rescaled <- ca_sf %>%
+    dplyr::mutate(
+        dplyr::across(
+            tidyselect::all_of(cols_reverse), scales::rescale, to = c(10, 1)
+        ),
+        dplyr::across(
+            tidyselect::all_of(cols_direct), scales::rescale, to = c(1, 10)
+        )
+    ) %>%
+    tidyr::pivot_longer(
+        cols = c("pct_in_poverty", "pct_nonwhite", "thousand_ppl_per_sq_mile",
+                 "pct_canopy"),
+        names_to = "Measure",
+        values_to = "Rescaled_Value"
+    )
+```
+
+
+```{r, fig.width = 7, fig.asp = 1, fig.align = "center"}
+plot_sf(ca_rescaled, "Rescaled_Value") +
+    facet_wrap(~ Measure)
+```
+\newpage
+
+# Combine Rescaled Measures into a Single Ranking
+
+In this basic example, rescaled measures are treated equally and simply added.
+```{r}
+ca_single <- ca_rescaled %>%
+    dplyr::group_by(geo_id, civ_name) %>%
+    summarize(
+        geometry = geometry[1],
+        Unweighted_Importance = sum(Rescaled_Value),
+        color = dplyr::if_else(Unweighted_Importance < 20, "white", "black"),
+        .groups = "drop"
+    )
+```
+
+```{r fig.width = 5.5, fig.asp = 1, fig.align = "center", warning = FALSE}
+plot_sf(ca_single, "Unweighted_Importance") +
+    geom_sf_text(
+        aes(label = geo_id, geometry = geometry),
+        size = 3, color = ca_single$color,
+        fontface = "bold"
+    )
+```
+
+```{r, results = "asis"}
+split <- nrow(ca_single) / 3
+
+purrr::map(
+  1:3,
+  function(n) {
+    ca_single %>%
+      dplyr::select(GEO_ID = geo_id, Civic_Association = civ_name) %>%
+      dplyr::mutate(
+        Civic_Association = stringr::str_replace(Civic_Association, " - ", "-")
+      ) %>%
+      dplyr::filter(
+        GEO_ID <= n * split,
+        GEO_ID > (n-1) * split
+      )
+  }
+) %>%
+  kableExtra::kbl(booktabs = TRUE) %>%
+  kableExtra::kable_styling(
+    latex_options = c("striped", "hold_position"),
+    font_size = 8
+  )
+```
+

From 4fb027ac35a3e295d88acfb93f3fefa5c9d583e7 Mon Sep 17 00:00:00 2001
From: Chellison <cle9aa@virginia.edu>
Date: Mon, 16 Aug 2021 20:35:09 -0400
Subject: [PATCH 2/3] Overview Maps

Create heatmaps of a few different variables across all civic associations.
---
 Reports/civAssociation_variable.Rmd | 156 ++++++++++++++++++++++++++++
 scripts/prepBGforRanking.R          | 114 ++++++++++++++++++++
 2 files changed, 270 insertions(+)
 create mode 100644 Reports/civAssociation_variable.Rmd
 create mode 100644 scripts/prepBGforRanking.R

diff --git a/Reports/civAssociation_variable.Rmd b/Reports/civAssociation_variable.Rmd
new file mode 100644
index 0000000..bf31dcb
--- /dev/null
+++ b/Reports/civAssociation_variable.Rmd
@@ -0,0 +1,156 @@
+---
+title: "Civic Associations Variables"
+author: "DataKind Tree Team"
+date: "8/16/2021"
+#always_allow_html: true
+output:
+  pdf_document: default
+  word_document: default
+  html_document:
+    df_print: paged
+geometry: margin=0.5in
+subtitle: EXAMPLE
+---
+
+```{r setup, include=FALSE}
+#This document prints a visual summary of civic association threshold values
+#User can enter variable (% poverty, % not white, canopy cover, trees per area
+#already planted by ecoAction) and rank threshold.
+#Based on Allen's code
+#Need to have civ stat data (with variabl values and ranks) as well as civic
+#associatin geometry
+
+#TO DO: -Remove warning messages when printing maps
+#       -make labels/titles clearly and fix any other visuals
+
+#get libraries
+library(here)
+library(tidyverse)
+library(scales)
+library(gridExtra)
+library(kableExtra)
+library(dplyr)
+library(sf)
+library(units)
+library(ggmap)
+library(flextable)
+library(knitr)
+library(webshot)
+library(ggplot2)
+#library(ggsflabel)
+
+knitr::opts_chunk$set(echo = FALSE)
+knitr::opts_knit$set(root.dir = here::here())
+```
+
+```{r functions, include = FALSE}
+#setwd('C:/Users/cle9a/Documents/Trees/GitHub_02-07-2021/EcoAction')
+source("src/load_data.R")
+
+#get stats for civic association (including rank) 
+df_ranks <- read.csv('data/civ_stats_wTree.csv')
+#for two variables- recalculate rank so that 1 is civic association of greatest
+#interest and 63 is civic is civic association of least interest
+df_ranks$rank_pct_canopy = 64-df_ranks$rank_pct_canopy
+df_ranks$rank_tree_area = 64-df_ranks$rank_tree_area
+#get and merge geometry
+civ_geo_df <- read_geos_civ_assoc()
+civ <- merge(df_ranks, civ_geo_df, by.x = "geo_id", by.y="geo_id")
+
+#get map background, zoom=13 seems to have a good amount of detail
+b <- st_bbox(civ_geo_df)
+civ2_g <- get_stamenmap(bbox = c(left = b[[1]], bottom = b[[2]], right = b[[3]], top = b[[4]]), zoom = 13)
+
+plot_civVar <- function(varCol) {
+
+    geo_data1 <- merge(df_ranks, civ_geo_df, by.x = "geo_id", by.y="geo_id")
+    geo_data1$Var <- geo_data1[,varCol]
+    #make title using variable and rank
+    mapTitle <- paste('Variable:', varCol)
+    
+    #geo_data1$rank_pct_in_poverty
+    
+    #make map- includes civic associations of interest with geo_id labels
+    ggmap(civ2_g)+
+          geom_sf(data = civ_geo_df$geometry, fill=alpha("blue",0), inherit.aes = FALSE)+
+          geom_sf(data = geo_data1$geometry, aes(fill=geo_data1$Var), inherit.aes = FALSE)+
+        geom_sf_text( data = geo_data1$geometry, inherit.aes = FALSE,
+        aes(label = geo_data1$geo_id),
+        size = 3, color = "white",
+        fontface = "bold"
+    )+ggtitle(mapTitle)
+}
+
+```
+
+
+```{r, results = "asis"}
+#print out geo_id and civic assocation names
+split <- nrow(df_ranks) / 3
+
+purrr::map(
+  1:3,
+  function(n) {
+    df_ranks %>%
+      dplyr::select(GEO_ID = geo_id, Civic_Association = civ_name) %>%
+      dplyr::mutate(
+        Civic_Association = stringr::str_replace(Civic_Association, " - ", "-")
+      ) %>%
+      dplyr::filter(
+        GEO_ID <= n * split,
+        GEO_ID > (n-1) * split
+      )
+  }
+) %>%
+  kableExtra::kbl(booktabs = TRUE) %>%
+  kableExtra::kable_styling(
+    latex_options = c("striped", "hold_position"),
+    font_size = 8
+  )
+```
+
+```{r, fig.width = 7, fig.asp = 1, fig.align = "center", warnings=FALSE, message = FALSE, results = FALSE}
+plot_civVar('pct_in_poverty')
+plot_civVar('pct_canopy')
+plot_civVar('pct_open_plantable')
+plot_civVar('tree_area')
+```
+
+
+# A summary of several variables is shown below.
+
+* pov = % of population below poverty line
+* can = % of area covered by tree canopy
+* plant = % of area that is plantable and open (residential and not covered by canopy)
+* EcoTree = number of trees per m^2 (divide number by 10^5)
+
+```{r, results = "asis"}
+#print out geo_id and civic assocation names
+df_ranks$tree_area5 = df_ranks$tree_area*100000
+split <- nrow(df_ranks)
+
+purrr::map(
+  1,
+  function(n) {
+    df_ranks %>%
+      dplyr::select(GEO_ID = geo_id, Civic_Association = civ_name, pov = pct_in_poverty,
+                    can = pct_canopy, plant = pct_open_plantable, tree = tree_area5) %>%
+      dplyr::mutate(
+        Civic_Association = stringr::str_replace(Civic_Association, " - ", "-"),
+        pov = round(pov,2),
+        can = round(can,2),
+        plant = round(plant,2),
+        tree = round(tree,2)
+      ) %>%
+      dplyr::filter(
+        GEO_ID <= n * split,
+        GEO_ID > (n-1) * split
+      )
+  }
+) %>%
+  kableExtra::kbl(booktabs = TRUE) %>%
+  kableExtra::kable_styling(
+    latex_options = c("striped", "hold_position"),
+    font_size = 8
+  )
+```
diff --git a/scripts/prepBGforRanking.R b/scripts/prepBGforRanking.R
new file mode 100644
index 0000000..986f358
--- /dev/null
+++ b/scripts/prepBGforRanking.R
@@ -0,0 +1,114 @@
+#prepare Block Group stats for ranking
+
+#libraries
+library(tidyverse)
+library(leaflet)
+library(RColorBrewer)
+library(htmltools)
+library(htmlwidgets)
+library(sf)
+library(sp)
+library(raster)
+library("rgdal")
+library(units)
+library(lwgeom)
+
+#get data and functions
+#setwd('Path_to/EcoAction')
+source("src/load_data.R")
+
+#get blockgroups demo data and canopy data- merge
+demo_bg <- read_demographics_block_group_csv()
+canopy_bg <- read_land_area_block_group_csv()
+
+bg <- merge(demo_bg, canopy_bg, by.x = "geo_id", by.y="geo_id")
+
+
+#get number of trees in bg
+trees <- read_tree_data()
+bg_loc <- read_geos_block_group()
+bg_loc$tree_count <- lengths(st_intersects(bg_loc, trees))
+
+bg <- merge(bg, bg_loc, by.x = "geo_id", by.y="geo_id")
+
+bg$tree_area <- bg$tree_count/bg$area_m_sq
+
+#get ranks for each column
+df_ranks <- bg
+
+columns <- c('pct_in_poverty', 'pct_nonwhite', 'pct_hisp',
+             'pct_canopy', 'pct_open_plantable', 'pct_plantable', 'tree_area')
+
+for (c_name in columns) {
+  df_sub <- dplyr::select(bg, geo_id, c_name)
+  df_sub_sorted <- df_sub[order(-df_sub[,c_name]),]
+
+  # Add column of rank in
+  df_sub_sorted[, ncol(df_sub_sorted) + 1] <- seq_len(nrow(df_sub))
+  # Name the column
+  colnames(df_sub_sorted)[ncol(df_sub_sorted)] <- paste0('rank_', c_name)
+
+  for_merge <- dplyr::select(df_sub_sorted, geo_id, paste0('rank_', c_name))
+  df_ranks <- sp::merge(df_ranks, for_merge, by = "geo_id")
+}
+
+df = subset(df_ranks, select = -c(geometry) )
+
+df_ranks_noGeo <- st_drop_geometry(df_ranks)
+write.csv(df, 'data/bg_stats.csv')
+
+
+#add tree_area to civ
+civ_loc <- read_geos_civ_assoc()
+df_ranks <- read.csv('data/civ_stats.csv')
+demo_civ <- read_demographics_civic_association_csv()
+trees <- read_tree_data()
+civ_loc$tree_count <- lengths(st_intersects(civ_loc, trees))
+
+canopy_civ <- read_land_area_civic_association_csv()
+
+df_ranks <- merge(df_ranks, civ_loc, by.x = "geo_id", by.y="geo_id")
+df_ranks <- merge(df_ranks, canopy_civ, by.x ="geo_id", by.y="geo_id")
+
+df_ranks$tree_area <- df_ranks$tree_count/df_ranks$area_m_sq
+
+columns <- c('tree_area')
+
+for (c_name in columns) {
+    df_sub <- dplyr::select(df_ranks, geo_id, c_name)
+    df_sub_sorted <- df_sub[order(-df_sub[,c_name]),]
+
+    # Add column of rank in
+    df_sub_sorted[, ncol(df_sub_sorted) + 1] <- seq_len(nrow(df_sub))
+    # Name the column
+    colnames(df_sub_sorted)[ncol(df_sub_sorted)] <- paste0('rank_', c_name)
+
+    for_merge <- dplyr::select(df_sub_sorted, geo_id, paste0('rank_', c_name))
+    df_ranks <- sp::merge(df_ranks, for_merge, by = "geo_id")
+}
+
+df = subset(df_ranks, select = -c(geometry) )
+
+df_ranks_noGeo <- st_drop_geometry(df_ranks)
+write.csv(df, 'data/civ_stats_toomany.csv')
+
+
+#get corresponding civic association
+#trying this again to create join csv with simplified
+simplified <- read.csv('data/blockgroup_simplified.csv')
+simplified <- read_excel('data/blockgroup_simplified.xlsx')
+bg_all <- read_geos_block_group()
+bg_all$geo_id <- as.numeric(bg_all$geo_id)
+bg_all <- merge(bg_all, simplified, by.x = 'geo_id', by.y = 'geo_id')
+civ_all <- read_geos_civ_assoc()
+civ <- dplyr::select(civ_all, civ_name, geo_id, geometry)
+
+b_loc_sf <- sf::st_sf(bg_all)
+civ_loc <- sf::st_sf(civ)
+
+combined <- sf::st_join(bg_all, civ, join = st_intersects)
+intersect <- st_intersection(bg_all, civ)
+comgined2 <- st_join(b_loc_sf, civ_loc, left=TRUE)
+
+intersect <- st_drop_geometry(intersect)
+write.csv(intersect, 'data/bg_ca_intersect.csv')

From e36b44b4be792007684a8cb4907f510f2f18184f Mon Sep 17 00:00:00 2001
From: Chellison <cle9aa@virginia.edu>
Date: Mon, 16 Aug 2021 20:45:37 -0400
Subject: [PATCH 3/3] Revert "Overview Maps"

This reverts commit 4fb027ac35a3e295d88acfb93f3fefa5c9d583e7.
---
 Reports/civAssociation_variable.Rmd | 156 ----------------------------
 scripts/prepBGforRanking.R          | 114 --------------------
 2 files changed, 270 deletions(-)
 delete mode 100644 Reports/civAssociation_variable.Rmd
 delete mode 100644 scripts/prepBGforRanking.R

diff --git a/Reports/civAssociation_variable.Rmd b/Reports/civAssociation_variable.Rmd
deleted file mode 100644
index bf31dcb..0000000
--- a/Reports/civAssociation_variable.Rmd
+++ /dev/null
@@ -1,156 +0,0 @@
----
-title: "Civic Associations Variables"
-author: "DataKind Tree Team"
-date: "8/16/2021"
-#always_allow_html: true
-output:
-  pdf_document: default
-  word_document: default
-  html_document:
-    df_print: paged
-geometry: margin=0.5in
-subtitle: EXAMPLE
----
-
-```{r setup, include=FALSE}
-#This document prints a visual summary of civic association threshold values
-#User can enter variable (% poverty, % not white, canopy cover, trees per area
-#already planted by ecoAction) and rank threshold.
-#Based on Allen's code
-#Need to have civ stat data (with variabl values and ranks) as well as civic
-#associatin geometry
-
-#TO DO: -Remove warning messages when printing maps
-#       -make labels/titles clearly and fix any other visuals
-
-#get libraries
-library(here)
-library(tidyverse)
-library(scales)
-library(gridExtra)
-library(kableExtra)
-library(dplyr)
-library(sf)
-library(units)
-library(ggmap)
-library(flextable)
-library(knitr)
-library(webshot)
-library(ggplot2)
-#library(ggsflabel)
-
-knitr::opts_chunk$set(echo = FALSE)
-knitr::opts_knit$set(root.dir = here::here())
-```
-
-```{r functions, include = FALSE}
-#setwd('C:/Users/cle9a/Documents/Trees/GitHub_02-07-2021/EcoAction')
-source("src/load_data.R")
-
-#get stats for civic association (including rank) 
-df_ranks <- read.csv('data/civ_stats_wTree.csv')
-#for two variables- recalculate rank so that 1 is civic association of greatest
-#interest and 63 is civic is civic association of least interest
-df_ranks$rank_pct_canopy = 64-df_ranks$rank_pct_canopy
-df_ranks$rank_tree_area = 64-df_ranks$rank_tree_area
-#get and merge geometry
-civ_geo_df <- read_geos_civ_assoc()
-civ <- merge(df_ranks, civ_geo_df, by.x = "geo_id", by.y="geo_id")
-
-#get map background, zoom=13 seems to have a good amount of detail
-b <- st_bbox(civ_geo_df)
-civ2_g <- get_stamenmap(bbox = c(left = b[[1]], bottom = b[[2]], right = b[[3]], top = b[[4]]), zoom = 13)
-
-plot_civVar <- function(varCol) {
-
-    geo_data1 <- merge(df_ranks, civ_geo_df, by.x = "geo_id", by.y="geo_id")
-    geo_data1$Var <- geo_data1[,varCol]
-    #make title using variable and rank
-    mapTitle <- paste('Variable:', varCol)
-    
-    #geo_data1$rank_pct_in_poverty
-    
-    #make map- includes civic associations of interest with geo_id labels
-    ggmap(civ2_g)+
-          geom_sf(data = civ_geo_df$geometry, fill=alpha("blue",0), inherit.aes = FALSE)+
-          geom_sf(data = geo_data1$geometry, aes(fill=geo_data1$Var), inherit.aes = FALSE)+
-        geom_sf_text( data = geo_data1$geometry, inherit.aes = FALSE,
-        aes(label = geo_data1$geo_id),
-        size = 3, color = "white",
-        fontface = "bold"
-    )+ggtitle(mapTitle)
-}
-
-```
-
-
-```{r, results = "asis"}
-#print out geo_id and civic assocation names
-split <- nrow(df_ranks) / 3
-
-purrr::map(
-  1:3,
-  function(n) {
-    df_ranks %>%
-      dplyr::select(GEO_ID = geo_id, Civic_Association = civ_name) %>%
-      dplyr::mutate(
-        Civic_Association = stringr::str_replace(Civic_Association, " - ", "-")
-      ) %>%
-      dplyr::filter(
-        GEO_ID <= n * split,
-        GEO_ID > (n-1) * split
-      )
-  }
-) %>%
-  kableExtra::kbl(booktabs = TRUE) %>%
-  kableExtra::kable_styling(
-    latex_options = c("striped", "hold_position"),
-    font_size = 8
-  )
-```
-
-```{r, fig.width = 7, fig.asp = 1, fig.align = "center", warnings=FALSE, message = FALSE, results = FALSE}
-plot_civVar('pct_in_poverty')
-plot_civVar('pct_canopy')
-plot_civVar('pct_open_plantable')
-plot_civVar('tree_area')
-```
-
-
-# A summary of several variables is shown below.
-
-* pov = % of population below poverty line
-* can = % of area covered by tree canopy
-* plant = % of area that is plantable and open (residential and not covered by canopy)
-* EcoTree = number of trees per m^2 (divide number by 10^5)
-
-```{r, results = "asis"}
-#print out geo_id and civic assocation names
-df_ranks$tree_area5 = df_ranks$tree_area*100000
-split <- nrow(df_ranks)
-
-purrr::map(
-  1,
-  function(n) {
-    df_ranks %>%
-      dplyr::select(GEO_ID = geo_id, Civic_Association = civ_name, pov = pct_in_poverty,
-                    can = pct_canopy, plant = pct_open_plantable, tree = tree_area5) %>%
-      dplyr::mutate(
-        Civic_Association = stringr::str_replace(Civic_Association, " - ", "-"),
-        pov = round(pov,2),
-        can = round(can,2),
-        plant = round(plant,2),
-        tree = round(tree,2)
-      ) %>%
-      dplyr::filter(
-        GEO_ID <= n * split,
-        GEO_ID > (n-1) * split
-      )
-  }
-) %>%
-  kableExtra::kbl(booktabs = TRUE) %>%
-  kableExtra::kable_styling(
-    latex_options = c("striped", "hold_position"),
-    font_size = 8
-  )
-```
diff --git a/scripts/prepBGforRanking.R b/scripts/prepBGforRanking.R
deleted file mode 100644
index 986f358..0000000
--- a/scripts/prepBGforRanking.R
+++ /dev/null
@@ -1,114 +0,0 @@
-#prepare Block Group stats for ranking
-
-#libraries
-library(tidyverse)
-library(leaflet)
-library(RColorBrewer)
-library(htmltools)
-library(htmlwidgets)
-library(sf)
-library(sp)
-library(raster)
-library("rgdal")
-library(units)
-library(lwgeom)
-
-#get data and functions
-#setwd('Path_to/EcoAction')
-source("src/load_data.R")
-
-#get blockgroups demo data and canopy data- merge
-demo_bg <- read_demographics_block_group_csv()
-canopy_bg <- read_land_area_block_group_csv()
-
-bg <- merge(demo_bg, canopy_bg, by.x = "geo_id", by.y="geo_id")
-
-
-#get number of trees in bg
-trees <- read_tree_data()
-bg_loc <- read_geos_block_group()
-bg_loc$tree_count <- lengths(st_intersects(bg_loc, trees))
-
-bg <- merge(bg, bg_loc, by.x = "geo_id", by.y="geo_id")
-
-bg$tree_area <- bg$tree_count/bg$area_m_sq
-
-#get ranks for each column
-df_ranks <- bg
-
-columns <- c('pct_in_poverty', 'pct_nonwhite', 'pct_hisp',
-             'pct_canopy', 'pct_open_plantable', 'pct_plantable', 'tree_area')
-
-for (c_name in columns) {
-  df_sub <- dplyr::select(bg, geo_id, c_name)
-  df_sub_sorted <- df_sub[order(-df_sub[,c_name]),]
-
-  # Add column of rank in
-  df_sub_sorted[, ncol(df_sub_sorted) + 1] <- seq_len(nrow(df_sub))
-  # Name the column
-  colnames(df_sub_sorted)[ncol(df_sub_sorted)] <- paste0('rank_', c_name)
-
-  for_merge <- dplyr::select(df_sub_sorted, geo_id, paste0('rank_', c_name))
-  df_ranks <- sp::merge(df_ranks, for_merge, by = "geo_id")
-}
-
-df = subset(df_ranks, select = -c(geometry) )
-
-df_ranks_noGeo <- st_drop_geometry(df_ranks)
-write.csv(df, 'data/bg_stats.csv')
-
-
-#add tree_area to civ
-civ_loc <- read_geos_civ_assoc()
-df_ranks <- read.csv('data/civ_stats.csv')
-demo_civ <- read_demographics_civic_association_csv()
-trees <- read_tree_data()
-civ_loc$tree_count <- lengths(st_intersects(civ_loc, trees))
-
-canopy_civ <- read_land_area_civic_association_csv()
-
-df_ranks <- merge(df_ranks, civ_loc, by.x = "geo_id", by.y="geo_id")
-df_ranks <- merge(df_ranks, canopy_civ, by.x ="geo_id", by.y="geo_id")
-
-df_ranks$tree_area <- df_ranks$tree_count/df_ranks$area_m_sq
-
-columns <- c('tree_area')
-
-for (c_name in columns) {
-    df_sub <- dplyr::select(df_ranks, geo_id, c_name)
-    df_sub_sorted <- df_sub[order(-df_sub[,c_name]),]
-
-    # Add column of rank in
-    df_sub_sorted[, ncol(df_sub_sorted) + 1] <- seq_len(nrow(df_sub))
-    # Name the column
-    colnames(df_sub_sorted)[ncol(df_sub_sorted)] <- paste0('rank_', c_name)
-
-    for_merge <- dplyr::select(df_sub_sorted, geo_id, paste0('rank_', c_name))
-    df_ranks <- sp::merge(df_ranks, for_merge, by = "geo_id")
-}
-
-df = subset(df_ranks, select = -c(geometry) )
-
-df_ranks_noGeo <- st_drop_geometry(df_ranks)
-write.csv(df, 'data/civ_stats_toomany.csv')
-
-
-#get corresponding civic association
-#trying this again to create join csv with simplified
-simplified <- read.csv('data/blockgroup_simplified.csv')
-simplified <- read_excel('data/blockgroup_simplified.xlsx')
-bg_all <- read_geos_block_group()
-bg_all$geo_id <- as.numeric(bg_all$geo_id)
-bg_all <- merge(bg_all, simplified, by.x = 'geo_id', by.y = 'geo_id')
-civ_all <- read_geos_civ_assoc()
-civ <- dplyr::select(civ_all, civ_name, geo_id, geometry)
-
-b_loc_sf <- sf::st_sf(bg_all)
-civ_loc <- sf::st_sf(civ)
-
-combined <- sf::st_join(bg_all, civ, join = st_intersects)
-intersect <- st_intersection(bg_all, civ)
-comgined2 <- st_join(b_loc_sf, civ_loc, left=TRUE)
-
-intersect <- st_drop_geometry(intersect)
-write.csv(intersect, 'data/bg_ca_intersect.csv')