---
title: "Covid_Greenspace_Analysis_step3"
author: "Colleen E. Reid, ES Rieves"
date: "1/14/2021"
output: html_document
---

```{r}
# bring in libraries that I need for script
library(tidyverse)
library(lubridate)
library(car)
library(gt)
library(gtsummary)
library(ggplot2)
library(ggpubr)
library(stats)
library(corrplot)
library(broom)
```

# I. Data preparation + cleaning
## Loading
```{r}
#read in the greenspace+covid data
# Emma's base directory
 base_dir <- "/Users/esrieves/Documents/GitHub/grnspc/"

# set directory to export files to
 export_dir <- "/Users/esrieves/Documents/GitHub/grnspc/Perceived_Greenspace/outputs"

# Colleen's base directory
# base_dir <- "C:/github/grnspc"
# export_dir <- "C:/github/grnspc/Perceived_Greenspace"

# Read in data
# includes all responses (n=911)
all_data <- read.csv(file.path(base_dir,"/publication_dataset.csv"))
```

## creating dataset for publication
```{r}
# pub_data = all_data %>% dplyr::select(time_period,age,age_group,sex,ethnicity,race2,cohabit,insured,looking_for_work,BAplus,income,
#                                MN19_300,MN19_500,Q33_1,Q33_2,Q33_3,Q33_4,Q33_5,pss_score,cesd_score,mmpi_score,
#                                diagnosed,covid_financial,covid_lossincome,covid_resources,covid_workexp,
#                                covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green,
#                                num_diagnosis)
# write.csv(pub_data, file.path(base_dir,"publication_dataset.csv"))
```


## Cleaning
```{r}
# Makes the date a factor
all_data$time_period = factor(all_data$time_period, levels=c("before covid period","stay at home period","reopening period","fall wave period"))

# Test success, examine breakdown by time period
summary(all_data$time_period)
count(all_data)
```

```{r}
# Reordering greenspace questions: Strongly Disagree < Disagree < Agree < Strongly Agree
all_data$Q33_1=factor(all_data$Q33_1, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree"))
all_data$Q33_2=factor(all_data$Q33_2, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree"))
all_data$Q33_3=factor(all_data$Q33_3, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree"))
all_data$Q33_4=factor(all_data$Q33_4, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree"))
all_data$Q33_5=factor(all_data$Q33_5, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree"))
```

```{r}
# Reordering sociodemographic questions

# order income in ascending order
all_data$income = factor(all_data$income,levels=c("Less than $25,000","$25,000 to $50,000","$50,000 to $75,000","$75,000 to $100,000","$100,000 to $150,000","Greater than $150,000"))

# refactor so 'not looking' is reference group
all_data$looking_for_work = factor(all_data$looking_for_work, levels=c("Not looking","Looking - part-time","Looking"))

# establishes White (most prevalent) as reference group; all subsequent groups in alphabetical order
all_data$race2 = factor(all_data$race2, levels=c("White","Asian/Pacific Islander","Black/African American","Multiracial","Native American"))

# establishes Non-Hispanic (most prevalent) as reference group
all_data$ethnicity = factor(all_data$ethnicity, levels=c("Not Hispanic/Latino","Hispanic/Latino"))
```


## Create datsets for analyses
```{r}
## TIME ANALYSIS DATASET -- all_data (n=911)

## SOCIODEMOGRAPHIC ANALYSIS DATASET -- soc_data (n=809)
## Remove rows without objective greenspace, remove 'before covid' respondents

# drop if any objective greenspace measures have NA observations
soc_data = all_data %>%
  # remove before covid observations
  filter(time_period != "before covid period") %>%
  # remove participants without objective greenspace information
  drop_na(MN19_300,MN19_500)

# check to confirm success
# count = 809
count(soc_data)

## checking count
# 816 excluding before covid
all_data %>%
  filter(time_period != "before covid period") %>%
  summarise(n=n())

# 7 NAs (excluding before covid people); therefore 816 - 7 = 809, checks out
all_data %>%
  filter(time_period != "before covid period") %>%
  dplyr::select(MN19_300,MN19_500) %>%
  summarise_all(funs(sum(is.na(.))))
```


```{r}
## COVID ANALYSIS DATASET -- covid_data (n=431)
## Remove rows with NA answer for any covid questions

# drop if pre-covid to ensure that subsequent regressions are not done on pre-covid people
# and excluding survey respondents missing objective greenspace information
# soc_data n=809

covid_data = soc_data %>%
  #filter(time_period != "before covid period") %>%
  drop_na(covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green)

# check - removing before covid, n = 809
summary(soc_data$time_period != "before covid period")

# check - removing before covid (n=809), there are 378 NAs for diagnosed (368 for all other categories); should mean new n = 431
soc_data %>%
  dplyr::select(time_period,covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green) %>%
  summarise_all(funs(sum(is.na(.))))

# n = 431, checks out
count(covid_data)

covid_data %>%
  dplyr::select(covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green,Q33_1,Q33_2,Q33_3,Q33_4,Q33_5,MN19_300,MN19_500,pss_score,cesd_score,mmpi_score) %>%
  summarise_all(funs(sum(is.na(.))))
```

# II. Correlation betweeen measures
```{r}
# Coded such that high agreement score will indicate positive corrrelation with high NDVI score (avoids negative correlation between agreeing greenspace measures)
green_recode = soc_data %>% mutate(Q33_1=dplyr::recode(Q33_1,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0),
                      Q33_2=dplyr::recode(Q33_2,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0),
                      Q33_3=dplyr::recode(Q33_3,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0),
                      Q33_4=dplyr::recode(Q33_4,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0),
                      Q33_5=dplyr::recode(Q33_5,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0))

green_matrix = green_recode %>% dplyr::select(Q33_1,Q33_2,Q33_3,Q33_4,Q33_5,MN19_300,MN19_500)

# use covid dataset for covid analyses
covid_matrix = covid_data %>% dplyr::select(covid_financial,covid_lossincome, covid_resources,covid_workexp,covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green,num_diagnosis)

# Green data: create correlation matrix using Spearman method
green_matrix_res = cor(green_matrix, method=c("spearman"), use = "complete.obs")
green_matrix_res = as.table(green_matrix_res)
corrplot.mixed(green_matrix_res)

# Covid data: create correlation matrix using Spearman method
covid_matrix_res = cor(covid_matrix, method=c("spearman"), use = "complete.obs")
covid_matrix_res = as.table(covid_matrix_res)
corrplot.mixed(covid_matrix_res)
```


# III. Mental health across COVID periods
## A. Time period analysis
```{r}
pss_anova = aov(pss_score~time_period,data=all_data)
pss_tukey = TukeyHSD(pss_anova, ordered=TRUE)
pss_tukey = tidy(pss_tukey)
p1 = pss_tukey %>% dplyr::select(contrast,estimate,adj.p.value)
```

```{r}
cesd_anova = aov(cesd_score~time_period,data=all_data)
cesd_tukey = TukeyHSD(cesd_anova, ordered=TRUE)
cesd_tukey = tidy(cesd_tukey)
p2 = cesd_tukey %>% dplyr::select(contrast,estimate,adj.p.value) 
```

```{r}
mmpi_anova = aov(mmpi_score~time_period,data=all_data)
mmpi_tukey = TukeyHSD(mmpi_anova, ordered=TRUE)
mmpi_tukey = tidy(mmpi_tukey)
p3 = mmpi_tukey %>% dplyr::select(contrast,estimate,adj.p.value) 
```

```{r} 
## Merge dataframes to create gt table
# contrast = time period comparisons
time_scores = left_join(p1,p2, by = "contrast")
time_scores = left_join(time_scores, p3, by = "contrast")

# double checking join
time_scores$estimate.x == p1$estimate # estimate.x is p1 is pss
time_scores$estimate.y == p2$estimate # estimate.y is p2 is cesd
time_scores$estimate == p3$estimate # estimate is p3 is mmpi
#time_scores = as.data.frame(time_scores)

## get Ns for table
# PSS, N = 909 (2 = NA)
all_data %>%
  dplyr::select(pss_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# CES-D, N = 901 (10 = NA)
all_data %>%
  dplyr::select(cesd_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# MMPI, N = 885 (26 = NA)
all_data %>%
  dplyr::select(mmpi_score) %>%
  summarise_all(funs(sum(!is.na(.))))


## Merge tables
time_scores_tbl = time_scores %>% 
  gt() %>% 
  fmt_number(columns = vars(estimate.x,estimate.y,estimate), decimals = 2) %>%
  fmt_number(columns = vars(adj.p.value.x,adj.p.value.y,adj.p.value), decimals = 3) %>%
  tab_spanner(
    label = md("**PSS Stress, N = 909**"),
    columns = c(estimate.x,adj.p.value.x)
  ) %>%
  tab_spanner(
    label = md("**CES-D-10 Depression, N = 901**"),
    columns = c(estimate.y,adj.p.value.y)
  ) %>%
  tab_spanner(
    label = md("**MMPI-2 Anxiety, N = 885**"),
    columns = c(estimate,adj.p.value)
  ) %>%
  cols_label(contrast=md("**Time period**"), estimate=md("**Coefficient**"),estimate.x=md("**Coefficient**"),
             estimate.y=md("**Coefficient**"),adj.p.value=md("**P-value**"),adj.p.value.x=md("**P-value**"),
             adj.p.value.y=md("**P-value**")) %>%
  tab_header(title = "Coefficients (p-values) for Tukey’s HSD between Minnesota Multiphasic Personality Inventory scores during different time periods related to the COVID-19 pandemic in Denver, CO")

  
time_scores_tbl
```


## B. Graphical outputs
```{r}
comps = list(c("before covid period","stay at home period"), c("before covid period","reopening period"),c("before covid period","fall wave period"), c("stay at home period","reopening period"), c("stay at home period","fall wave period"), c("reopening period","fall wave period"))

pss_timeplot = ggplot(all_data, aes(x=time_period,y=pss_score,color=time_period)) + geom_violin() 
pss_timeplot = pss_timeplot + 
  geom_boxplot(width=0.1, color="black") + 
  geom_jitter(alpha=.1) +
  stat_compare_means(comparisons = comps, label="p.signif",hide.ns = TRUE, color="black",
                     step.increase = 0.2, method = "wilcox.test") +
  ylab("PSS score") +
  scale_color_brewer(palette = "Dark2") +
  theme_light() +
  theme(legend.position="bottom",
        axis.title.x=element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) +
  # New title for the legend
  labs(color = "COVID-19 time periods")

cesd_timeplot = ggplot(all_data, aes(x=time_period,y=cesd_score,color=time_period)) + geom_violin()
cesd_timeplot = cesd_timeplot +
  geom_boxplot(width=0.1, color="black") +
  geom_jitter(alpha=.1) +
  ylab("CES-D-10 score") +
  stat_compare_means(comparisons = comps, label="p.signif",hide.ns = TRUE, color="black",
                     step.increase = 0.2, method = "wilcox.test") +
  scale_color_brewer(palette = "Dark2") +
  theme_light() +
  theme(legend.position="none",
        axis.title.x=element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

mmpi_timeplot = ggplot(all_data, aes(x=time_period,y=mmpi_score,color=time_period)) + geom_violin()
mmpi_timeplot = mmpi_timeplot +
  geom_boxplot(width=0.1, color="black") +
  geom_jitter(alpha=.1) +
  ylab("MMPI-2 score") +
  stat_compare_means(comparisons = comps, label="p.signif",hide.ns = TRUE, color="black",
                     step.increase = 0.2, method = "wilcox.test") +
  scale_color_brewer(palette = "Dark2") +
  theme_light() + 
  theme(legend.position = "none",
        axis.title.x=element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

allscores_timeplot = ggarrange(pss_timeplot,cesd_timeplot,mmpi_timeplot,ncol=3, nrow= 1, common.legend = TRUE)
#allscores_timeplot

# save output plot
tiff("figure2.tiff", compression = "lzw", unit = "in", width = 7.5, height = 5.5, res=500)
print(allscores_timeplot)
dev.off()
```


# IV. Univariate regressions -- *loop logic*
Use soc_data as dataset for perceived greenspace and sociodemographic univariate regressions
Use covid_data as dataset for covid univariate regressions


## Greenspace
```{r}
# independent variables for greenspace uv regressions
grn_uv_iv_list = soc_data %>%
  dplyr::select(Q33_1,Q33_2,Q33_3,Q33_4,Q33_5,MN19_300,MN19_500)


## PSS score
lm_tests_grn_pss = vector("list",length(grn_uv_iv_list))
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_grn_pss[[i]] = glm(reformulate(names(grn_uv_iv_list[i]),"pss_score"),data=soc_data)
  # prints summary outputs for each model
  #print(summary(lm_tests_grn_pss[[i]]))
}


## CES-D score
lm_tests_grn_cesd = vector("list",length(grn_uv_iv_list))
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_grn_cesd[[i]] = glm(reformulate(names(grn_uv_iv_list[i]),"cesd_score"),data=soc_data)
  # prints summary outputs for each model
  #print(summary(lm_tests_grn_cesd[[i]]))
}


## MMPI score
lm_tests_grn_mmpi = vector("list",length(grn_uv_iv_list))
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_grn_mmpi[[i]] = glm(reformulate(names(grn_uv_iv_list[i]),"mmpi_score"),data=soc_data)
  # prints summary outputs for each model
  #print(summary(lm_tests_grn_mmpi[[i]]))
}

```

## Sociodemographics
```{r}
# independent variables for sociodemographic uv regressions
socdem_uv_iv_list = soc_data %>%
  dplyr::select(sex,income,race2,ethnicity,age_group,cohabit,BAplus,looking_for_work,insured)


## PSS score
lm_tests_soc_pss = vector("list",length(socdem_uv_iv_list))
for(i in seq_along(socdem_uv_iv_list)) {
  lm_tests_soc_pss[[i]] = glm(reformulate(names(socdem_uv_iv_list[i]),"pss_score"),data=soc_data)
  # prints summary outputs for each model
  #print(summary(lm_tests_soc_pss[[i]]))
}


## CES-D score
lm_tests_soc_cesd = vector("list",length(socdem_uv_iv_list))
for(i in seq_along(socdem_uv_iv_list)) {
  lm_tests_soc_cesd[[i]] = glm(reformulate(names(socdem_uv_iv_list[i]),"cesd_score"),data=soc_data)
  # prints summary outputs for each model
  #print(summary(lm_tests_soc_cesd[i]))
}


## MMPI score
lm_tests_soc_mmpi = vector("list",length(socdem_uv_iv_list))
for(i in seq_along(socdem_uv_iv_list)) {
  lm_tests_soc_mmpi[[i]] = glm(reformulate(names(socdem_uv_iv_list[i]),"mmpi_score"),data=soc_data)
  # prints summary outputs for each model
  #print(summary(lm_tests_soc_mmpi[[i]]))
}

```

## Covid questions
```{r}
# independent variables for covid uv regressions
cov_uv_iv_list = covid_data %>%
  dplyr::select(covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closesymp,covid_closeexp,covid_online,covid_green)


## PSS score
lm_tests_cov_pss = vector("list",length(cov_uv_iv_list))
for(i in seq_along(cov_uv_iv_list)) {
  lm_tests_cov_pss[[i]] = glm(reformulate(names(cov_uv_iv_list[i]),"pss_score"),data=covid_data)
  # prints summary outputs for each model
  #print(summary(lm_tests_cov_pss[[i]]))
}


## CES-D score
lm_tests_cov_cesd = vector("list",length(cov_uv_iv_list))
for(i in seq_along(cov_uv_iv_list)) {
  lm_tests_cov_cesd[[i]] = glm(reformulate(names(cov_uv_iv_list[i]),"cesd_score"),data=covid_data)
  # prints summary outputs for each model
  #print(summary(lm_tests_cov_cesd[[i]]))
}


## MMPI score
lm_tests_cov_mmpi = vector("list",length(cov_uv_iv_list))
for(i in seq_along(cov_uv_iv_list)) {
  lm_tests_cov_mmpi[[i]] = glm(reformulate(names(cov_uv_iv_list[i]),"mmpi_score"),data=covid_data)
  # prints summary outputs for each model
  #print(summary(lm_tests_cov_mmpi[[i]]))
}

```

# V. Univariate Outputs
```{r}
# Labels for all univariate and multivariate output tables
grnspc_labels_list = c("“There is a lot of vegetation/greenery in my neighborhood”","“I can see vegetation/greenery from my home”",
                       "“The nearest vegetated park/green space is easy for me to access”","“I spend a lot of time in spaces with natural vegetation”",
                       "“The green spaces near my home are very high quality”","NAIP NDVI – 300 m buffer","NAIP NDVI – 500 m buffer")

soc_labels_list = c("Sex","Income","Race","Ethnicity","Age","Cohabitation status","Educational attainment","Looking for work?","Insurance status")

cov_labels_list = c("COVID-19 has impacted me negatively from a financial point of view","I have lost job-related income due to COVID-19",
                    "I have had a hard time getting needed resources (food, toilet paper) due to COVID-19",
                    "I had to continue to work even though I was in close contact with people who might be infected (e.g. customers, patients, co-workers)",
                    "I have been diagnosed with COVID-19","I have had COVID-19-like symptoms at some point in the last two months",
                    "I have been in close proximity with someone who has been diagnosed with COVID-19",
                    "I have been in close proximity with someone who has had COVID-19-like symptoms in the last two months",
                    "I spend a huge percentage of my time trying to find updates online or on TB about COVID-19",
                    "I have spent more time outside on parks, on trails, and near nature in the past month compared to the same time last year")
```


creates GT tables and also determines which variables need to be included in the multivariate regressions
## Greenspace
```{r}
# for loop initializes variable name for each regression table
# naming convention:
#     - uv_gp# is greenspace pss uv reg
#     - uv_gc# is greenspace cesd uv reg
#     - uv_gm# is greenspace mmpi uv reg
# then creates and formats gt table


## PSS
for(i in seq_along(lm_tests_grn_pss)) {
  # creates variable names
  var_nm = paste0("uv_gp",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_grn_pss[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}


## CES-D
for(i in seq_along(lm_tests_grn_cesd)) {
  # creates variable names
  var_nm = paste0("uv_gc",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_grn_cesd[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}


## MMPI
for(i in seq_along(lm_tests_grn_mmpi)) {
  # creates variable names
  var_nm = paste0("uv_gm",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_grn_mmpi[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

```

```{r}
## tbl_stack
# stack pss univariate regression objects in table
uvreg_grn_pss_stack = tbl_stack(tbls= list(uv_gp1,uv_gp2,uv_gp3,uv_gp4,uv_gp5,uv_gp6,uv_gp7))


# stack cesd univariate regression objects in table
uvreg_grn_cesd_stack = tbl_stack(tbls= list(uv_gc1,uv_gc2,uv_gc3,uv_gc4,uv_gc5,uv_gc6,uv_gc7))


# stack mmpi univariate regression objects in table
uvreg_grn_mmpi_stack = tbl_stack(tbls= list(uv_gm1,uv_gm2,uv_gm3,uv_gm4,uv_gm5,uv_gm6,uv_gm7))


## get Ns for table
# PSS, N = 807 (2 = NA)
soc_data %>%
  dplyr::select(pss_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# CES-D, N = 801 (8 = NA)
soc_data %>%
  dplyr::select(cesd_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# MMPI, N = 785 (24 = NA)
soc_data %>%
  dplyr::select(mmpi_score) %>%
  summarise_all(funs(sum(!is.na(.))))


## tbl_merge
uvreg_green_table = tbl_merge(
  tbls = list(uvreg_grn_pss_stack,uvreg_grn_cesd_stack,uvreg_grn_mmpi_stack)) %>%
  modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 807") %>%
  modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 801") %>%
  modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 785") %>%
  modify_header(update = label ~ "**Greenspace measure**") %>%
  as_gt() %>%
  opt_row_striping()

uvreg_green_table
```


## Sociodemographics
```{r}
# for loop initializes variable name for each regression table
# naming convention:
#     - uv_sp# is sociodemographic pss uv reg
#     - uv_sc# is sociodemographic cesd uv reg
#     - uv_sm# is sociodemographic mmpi uv reg
# then creates and formats gt table

## PSS
for(i in seq_along(lm_tests_soc_pss)) {
  # creates variable names
  var_nm = paste0("uv_sp",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_soc_pss[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           label = names(socdem_uv_iv_list[i]) ~ soc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}


## CES-D
for(i in seq_along(lm_tests_soc_cesd)) {
  # creates variable names
  var_nm = paste0("uv_sc",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_soc_cesd[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           label = names(socdem_uv_iv_list[i]) ~ soc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}


## MMPI
for(i in seq_along(lm_tests_soc_mmpi)) {
  # creates variable names
  var_nm = paste0("uv_sm",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_soc_mmpi[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           label = names(socdem_uv_iv_list[i]) ~ soc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

```

```{r}
## tbl_stack
# stack pss univariate regression objects in table
uvreg_soc_pss_stack = tbl_stack(tbls= list(uv_sp1,uv_sp2,uv_sp3,uv_sp4,uv_sp5,uv_sp6,uv_sp7,uv_sp8,uv_sp9))


# stack cesd univariate regression objects in table
uvreg_soc_cesd_stack = tbl_stack(tbls= list(uv_sc1,uv_sc2,uv_sc3,uv_sc4,uv_sc5,uv_sc6,uv_sc7,uv_sc8,uv_sc9))


# stack mmpi univariate regression objects in table
uvreg_soc_mmpi_stack = tbl_stack(tbls= list(uv_sm1,uv_sm2,uv_sm3,uv_sm4,uv_sm5,uv_sm6,uv_sm7,uv_sm8,uv_sm9))


## get Ns for table
# PSS, N = 807 (2 = NA)
soc_data %>%
  dplyr::select(pss_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# CES-D, N = 801 (8 = NA)
soc_data %>%
  dplyr::select(cesd_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# MMPI, N = 785 (24 = NA)
soc_data %>%
  dplyr::select(mmpi_score) %>%
  summarise_all(funs(sum(!is.na(.))))

## tbl_merge
uvreg_soc_table = tbl_merge(
  tbls = list(uvreg_soc_pss_stack,uvreg_soc_cesd_stack,uvreg_soc_mmpi_stack)) %>%
  modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 807") %>%
  modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 801") %>%
  modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 785") %>%
  modify_header(update = label ~ "**Sociodemographic measure**") %>%
  as_gt() %>%
  opt_row_striping()

uvreg_soc_table
```


## Covid
```{r}
# for loop initializes variable name for each regression table
# naming convention:
#     - uv_cp# is covid pss uv reg
#     - uv_cc# is covid cesd uv reg
#     - uv_cm# is covid mmpi uv reg
# then creates and formats gt table

## PSS
for(i in seq_along(lm_tests_cov_pss)) {
  # creates variable names
  var_nm = paste0("uv_cp",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_cov_pss[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           label = names(cov_uv_iv_list[i]) ~ cov_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}


## CES-D
for(i in seq_along(lm_tests_cov_cesd)) {
  # creates variable names
  var_nm = paste0("uv_cc",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_cov_cesd[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           label = names(cov_uv_iv_list[i]) ~ cov_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}


## MMPI
for(i in seq_along(lm_tests_cov_mmpi)) {
  # creates variable names
  var_nm = paste0("uv_cm",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_cov_mmpi[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           label = names(cov_uv_iv_list[i]) ~ cov_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

```

```{r}
## tbl_stack
# stack pss univariate regression objects in table
uvreg_cov_pss_stack = tbl_stack(tbls= list(uv_cp1,uv_cp2,uv_cp3,uv_cp4,uv_cp5,uv_cp6,uv_cp7,uv_cp8,uv_cp9,uv_cp10))


# stack cesd univariate regression objects in table
uvreg_cov_cesd_stack = tbl_stack(tbls= list(uv_cc1,uv_cc2,uv_cc3,uv_cc4,uv_cc5,uv_cc6,uv_cc7,uv_cc8,uv_cc9,uv_cc10))


# stack mmpi univariate regression objects in table
uvreg_cov_mmpi_stack = tbl_stack(tbls= list(uv_cm1,uv_cm2,uv_cm3,uv_cm4,uv_cm5,uv_cm6,uv_cm7,uv_cm8,uv_cm9,uv_cm10))


## get Ns for table
# PSS, N = 431 
covid_data %>%
  dplyr::select(pss_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# CES-D, N = 427
covid_data %>%
  dplyr::select(cesd_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# MMPI, N = 420
covid_data %>%
  dplyr::select(mmpi_score) %>%
  summarise_all(funs(sum(!is.na(.))))


## tbl_merge
uvreg_cov_table = tbl_merge(
  tbls = list(uvreg_cov_pss_stack,uvreg_cov_cesd_stack,uvreg_cov_mmpi_stack)) %>%
  modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 431") %>%
  modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 427") %>%
  modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 420") %>%
  modify_header(update = label ~ "**COVID-19 impact variables**") %>%
  as_gt() %>%
  opt_row_striping()

uvreg_cov_table

```

# VI. Multivariate regressions -- *loop logic*
## A. Greenspace + Sociodemographics
SIG VARS
pss: sex, income, age, cohabit, BAplus, looking for work, insured
cesd: income, age, cohabit, BAplus, looking for work, insured
mmpi: income, ethnicity, age, cohabit, BAplus, looking for work, insured


```{r}
## PSS

# dplyr::select variables sig in pss uv sociodemographic regressions
sig_soc_pss = soc_data %>% 
  dplyr::select(sex, income, age_group, cohabit, BAplus, looking_for_work, insured)

# create a variable set that pairs each greenspace variable with sig soc variables
var_set_mvsg_pss = vector("list",length(grn_uv_iv_list))
for(i in seq_along(var_set_mvsg_pss)) {
  var_set_mvsg_pss[[i]] = c(names(sig_soc_pss),names(grn_uv_iv_list[i]))
}

# "base" sociodemographic only regression (for anova comparison)
pss_soc_mod = glm(pss_score ~ sex+income+cohabit+age_group+BAplus+looking_for_work+insured, data=soc_data)
# all vifs under 2, little indication of multicollinearity
vif(pss_soc_mod)
summary(pss_soc_mod)

# run mv glm tests for pss with set of sig soc variables + each greenspace variable
lm_tests_mvsg_pss = vector("list",length(grn_uv_iv_list))

for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_pss[[i]] = glm(reformulate(var_set_mvsg_pss[[i]],"pss_score"),data=soc_data)
  # print model summaries
  # print(summary(lm_tests_mvsg_pss[[i]]))
  # print anova results
  # print(anova(pss_soc_mod,lm_tests_mvsg_pss[[i]]))
  # print vif outcomes
  # print(vif(lm_tests_mvsg_pss[[i]]))
}

```

```{r}
## CES-D

# dplyr::select variables sig in cesd uv sociodemographic regressions
sig_soc_cesd = soc_data %>%
  dplyr::select(income, age_group, cohabit, BAplus, looking_for_work, insured)

# create a variable set that pairs each greenspace variable with sig soc variables
var_set_mvsg_cesd = vector("list",length(grn_uv_iv_list))
for(i in seq_along(var_set_mvsg_cesd)) {
  var_set_mvsg_cesd[[i]] = c(names(sig_soc_cesd),names(grn_uv_iv_list[i]))
}

# "base" sociodemographic only regression (for anova comparison)
cesd_soc_mod = glm(cesd_score ~ income+cohabit+age_group+BAplus+looking_for_work+insured, data=soc_data)
# all vifs under 2, little indication of multicollinearity
vif(cesd_soc_mod)
summary(cesd_soc_mod)

# run mv glm tests for cesd with set of sig soc variables + each greenspace variable
lm_tests_mvsg_cesd = vector("list",length(grn_uv_iv_list))

for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_cesd[[i]] = glm(reformulate(var_set_mvsg_cesd[[i]],"cesd_score"),data=soc_data)
  # print model summaries
  # print(summary(lm_tests_mvsg_cesd[[i]]))
  # print anova results
  # print(anova(cesd_soc_mod,lm_tests_mvsg_cesd[[i]]))
  # print vif outcomes
  # print(vif(lm_tests_mvsg_cesd[[i]]))
}
```

```{r}
## MMPI

# dplyr::select variables sig in mmpi uv sociodemographic regressions
sig_soc_mmpi = soc_data %>%
  dplyr::select(income, ethnicity, age_group, cohabit, BAplus, looking_for_work, insured)

# create a variable set that pairs each greenspace variable with sig soc variables
var_set_mvsg_mmpi = vector("list",length(grn_uv_iv_list))
for(i in seq_along(var_set_mvsg_mmpi)) {
  var_set_mvsg_mmpi[[i]] = c(names(sig_soc_mmpi),names(grn_uv_iv_list[i]))
}

# "base" sociodemographic only regression (for anova comparison)
mmpi_soc_mod = glm(mmpi_score ~ income+ethnicity+cohabit+age_group+BAplus+looking_for_work+insured, data=soc_data)
# all vifs under 2, little indication of multicollinearity
vif(mmpi_soc_mod)
summary(mmpi_soc_mod)

# run mv glm tests for mmpi with set of sig soc variables + each greenspace variable
lm_tests_mvsg_mmpi = vector("list",length(grn_uv_iv_list))

for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_mmpi[[i]] = glm(reformulate(var_set_mvsg_mmpi[[i]],"mmpi_score"),data=soc_data)
  # print model summaries
  # summary(lm_tests_mvsg_mmpi[[i]])
  # print anova results
  # anova(cesd_soc_mod,lm_tests_mvsg_mmpi[[i]])
  # print vif outcomes
  # print(vif(lm_tests_mvsg_mmpi[[i]]))
}
```

## B. Greenspace + Sociodemographics + Covid
Use covid_data

SIG VARS
pss: sex, income, age, cohabit, BAplus, looking for work, insured; covid_financial, covid_lossincome, covid_resources, covid_symptoms, covid_closesymp, covid_online

cesd: income, age, cohabit, BAplus, looking for work, insured; covid_financial, covid_lossincome, covid_resources, covid_symptoms, covid_closesymp, covid_online

mmpi: income, ethnicity, age, cohabit, BAplus, looking for work, insured; covid_financial, covid_lossincome, covid_resources, covid_workexp, covid_symptoms, covid_closesymp, covid_online
```{r}
## PSS

# dplyr::select variables sig in pss uv sociodemographic regressions
sig_soc_cov_pss = covid_data %>% 
  dplyr::select(sex, income, age_group, cohabit, BAplus, looking_for_work, insured, covid_financial, covid_lossincome, covid_resources, covid_symptoms, covid_closesymp, covid_online)

# create a variable set that pairs each greenspace variable with sig soc + cov variables
var_set_mvsgc_pss = vector("list",length(grn_uv_iv_list))
for(i in seq_along(var_set_mvsgc_pss)) {
  var_set_mvsgc_pss[[i]] = c(names(sig_soc_cov_pss),names(grn_uv_iv_list[i]))
}

# "base" sociodemographic only regression (for anova comparison)
pss_base_soc_cov_mod = glm(pss_score ~ sex+income+age_group+cohabit+BAplus+looking_for_work+insured+covid_financial+covid_lossincome+covid_resources+
                             covid_symptoms+covid_closesymp+covid_online,data=covid_data)
# summary of base mod
summary(pss_base_soc_cov_mod)
# all vifs under 2, little indication of multicollinearity
vif(pss_base_soc_cov_mod)

# run mv glm tests for pss with set of sig soc variables + each greenspace variable
lm_tests_mvsgc_pss = vector("list",length(grn_uv_iv_list))

for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsgc_pss[[i]] = glm(reformulate(var_set_mvsgc_pss[[i]],"pss_score"),data=covid_data)
  # print model summaries
  # print(summary(lm_tests_mvsgc_pss[[i]]))
  # print anova results
  # print(anova(pss_base_soc_cov_mod,lm_tests_mvsgc_pss[[i]],test="Chi"))
  # print vif outcomes
  # print(vif(lm_tests_mvsgc_pss[[i]]))
}

```

```{r}
## CES-D

# dplyr::select variables sig in cesd uv sociodemographic regressions
sig_soc_cov_cesd = covid_data %>%
  dplyr::select(income, age_group, cohabit, BAplus, looking_for_work, insured, covid_financial, covid_lossincome, covid_resources, covid_symptoms, covid_closesymp, covid_online)

# create a variable set that pairs each greenspace variable with sig soc variables
var_set_mvsgc_cesd = vector("list",length(grn_uv_iv_list))
for(i in seq_along(var_set_mvsgc_cesd)) {
  var_set_mvsgc_cesd[[i]] = c(names(sig_soc_cov_cesd),names(grn_uv_iv_list[i]))
}

# "base" sociodemographic and covid only regression (for anova comparison)
cesd_soc_cov_mod = glm(cesd_score ~ income+cohabit+age_group+BAplus+looking_for_work+insured+covid_financial+covid_lossincome+
                         covid_resources+covid_symptoms+covid_closesymp+covid_online, data=covid_data)

# all vifs under 2, little indication of multicollinearity
vif(cesd_soc_cov_mod)
summary(cesd_soc_cov_mod)

# run mv glm tests for cesd with set of sig soc variables + each greenspace variable
lm_tests_mvsgc_cesd = vector("list",length(grn_uv_iv_list))

for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsgc_cesd[[i]] = glm(reformulate(var_set_mvsgc_cesd[[i]],"cesd_score"),data=covid_data)
  # print model summmaries
  # print(summary(lm_tests_mvsg_cesd[[i]]))
  # print anova results
  # anova(cesd_soc_cov_mod,lm_tests_mvsgc_cesd[[i]])
  # print vif outcomes
  # print(vif(lm_tests_mvsgc_cesd[[i]]))
}
```

```{r}
## MMPI

# dplyr::select variables sig in mmpi uv sociodemographic regressions
sig_soc_cov_mmpi = covid_data %>%
  dplyr::select(income, ethnicity, age_group, cohabit, BAplus, looking_for_work, insured, covid_financial, covid_lossincome, covid_resources, covid_workexp, covid_symptoms, covid_closesymp, covid_online)

# create a variable set that pairs each greenspace variable with sig soc variables
var_set_mvsgc_mmpi = vector("list",length(grn_uv_iv_list))
for(i in seq_along(var_set_mvsgc_mmpi)) {
  var_set_mvsgc_mmpi[[i]] = c(names(sig_soc_cov_mmpi),names(grn_uv_iv_list[i]))
}

# "base" sociodemographic only regression (for anova comparison)
mmpi_soc_cov_mod = glm(mmpi_score ~ income+ethnicity+cohabit+age_group+BAplus+looking_for_work+insured+covid_financial+covid_lossincome+
                         covid_resources+covid_workexp+covid_symptoms+covid_closesymp+covid_online, data=covid_data)
# all vifs under 2, little indication of multicollinearity
vif(mmpi_soc_cov_mod)
summary(mmpi_soc_cov_mod)

# run mv glm tests for mmpi with set of sig soc variables + each greenspace variable
lm_tests_mvsgc_mmpi = vector("list",length(grn_uv_iv_list))

for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsgc_mmpi[[i]] = glm(reformulate(var_set_mvsgc_mmpi[[i]],"mmpi_score"),data=covid_data)
  # print model summaries
  #print(summary(lm_tests_mvsg_mmpi[[i]]))
  # print anova results
  # print(anova(mmpi_soc_cov_mod,lm_tests_mvsgc_mmpi[[i]]))
  # print vif outcomes
  # print(vif(lm_tests_mvsgc_mmpi[[i]]))
}
```

## C. Sensitivity analysis: Greenspace + Sociodemographics
```{r}
## Create three variable sets for sensitivity analysis
# pss has sex only (run 2,3)
# cesd has neither sex nor ethnicity (run 1-3)
# mmpi has ethnicity only (run 1,3)

sig_vars1 = soc_data %>% 
  dplyr::select(sex, income, age_group, cohabit, BAplus, looking_for_work, insured)

sig_vars2 = soc_data %>% 
  dplyr::select(ethnicity, income, age_group, cohabit, BAplus, looking_for_work, insured)

sig_vars3 = soc_data %>% 
  dplyr::select(sex, ethnicity, income, age_group, cohabit, BAplus, looking_for_work, insured)
```

```{r}
# create a variable set for set1
var_set_mvsg_alt1 = vector("list",length(grn_uv_iv_list))
for(i in seq_along(var_set_mvsg_alt1)) {
  var_set_mvsg_alt1[[i]] = c(names(sig_vars1),names(grn_uv_iv_list[i]))
}

# run mv glm tests for cesd and mmpi with set 1 variables + each greenspace variable
lm_tests_mvsg_cesd_alt1 = vector("list",length(grn_uv_iv_list))
lm_tests_mvsg_mmpi_alt1 = vector("list",length(grn_uv_iv_list))

## Results
# cesd
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_cesd_alt1[[i]] = glm(reformulate(var_set_mvsg_alt1[[i]],"cesd_score"),data=soc_data)
  # print(summary(lm_tests_mvsg_cesd_alt1[[i]]))
}
# significant differences from standard variable set: N/A

# mmpi
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_mmpi_alt1[[i]] = glm(reformulate(var_set_mvsg_alt1[[i]],"mmpi_score"),data=soc_data)
  # print(summary(lm_tests_mvsg_mmpi_alt1[[i]]))
}
# significant differences from standard variable set: N/A
```

```{r}
# create a variable set for set2
var_set_mvsg_alt2 = vector("list",length(grn_uv_iv_list))
for(i in seq_along(var_set_mvsg_alt2)) {
  var_set_mvsg_alt2[[i]] = c(names(sig_vars2),names(grn_uv_iv_list[i]))
}

# run mv glm tests for pss and cesd with set 2 variables + each greenspace variable
lm_tests_mvsg_pss_alt2 = vector("list",length(grn_uv_iv_list))
lm_tests_mvsg_cesd_alt2 = vector("list",length(grn_uv_iv_list))

## Results
# pss
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_pss_alt2[[i]] = glm(reformulate(var_set_mvsg_alt2[[i]],"pss_score"),data=soc_data)
  # print(summary(lm_tests_mvsg_pss_alt2[[i]]))
}
# significant differences from standard variable set: N/A

# cesd
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_cesd_alt2[[i]] = glm(reformulate(var_set_mvsg_alt2[[i]],"cesd_score"),data=soc_data)
  # print(summary(lm_tests_mvsg_cesd_alt2[[i]]))
}
# significant differences from standard variable set: N/A
```

```{r}
# create a variable set for set3
var_set_mvsg_alt3 = vector("list",length(grn_uv_iv_list))
for(i in seq_along(var_set_mvsg_alt3)) {
  var_set_mvsg_alt3[[i]] = c(names(sig_vars3),names(grn_uv_iv_list[i]))
}

# run mv glm tests for pss, cesd, and mmpi2 with set 3 variables + each greenspace variable
lm_tests_mvsg_pss_alt3 = vector("list",length(grn_uv_iv_list))
lm_tests_mvsg_cesd_alt3 = vector("list",length(grn_uv_iv_list))
lm_tests_mvsg_mmpi_alt3 = vector("list",length(grn_uv_iv_list))

## Results
# pss
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_pss_alt3[[i]] = glm(reformulate(var_set_mvsg_alt3[[i]],"pss_score"),data=soc_data)
  # print(summary(lm_tests_mvsg_pss_alt3[[i]]))
}
# significant differences from standard variable set: Q33_2 strongly agree becomes sig (comp'd to strongly disagree, agree is borderline)

# cesd
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_cesd_alt3[[i]] = glm(reformulate(var_set_mvsg_alt3[[i]],"cesd_score"),data=soc_data)
  # print(summary(lm_tests_mvsg_cesd_alt3[[i]]))
}
# significant differences from standard variable set: N/A

# mmpi
for(i in seq_along(grn_uv_iv_list)) {
  lm_tests_mvsg_mmpi_alt3[[i]] = glm(reformulate(var_set_mvsg_alt3[[i]],"mmpi_score"),data=soc_data)
  # print(summary(lm_tests_mvsg_mmpi_alt3[[i]]))
}
# significant differences from standard variable set: N/A
```


# VII. Multivariate outputs
## A. Greenspace + Sociodemographics
```{r}
# for loop initializes variable name for each regression table
# naming convention:
#     - mv_sgp# is sociodemographic greenspace pss mv reg
#     - mv_sgc# is sociodemographic greenspace cesd mv reg
#     - mv_sgm# is sociodemographic greenspace mmpi mv reg
# then creates and formats gt table

## PSS
for(i in seq_along(lm_tests_mvsg_pss)) {
  # creates variable names
  var_nm = paste0("mv_sgp",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_pss[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

## CES-D
for(i in seq_along(lm_tests_mvsg_cesd)) {
  # creates variable names
  var_nm = paste0("mv_sgc",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_cesd[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}


## MMPI
for(i in seq_along(lm_tests_mvsg_mmpi)) {
  # creates variable names
  var_nm = paste0("mv_sgm",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_mmpi[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

```

```{r}
## tbl_stack
# stack pss univariate regression objects in table
mvreg_soc_grn_pss_stack = tbl_stack(tbls= list(mv_sgp1,mv_sgp2,mv_sgp3,mv_sgp4,mv_sgp5,mv_sgp6,mv_sgp7)) 

# stack cesd univariate regression objects in table
mvreg_soc_grn_cesd_stack = tbl_stack(tbls= list(mv_sgc1,mv_sgc2,mv_sgc3,mv_sgc4,mv_sgc5,mv_sgc6,mv_sgc7))

# stack mmpi univariate regression objects in table
mvreg_soc_grn_mmpi_stack = tbl_stack(tbls= list(mv_sgm1,mv_sgm2,mv_sgm3,mv_sgm4,mv_sgm5,mv_sgm6,mv_sgm7))


## get Ns for table
# PSS, N = 807 (2 = NA)
soc_data %>%
  dplyr::select(pss_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# CES-D, N = 801 (8 = NA)
soc_data %>%
  dplyr::select(cesd_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# MMPI, N = 785 (24 = NA)
soc_data %>%
  dplyr::select(mmpi_score) %>%
  summarise_all(funs(sum(!is.na(.))))


## tbl_merge
mvreg_soc_green_table = tbl_merge(
  tbls = list(mvreg_soc_grn_pss_stack,mvreg_soc_grn_cesd_stack,mvreg_soc_grn_mmpi_stack)) %>%
  # rename columns and spanners
  modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 807") %>%
  modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 801") %>%
  modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 785") %>%
  modify_header(update = label ~ "**Greenspace measure**") %>%
  as_gt() %>%
  # add footnotes for adjustment variables
  tab_footnote(
    footnote = "Adjusted for: sex, income, age, cohabitation status, educational attainment, employment status, insurance status",
    locations = cells_column_spanners(contains("PSS"))
  ) %>%
  tab_footnote(
    footnote = "Adjusted for: income, age, cohabitation status, educational attainment, employment status, insurance status",
    locations = cells_column_spanners(contains("CES-D"))
  ) %>%
  tab_footnote(
    footnote = "Adjusted for: income, ethnicity, age, cohabitation status, educational attainment, employment status, insurance status",
    locations = cells_column_spanners(contains("MMPI"))
  ) %>%
  # add stripes
  opt_row_striping()

mvreg_soc_green_table
```

## B. Greenspace + Sociodemographics + Covid
```{r}
# for loop initializes variable name for each regression table
# naming convention:
#     - mv_sgcp# is sociodemographic covid greenspace pss mv reg
#     - mv_sgcc# is sociodemographic covid greenspace cesd mv reg
#     - mv_sgcm# is sociodemographic sovid greenspace mmpi mv reg
# then creates and formats gt table

## PSS
for(i in seq_along(lm_tests_mvsgc_pss)) {
  # creates variable names
  var_nm = paste0("mv_sgcp",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsgc_pss[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

## CES-D
for(i in seq_along(lm_tests_mvsgc_cesd)) {
  # creates variable names
  var_nm = paste0("mv_sgcc",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsgc_cesd[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}


## MMPI
for(i in seq_along(lm_tests_mvsgc_mmpi)) {
  # creates variable names
  var_nm = paste0("mv_sgcm",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsgc_mmpi[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

```

```{r}
## tbl_stack
# stack pss univariate regression objects in table
mvreg_soc_cov_grn_pss_stack = tbl_stack(tbls= list(mv_sgcp1,mv_sgcp2,mv_sgcp3,mv_sgcp4,mv_sgcp5,mv_sgcp6,mv_sgcp7))

# stack cesd univariate regression objects in table
mvreg_soc_cov_grn_cesd_stack = tbl_stack(tbls= list(mv_sgcc1,mv_sgcc2,mv_sgcc3,mv_sgcc4,mv_sgcc5,mv_sgcc6,mv_sgcc7))

# stack mmpi univariate regression objects in table
mvreg_soc_cov_grn_mmpi_stack = tbl_stack(tbls= list(mv_sgcm1,mv_sgcm2,mv_sgcm3,mv_sgcm4,mv_sgcm5,mv_sgcm6,mv_sgcm7))


## get Ns for table
# PSS, N = 807 (2 = NA)
soc_data %>%
  dplyr::select(pss_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# CES-D, N = 801 (8 = NA)
soc_data %>%
  dplyr::select(cesd_score) %>%
  summarise_all(funs(sum(!is.na(.))))

# MMPI, N = 785 (24 = NA)
soc_data %>%
  dplyr::select(mmpi_score) %>%
  summarise_all(funs(sum(!is.na(.))))


## tbl_merge
mvreg_soc_cov_green_table = tbl_merge(
  tbls = list(mvreg_soc_cov_grn_pss_stack,mvreg_soc_cov_grn_cesd_stack,mvreg_soc_cov_grn_mmpi_stack)) %>%
  # rename columns and spanners
  modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 807") %>%
  modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 801") %>%
  modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 785") %>%
  modify_header(update = label ~ "**Greenspace measure**") %>%
  as_gt() %>%
  # add footnotes for adjustment variables
  tab_footnote(
    footnote = "Adjusted for: sex, income, age, cohabitation status, educational attainment, employment status, insurance status, and COVID-19 related: financial stress, lost income, resource scarcity, symptoms, close proximity to symptoms, and online engagement",
    locations = cells_column_spanners(contains("PSS"))
  ) %>%
  tab_footnote(
    footnote = "Adjusted for: income, age, cohabitation status, educational attainment, employment status, insurance status, and COVID-19 related: financial stress, lost income, resource scarcity, symptoms, close proximity to symptoms, and online engagement",
    locations = cells_column_spanners(contains("CES-D"))
  ) %>%
  tab_footnote(
    footnote = "Adjusted for: income, ethnicity, age, cohabitation status, educational attainment, employment status, insurance status, and COVID-19 related: financial stress, lost income, resource scarcity, potential workplace exposure, symptoms, close proximity to symptoms, and online engagement",
    locations = cells_column_spanners(contains("MMPI"))
  ) %>%
  # add stripes
  opt_row_striping()

mvreg_soc_cov_green_table
```

## C. Sensitivity outputs : Greenspace + Sociodemographics

```{r}
# Create three variable sets for sensitivity analysis
# pss has sex only (run 2,3)
# cesd has neither sex nor ethnicity (run 1-3)
# mmpi has ethnicity only (run 1,3)


# for loop initializes variable name for each regression table
# naming convention:
#     - mvsg_[score]_alt#
# then creates and formats gt table
```
### 1. PSS
```{r}
# run alt2, alt3
# naming convention: mvsg_pss_alt#

for(i in seq_along(lm_tests_mvsg_pss_alt2)) {
  # creates variable names
  var_nm = paste0("mv_sgp2",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_pss_alt2[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

for(i in seq_along(lm_tests_mvsg_pss_alt3)) {
  # creates variable names
  var_nm = paste0("mv_sgp3",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_pss_alt3[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}
```

```{r}
# stack pss alt2
mvreg_soc_grn_pss_alt2_stack = tbl_stack(tbls= list(mv_sgp21,mv_sgp22,mv_sgp23,mv_sgp24,mv_sgp25,mv_sgp26,mv_sgp27))

# stack pss alt3
mvreg_soc_grn_pss_alt3_stack = tbl_stack(tbls= list(mv_sgp31,mv_sgp32,mv_sgp33,mv_sgp34,mv_sgp35,mv_sgp36,mv_sgp37))

pss_alt_soc_table = tbl_merge(tbls = list(mvreg_soc_grn_pss_alt2_stack,mvreg_soc_grn_pss_alt3_stack),
                              tab_spanner = c("**PSS and ethnicity**", "**PSS, sex, and ethnicity**"))

pss_alt_soc_table
```

### 2. CES-D
```{r}
# run alt1, alt2, alt3
# naming convention: mvsg_cesd_alt#

for(i in seq_along(lm_tests_mvsg_cesd_alt1)) {
  # creates variable names
  var_nm = paste0("mv_sgc1",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_cesd_alt1[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

for(i in seq_along(lm_tests_mvsg_cesd_alt2)) {
  # creates variable names
  var_nm = paste0("mv_sgc2",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_cesd_alt2[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

for(i in seq_along(lm_tests_mvsg_cesd_alt3)) {
  # creates variable names
  var_nm = paste0("mv_sgc3",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_cesd_alt3[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}
```

```{r}
# stack mmpi alt1
mvreg_soc_grn_cesd_alt1_stack = tbl_stack(tbls= list(mv_sgc11,mv_sgc12,mv_sgc13,mv_sgc14,mv_sgc15,mv_sgc16,mv_sgc17))

# stack mmpi alt3
mvreg_soc_grn_cesd_alt2_stack = tbl_stack(tbls= list(mv_sgc21,mv_sgc22,mv_sgc23,mv_sgc24,mv_sgc25,mv_sgc26,mv_sgc27))

# stack mmpi alt3
mvreg_soc_grn_cesd_alt3_stack = tbl_stack(tbls= list(mv_sgc31,mv_sgc32,mv_sgc33,mv_sgc34,mv_sgc35,mv_sgc36,mv_sgc37))

cesd_alt_soc_table = tbl_merge(tbls = list(mvreg_soc_grn_cesd_alt1_stack,mvreg_soc_grn_cesd_alt2_stack,mvreg_soc_grn_cesd_alt3_stack),
                              tab_spanner = c("**CES-D-10 and sex**", "**CES-D-10 and ethnicity**","**CES-D-10, sex, and ethnicity**"))

cesd_alt_soc_table
```


### 3. MMPI
```{r}
# run alt1, alt3
# naming convention: mvsg_mmpi_alt#

for(i in seq_along(lm_tests_mvsg_mmpi_alt1)) {
  # creates variable names
  var_nm = paste0("mv_sgm1",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_mmpi_alt1[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}

for(i in seq_along(lm_tests_mvsg_mmpi_alt3)) {
  # creates variable names
  var_nm = paste0("mv_sgm3",i)
  # assigns variable to corresponding lm test value
  assign(var_nm,lm_tests_mvsg_mmpi_alt3[[i]]
         # gt table creation/formatting
         %>% tbl_regression(
           exponentiate=FALSE,
           pvalue_fun = function(x) style_pvalue(x, digits = 3),
           estimate_fun = function(x) style_number(x, digits = 2),
           include = names(grn_uv_iv_list[i]),
           label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>%
        bold_p(t=0.05) %>%
        bold_labels() %>%
        italicize_levels())
}
```

```{r}
# stack mmpi alt1
mvreg_soc_grn_mmpi_alt1_stack = tbl_stack(tbls= list(mv_sgm11,mv_sgm12,mv_sgm13,mv_sgm14,mv_sgm15,mv_sgm16,mv_sgm17))

# stack mmpi alt3
mvreg_soc_grn_mmpi_alt3_stack = tbl_stack(tbls= list(mv_sgm31,mv_sgm32,mv_sgm33,mv_sgm34,mv_sgm35,mv_sgm36,mv_sgm37))

mmpi_alt_soc_table = tbl_merge(tbls = list(mvreg_soc_grn_mmpi_alt1_stack,mvreg_soc_grn_mmpi_alt3_stack),
                              tab_spanner = c("**MMPI-2 and sex**", "**MMPI-2, sex, and ethnicity**"))

mmpi_alt_soc_table
```


# VIII. Summary statistics
## Sociodemographics summary statistics table
```{r}
soc_summary_table = all_data %>%
  dplyr::select(time_period,sex,age_group,income,ethnicity,race2,cohabit,BAplus,looking_for_work,insured) %>%
  tbl_summary(
    # group by time period
    by=time_period,
    # change variable row names
    label = list(sex ~ "Sex", 
                 age_group ~ "Age",
                 income ~ "Income",
                 ethnicity ~ "Ethnicity",
                 race2 ~ "Race",
                 cohabit ~ "Cohabitation status",
                 BAplus ~ "Educational attainment",
                 looking_for_work ~ "Looking for work?",
                 insured ~ "Insurance status")) %>%
  # add total column
  add_overall() %>%
  bold_labels() %>%
  italicize_levels() %>%
  # set spanning header
  modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>%
  modify_header(
    # change table column names
    update = list(label ~ "**Sociodemographic variables**",
                  stat_0 ~ "**Total** N={N}",
                  stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)",
                  stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)",
                  stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)",
                  stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) %>%
  modify_footnote(
    update = starts_with("stat") ~
      "Before COVID-19 (before 3/12/21); Stay at home (3/13/20-5/8/20); Reopening (5/9/20-10/12/20); Fall wave (10/13/20-1/2/21)"
  ) %>% 
  as_gt() %>%
  opt_row_striping()
soc_summary_table
```

```{r}
supp_soc_table = soc_data %>%
  dplyr::select(time_period,sex,age_group,income,ethnicity,race2,cohabit,BAplus,looking_for_work,insured) %>%
  tbl_summary(
    # group by time period
    by=time_period,
    # change variable row names
    label = list(sex ~ "Sex", 
                 age_group ~ "Age",
                 income ~ "Income",
                 ethnicity ~ "Ethnicity",
                 race2 ~ "Race",
                 cohabit ~ "Cohabitation status",
                 BAplus ~ "Educational attainment",
                 looking_for_work ~ "Looking for work?",
                 insured ~ "Insurance status")) %>%
  # add total column
  add_overall() %>%
  bold_labels() %>%
  italicize_levels() %>%
  # set spanning header
  modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>%
  modify_header(
    # change table column names
    update = list(label ~ "**Sociodemographic variables**",
                  stat_0 ~ "**Total** N={N}",
                  stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)",
                  stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)",
                  stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)",
                  stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) %>%
  modify_footnote(
    update = starts_with("stat") ~
      "Before COVID-19 (before 3/12/21); Stay at home (3/13/20-5/8/20); Reopening (5/9/20-10/12/20); Fall wave (10/13/20-1/2/21)"
  ) %>% 
  as_gt() %>%
  # hides empty before covid column
  cols_hide(
    columns = vars(stat_1)
  ) %>%
  opt_row_striping()

supp_soc_table
```

## COVID-19 summary statistics table
### Ungrouped
```{r}
sum_table_covid = covid_data %>%
  dplyr::select(time_period,covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closesymp,covid_closeexp,covid_online,covid_green) %>%
  tbl_summary(
    # group by time period
    by=time_period,
    # rename covid variables
    label = list(covid_financial ~ "COVID-19 has impacted me negatively from a financial point of view",
                 covid_lossincome ~ "I have lost job-related income due to COVID-19",
                 covid_resources ~ "I have had a hard time getting needed resources (food, toilet paper) due to COVID-19",
                 covid_workexp ~ "I had to continue to work even though I was in close contact with people who might be infected (e.g. customers, patients, co-workers)",
                 diagnosed ~ "I have been diagnosed with COVID-19",
                 covid_symptoms ~ "I have had COVID-19-like symptoms at some point in the last two months",
                 covid_closesymp ~"I have been in close proximity with someone who has been diagnosed with COVID-19",
                 covid_closeexp ~ "I have been in close proximity with someone who has had COVID-19-like symptoms in the last two months",
                 covid_online ~ "I spend a huge percentage of my time trying to find updates online or on TB about COVID-19",
                 covid_green ~ "I have spent more time outside on parks, on trails, and near nature in the past month compared to the same time last year")) %>%
  # add total column
  add_overall() %>%
  bold_labels() %>%
  italicize_levels() %>%
  # set spanning header
  modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>%
  modify_header(
    # change table column names
    update = list(label ~ "**COVID-19 impact variables**",
                  stat_0 ~ "**Total** N={N}",
                  stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)",
                  stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)",
                  stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)",
                  stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) %>%
  modify_footnote(list(
    update = starts_with("stat") ~
      "Reopening (5/9/20-10/12/20); Fall wave (10/13/20-1/2/21). No respondents completed COVID-19 questions in Before COVID-19 (before 3/12/21) or Stay at home (3/13/20-5/8/20) periods.",
      label ~ "Where 1 = Not true of me at all and 7 = Very true of me")) %>% 
  as_gt() %>%
  # hides empty before covid and stay at home columns
  cols_hide(
    columns = c(stat_1,stat_2)
  ) %>%
  opt_row_striping()

sum_table_covid
```
### Grouped
```{r}
## Grouping covid impact questions into three groups: 1-3 = not true of me at all/little impact, 4 = neutral, 5-7 = very true/substantial impact
covid_recode = function(x) {
  dplyr::recode(x,"1"  = "Little impact","2"  = "Little impact","3"  = "Little impact",
                          "4"  = "Neutral",
                          "5"  = "Substantial impact","6"  = "Substantial impact","7"  = "Substantial impact")
}

sum_table_covid_grouped = covid_data %>%
  dplyr::select(time_period,covid_financial,covid_lossincome,covid_resources,covid_workexp,covid_symptoms,covid_closesymp,covid_closeexp,covid_online,covid_green) %>%
  mutate_at(c("covid_financial","covid_lossincome","covid_resources","covid_workexp","covid_symptoms","covid_closesymp","covid_closeexp","covid_online","covid_green"),covid_recode) %>%
  tbl_summary(
    # group by time period
    by=time_period,
    # rename covid variables
    label = list(covid_financial ~ "COVID-19 has impacted me negatively from a financial point of view",
                 covid_lossincome ~ "I have lost job-related income due to COVID-19",
                 covid_resources ~ "I have had a hard time getting needed resources (food, toilet paper) due to COVID-19",
                 covid_workexp ~ "I had to continue to work even though I was in close contact with people who might be infected (e.g. customers, patients, co-workers)",
                 covid_symptoms ~ "I have had COVID-19-like symptoms at some point in the last two months",
                 covid_closesymp ~"I have been in close proximity with someone who has been diagnosed with COVID-19",
                 covid_closeexp ~ "I have been in close proximity with someone who has had COVID-19-like symptoms in the last two months",
                 covid_online ~ "I spend a huge percentage of my time trying to find updates online or on TB about COVID-19",
                 covid_green ~ "I have spent more time outside on parks, on trails, and near nature in the past month compared to the same time last year")) %>%
  # add total column
  add_overall() %>%
  bold_labels() %>%
  italicize_levels() %>%
  # set spanning header
  modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>%
  modify_header(
    # change table column names
    update = list(label ~ "**COVID-19 impact variables**",
                  stat_0 ~ "**Total** N={N}",
                  stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)",
                  stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)",
                  stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)",
                  stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) %>%
 modify_footnote(list(
    update = starts_with("stat") ~
      "Reopening (5/9/20-10/12/20); Fall wave (10/13/20-1/2/21). No respondents completed COVID-19 questions in Before COVID-19 (before 3/12/21) or Stay at home (3/13/20-5/8/20) periods.",
      label ~ "Where 1-3 = Little impact, 4 = Neutral, and 5-7 = Substantial impact."))

sum_table_covid_diagnosed_grouped = covid_data %>%
  dplyr::select(time_period, diagnosed) %>%
  tbl_summary(
    by = time_period,
    label = diagnosed ~ "I have been diagnosed with COVID-19") %>%
  add_overall() %>%
  bold_labels() %>%
  italicize_labels() %>%
  modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>%
  modify_header(
    # change table column names
    update = list(label ~ "**COVID-19 impact variables**",
                  stat_0 ~ "**Total** N={N}",
                  stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)",
                  stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)",
                  stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)",
                  stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) 
  
sum_tab_cov_grouped = tbl_stack(tbls= list(sum_table_covid_grouped,sum_table_covid_diagnosed_grouped)) %>%
  as_gt() %>%
  # hides empty before covid and stay at home columns
  cols_hide(
    columns = c(stat_1,stat_2)
  ) %>%
  opt_row_striping()

sum_tab_cov_grouped
```


# IX. Chi-squared tests to compare observed (survey) versus expected (census) population proportions
Result: all of the observed values (from the sample) are statistically significantly different from the expected values (based on the Denver Census data).

```{r}
neigh_acs18 = read.csv("/Users/esrieves/Documents/GitHub/grnspc/2018_neighborhood_5yracs.csv")
neigh_acs18

den_acs19 = read.csv("/Users/esrieves/Documents/GitHub/grnspc/2019_acs5yr.csv")
den_acs19
```


```{r}
# Sex chi

# order: female, male
ob_sex = all_data %>%
  count(sex) %>%
  mutate(pct = n/sum(n))
ob_sex_df = ob_sex
ob_sex=as.numeric(ob_sex$n)
ob_sex

## ACS 2019 - Denver County comparison
# order: female,male 
ex_sex = den_acs19[4:3,4]/100
ex_sex
sex_chi_res = chisq.test(ob_sex, p=ex_sex)
sex_chi_res
sex_chi_res_p = as.numeric(sex_chi_res[3])

## ACS 2018 - Neighborhood comparison
# order: female, male
ex_sex_nei = neigh_acs18[30:29,2]/100
ex_sex_nei
sex_nei_chi = chisq.test(ob_sex, p=ex_sex_nei)
sex_nei_chi
sex_nei_chi_p = as.numeric(sex_nei_chi[3])

ob_sex_df = ob_sex_df %>%
  mutate(Denver_obs = ex_sex) %>%
  mutate(Den_p = sex_chi_res_p) %>%
  mutate(Neigh_obs = ex_sex_nei) %>%
  mutate(Neigh_p = sex_nei_chi_p) %>%
  rename(category = sex)
  

ob_sex_df
```

```{r}
# Ethnicity chi 

# order: not hisp, hisp
ob_eth = all_data %>%
  count(ethnicity,sort=TRUE) %>%
   mutate(pct = n/sum(n))
ob_eth_df = ob_eth
ob_eth
ob_eth = as.numeric(ob_eth$n)
ob_eth

## ACS 2019 - Denver County comparison
# order: Not Hispanic,Hispanic
ex_eth = den_acs19[22:21,4]/100
ex_eth
eth_chi_res = chisq.test(ob_eth, p=ex_eth)
eth_chi_res
eth_chi_res_p =as.numeric(eth_chi_res[3])

## ACS 2018 - Neighborhood comparison
# order: not hispanic, hispanic
ex_eth_nei = neigh_acs18[20:19,2]/100
ex_eth_nei
eth_nei_chi = chisq.test(ob_eth,p=ex_eth_nei)
eth_nei_chi
eth_nei_chi_p = as.numeric(eth_nei_chi[3])

ob_eth_df = ob_eth_df %>% 
  mutate(Denver_obs = ex_eth) %>%
  mutate(Den_p = eth_chi_res_p) %>%
  mutate(Neigh_obs = ex_eth_nei) %>%
  mutate(Neigh_p = eth_nei_chi_p) %>%
  rename(category = ethnicity)
ob_eth_df
```

```{r}
# age group chi

# re-filter age to match ACS groupings
# there are 15 NA, who are the < 20 y.o. 
all_data$acs_age_groups = ifelse(20 <= all_data$age & all_data$age < 35, "20 to 34 years",
                 ifelse(35 <= all_data$age & all_data$age < 55, "35 to 54 years",
                 ifelse(55 <= all_data$age & all_data$age < 65, "55 to 64 years",
                 ifelse(65 <= all_data$age,"65+",NA))))
# ascending order
ob_age = all_data %>%
  filter(!is.na(acs_age_groups)) %>%
  count(acs_age_groups,sort=TRUE) %>%
  arrange(acs_age_groups) %>%
   mutate(pct = n/sum(n))
ob_age_df = ob_age
ob_age = as.numeric(ob_age$n)
ob_age
# order: 20-34,35-54,55-64,65+,

## ACS 2019 - Denver County comparison
ex_age = den_acs19[5:8,4]/100
ex_age
age_chi_res = chisq.test(ob_age, p=ex_age)
age_chi_res
age_chi_res_p = as.numeric(age_chi_res[3])

## ACS 2018 - Neighborhood comparison
# NOTE < 20s have been removed.. not a perfect comparison
# order: ascending
ex_age_nei = neigh_acs18[33:36,2]/100
age_nei_chi = chisq.test(ob_age, p=ex_age_nei,rescale.p = TRUE)
age_nei_chi
age_nei_chi_p = as.numeric(age_nei_chi[3])

ob_age_df = ob_age_df %>% 
  mutate(Denver_obs = ex_age) %>%
  mutate(Den_p = age_chi_res_p) %>%
  mutate(Neigh_obs = ex_age_nei) %>%
  mutate(Neigh_p = age_nei_chi_p) %>%
  rename(category = acs_age_groups)
ob_age_df
```

```{r}
# income chi
# descending order
ob_inc = all_data %>%
  filter(!is.na(income)) %>%
  count(income,sort=TRUE) %>%
   mutate(pct = n/sum(n))
ob_inc_df = ob_inc
ob_inc = as.numeric(ob_inc$n)
ob_inc

## 2019 ACS - Denver County comparisons
# order: descending
ex_inc = den_acs19[44:39,4]/100
ex_inc
inc_chi_res = chisq.test(ob_inc, p=c(ex_inc))
inc_chi_res_p = as.numeric(inc_chi_res[3])
inc_chi_res
inc_chi_res_p = as.numeric(inc_chi_res[3])

## 2018 ACS - Neighborhood comparisons
# order: descending
ex_inc_nei = neigh_acs18[11:6,2]/100
inc_nei_chi = chisq.test(ob_inc,p=ex_inc_nei,rescale.p = TRUE)
inc_nei_chi
inc_nei_chi_p = as.numeric(inc_nei_chi[3])

ob_inc_df = ob_inc_df %>% 
  mutate(Denver_obs = ex_inc) %>%
  mutate(Den_p = inc_chi_res_p) %>%
  mutate(Neigh_obs = ex_inc_nei) %>%
  mutate(Neigh_p = inc_nei_chi_p) %>%
  rename(category = income)
ob_inc_df
```


```{r}
# race chi

# order initially: white, NA, Black, multiracial, Native American, Asian
ob_race = all_data %>%
  count(race2,sort=TRUE) %>%
   mutate(pct = n/sum(n))
# reorder to be: Multiracial, White, Black, Asian, Native American, NA
ob_race$race2 = ob_race$race2[c(4,1,3,6,5,2)]
ob_race_df = ob_race
ob_race = as.numeric(ob_race$n)
ob_race

## 2019 ACS - Denver County comparisons
# order: Multiracial, White, Black, Asian, Native American, "Some other race" (NA)
ex_race1 = den_acs19[12:18,4]/100
# remove "one race" total category (acs_5yr[13])
ex_race = ex_race1[c(-2)]
# probability sums to 100.1257, therefore use rescale.p in chi test below
race_chi_res = chisq.test(ob_race, p = ex_race,rescale.p = TRUE)
race_chi_res
race_chi_res_p = as.numeric(race_chi_res[3])

## 2018 ACS - Neighborhood comparisons
ex_race_nei1 = neigh_acs18[22:27,2]/100
# reordering to match above orders: Multiracial, White, Black, Asian, Native American, "Some other race" (NA)
ex_race_nei = ex_race_nei1[c(6,1,2,4,3,5)]
race_nei_chi = chisq.test(ob_race, p=ex_race_nei,rescale.p = TRUE)
race_nei_chi
race_nei_chi_p = as.numeric(race_nei_chi[3])

ob_race_df = ob_race_df %>% 
  mutate(Denver_obs = ex_race) %>%
  mutate(Den_p = race_chi_res_p) %>%
  mutate(Neigh_obs = ex_race_nei)%>%
  mutate(Neigh_p = race_nei_chi_p) %>%
  rename(category = race2)
ob_race_df
```

```{r}
# Education chi 

# order: Baplus, ltBA
ob_edu = all_data %>%
  filter(!is.na(BAplus)) %>%
  count(BAplus,sort=TRUE) %>%
   mutate(pct = n/sum(n))
ob_edu_df = ob_edu
ob_edu = as.numeric(ob_edu$n)
ob_edu

## 2019 ACS - Denver County comparisons
# order: BAplus, ltBA
ex_edu = den_acs19[29:28,4]/100
# probability sums to just under 100, therefore use rscale.p in test
edu_chi_res = chisq.test(ob_edu, p=ex_edu,rescale.p = TRUE)
edu_chi_res
edu_chi_res_p = as.numeric(edu_chi_res[3])

## 2018 ACS - Neighborhood comparisons
# order: BAplus, ltBA
ex_edu_nei = neigh_acs18[14:13,2]/100
edu_nei_chi = chisq.test(ob_edu, p=ex_edu_nei)
edu_nei_chi
edu_nei_chi_p = as.numeric(edu_nei_chi[3])

ob_edu_df = ob_edu_df %>% 
  mutate(Denver_obs = ex_edu) %>%
  mutate(Den_p = edu_chi_res_p) %>%
  mutate(Neigh_obs = ex_edu_nei) %>%
  mutate(Neigh_p = edu_nei_chi_p) %>%
  rename(category = BAplus)
ob_edu_df
```

```{r}
# Insurance chi

# order: insured, uninsured
ob_ins = all_data %>%
  filter(!is.na(insured)) %>%
  count(insured,sort=TRUE) %>%
   mutate(pct = n/sum(n))
ob_ins_df = ob_ins
ob_ins = as.numeric(ob_ins$n)
ob_ins

## 2019 ACS - Denver County comparisons
# order: with insurance, without insurance
ex_ins = den_acs19[47:48,4]/100
ex_ins
ins_chi_res = chisq.test(ob_ins, p=c(ex_ins))
ins_chi_res
ins_chi_res_p = as.numeric(ins_chi_res[3])

## 2018 ACS - Neighborhood comparisons
# order: insured, uninsured
# NOT SIG P!!!
ex_ins_nei = neigh_acs18[2:1,2]/100
ins_nei_chi = chisq.test(ob_ins, p=ex_ins_nei)
ins_nei_chi
ins_nei_chi_p = as.numeric(ins_nei_chi[3])

ob_ins_df = ob_ins_df %>% 
  mutate(Denver_obs = ex_ins) %>%
  mutate(Den_p = ins_chi_res_p) %>%
  mutate(Neigh_obs = ex_ins_nei) %>%
  mutate(Neigh_p = ins_nei_chi_p) %>%
  rename(category = insured)
ob_ins_df
```


```{r}
## Create dataframe of results
# initializes an empty dataframe to fill with chi_results
demo_comps = rbind(ob_sex_df,ob_age_df,ob_eth_df,ob_edu_df,ob_inc_df,ob_race_df,ob_ins_df)
demo_comps = data.frame(demo_comps)
demo_comps
table = demo_comps %>%
  gt() %>%
  fmt_percent(
  columns = c(pct,Denver_obs,Neigh_obs),
  decimals = 1
  )
table
```