--- title: "Covid_Greenspace_Analysis_step3" author: "Colleen E. Reid, ES Rieves" date: "1/14/2021" output: html_document --- ```{r} # bring in libraries that I need for script library(tidyverse) library(lubridate) library(car) library(gt) library(gtsummary) library(ggplot2) library(ggpubr) library(stats) library(corrplot) library(broom) ``` # I. Data preparation + cleaning ## Loading ```{r} #read in the greenspace+covid data # Emma's base directory base_dir <- "/Users/esrieves/Documents/GitHub/grnspc/" # set directory to export files to export_dir <- "/Users/esrieves/Documents/GitHub/grnspc/Perceived_Greenspace/outputs" # Colleen's base directory # base_dir <- "C:/github/grnspc" # export_dir <- "C:/github/grnspc/Perceived_Greenspace" # Read in data # includes all responses (n=911) all_data <- read.csv(file.path(base_dir,"/publication_dataset.csv")) ``` ## creating dataset for publication ```{r} # pub_data = all_data %>% dplyr::select(time_period,age,age_group,sex,ethnicity,race2,cohabit,insured,looking_for_work,BAplus,income, # MN19_300,MN19_500,Q33_1,Q33_2,Q33_3,Q33_4,Q33_5,pss_score,cesd_score,mmpi_score, # diagnosed,covid_financial,covid_lossincome,covid_resources,covid_workexp, # covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green, # num_diagnosis) # write.csv(pub_data, file.path(base_dir,"publication_dataset.csv")) ``` ## Cleaning ```{r} # Makes the date a factor all_data$time_period = factor(all_data$time_period, levels=c("before covid period","stay at home period","reopening period","fall wave period")) # Test success, examine breakdown by time period summary(all_data$time_period) count(all_data) ``` ```{r} # Reordering greenspace questions: Strongly Disagree < Disagree < Agree < Strongly Agree all_data$Q33_1=factor(all_data$Q33_1, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree")) all_data$Q33_2=factor(all_data$Q33_2, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree")) all_data$Q33_3=factor(all_data$Q33_3, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree")) all_data$Q33_4=factor(all_data$Q33_4, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree")) all_data$Q33_5=factor(all_data$Q33_5, levels = c("Strongly Disagree","Disagree","Agree","Strongly Agree")) ``` ```{r} # Reordering sociodemographic questions # order income in ascending order all_data$income = factor(all_data$income,levels=c("Less than $25,000","$25,000 to $50,000","$50,000 to $75,000","$75,000 to $100,000","$100,000 to $150,000","Greater than $150,000")) # refactor so 'not looking' is reference group all_data$looking_for_work = factor(all_data$looking_for_work, levels=c("Not looking","Looking - part-time","Looking")) # establishes White (most prevalent) as reference group; all subsequent groups in alphabetical order all_data$race2 = factor(all_data$race2, levels=c("White","Asian/Pacific Islander","Black/African American","Multiracial","Native American")) # establishes Non-Hispanic (most prevalent) as reference group all_data$ethnicity = factor(all_data$ethnicity, levels=c("Not Hispanic/Latino","Hispanic/Latino")) ``` ## Create datsets for analyses ```{r} ## TIME ANALYSIS DATASET -- all_data (n=911) ## SOCIODEMOGRAPHIC ANALYSIS DATASET -- soc_data (n=809) ## Remove rows without objective greenspace, remove 'before covid' respondents # drop if any objective greenspace measures have NA observations soc_data = all_data %>% # remove before covid observations filter(time_period != "before covid period") %>% # remove participants without objective greenspace information drop_na(MN19_300,MN19_500) # check to confirm success # count = 809 count(soc_data) ## checking count # 816 excluding before covid all_data %>% filter(time_period != "before covid period") %>% summarise(n=n()) # 7 NAs (excluding before covid people); therefore 816 - 7 = 809, checks out all_data %>% filter(time_period != "before covid period") %>% dplyr::select(MN19_300,MN19_500) %>% summarise_all(funs(sum(is.na(.)))) ``` ```{r} ## COVID ANALYSIS DATASET -- covid_data (n=431) ## Remove rows with NA answer for any covid questions # drop if pre-covid to ensure that subsequent regressions are not done on pre-covid people # and excluding survey respondents missing objective greenspace information # soc_data n=809 covid_data = soc_data %>% #filter(time_period != "before covid period") %>% drop_na(covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green) # check - removing before covid, n = 809 summary(soc_data$time_period != "before covid period") # check - removing before covid (n=809), there are 378 NAs for diagnosed (368 for all other categories); should mean new n = 431 soc_data %>% dplyr::select(time_period,covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green) %>% summarise_all(funs(sum(is.na(.)))) # n = 431, checks out count(covid_data) covid_data %>% dplyr::select(covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green,Q33_1,Q33_2,Q33_3,Q33_4,Q33_5,MN19_300,MN19_500,pss_score,cesd_score,mmpi_score) %>% summarise_all(funs(sum(is.na(.)))) ``` # II. Correlation betweeen measures ```{r} # Coded such that high agreement score will indicate positive corrrelation with high NDVI score (avoids negative correlation between agreeing greenspace measures) green_recode = soc_data %>% mutate(Q33_1=dplyr::recode(Q33_1,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0), Q33_2=dplyr::recode(Q33_2,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0), Q33_3=dplyr::recode(Q33_3,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0), Q33_4=dplyr::recode(Q33_4,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0), Q33_5=dplyr::recode(Q33_5,"Strongly Agree"=3,"Agree"=2,"Disagree"=1,"Strongly Disagree"=0)) green_matrix = green_recode %>% dplyr::select(Q33_1,Q33_2,Q33_3,Q33_4,Q33_5,MN19_300,MN19_500) # use covid dataset for covid analyses covid_matrix = covid_data %>% dplyr::select(covid_financial,covid_lossincome, covid_resources,covid_workexp,covid_symptoms,covid_closeexp,covid_closesymp,covid_online,covid_green,num_diagnosis) # Green data: create correlation matrix using Spearman method green_matrix_res = cor(green_matrix, method=c("spearman"), use = "complete.obs") green_matrix_res = as.table(green_matrix_res) corrplot.mixed(green_matrix_res) # Covid data: create correlation matrix using Spearman method covid_matrix_res = cor(covid_matrix, method=c("spearman"), use = "complete.obs") covid_matrix_res = as.table(covid_matrix_res) corrplot.mixed(covid_matrix_res) ``` # III. Mental health across COVID periods ## A. Time period analysis ```{r} pss_anova = aov(pss_score~time_period,data=all_data) pss_tukey = TukeyHSD(pss_anova, ordered=TRUE) pss_tukey = tidy(pss_tukey) p1 = pss_tukey %>% dplyr::select(contrast,estimate,adj.p.value) ``` ```{r} cesd_anova = aov(cesd_score~time_period,data=all_data) cesd_tukey = TukeyHSD(cesd_anova, ordered=TRUE) cesd_tukey = tidy(cesd_tukey) p2 = cesd_tukey %>% dplyr::select(contrast,estimate,adj.p.value) ``` ```{r} mmpi_anova = aov(mmpi_score~time_period,data=all_data) mmpi_tukey = TukeyHSD(mmpi_anova, ordered=TRUE) mmpi_tukey = tidy(mmpi_tukey) p3 = mmpi_tukey %>% dplyr::select(contrast,estimate,adj.p.value) ``` ```{r} ## Merge dataframes to create gt table # contrast = time period comparisons time_scores = left_join(p1,p2, by = "contrast") time_scores = left_join(time_scores, p3, by = "contrast") # double checking join time_scores$estimate.x == p1$estimate # estimate.x is p1 is pss time_scores$estimate.y == p2$estimate # estimate.y is p2 is cesd time_scores$estimate == p3$estimate # estimate is p3 is mmpi #time_scores = as.data.frame(time_scores) ## get Ns for table # PSS, N = 909 (2 = NA) all_data %>% dplyr::select(pss_score) %>% summarise_all(funs(sum(!is.na(.)))) # CES-D, N = 901 (10 = NA) all_data %>% dplyr::select(cesd_score) %>% summarise_all(funs(sum(!is.na(.)))) # MMPI, N = 885 (26 = NA) all_data %>% dplyr::select(mmpi_score) %>% summarise_all(funs(sum(!is.na(.)))) ## Merge tables time_scores_tbl = time_scores %>% gt() %>% fmt_number(columns = vars(estimate.x,estimate.y,estimate), decimals = 2) %>% fmt_number(columns = vars(adj.p.value.x,adj.p.value.y,adj.p.value), decimals = 3) %>% tab_spanner( label = md("**PSS Stress, N = 909**"), columns = c(estimate.x,adj.p.value.x) ) %>% tab_spanner( label = md("**CES-D-10 Depression, N = 901**"), columns = c(estimate.y,adj.p.value.y) ) %>% tab_spanner( label = md("**MMPI-2 Anxiety, N = 885**"), columns = c(estimate,adj.p.value) ) %>% cols_label(contrast=md("**Time period**"), estimate=md("**Coefficient**"),estimate.x=md("**Coefficient**"), estimate.y=md("**Coefficient**"),adj.p.value=md("**P-value**"),adj.p.value.x=md("**P-value**"), adj.p.value.y=md("**P-value**")) %>% tab_header(title = "Coefficients (p-values) for Tukey’s HSD between Minnesota Multiphasic Personality Inventory scores during different time periods related to the COVID-19 pandemic in Denver, CO") time_scores_tbl ``` ## B. Graphical outputs ```{r} comps = list(c("before covid period","stay at home period"), c("before covid period","reopening period"),c("before covid period","fall wave period"), c("stay at home period","reopening period"), c("stay at home period","fall wave period"), c("reopening period","fall wave period")) pss_timeplot = ggplot(all_data, aes(x=time_period,y=pss_score,color=time_period)) + geom_violin() pss_timeplot = pss_timeplot + geom_boxplot(width=0.1, color="black") + geom_jitter(alpha=.1) + stat_compare_means(comparisons = comps, label="p.signif",hide.ns = TRUE, color="black", step.increase = 0.2, method = "wilcox.test") + ylab("PSS score") + scale_color_brewer(palette = "Dark2") + theme_light() + theme(legend.position="bottom", axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) + # New title for the legend labs(color = "COVID-19 time periods") cesd_timeplot = ggplot(all_data, aes(x=time_period,y=cesd_score,color=time_period)) + geom_violin() cesd_timeplot = cesd_timeplot + geom_boxplot(width=0.1, color="black") + geom_jitter(alpha=.1) + ylab("CES-D-10 score") + stat_compare_means(comparisons = comps, label="p.signif",hide.ns = TRUE, color="black", step.increase = 0.2, method = "wilcox.test") + scale_color_brewer(palette = "Dark2") + theme_light() + theme(legend.position="none", axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) mmpi_timeplot = ggplot(all_data, aes(x=time_period,y=mmpi_score,color=time_period)) + geom_violin() mmpi_timeplot = mmpi_timeplot + geom_boxplot(width=0.1, color="black") + geom_jitter(alpha=.1) + ylab("MMPI-2 score") + stat_compare_means(comparisons = comps, label="p.signif",hide.ns = TRUE, color="black", step.increase = 0.2, method = "wilcox.test") + scale_color_brewer(palette = "Dark2") + theme_light() + theme(legend.position = "none", axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank()) allscores_timeplot = ggarrange(pss_timeplot,cesd_timeplot,mmpi_timeplot,ncol=3, nrow= 1, common.legend = TRUE) #allscores_timeplot # save output plot tiff("figure2.tiff", compression = "lzw", unit = "in", width = 7.5, height = 5.5, res=500) print(allscores_timeplot) dev.off() ``` # IV. Univariate regressions -- *loop logic* Use soc_data as dataset for perceived greenspace and sociodemographic univariate regressions Use covid_data as dataset for covid univariate regressions ## Greenspace ```{r} # independent variables for greenspace uv regressions grn_uv_iv_list = soc_data %>% dplyr::select(Q33_1,Q33_2,Q33_3,Q33_4,Q33_5,MN19_300,MN19_500) ## PSS score lm_tests_grn_pss = vector("list",length(grn_uv_iv_list)) for(i in seq_along(grn_uv_iv_list)) { lm_tests_grn_pss[[i]] = glm(reformulate(names(grn_uv_iv_list[i]),"pss_score"),data=soc_data) # prints summary outputs for each model #print(summary(lm_tests_grn_pss[[i]])) } ## CES-D score lm_tests_grn_cesd = vector("list",length(grn_uv_iv_list)) for(i in seq_along(grn_uv_iv_list)) { lm_tests_grn_cesd[[i]] = glm(reformulate(names(grn_uv_iv_list[i]),"cesd_score"),data=soc_data) # prints summary outputs for each model #print(summary(lm_tests_grn_cesd[[i]])) } ## MMPI score lm_tests_grn_mmpi = vector("list",length(grn_uv_iv_list)) for(i in seq_along(grn_uv_iv_list)) { lm_tests_grn_mmpi[[i]] = glm(reformulate(names(grn_uv_iv_list[i]),"mmpi_score"),data=soc_data) # prints summary outputs for each model #print(summary(lm_tests_grn_mmpi[[i]])) } ``` ## Sociodemographics ```{r} # independent variables for sociodemographic uv regressions socdem_uv_iv_list = soc_data %>% dplyr::select(sex,income,race2,ethnicity,age_group,cohabit,BAplus,looking_for_work,insured) ## PSS score lm_tests_soc_pss = vector("list",length(socdem_uv_iv_list)) for(i in seq_along(socdem_uv_iv_list)) { lm_tests_soc_pss[[i]] = glm(reformulate(names(socdem_uv_iv_list[i]),"pss_score"),data=soc_data) # prints summary outputs for each model #print(summary(lm_tests_soc_pss[[i]])) } ## CES-D score lm_tests_soc_cesd = vector("list",length(socdem_uv_iv_list)) for(i in seq_along(socdem_uv_iv_list)) { lm_tests_soc_cesd[[i]] = glm(reformulate(names(socdem_uv_iv_list[i]),"cesd_score"),data=soc_data) # prints summary outputs for each model #print(summary(lm_tests_soc_cesd[i])) } ## MMPI score lm_tests_soc_mmpi = vector("list",length(socdem_uv_iv_list)) for(i in seq_along(socdem_uv_iv_list)) { lm_tests_soc_mmpi[[i]] = glm(reformulate(names(socdem_uv_iv_list[i]),"mmpi_score"),data=soc_data) # prints summary outputs for each model #print(summary(lm_tests_soc_mmpi[[i]])) } ``` ## Covid questions ```{r} # independent variables for covid uv regressions cov_uv_iv_list = covid_data %>% dplyr::select(covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closesymp,covid_closeexp,covid_online,covid_green) ## PSS score lm_tests_cov_pss = vector("list",length(cov_uv_iv_list)) for(i in seq_along(cov_uv_iv_list)) { lm_tests_cov_pss[[i]] = glm(reformulate(names(cov_uv_iv_list[i]),"pss_score"),data=covid_data) # prints summary outputs for each model #print(summary(lm_tests_cov_pss[[i]])) } ## CES-D score lm_tests_cov_cesd = vector("list",length(cov_uv_iv_list)) for(i in seq_along(cov_uv_iv_list)) { lm_tests_cov_cesd[[i]] = glm(reformulate(names(cov_uv_iv_list[i]),"cesd_score"),data=covid_data) # prints summary outputs for each model #print(summary(lm_tests_cov_cesd[[i]])) } ## MMPI score lm_tests_cov_mmpi = vector("list",length(cov_uv_iv_list)) for(i in seq_along(cov_uv_iv_list)) { lm_tests_cov_mmpi[[i]] = glm(reformulate(names(cov_uv_iv_list[i]),"mmpi_score"),data=covid_data) # prints summary outputs for each model #print(summary(lm_tests_cov_mmpi[[i]])) } ``` # V. Univariate Outputs ```{r} # Labels for all univariate and multivariate output tables grnspc_labels_list = c("“There is a lot of vegetation/greenery in my neighborhood”","“I can see vegetation/greenery from my home”", "“The nearest vegetated park/green space is easy for me to access”","“I spend a lot of time in spaces with natural vegetation”", "“The green spaces near my home are very high quality”","NAIP NDVI – 300 m buffer","NAIP NDVI – 500 m buffer") soc_labels_list = c("Sex","Income","Race","Ethnicity","Age","Cohabitation status","Educational attainment","Looking for work?","Insurance status") cov_labels_list = c("COVID-19 has impacted me negatively from a financial point of view","I have lost job-related income due to COVID-19", "I have had a hard time getting needed resources (food, toilet paper) due to COVID-19", "I had to continue to work even though I was in close contact with people who might be infected (e.g. customers, patients, co-workers)", "I have been diagnosed with COVID-19","I have had COVID-19-like symptoms at some point in the last two months", "I have been in close proximity with someone who has been diagnosed with COVID-19", "I have been in close proximity with someone who has had COVID-19-like symptoms in the last two months", "I spend a huge percentage of my time trying to find updates online or on TB about COVID-19", "I have spent more time outside on parks, on trails, and near nature in the past month compared to the same time last year") ``` creates GT tables and also determines which variables need to be included in the multivariate regressions ## Greenspace ```{r} # for loop initializes variable name for each regression table # naming convention: # - uv_gp# is greenspace pss uv reg # - uv_gc# is greenspace cesd uv reg # - uv_gm# is greenspace mmpi uv reg # then creates and formats gt table ## PSS for(i in seq_along(lm_tests_grn_pss)) { # creates variable names var_nm = paste0("uv_gp",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_grn_pss[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## CES-D for(i in seq_along(lm_tests_grn_cesd)) { # creates variable names var_nm = paste0("uv_gc",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_grn_cesd[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## MMPI for(i in seq_along(lm_tests_grn_mmpi)) { # creates variable names var_nm = paste0("uv_gm",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_grn_mmpi[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ``` ```{r} ## tbl_stack # stack pss univariate regression objects in table uvreg_grn_pss_stack = tbl_stack(tbls= list(uv_gp1,uv_gp2,uv_gp3,uv_gp4,uv_gp5,uv_gp6,uv_gp7)) # stack cesd univariate regression objects in table uvreg_grn_cesd_stack = tbl_stack(tbls= list(uv_gc1,uv_gc2,uv_gc3,uv_gc4,uv_gc5,uv_gc6,uv_gc7)) # stack mmpi univariate regression objects in table uvreg_grn_mmpi_stack = tbl_stack(tbls= list(uv_gm1,uv_gm2,uv_gm3,uv_gm4,uv_gm5,uv_gm6,uv_gm7)) ## get Ns for table # PSS, N = 807 (2 = NA) soc_data %>% dplyr::select(pss_score) %>% summarise_all(funs(sum(!is.na(.)))) # CES-D, N = 801 (8 = NA) soc_data %>% dplyr::select(cesd_score) %>% summarise_all(funs(sum(!is.na(.)))) # MMPI, N = 785 (24 = NA) soc_data %>% dplyr::select(mmpi_score) %>% summarise_all(funs(sum(!is.na(.)))) ## tbl_merge uvreg_green_table = tbl_merge( tbls = list(uvreg_grn_pss_stack,uvreg_grn_cesd_stack,uvreg_grn_mmpi_stack)) %>% modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 807") %>% modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 801") %>% modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 785") %>% modify_header(update = label ~ "**Greenspace measure**") %>% as_gt() %>% opt_row_striping() uvreg_green_table ``` ## Sociodemographics ```{r} # for loop initializes variable name for each regression table # naming convention: # - uv_sp# is sociodemographic pss uv reg # - uv_sc# is sociodemographic cesd uv reg # - uv_sm# is sociodemographic mmpi uv reg # then creates and formats gt table ## PSS for(i in seq_along(lm_tests_soc_pss)) { # creates variable names var_nm = paste0("uv_sp",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_soc_pss[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), label = names(socdem_uv_iv_list[i]) ~ soc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## CES-D for(i in seq_along(lm_tests_soc_cesd)) { # creates variable names var_nm = paste0("uv_sc",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_soc_cesd[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), label = names(socdem_uv_iv_list[i]) ~ soc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## MMPI for(i in seq_along(lm_tests_soc_mmpi)) { # creates variable names var_nm = paste0("uv_sm",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_soc_mmpi[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), label = names(socdem_uv_iv_list[i]) ~ soc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ``` ```{r} ## tbl_stack # stack pss univariate regression objects in table uvreg_soc_pss_stack = tbl_stack(tbls= list(uv_sp1,uv_sp2,uv_sp3,uv_sp4,uv_sp5,uv_sp6,uv_sp7,uv_sp8,uv_sp9)) # stack cesd univariate regression objects in table uvreg_soc_cesd_stack = tbl_stack(tbls= list(uv_sc1,uv_sc2,uv_sc3,uv_sc4,uv_sc5,uv_sc6,uv_sc7,uv_sc8,uv_sc9)) # stack mmpi univariate regression objects in table uvreg_soc_mmpi_stack = tbl_stack(tbls= list(uv_sm1,uv_sm2,uv_sm3,uv_sm4,uv_sm5,uv_sm6,uv_sm7,uv_sm8,uv_sm9)) ## get Ns for table # PSS, N = 807 (2 = NA) soc_data %>% dplyr::select(pss_score) %>% summarise_all(funs(sum(!is.na(.)))) # CES-D, N = 801 (8 = NA) soc_data %>% dplyr::select(cesd_score) %>% summarise_all(funs(sum(!is.na(.)))) # MMPI, N = 785 (24 = NA) soc_data %>% dplyr::select(mmpi_score) %>% summarise_all(funs(sum(!is.na(.)))) ## tbl_merge uvreg_soc_table = tbl_merge( tbls = list(uvreg_soc_pss_stack,uvreg_soc_cesd_stack,uvreg_soc_mmpi_stack)) %>% modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 807") %>% modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 801") %>% modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 785") %>% modify_header(update = label ~ "**Sociodemographic measure**") %>% as_gt() %>% opt_row_striping() uvreg_soc_table ``` ## Covid ```{r} # for loop initializes variable name for each regression table # naming convention: # - uv_cp# is covid pss uv reg # - uv_cc# is covid cesd uv reg # - uv_cm# is covid mmpi uv reg # then creates and formats gt table ## PSS for(i in seq_along(lm_tests_cov_pss)) { # creates variable names var_nm = paste0("uv_cp",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_cov_pss[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), label = names(cov_uv_iv_list[i]) ~ cov_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## CES-D for(i in seq_along(lm_tests_cov_cesd)) { # creates variable names var_nm = paste0("uv_cc",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_cov_cesd[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), label = names(cov_uv_iv_list[i]) ~ cov_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## MMPI for(i in seq_along(lm_tests_cov_mmpi)) { # creates variable names var_nm = paste0("uv_cm",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_cov_mmpi[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), label = names(cov_uv_iv_list[i]) ~ cov_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ``` ```{r} ## tbl_stack # stack pss univariate regression objects in table uvreg_cov_pss_stack = tbl_stack(tbls= list(uv_cp1,uv_cp2,uv_cp3,uv_cp4,uv_cp5,uv_cp6,uv_cp7,uv_cp8,uv_cp9,uv_cp10)) # stack cesd univariate regression objects in table uvreg_cov_cesd_stack = tbl_stack(tbls= list(uv_cc1,uv_cc2,uv_cc3,uv_cc4,uv_cc5,uv_cc6,uv_cc7,uv_cc8,uv_cc9,uv_cc10)) # stack mmpi univariate regression objects in table uvreg_cov_mmpi_stack = tbl_stack(tbls= list(uv_cm1,uv_cm2,uv_cm3,uv_cm4,uv_cm5,uv_cm6,uv_cm7,uv_cm8,uv_cm9,uv_cm10)) ## get Ns for table # PSS, N = 431 covid_data %>% dplyr::select(pss_score) %>% summarise_all(funs(sum(!is.na(.)))) # CES-D, N = 427 covid_data %>% dplyr::select(cesd_score) %>% summarise_all(funs(sum(!is.na(.)))) # MMPI, N = 420 covid_data %>% dplyr::select(mmpi_score) %>% summarise_all(funs(sum(!is.na(.)))) ## tbl_merge uvreg_cov_table = tbl_merge( tbls = list(uvreg_cov_pss_stack,uvreg_cov_cesd_stack,uvreg_cov_mmpi_stack)) %>% modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 431") %>% modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 427") %>% modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 420") %>% modify_header(update = label ~ "**COVID-19 impact variables**") %>% as_gt() %>% opt_row_striping() uvreg_cov_table ``` # VI. Multivariate regressions -- *loop logic* ## A. Greenspace + Sociodemographics SIG VARS pss: sex, income, age, cohabit, BAplus, looking for work, insured cesd: income, age, cohabit, BAplus, looking for work, insured mmpi: income, ethnicity, age, cohabit, BAplus, looking for work, insured ```{r} ## PSS # dplyr::select variables sig in pss uv sociodemographic regressions sig_soc_pss = soc_data %>% dplyr::select(sex, income, age_group, cohabit, BAplus, looking_for_work, insured) # create a variable set that pairs each greenspace variable with sig soc variables var_set_mvsg_pss = vector("list",length(grn_uv_iv_list)) for(i in seq_along(var_set_mvsg_pss)) { var_set_mvsg_pss[[i]] = c(names(sig_soc_pss),names(grn_uv_iv_list[i])) } # "base" sociodemographic only regression (for anova comparison) pss_soc_mod = glm(pss_score ~ sex+income+cohabit+age_group+BAplus+looking_for_work+insured, data=soc_data) # all vifs under 2, little indication of multicollinearity vif(pss_soc_mod) summary(pss_soc_mod) # run mv glm tests for pss with set of sig soc variables + each greenspace variable lm_tests_mvsg_pss = vector("list",length(grn_uv_iv_list)) for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_pss[[i]] = glm(reformulate(var_set_mvsg_pss[[i]],"pss_score"),data=soc_data) # print model summaries # print(summary(lm_tests_mvsg_pss[[i]])) # print anova results # print(anova(pss_soc_mod,lm_tests_mvsg_pss[[i]])) # print vif outcomes # print(vif(lm_tests_mvsg_pss[[i]])) } ``` ```{r} ## CES-D # dplyr::select variables sig in cesd uv sociodemographic regressions sig_soc_cesd = soc_data %>% dplyr::select(income, age_group, cohabit, BAplus, looking_for_work, insured) # create a variable set that pairs each greenspace variable with sig soc variables var_set_mvsg_cesd = vector("list",length(grn_uv_iv_list)) for(i in seq_along(var_set_mvsg_cesd)) { var_set_mvsg_cesd[[i]] = c(names(sig_soc_cesd),names(grn_uv_iv_list[i])) } # "base" sociodemographic only regression (for anova comparison) cesd_soc_mod = glm(cesd_score ~ income+cohabit+age_group+BAplus+looking_for_work+insured, data=soc_data) # all vifs under 2, little indication of multicollinearity vif(cesd_soc_mod) summary(cesd_soc_mod) # run mv glm tests for cesd with set of sig soc variables + each greenspace variable lm_tests_mvsg_cesd = vector("list",length(grn_uv_iv_list)) for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_cesd[[i]] = glm(reformulate(var_set_mvsg_cesd[[i]],"cesd_score"),data=soc_data) # print model summaries # print(summary(lm_tests_mvsg_cesd[[i]])) # print anova results # print(anova(cesd_soc_mod,lm_tests_mvsg_cesd[[i]])) # print vif outcomes # print(vif(lm_tests_mvsg_cesd[[i]])) } ``` ```{r} ## MMPI # dplyr::select variables sig in mmpi uv sociodemographic regressions sig_soc_mmpi = soc_data %>% dplyr::select(income, ethnicity, age_group, cohabit, BAplus, looking_for_work, insured) # create a variable set that pairs each greenspace variable with sig soc variables var_set_mvsg_mmpi = vector("list",length(grn_uv_iv_list)) for(i in seq_along(var_set_mvsg_mmpi)) { var_set_mvsg_mmpi[[i]] = c(names(sig_soc_mmpi),names(grn_uv_iv_list[i])) } # "base" sociodemographic only regression (for anova comparison) mmpi_soc_mod = glm(mmpi_score ~ income+ethnicity+cohabit+age_group+BAplus+looking_for_work+insured, data=soc_data) # all vifs under 2, little indication of multicollinearity vif(mmpi_soc_mod) summary(mmpi_soc_mod) # run mv glm tests for mmpi with set of sig soc variables + each greenspace variable lm_tests_mvsg_mmpi = vector("list",length(grn_uv_iv_list)) for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_mmpi[[i]] = glm(reformulate(var_set_mvsg_mmpi[[i]],"mmpi_score"),data=soc_data) # print model summaries # summary(lm_tests_mvsg_mmpi[[i]]) # print anova results # anova(cesd_soc_mod,lm_tests_mvsg_mmpi[[i]]) # print vif outcomes # print(vif(lm_tests_mvsg_mmpi[[i]])) } ``` ## B. Greenspace + Sociodemographics + Covid Use covid_data SIG VARS pss: sex, income, age, cohabit, BAplus, looking for work, insured; covid_financial, covid_lossincome, covid_resources, covid_symptoms, covid_closesymp, covid_online cesd: income, age, cohabit, BAplus, looking for work, insured; covid_financial, covid_lossincome, covid_resources, covid_symptoms, covid_closesymp, covid_online mmpi: income, ethnicity, age, cohabit, BAplus, looking for work, insured; covid_financial, covid_lossincome, covid_resources, covid_workexp, covid_symptoms, covid_closesymp, covid_online ```{r} ## PSS # dplyr::select variables sig in pss uv sociodemographic regressions sig_soc_cov_pss = covid_data %>% dplyr::select(sex, income, age_group, cohabit, BAplus, looking_for_work, insured, covid_financial, covid_lossincome, covid_resources, covid_symptoms, covid_closesymp, covid_online) # create a variable set that pairs each greenspace variable with sig soc + cov variables var_set_mvsgc_pss = vector("list",length(grn_uv_iv_list)) for(i in seq_along(var_set_mvsgc_pss)) { var_set_mvsgc_pss[[i]] = c(names(sig_soc_cov_pss),names(grn_uv_iv_list[i])) } # "base" sociodemographic only regression (for anova comparison) pss_base_soc_cov_mod = glm(pss_score ~ sex+income+age_group+cohabit+BAplus+looking_for_work+insured+covid_financial+covid_lossincome+covid_resources+ covid_symptoms+covid_closesymp+covid_online,data=covid_data) # summary of base mod summary(pss_base_soc_cov_mod) # all vifs under 2, little indication of multicollinearity vif(pss_base_soc_cov_mod) # run mv glm tests for pss with set of sig soc variables + each greenspace variable lm_tests_mvsgc_pss = vector("list",length(grn_uv_iv_list)) for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsgc_pss[[i]] = glm(reformulate(var_set_mvsgc_pss[[i]],"pss_score"),data=covid_data) # print model summaries # print(summary(lm_tests_mvsgc_pss[[i]])) # print anova results # print(anova(pss_base_soc_cov_mod,lm_tests_mvsgc_pss[[i]],test="Chi")) # print vif outcomes # print(vif(lm_tests_mvsgc_pss[[i]])) } ``` ```{r} ## CES-D # dplyr::select variables sig in cesd uv sociodemographic regressions sig_soc_cov_cesd = covid_data %>% dplyr::select(income, age_group, cohabit, BAplus, looking_for_work, insured, covid_financial, covid_lossincome, covid_resources, covid_symptoms, covid_closesymp, covid_online) # create a variable set that pairs each greenspace variable with sig soc variables var_set_mvsgc_cesd = vector("list",length(grn_uv_iv_list)) for(i in seq_along(var_set_mvsgc_cesd)) { var_set_mvsgc_cesd[[i]] = c(names(sig_soc_cov_cesd),names(grn_uv_iv_list[i])) } # "base" sociodemographic and covid only regression (for anova comparison) cesd_soc_cov_mod = glm(cesd_score ~ income+cohabit+age_group+BAplus+looking_for_work+insured+covid_financial+covid_lossincome+ covid_resources+covid_symptoms+covid_closesymp+covid_online, data=covid_data) # all vifs under 2, little indication of multicollinearity vif(cesd_soc_cov_mod) summary(cesd_soc_cov_mod) # run mv glm tests for cesd with set of sig soc variables + each greenspace variable lm_tests_mvsgc_cesd = vector("list",length(grn_uv_iv_list)) for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsgc_cesd[[i]] = glm(reformulate(var_set_mvsgc_cesd[[i]],"cesd_score"),data=covid_data) # print model summmaries # print(summary(lm_tests_mvsg_cesd[[i]])) # print anova results # anova(cesd_soc_cov_mod,lm_tests_mvsgc_cesd[[i]]) # print vif outcomes # print(vif(lm_tests_mvsgc_cesd[[i]])) } ``` ```{r} ## MMPI # dplyr::select variables sig in mmpi uv sociodemographic regressions sig_soc_cov_mmpi = covid_data %>% dplyr::select(income, ethnicity, age_group, cohabit, BAplus, looking_for_work, insured, covid_financial, covid_lossincome, covid_resources, covid_workexp, covid_symptoms, covid_closesymp, covid_online) # create a variable set that pairs each greenspace variable with sig soc variables var_set_mvsgc_mmpi = vector("list",length(grn_uv_iv_list)) for(i in seq_along(var_set_mvsgc_mmpi)) { var_set_mvsgc_mmpi[[i]] = c(names(sig_soc_cov_mmpi),names(grn_uv_iv_list[i])) } # "base" sociodemographic only regression (for anova comparison) mmpi_soc_cov_mod = glm(mmpi_score ~ income+ethnicity+cohabit+age_group+BAplus+looking_for_work+insured+covid_financial+covid_lossincome+ covid_resources+covid_workexp+covid_symptoms+covid_closesymp+covid_online, data=covid_data) # all vifs under 2, little indication of multicollinearity vif(mmpi_soc_cov_mod) summary(mmpi_soc_cov_mod) # run mv glm tests for mmpi with set of sig soc variables + each greenspace variable lm_tests_mvsgc_mmpi = vector("list",length(grn_uv_iv_list)) for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsgc_mmpi[[i]] = glm(reformulate(var_set_mvsgc_mmpi[[i]],"mmpi_score"),data=covid_data) # print model summaries #print(summary(lm_tests_mvsg_mmpi[[i]])) # print anova results # print(anova(mmpi_soc_cov_mod,lm_tests_mvsgc_mmpi[[i]])) # print vif outcomes # print(vif(lm_tests_mvsgc_mmpi[[i]])) } ``` ## C. Sensitivity analysis: Greenspace + Sociodemographics ```{r} ## Create three variable sets for sensitivity analysis # pss has sex only (run 2,3) # cesd has neither sex nor ethnicity (run 1-3) # mmpi has ethnicity only (run 1,3) sig_vars1 = soc_data %>% dplyr::select(sex, income, age_group, cohabit, BAplus, looking_for_work, insured) sig_vars2 = soc_data %>% dplyr::select(ethnicity, income, age_group, cohabit, BAplus, looking_for_work, insured) sig_vars3 = soc_data %>% dplyr::select(sex, ethnicity, income, age_group, cohabit, BAplus, looking_for_work, insured) ``` ```{r} # create a variable set for set1 var_set_mvsg_alt1 = vector("list",length(grn_uv_iv_list)) for(i in seq_along(var_set_mvsg_alt1)) { var_set_mvsg_alt1[[i]] = c(names(sig_vars1),names(grn_uv_iv_list[i])) } # run mv glm tests for cesd and mmpi with set 1 variables + each greenspace variable lm_tests_mvsg_cesd_alt1 = vector("list",length(grn_uv_iv_list)) lm_tests_mvsg_mmpi_alt1 = vector("list",length(grn_uv_iv_list)) ## Results # cesd for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_cesd_alt1[[i]] = glm(reformulate(var_set_mvsg_alt1[[i]],"cesd_score"),data=soc_data) # print(summary(lm_tests_mvsg_cesd_alt1[[i]])) } # significant differences from standard variable set: N/A # mmpi for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_mmpi_alt1[[i]] = glm(reformulate(var_set_mvsg_alt1[[i]],"mmpi_score"),data=soc_data) # print(summary(lm_tests_mvsg_mmpi_alt1[[i]])) } # significant differences from standard variable set: N/A ``` ```{r} # create a variable set for set2 var_set_mvsg_alt2 = vector("list",length(grn_uv_iv_list)) for(i in seq_along(var_set_mvsg_alt2)) { var_set_mvsg_alt2[[i]] = c(names(sig_vars2),names(grn_uv_iv_list[i])) } # run mv glm tests for pss and cesd with set 2 variables + each greenspace variable lm_tests_mvsg_pss_alt2 = vector("list",length(grn_uv_iv_list)) lm_tests_mvsg_cesd_alt2 = vector("list",length(grn_uv_iv_list)) ## Results # pss for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_pss_alt2[[i]] = glm(reformulate(var_set_mvsg_alt2[[i]],"pss_score"),data=soc_data) # print(summary(lm_tests_mvsg_pss_alt2[[i]])) } # significant differences from standard variable set: N/A # cesd for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_cesd_alt2[[i]] = glm(reformulate(var_set_mvsg_alt2[[i]],"cesd_score"),data=soc_data) # print(summary(lm_tests_mvsg_cesd_alt2[[i]])) } # significant differences from standard variable set: N/A ``` ```{r} # create a variable set for set3 var_set_mvsg_alt3 = vector("list",length(grn_uv_iv_list)) for(i in seq_along(var_set_mvsg_alt3)) { var_set_mvsg_alt3[[i]] = c(names(sig_vars3),names(grn_uv_iv_list[i])) } # run mv glm tests for pss, cesd, and mmpi2 with set 3 variables + each greenspace variable lm_tests_mvsg_pss_alt3 = vector("list",length(grn_uv_iv_list)) lm_tests_mvsg_cesd_alt3 = vector("list",length(grn_uv_iv_list)) lm_tests_mvsg_mmpi_alt3 = vector("list",length(grn_uv_iv_list)) ## Results # pss for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_pss_alt3[[i]] = glm(reformulate(var_set_mvsg_alt3[[i]],"pss_score"),data=soc_data) # print(summary(lm_tests_mvsg_pss_alt3[[i]])) } # significant differences from standard variable set: Q33_2 strongly agree becomes sig (comp'd to strongly disagree, agree is borderline) # cesd for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_cesd_alt3[[i]] = glm(reformulate(var_set_mvsg_alt3[[i]],"cesd_score"),data=soc_data) # print(summary(lm_tests_mvsg_cesd_alt3[[i]])) } # significant differences from standard variable set: N/A # mmpi for(i in seq_along(grn_uv_iv_list)) { lm_tests_mvsg_mmpi_alt3[[i]] = glm(reformulate(var_set_mvsg_alt3[[i]],"mmpi_score"),data=soc_data) # print(summary(lm_tests_mvsg_mmpi_alt3[[i]])) } # significant differences from standard variable set: N/A ``` # VII. Multivariate outputs ## A. Greenspace + Sociodemographics ```{r} # for loop initializes variable name for each regression table # naming convention: # - mv_sgp# is sociodemographic greenspace pss mv reg # - mv_sgc# is sociodemographic greenspace cesd mv reg # - mv_sgm# is sociodemographic greenspace mmpi mv reg # then creates and formats gt table ## PSS for(i in seq_along(lm_tests_mvsg_pss)) { # creates variable names var_nm = paste0("mv_sgp",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_pss[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## CES-D for(i in seq_along(lm_tests_mvsg_cesd)) { # creates variable names var_nm = paste0("mv_sgc",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_cesd[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## MMPI for(i in seq_along(lm_tests_mvsg_mmpi)) { # creates variable names var_nm = paste0("mv_sgm",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_mmpi[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ``` ```{r} ## tbl_stack # stack pss univariate regression objects in table mvreg_soc_grn_pss_stack = tbl_stack(tbls= list(mv_sgp1,mv_sgp2,mv_sgp3,mv_sgp4,mv_sgp5,mv_sgp6,mv_sgp7)) # stack cesd univariate regression objects in table mvreg_soc_grn_cesd_stack = tbl_stack(tbls= list(mv_sgc1,mv_sgc2,mv_sgc3,mv_sgc4,mv_sgc5,mv_sgc6,mv_sgc7)) # stack mmpi univariate regression objects in table mvreg_soc_grn_mmpi_stack = tbl_stack(tbls= list(mv_sgm1,mv_sgm2,mv_sgm3,mv_sgm4,mv_sgm5,mv_sgm6,mv_sgm7)) ## get Ns for table # PSS, N = 807 (2 = NA) soc_data %>% dplyr::select(pss_score) %>% summarise_all(funs(sum(!is.na(.)))) # CES-D, N = 801 (8 = NA) soc_data %>% dplyr::select(cesd_score) %>% summarise_all(funs(sum(!is.na(.)))) # MMPI, N = 785 (24 = NA) soc_data %>% dplyr::select(mmpi_score) %>% summarise_all(funs(sum(!is.na(.)))) ## tbl_merge mvreg_soc_green_table = tbl_merge( tbls = list(mvreg_soc_grn_pss_stack,mvreg_soc_grn_cesd_stack,mvreg_soc_grn_mmpi_stack)) %>% # rename columns and spanners modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 807") %>% modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 801") %>% modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 785") %>% modify_header(update = label ~ "**Greenspace measure**") %>% as_gt() %>% # add footnotes for adjustment variables tab_footnote( footnote = "Adjusted for: sex, income, age, cohabitation status, educational attainment, employment status, insurance status", locations = cells_column_spanners(contains("PSS")) ) %>% tab_footnote( footnote = "Adjusted for: income, age, cohabitation status, educational attainment, employment status, insurance status", locations = cells_column_spanners(contains("CES-D")) ) %>% tab_footnote( footnote = "Adjusted for: income, ethnicity, age, cohabitation status, educational attainment, employment status, insurance status", locations = cells_column_spanners(contains("MMPI")) ) %>% # add stripes opt_row_striping() mvreg_soc_green_table ``` ## B. Greenspace + Sociodemographics + Covid ```{r} # for loop initializes variable name for each regression table # naming convention: # - mv_sgcp# is sociodemographic covid greenspace pss mv reg # - mv_sgcc# is sociodemographic covid greenspace cesd mv reg # - mv_sgcm# is sociodemographic sovid greenspace mmpi mv reg # then creates and formats gt table ## PSS for(i in seq_along(lm_tests_mvsgc_pss)) { # creates variable names var_nm = paste0("mv_sgcp",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsgc_pss[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## CES-D for(i in seq_along(lm_tests_mvsgc_cesd)) { # creates variable names var_nm = paste0("mv_sgcc",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsgc_cesd[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ## MMPI for(i in seq_along(lm_tests_mvsgc_mmpi)) { # creates variable names var_nm = paste0("mv_sgcm",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsgc_mmpi[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ``` ```{r} ## tbl_stack # stack pss univariate regression objects in table mvreg_soc_cov_grn_pss_stack = tbl_stack(tbls= list(mv_sgcp1,mv_sgcp2,mv_sgcp3,mv_sgcp4,mv_sgcp5,mv_sgcp6,mv_sgcp7)) # stack cesd univariate regression objects in table mvreg_soc_cov_grn_cesd_stack = tbl_stack(tbls= list(mv_sgcc1,mv_sgcc2,mv_sgcc3,mv_sgcc4,mv_sgcc5,mv_sgcc6,mv_sgcc7)) # stack mmpi univariate regression objects in table mvreg_soc_cov_grn_mmpi_stack = tbl_stack(tbls= list(mv_sgcm1,mv_sgcm2,mv_sgcm3,mv_sgcm4,mv_sgcm5,mv_sgcm6,mv_sgcm7)) ## get Ns for table # PSS, N = 807 (2 = NA) soc_data %>% dplyr::select(pss_score) %>% summarise_all(funs(sum(!is.na(.)))) # CES-D, N = 801 (8 = NA) soc_data %>% dplyr::select(cesd_score) %>% summarise_all(funs(sum(!is.na(.)))) # MMPI, N = 785 (24 = NA) soc_data %>% dplyr::select(mmpi_score) %>% summarise_all(funs(sum(!is.na(.)))) ## tbl_merge mvreg_soc_cov_green_table = tbl_merge( tbls = list(mvreg_soc_cov_grn_pss_stack,mvreg_soc_cov_grn_cesd_stack,mvreg_soc_cov_grn_mmpi_stack)) %>% # rename columns and spanners modify_spanning_header(ends_with("1") ~ "**PSS Stress**, N = 807") %>% modify_spanning_header(ends_with("2") ~ "**CES-D-10 Depression**, N = 801") %>% modify_spanning_header(ends_with("3") ~ "**MMPI-2 Anxiety**, N = 785") %>% modify_header(update = label ~ "**Greenspace measure**") %>% as_gt() %>% # add footnotes for adjustment variables tab_footnote( footnote = "Adjusted for: sex, income, age, cohabitation status, educational attainment, employment status, insurance status, and COVID-19 related: financial stress, lost income, resource scarcity, symptoms, close proximity to symptoms, and online engagement", locations = cells_column_spanners(contains("PSS")) ) %>% tab_footnote( footnote = "Adjusted for: income, age, cohabitation status, educational attainment, employment status, insurance status, and COVID-19 related: financial stress, lost income, resource scarcity, symptoms, close proximity to symptoms, and online engagement", locations = cells_column_spanners(contains("CES-D")) ) %>% tab_footnote( footnote = "Adjusted for: income, ethnicity, age, cohabitation status, educational attainment, employment status, insurance status, and COVID-19 related: financial stress, lost income, resource scarcity, potential workplace exposure, symptoms, close proximity to symptoms, and online engagement", locations = cells_column_spanners(contains("MMPI")) ) %>% # add stripes opt_row_striping() mvreg_soc_cov_green_table ``` ## C. Sensitivity outputs : Greenspace + Sociodemographics ```{r} # Create three variable sets for sensitivity analysis # pss has sex only (run 2,3) # cesd has neither sex nor ethnicity (run 1-3) # mmpi has ethnicity only (run 1,3) # for loop initializes variable name for each regression table # naming convention: # - mvsg_[score]_alt# # then creates and formats gt table ``` ### 1. PSS ```{r} # run alt2, alt3 # naming convention: mvsg_pss_alt# for(i in seq_along(lm_tests_mvsg_pss_alt2)) { # creates variable names var_nm = paste0("mv_sgp2",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_pss_alt2[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } for(i in seq_along(lm_tests_mvsg_pss_alt3)) { # creates variable names var_nm = paste0("mv_sgp3",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_pss_alt3[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ``` ```{r} # stack pss alt2 mvreg_soc_grn_pss_alt2_stack = tbl_stack(tbls= list(mv_sgp21,mv_sgp22,mv_sgp23,mv_sgp24,mv_sgp25,mv_sgp26,mv_sgp27)) # stack pss alt3 mvreg_soc_grn_pss_alt3_stack = tbl_stack(tbls= list(mv_sgp31,mv_sgp32,mv_sgp33,mv_sgp34,mv_sgp35,mv_sgp36,mv_sgp37)) pss_alt_soc_table = tbl_merge(tbls = list(mvreg_soc_grn_pss_alt2_stack,mvreg_soc_grn_pss_alt3_stack), tab_spanner = c("**PSS and ethnicity**", "**PSS, sex, and ethnicity**")) pss_alt_soc_table ``` ### 2. CES-D ```{r} # run alt1, alt2, alt3 # naming convention: mvsg_cesd_alt# for(i in seq_along(lm_tests_mvsg_cesd_alt1)) { # creates variable names var_nm = paste0("mv_sgc1",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_cesd_alt1[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } for(i in seq_along(lm_tests_mvsg_cesd_alt2)) { # creates variable names var_nm = paste0("mv_sgc2",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_cesd_alt2[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } for(i in seq_along(lm_tests_mvsg_cesd_alt3)) { # creates variable names var_nm = paste0("mv_sgc3",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_cesd_alt3[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ``` ```{r} # stack mmpi alt1 mvreg_soc_grn_cesd_alt1_stack = tbl_stack(tbls= list(mv_sgc11,mv_sgc12,mv_sgc13,mv_sgc14,mv_sgc15,mv_sgc16,mv_sgc17)) # stack mmpi alt3 mvreg_soc_grn_cesd_alt2_stack = tbl_stack(tbls= list(mv_sgc21,mv_sgc22,mv_sgc23,mv_sgc24,mv_sgc25,mv_sgc26,mv_sgc27)) # stack mmpi alt3 mvreg_soc_grn_cesd_alt3_stack = tbl_stack(tbls= list(mv_sgc31,mv_sgc32,mv_sgc33,mv_sgc34,mv_sgc35,mv_sgc36,mv_sgc37)) cesd_alt_soc_table = tbl_merge(tbls = list(mvreg_soc_grn_cesd_alt1_stack,mvreg_soc_grn_cesd_alt2_stack,mvreg_soc_grn_cesd_alt3_stack), tab_spanner = c("**CES-D-10 and sex**", "**CES-D-10 and ethnicity**","**CES-D-10, sex, and ethnicity**")) cesd_alt_soc_table ``` ### 3. MMPI ```{r} # run alt1, alt3 # naming convention: mvsg_mmpi_alt# for(i in seq_along(lm_tests_mvsg_mmpi_alt1)) { # creates variable names var_nm = paste0("mv_sgm1",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_mmpi_alt1[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } for(i in seq_along(lm_tests_mvsg_mmpi_alt3)) { # creates variable names var_nm = paste0("mv_sgm3",i) # assigns variable to corresponding lm test value assign(var_nm,lm_tests_mvsg_mmpi_alt3[[i]] # gt table creation/formatting %>% tbl_regression( exponentiate=FALSE, pvalue_fun = function(x) style_pvalue(x, digits = 3), estimate_fun = function(x) style_number(x, digits = 2), include = names(grn_uv_iv_list[i]), label = names(grn_uv_iv_list[i]) ~ grnspc_labels_list[i]) %>% bold_p(t=0.05) %>% bold_labels() %>% italicize_levels()) } ``` ```{r} # stack mmpi alt1 mvreg_soc_grn_mmpi_alt1_stack = tbl_stack(tbls= list(mv_sgm11,mv_sgm12,mv_sgm13,mv_sgm14,mv_sgm15,mv_sgm16,mv_sgm17)) # stack mmpi alt3 mvreg_soc_grn_mmpi_alt3_stack = tbl_stack(tbls= list(mv_sgm31,mv_sgm32,mv_sgm33,mv_sgm34,mv_sgm35,mv_sgm36,mv_sgm37)) mmpi_alt_soc_table = tbl_merge(tbls = list(mvreg_soc_grn_mmpi_alt1_stack,mvreg_soc_grn_mmpi_alt3_stack), tab_spanner = c("**MMPI-2 and sex**", "**MMPI-2, sex, and ethnicity**")) mmpi_alt_soc_table ``` # VIII. Summary statistics ## Sociodemographics summary statistics table ```{r} soc_summary_table = all_data %>% dplyr::select(time_period,sex,age_group,income,ethnicity,race2,cohabit,BAplus,looking_for_work,insured) %>% tbl_summary( # group by time period by=time_period, # change variable row names label = list(sex ~ "Sex", age_group ~ "Age", income ~ "Income", ethnicity ~ "Ethnicity", race2 ~ "Race", cohabit ~ "Cohabitation status", BAplus ~ "Educational attainment", looking_for_work ~ "Looking for work?", insured ~ "Insurance status")) %>% # add total column add_overall() %>% bold_labels() %>% italicize_levels() %>% # set spanning header modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>% modify_header( # change table column names update = list(label ~ "**Sociodemographic variables**", stat_0 ~ "**Total** N={N}", stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)", stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)", stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)", stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) %>% modify_footnote( update = starts_with("stat") ~ "Before COVID-19 (before 3/12/21); Stay at home (3/13/20-5/8/20); Reopening (5/9/20-10/12/20); Fall wave (10/13/20-1/2/21)" ) %>% as_gt() %>% opt_row_striping() soc_summary_table ``` ```{r} supp_soc_table = soc_data %>% dplyr::select(time_period,sex,age_group,income,ethnicity,race2,cohabit,BAplus,looking_for_work,insured) %>% tbl_summary( # group by time period by=time_period, # change variable row names label = list(sex ~ "Sex", age_group ~ "Age", income ~ "Income", ethnicity ~ "Ethnicity", race2 ~ "Race", cohabit ~ "Cohabitation status", BAplus ~ "Educational attainment", looking_for_work ~ "Looking for work?", insured ~ "Insurance status")) %>% # add total column add_overall() %>% bold_labels() %>% italicize_levels() %>% # set spanning header modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>% modify_header( # change table column names update = list(label ~ "**Sociodemographic variables**", stat_0 ~ "**Total** N={N}", stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)", stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)", stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)", stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) %>% modify_footnote( update = starts_with("stat") ~ "Before COVID-19 (before 3/12/21); Stay at home (3/13/20-5/8/20); Reopening (5/9/20-10/12/20); Fall wave (10/13/20-1/2/21)" ) %>% as_gt() %>% # hides empty before covid column cols_hide( columns = vars(stat_1) ) %>% opt_row_striping() supp_soc_table ``` ## COVID-19 summary statistics table ### Ungrouped ```{r} sum_table_covid = covid_data %>% dplyr::select(time_period,covid_financial,covid_lossincome,covid_resources,covid_workexp,diagnosed,covid_symptoms,covid_closesymp,covid_closeexp,covid_online,covid_green) %>% tbl_summary( # group by time period by=time_period, # rename covid variables label = list(covid_financial ~ "COVID-19 has impacted me negatively from a financial point of view", covid_lossincome ~ "I have lost job-related income due to COVID-19", covid_resources ~ "I have had a hard time getting needed resources (food, toilet paper) due to COVID-19", covid_workexp ~ "I had to continue to work even though I was in close contact with people who might be infected (e.g. customers, patients, co-workers)", diagnosed ~ "I have been diagnosed with COVID-19", covid_symptoms ~ "I have had COVID-19-like symptoms at some point in the last two months", covid_closesymp ~"I have been in close proximity with someone who has been diagnosed with COVID-19", covid_closeexp ~ "I have been in close proximity with someone who has had COVID-19-like symptoms in the last two months", covid_online ~ "I spend a huge percentage of my time trying to find updates online or on TB about COVID-19", covid_green ~ "I have spent more time outside on parks, on trails, and near nature in the past month compared to the same time last year")) %>% # add total column add_overall() %>% bold_labels() %>% italicize_levels() %>% # set spanning header modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>% modify_header( # change table column names update = list(label ~ "**COVID-19 impact variables**", stat_0 ~ "**Total** N={N}", stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)", stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)", stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)", stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) %>% modify_footnote(list( update = starts_with("stat") ~ "Reopening (5/9/20-10/12/20); Fall wave (10/13/20-1/2/21). No respondents completed COVID-19 questions in Before COVID-19 (before 3/12/21) or Stay at home (3/13/20-5/8/20) periods.", label ~ "Where 1 = Not true of me at all and 7 = Very true of me")) %>% as_gt() %>% # hides empty before covid and stay at home columns cols_hide( columns = c(stat_1,stat_2) ) %>% opt_row_striping() sum_table_covid ``` ### Grouped ```{r} ## Grouping covid impact questions into three groups: 1-3 = not true of me at all/little impact, 4 = neutral, 5-7 = very true/substantial impact covid_recode = function(x) { dplyr::recode(x,"1" = "Little impact","2" = "Little impact","3" = "Little impact", "4" = "Neutral", "5" = "Substantial impact","6" = "Substantial impact","7" = "Substantial impact") } sum_table_covid_grouped = covid_data %>% dplyr::select(time_period,covid_financial,covid_lossincome,covid_resources,covid_workexp,covid_symptoms,covid_closesymp,covid_closeexp,covid_online,covid_green) %>% mutate_at(c("covid_financial","covid_lossincome","covid_resources","covid_workexp","covid_symptoms","covid_closesymp","covid_closeexp","covid_online","covid_green"),covid_recode) %>% tbl_summary( # group by time period by=time_period, # rename covid variables label = list(covid_financial ~ "COVID-19 has impacted me negatively from a financial point of view", covid_lossincome ~ "I have lost job-related income due to COVID-19", covid_resources ~ "I have had a hard time getting needed resources (food, toilet paper) due to COVID-19", covid_workexp ~ "I had to continue to work even though I was in close contact with people who might be infected (e.g. customers, patients, co-workers)", covid_symptoms ~ "I have had COVID-19-like symptoms at some point in the last two months", covid_closesymp ~"I have been in close proximity with someone who has been diagnosed with COVID-19", covid_closeexp ~ "I have been in close proximity with someone who has had COVID-19-like symptoms in the last two months", covid_online ~ "I spend a huge percentage of my time trying to find updates online or on TB about COVID-19", covid_green ~ "I have spent more time outside on parks, on trails, and near nature in the past month compared to the same time last year")) %>% # add total column add_overall() %>% bold_labels() %>% italicize_levels() %>% # set spanning header modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>% modify_header( # change table column names update = list(label ~ "**COVID-19 impact variables**", stat_0 ~ "**Total** N={N}", stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)", stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)", stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)", stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) %>% modify_footnote(list( update = starts_with("stat") ~ "Reopening (5/9/20-10/12/20); Fall wave (10/13/20-1/2/21). No respondents completed COVID-19 questions in Before COVID-19 (before 3/12/21) or Stay at home (3/13/20-5/8/20) periods.", label ~ "Where 1-3 = Little impact, 4 = Neutral, and 5-7 = Substantial impact.")) sum_table_covid_diagnosed_grouped = covid_data %>% dplyr::select(time_period, diagnosed) %>% tbl_summary( by = time_period, label = diagnosed ~ "I have been diagnosed with COVID-19") %>% add_overall() %>% bold_labels() %>% italicize_labels() %>% modify_spanning_header(c("stat_1","stat_2","stat_3","stat_4") ~ "**COVID-19 time period**") %>% modify_header( # change table column names update = list(label ~ "**COVID-19 impact variables**", stat_0 ~ "**Total** N={N}", stat_1 ~ "**Before COVID-19** N={n} ({style_percent(p)}%)", stat_2 ~ "**Stay at home period** N={n} ({style_percent(p)}%)", stat_3 ~ "**Reopening period** N={n} ({style_percent(p)}%)", stat_4 ~ "**Fall wave** N={n} ({style_percent(p)}%)")) sum_tab_cov_grouped = tbl_stack(tbls= list(sum_table_covid_grouped,sum_table_covid_diagnosed_grouped)) %>% as_gt() %>% # hides empty before covid and stay at home columns cols_hide( columns = c(stat_1,stat_2) ) %>% opt_row_striping() sum_tab_cov_grouped ``` # IX. Chi-squared tests to compare observed (survey) versus expected (census) population proportions Result: all of the observed values (from the sample) are statistically significantly different from the expected values (based on the Denver Census data). ```{r} neigh_acs18 = read.csv("/Users/esrieves/Documents/GitHub/grnspc/2018_neighborhood_5yracs.csv") neigh_acs18 den_acs19 = read.csv("/Users/esrieves/Documents/GitHub/grnspc/2019_acs5yr.csv") den_acs19 ``` ```{r} # Sex chi # order: female, male ob_sex = all_data %>% count(sex) %>% mutate(pct = n/sum(n)) ob_sex_df = ob_sex ob_sex=as.numeric(ob_sex$n) ob_sex ## ACS 2019 - Denver County comparison # order: female,male ex_sex = den_acs19[4:3,4]/100 ex_sex sex_chi_res = chisq.test(ob_sex, p=ex_sex) sex_chi_res sex_chi_res_p = as.numeric(sex_chi_res[3]) ## ACS 2018 - Neighborhood comparison # order: female, male ex_sex_nei = neigh_acs18[30:29,2]/100 ex_sex_nei sex_nei_chi = chisq.test(ob_sex, p=ex_sex_nei) sex_nei_chi sex_nei_chi_p = as.numeric(sex_nei_chi[3]) ob_sex_df = ob_sex_df %>% mutate(Denver_obs = ex_sex) %>% mutate(Den_p = sex_chi_res_p) %>% mutate(Neigh_obs = ex_sex_nei) %>% mutate(Neigh_p = sex_nei_chi_p) %>% rename(category = sex) ob_sex_df ``` ```{r} # Ethnicity chi # order: not hisp, hisp ob_eth = all_data %>% count(ethnicity,sort=TRUE) %>% mutate(pct = n/sum(n)) ob_eth_df = ob_eth ob_eth ob_eth = as.numeric(ob_eth$n) ob_eth ## ACS 2019 - Denver County comparison # order: Not Hispanic,Hispanic ex_eth = den_acs19[22:21,4]/100 ex_eth eth_chi_res = chisq.test(ob_eth, p=ex_eth) eth_chi_res eth_chi_res_p =as.numeric(eth_chi_res[3]) ## ACS 2018 - Neighborhood comparison # order: not hispanic, hispanic ex_eth_nei = neigh_acs18[20:19,2]/100 ex_eth_nei eth_nei_chi = chisq.test(ob_eth,p=ex_eth_nei) eth_nei_chi eth_nei_chi_p = as.numeric(eth_nei_chi[3]) ob_eth_df = ob_eth_df %>% mutate(Denver_obs = ex_eth) %>% mutate(Den_p = eth_chi_res_p) %>% mutate(Neigh_obs = ex_eth_nei) %>% mutate(Neigh_p = eth_nei_chi_p) %>% rename(category = ethnicity) ob_eth_df ``` ```{r} # age group chi # re-filter age to match ACS groupings # there are 15 NA, who are the < 20 y.o. all_data$acs_age_groups = ifelse(20 <= all_data$age & all_data$age < 35, "20 to 34 years", ifelse(35 <= all_data$age & all_data$age < 55, "35 to 54 years", ifelse(55 <= all_data$age & all_data$age < 65, "55 to 64 years", ifelse(65 <= all_data$age,"65+",NA)))) # ascending order ob_age = all_data %>% filter(!is.na(acs_age_groups)) %>% count(acs_age_groups,sort=TRUE) %>% arrange(acs_age_groups) %>% mutate(pct = n/sum(n)) ob_age_df = ob_age ob_age = as.numeric(ob_age$n) ob_age # order: 20-34,35-54,55-64,65+, ## ACS 2019 - Denver County comparison ex_age = den_acs19[5:8,4]/100 ex_age age_chi_res = chisq.test(ob_age, p=ex_age) age_chi_res age_chi_res_p = as.numeric(age_chi_res[3]) ## ACS 2018 - Neighborhood comparison # NOTE < 20s have been removed.. not a perfect comparison # order: ascending ex_age_nei = neigh_acs18[33:36,2]/100 age_nei_chi = chisq.test(ob_age, p=ex_age_nei,rescale.p = TRUE) age_nei_chi age_nei_chi_p = as.numeric(age_nei_chi[3]) ob_age_df = ob_age_df %>% mutate(Denver_obs = ex_age) %>% mutate(Den_p = age_chi_res_p) %>% mutate(Neigh_obs = ex_age_nei) %>% mutate(Neigh_p = age_nei_chi_p) %>% rename(category = acs_age_groups) ob_age_df ``` ```{r} # income chi # descending order ob_inc = all_data %>% filter(!is.na(income)) %>% count(income,sort=TRUE) %>% mutate(pct = n/sum(n)) ob_inc_df = ob_inc ob_inc = as.numeric(ob_inc$n) ob_inc ## 2019 ACS - Denver County comparisons # order: descending ex_inc = den_acs19[44:39,4]/100 ex_inc inc_chi_res = chisq.test(ob_inc, p=c(ex_inc)) inc_chi_res_p = as.numeric(inc_chi_res[3]) inc_chi_res inc_chi_res_p = as.numeric(inc_chi_res[3]) ## 2018 ACS - Neighborhood comparisons # order: descending ex_inc_nei = neigh_acs18[11:6,2]/100 inc_nei_chi = chisq.test(ob_inc,p=ex_inc_nei,rescale.p = TRUE) inc_nei_chi inc_nei_chi_p = as.numeric(inc_nei_chi[3]) ob_inc_df = ob_inc_df %>% mutate(Denver_obs = ex_inc) %>% mutate(Den_p = inc_chi_res_p) %>% mutate(Neigh_obs = ex_inc_nei) %>% mutate(Neigh_p = inc_nei_chi_p) %>% rename(category = income) ob_inc_df ``` ```{r} # race chi # order initially: white, NA, Black, multiracial, Native American, Asian ob_race = all_data %>% count(race2,sort=TRUE) %>% mutate(pct = n/sum(n)) # reorder to be: Multiracial, White, Black, Asian, Native American, NA ob_race$race2 = ob_race$race2[c(4,1,3,6,5,2)] ob_race_df = ob_race ob_race = as.numeric(ob_race$n) ob_race ## 2019 ACS - Denver County comparisons # order: Multiracial, White, Black, Asian, Native American, "Some other race" (NA) ex_race1 = den_acs19[12:18,4]/100 # remove "one race" total category (acs_5yr[13]) ex_race = ex_race1[c(-2)] # probability sums to 100.1257, therefore use rescale.p in chi test below race_chi_res = chisq.test(ob_race, p = ex_race,rescale.p = TRUE) race_chi_res race_chi_res_p = as.numeric(race_chi_res[3]) ## 2018 ACS - Neighborhood comparisons ex_race_nei1 = neigh_acs18[22:27,2]/100 # reordering to match above orders: Multiracial, White, Black, Asian, Native American, "Some other race" (NA) ex_race_nei = ex_race_nei1[c(6,1,2,4,3,5)] race_nei_chi = chisq.test(ob_race, p=ex_race_nei,rescale.p = TRUE) race_nei_chi race_nei_chi_p = as.numeric(race_nei_chi[3]) ob_race_df = ob_race_df %>% mutate(Denver_obs = ex_race) %>% mutate(Den_p = race_chi_res_p) %>% mutate(Neigh_obs = ex_race_nei)%>% mutate(Neigh_p = race_nei_chi_p) %>% rename(category = race2) ob_race_df ``` ```{r} # Education chi # order: Baplus, ltBA ob_edu = all_data %>% filter(!is.na(BAplus)) %>% count(BAplus,sort=TRUE) %>% mutate(pct = n/sum(n)) ob_edu_df = ob_edu ob_edu = as.numeric(ob_edu$n) ob_edu ## 2019 ACS - Denver County comparisons # order: BAplus, ltBA ex_edu = den_acs19[29:28,4]/100 # probability sums to just under 100, therefore use rscale.p in test edu_chi_res = chisq.test(ob_edu, p=ex_edu,rescale.p = TRUE) edu_chi_res edu_chi_res_p = as.numeric(edu_chi_res[3]) ## 2018 ACS - Neighborhood comparisons # order: BAplus, ltBA ex_edu_nei = neigh_acs18[14:13,2]/100 edu_nei_chi = chisq.test(ob_edu, p=ex_edu_nei) edu_nei_chi edu_nei_chi_p = as.numeric(edu_nei_chi[3]) ob_edu_df = ob_edu_df %>% mutate(Denver_obs = ex_edu) %>% mutate(Den_p = edu_chi_res_p) %>% mutate(Neigh_obs = ex_edu_nei) %>% mutate(Neigh_p = edu_nei_chi_p) %>% rename(category = BAplus) ob_edu_df ``` ```{r} # Insurance chi # order: insured, uninsured ob_ins = all_data %>% filter(!is.na(insured)) %>% count(insured,sort=TRUE) %>% mutate(pct = n/sum(n)) ob_ins_df = ob_ins ob_ins = as.numeric(ob_ins$n) ob_ins ## 2019 ACS - Denver County comparisons # order: with insurance, without insurance ex_ins = den_acs19[47:48,4]/100 ex_ins ins_chi_res = chisq.test(ob_ins, p=c(ex_ins)) ins_chi_res ins_chi_res_p = as.numeric(ins_chi_res[3]) ## 2018 ACS - Neighborhood comparisons # order: insured, uninsured # NOT SIG P!!! ex_ins_nei = neigh_acs18[2:1,2]/100 ins_nei_chi = chisq.test(ob_ins, p=ex_ins_nei) ins_nei_chi ins_nei_chi_p = as.numeric(ins_nei_chi[3]) ob_ins_df = ob_ins_df %>% mutate(Denver_obs = ex_ins) %>% mutate(Den_p = ins_chi_res_p) %>% mutate(Neigh_obs = ex_ins_nei) %>% mutate(Neigh_p = ins_nei_chi_p) %>% rename(category = insured) ob_ins_df ``` ```{r} ## Create dataframe of results # initializes an empty dataframe to fill with chi_results demo_comps = rbind(ob_sex_df,ob_age_df,ob_eth_df,ob_edu_df,ob_inc_df,ob_race_df,ob_ins_df) demo_comps = data.frame(demo_comps) demo_comps table = demo_comps %>% gt() %>% fmt_percent( columns = c(pct,Denver_obs,Neigh_obs), decimals = 1 ) table ```