From 4d948ec5437352f256c1797b3fe97926ab776423 Mon Sep 17 00:00:00 2001 From: Nathan Muncy Date: Thu, 29 Apr 2021 12:18:02 -0400 Subject: [PATCH 1/7] beginning refactor --- afq_step3_stats.R | 277 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 275 insertions(+), 2 deletions(-) diff --git a/afq_step3_stats.R b/afq_step3_stats.R index 1c2d689..1b3aaeb 100644 --- a/afq_step3_stats.R +++ b/afq_step3_stats.R @@ -11,6 +11,15 @@ library("dplyr") library("lme4") +### TODO: +# +# 1) Combine plot_diff functions +# +# 2) find group diffs from plot_diff when +# group n=3 + + + ### Set Up # Orienting paths - set globally dataDir <- "/Users/nmuncy/Projects/emu_AFQ/analyses/" @@ -21,6 +30,7 @@ plotDir_lm <- paste0(dataDir, "plots_lm/") statsDir_lm <- paste0(dataDir, "stats_lm/") tableDir <- paste0(dataDir, "tables/") + # set lists groupType <- 1:2 tractList <- c("UNC_L", "UNC_R", "FA") @@ -303,6 +313,12 @@ func_stat_mem <- function(df_afq){ } func_switch_g1 <- function(value){ + + ### --- Notes: + # + # Switch for determining group, coloring + # for grouping set 1 + x_col <- switch( value, "0" = "blue", @@ -318,6 +334,12 @@ func_switch_g1 <- function(value){ } func_switch_g2 <- function(value){ + + ### --- Notes: + # + # Switch for determining group, coloring + # for grouping set 2 + x_col <- switch( value, "0" = "blue", @@ -335,12 +357,17 @@ func_switch_g2 <- function(value){ } func_switch_name <- function(tract){ + + ### --- Notes: + # + # Switch for determining name + # of tracts + x_tract <- switch( tract, "UNC_L" = "L. Uncinate", "UNC_R" = "R. Uncinate", "FA" = "A. Forceps", - "CST_R" = "R. Cortico-spinal Tract", ) return(x_tract) } @@ -405,7 +432,8 @@ func_plot_diff <- function(model, tract, g_type){ # # This function will determine, plot differences # between two splines. - # The difference values will be returned. + # + # A table of significant nodes is written. # # wrapped by func_stat_diff @@ -720,6 +748,7 @@ func_stat_diff <- function(model, tract, g_type){ } for(comp in compList){ + # get table made by func_plot_diff h_cmd = paste0( "tail -n +10 ", tableDir, "Table_Diff_", tract, "_", "G", g_type, "_", comp, @@ -1072,6 +1101,250 @@ func_stat_lm_new <- function(df_lm, tract, gType, h_str, comp){ } + +func_plot_diff1 <- function(model, tract){ + + ### --- Notes: + # + # This will draw plots and write tables of sig + # node differences for GAM when group style=1 + + png(filename = paste0( + plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png"), + width = 600, height = 600 + ) + + gA <- func_switch_g1("0")[[2]][1] + gB <- func_switch_g1("1")[[2]][1] + + par(mar=c(5,5,4,2)) + capture.output(plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "1")), + rm.ranef = T, + main = paste0("Difference Scores, ", gA, "-", gB), + ylab = "Est. FA difference", + xlab = "Tract Node", + cex.lab = 2, + cex.axis = 2, + cex.main = 2.5, + cex.sub = 1.5), + file = paste0(tableDir, + "Table_Diff_", + tract, "_", "G", + g_type, "_01.txt" + ) + ) + par(mar=c(5,4,4,2)) + dev.off() +} + +func_plot_diff2 <- function(model, tract){ + + ### --- Notes: + # + # This will draw plots and write tables of sig + # node differences for GAM when group style=2 + + png(filename = paste0( + plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png" + ), width = 1800, height = 600 + ) + + gA <- func_switch_g2("0")[[2]][1] + gB <- func_switch_g2("1")[[2]][1] + gC <- func_switch_g2("2")[[2]][1] + + par(mfrow=c(1,3), mar=c(5,5,4,2)) + capture.output(plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "1")), + rm.ranef = T, + main = paste0("Difference Scores, ", gA, "-", gB), + ylab = "Est. FA difference", + xlab = "Tract Node", + cex.lab = 2, + cex.axis = 2, + cex.main = 2.5, + cex.sub = 1.5), + file = paste0(tableDir, + "Table_Diff_", + tract, "_", "G", + g_type, "_01.txt" + ) + ) + + par(mar=c(5,3,4,2)) + capture.output(plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "2")), + rm.ranef = T, + main = paste0("Difference Scores, ", gA, "-", gC), + ylab = "", + xlab = "Tract Node", + cex.lab = 2, + cex.axis = 2, + cex.main = 2.5, + cex.sub = 2), + file = paste0(tableDir, + "Table_Diff_", + tract, "_", "G", + g_type, "_02.txt" + ) + ) + + capture.output(plot_diff(model, + view="nodeID", + comp=list(Group=c("1", "2")), + rm.ranef = T, + main = paste0("Difference Scores, ", gB, "-", gC), + ylab = "", + xlab = "Tract Node", + cex.lab = 2, + cex.axis = 2, + cex.main = 2.5, + cex.sub = 2), + file = paste0(tableDir, + "Table_Diff_", + tract, "_", "G", + g_type, "_12.txt" + ) + ) + par(mfrow=c(1,1), mar=c(5,4,4,2)) + dev.off() +} + +func_mkdf_diff1 <- function(model){ + + ### --- Notes: + # + # Returns a dataframe of difference + # scores for each node. + + p01 <- plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "1")), + rm.ranef = T, + plot = F) + + colnames(p01) <- c(colnames(p01[,1:4]), "Comp") + p01$Comp <- "01" + return(p01) +} + +func_mkdf_diff2 <- function(model){ + + ### --- Notes: + # + # Returns a dataframe of difference + # scores for each node. + + p01 <- plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "1")), + rm.ranef = T, + plot = F) + colnames(p01) <- c(colnames(p01[,1:4]), "Comp") + p01$Comp <- "01" + + p02 <- plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "2")), + rm.ranef = T, + plot = F) + colnames(p02) <- c(colnames(p02[,1:4]), "Comp") + p02$Comp <- "02" + + p12 <- plot_diff(model, + view="nodeID", + comp=list(Group=c("1", "2")), + rm.ranef = T, + plot = F) + colnames(p12) <- c(colnames(p12[,1:4]), "Comp") + p12$Comp <- "12" + + df_out <- as.data.frame(matrix(NA, nrow=3*dim(p01)[1], ncol=dim(p01)[2])) + colnames(df_out) <- colnames(p01) + df_out <- rbind(p01, p02, p12) + return(df_out) +} + + +func_stat_diff_new <- function(model, tract, g_type){ + + # 1) func_plot_diff + # make plots, write tables + # determine diff nodes + # + # 2) func_max_diff + # make dataframes + # + # 3) determine nodes where groups differ + # return df_diff. + # Account for 2/3 groups + + ## Step 1 - make plots and tables + if(g_type == 1){ + func_plot_diff1(model, tract) + }else if(g_type == 2){ + func_plot_diff2(model, tract) + } + + # use tables to get sig nodes, save in df_nodeDiff + df_nodeDiff <- as.data.frame(matrix(NA, nrow=1, ncol=4)) + colnames(df_nodeDiff) <- c("Comparison", "Section", "Start", "End") + + if(g_type == 1){ + compList <- "01" + }else if(g_type == 2){ + compList <- c("01", "02", "12") + } + + # get table made by func_plot_diff for e/comparison + for(comp in compList){ + + # write, use bash + h_cmd = paste0( + "tail -n +10 ", + tableDir, "Table_Diff_", tract, "_", "G", g_type, "_", comp, + ".txt | sed 's/-/,/g'" + ) + h_lines <- system(h_cmd, intern = T) + + # make df + h_df <- read.table( + text=paste(h_lines, collapse = "\n"), + header = F, + stringsAsFactors = F, + sep = "," + ) + + # write to df_nodeDiff + for(i in 1:dim(h_df)[1]){ + df_nodeDiff <- rbind(df_nodeDiff, c(comp, i, h_df[i,1], h_df[i,2])) + } + } + df_nodeDiff <- na.omit(df_nodeDiff) + + + ## Step 2 - get plot_diff dataframes + if(g_type == 1){ + df_estDiff <- func_mkdf_diff1(model) + }else if(g_type == 2){ + df_estDiff <- func_mkdf_diff2(model) + } + + + ## Step 3 - for grouping 2, determine + # intx nodes + nodeList <- unique(df_allDiff$nodeID) + + + +} + + + ### Work # Two analyses (grouping types): # 1) Con vs Anx From 6e37b2961ce85b72216a7aa8c2955149d2619aeb Mon Sep 17 00:00:00 2001 From: Nathan Muncy Date: Fri, 30 Apr 2021 09:58:33 -0400 Subject: [PATCH 2/7] found group diffs, adding max diff --- afq_step3_stats.R | 99 ++++++++++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 36 deletions(-) diff --git a/afq_step3_stats.R b/afq_step3_stats.R index 1b3aaeb..312202c 100644 --- a/afq_step3_stats.R +++ b/afq_step3_stats.R @@ -1290,55 +1290,82 @@ func_stat_diff_new <- function(model, tract, g_type){ func_plot_diff2(model, tract) } - # use tables to get sig nodes, save in df_nodeDiff - df_nodeDiff <- as.data.frame(matrix(NA, nrow=1, ncol=4)) - colnames(df_nodeDiff) <- c("Comparison", "Section", "Start", "End") + # # use tables to get sig nodes, save in df_sigNodes + # df_sigNodes <- as.data.frame(matrix(NA, nrow=1, ncol=4)) + # colnames(df_sigNodes) <- c("Comparison", "Section", "Start", "End") + # + # if(g_type == 1){ + # compList <- "01" + # }else if(g_type == 2){ + # compList <- c("01", "02", "12") + # } + # + # # get table made by func_plot_diff for e/comparison + # for(comp in compList){ + # + # # write, use bash + # h_cmd = paste0( + # "tail -n +10 ", + # tableDir, "Table_Diff_", tract, "_", "G", g_type, "_", comp, + # ".txt | sed 's/-/,/g'" + # ) + # h_lines <- system(h_cmd, intern = T) + # + # # make df + # h_df <- read.table( + # text=paste(h_lines, collapse = "\n"), + # header = F, + # stringsAsFactors = F, + # sep = "," + # ) + # + # # write to df_sigNodes + # for(i in 1:dim(h_df)[1]){ + # df_sigNodes <- rbind(df_sigNodes, c(comp, i, h_df[i,1], h_df[i,2])) + # } + # } + # df_sigNodes <- na.omit(df_sigNodes) + + ## Step 2 - get plot_diff dataframes if(g_type == 1){ - compList <- "01" + df_estDiff <- func_mkdf_diff1(model) }else if(g_type == 2){ - compList <- c("01", "02", "12") + df_estDiff <- func_mkdf_diff2(model) } - # get table made by func_plot_diff for e/comparison - for(comp in compList){ - - # write, use bash - h_cmd = paste0( - "tail -n +10 ", - tableDir, "Table_Diff_", tract, "_", "G", g_type, "_", comp, - ".txt | sed 's/-/,/g'" - ) - h_lines <- system(h_cmd, intern = T) - - # make df - h_df <- read.table( - text=paste(h_lines, collapse = "\n"), - header = F, - stringsAsFactors = F, - sep = "," - ) - - # write to df_nodeDiff - for(i in 1:dim(h_df)[1]){ - df_nodeDiff <- rbind(df_nodeDiff, c(comp, i, h_df[i,1], h_df[i,2])) + + ## Step 3 - for grouping 2, determine + # nodes where all groups differ from + # e/o (diff) + nodeList <- unique(df_estDiff$nodeID) + diffList <- vector() + for(node in nodeList){ + ind_node <- which(df_estDiff$nodeID == node) + if(g_type == 1){ + if((abs(df_estDiff[ind_node[1],]$est) - df_estDiff[ind_node[1],]$CI) > 0){ + diffList <- c(diffList, node) + } + }else if(g_type == 2){ + if( + (abs(df_estDiff[ind_node[1],]$est) - df_estDiff[ind_node[1],]$CI) > 0 & + (abs(df_estDiff[ind_node[2],]$est) - df_estDiff[ind_node[2],]$CI) > 0 & + (abs(df_estDiff[ind_node[3],]$est) - df_estDiff[ind_node[3],]$CI) > 0 + ){ + diffList <- c(diffList, node) + } } } - df_nodeDiff <- na.omit(df_nodeDiff) - - ## Step 2 - get plot_diff dataframes + # find max diff + maxList <- vector() if(g_type == 1){ - df_estDiff <- func_mkdf_diff1(model) + ind_max <- max(abs(df_estDiff[which(df_estDiff$nodeID == diffList),]$est)) }else if(g_type == 2){ - df_estDiff <- func_mkdf_diff2(model) + sapply(diffList, ) } - ## Step 3 - for grouping 2, determine - # intx nodes - nodeList <- unique(df_allDiff$nodeID) - } From 515082227441e46d9b55b5cd22c03dba5d2e056f Mon Sep 17 00:00:00 2001 From: Nathan Muncy Date: Fri, 30 Apr 2021 11:11:16 -0400 Subject: [PATCH 3/7] found max node --- afq_step3_stats.R | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/afq_step3_stats.R b/afq_step3_stats.R index 312202c..c20161a 100644 --- a/afq_step3_stats.R +++ b/afq_step3_stats.R @@ -1336,8 +1336,7 @@ func_stat_diff_new <- function(model, tract, g_type){ ## Step 3 - for grouping 2, determine - # nodes where all groups differ from - # e/o (diff) + # nodes where all groups differ from e/o nodeList <- unique(df_estDiff$nodeID) diffList <- vector() for(node in nodeList){ @@ -1358,16 +1357,23 @@ func_stat_diff_new <- function(model, tract, g_type){ } # find max diff - maxList <- vector() if(g_type == 1){ - ind_max <- max(abs(df_estDiff[which(df_estDiff$nodeID == diffList),]$est)) + + h_df <- subset(df_estDiff, nodeID %in% diffList) + ind_max <- which(abs(h_df$est) == max(abs(h_df$est))) + node_max <- h_df[ind_max,]$nodeID + }else if(g_type == 2){ - sapply(diffList, ) + f_sum <- function(x){ + sum(abs(df_estDiff[which(df_estDiff$nodeID == x),]$est)) + } + h_sum <- sapply(diffList, f_sum) + names(h_sum) <- diffList + h_max <- which(h_sum == max(h_sum)) + node_max <- as.numeric(names(h_sum[h_max])) } - - - + return(list(diffList, node_max)) } From aab3ad3fb318e0b1aac0067daab22177d7df028c Mon Sep 17 00:00:00 2001 From: Nathan Muncy Date: Fri, 30 Apr 2021 12:24:58 -0400 Subject: [PATCH 4/7] commit before function deletion --- afq_step3_stats.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/afq_step3_stats.R b/afq_step3_stats.R index c20161a..58a5a01 100644 --- a/afq_step3_stats.R +++ b/afq_step3_stats.R @@ -36,7 +36,7 @@ groupType <- 1:2 tractList <- c("UNC_L", "UNC_R", "FA") -# Functions +# Functions - general func_df_master <- function(g_type){ ### --- Notes: @@ -372,6 +372,8 @@ func_switch_name <- function(tract){ return(x_tract) } + +# Functions - GAM func_plot_gam <- function(model, tract, g_type, df_tract){ ### --- Notes: @@ -771,6 +773,8 @@ func_stat_diff <- function(model, tract, g_type){ return(df_out) } + +# Functions - LM func_df_avg <- function(comp, df_tract, df_diff){ ### --- Notes: @@ -1269,7 +1273,6 @@ func_mkdf_diff2 <- function(model){ return(df_out) } - func_stat_diff_new <- function(model, tract, g_type){ # 1) func_plot_diff From 974390a0bc50754bdaba6f4f3fadb23e1f1cf544 Mon Sep 17 00:00:00 2001 From: Nathan Muncy Date: Fri, 30 Apr 2021 13:52:05 -0400 Subject: [PATCH 5/7] commit before deletion of lm functions --- afq_step3_stats.R | 1359 ++++++++++++++++++++------------------------- 1 file changed, 598 insertions(+), 761 deletions(-) diff --git a/afq_step3_stats.R b/afq_step3_stats.R index 58a5a01..94ae1bc 100644 --- a/afq_step3_stats.R +++ b/afq_step3_stats.R @@ -382,6 +382,8 @@ func_plot_gam <- function(model, tract, g_type, df_tract){ # dti data # # wrapped by func_stat_gam + # + # TODO update h_cols, etc to use switch # plot df_pred <- predict.bam( @@ -428,173 +430,6 @@ func_plot_gam <- function(model, tract, g_type, df_tract){ ggsave(paste0(plotDir_gam, "Plot_GAM_", tract, "_", "G", g_type, ".png")) } -func_plot_diff <- function(model, tract, g_type){ - - ### --- Notes: - # - # This function will determine, plot differences - # between two splines. - # - # A table of significant nodes is written. - # - # wrapped by func_stat_diff - - if(g_type == 2){ - - png(filename = paste0( - plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png" - ), width = 1800, height = 600 - ) - par(mfrow=c(1,3)) - par(mar=c(5,5,4,2)) - - capture.output(plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "1")), - rm.ranef = T, - main = "Difference Scores, Con-GAD", - ylab = "Est. FA difference", - xlab = "Tract Node", - cex.lab = 2, - cex.axis = 2, - cex.main = 2.5, - cex.sub = 1.5), - file = paste0(tableDir, - "Table_Diff_", - tract, "_", "G", - g_type, "_01.txt" - ) - ) - - par(mar=c(5,3,4,2)) - - capture.output(plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "2")), - rm.ranef = T, - main = "Difference Scores, Con-SAD", - ylab = "", - xlab = "Tract Node", - cex.lab = 2, - cex.axis = 2, - cex.main = 2.5, - cex.sub = 2), - file = paste0(tableDir, - "Table_Diff_", - tract, "_", "G", - g_type, "_02.txt" - ) - ) - - capture.output(plot_diff(model, - view="nodeID", - comp=list(Group=c("1", "2")), - rm.ranef = T, - main = "Difference Scores, GAD-SAD", - ylab = "", - xlab = "Tract Node", - cex.lab = 2, - cex.axis = 2, - cex.main = 2.5, - cex.sub = 2), - file = paste0(tableDir, - "Table_Diff_", - tract, "_", "G", - g_type, "_12.txt" - ) - ) - - par(mfrow=c(1,1)) - par(mar=c(5,4,4,2)) - dev.off() - - }else if(g_type == 1){ - - png(filename = paste0( - plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png"), - width = 600, height = 600 - ) - par(mar=c(5,5,4,2)) - capture.output(plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "1")), - rm.ranef = T, - main = "Difference Scores, Con-Anx", - ylab = "Est. FA difference", - xlab = "Tract Node", - cex.lab = 2, - cex.axis = 2, - cex.main = 2.5, - cex.sub = 1.5), - file = paste0(tableDir, - "Table_Diff_", - tract, "_", "G", - g_type, "_01.txt" - ) - ) - par(mar=c(5,4,4,2)) - dev.off() - } - - # return(list(p01,p02,p12)) -} - -func_max_diff <- function(model, g_type){ - - ### --- Notes: - # - # Return node of max diff per contrast - - if(g_type == 2){ - - p01 <- plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "1")), - rm.ranef = T, - plot = F) - - m01 <- p01[which(abs(p01$est) == max(abs(p01$est))),]$nodeID - - p02 <- plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "2")), - rm.ranef = T, - plot = F) - - m02 <- p02[which(abs(p02$est) == max(abs(p02$est))),]$nodeID - - p12 <- plot_diff(model, - view="nodeID", - comp=list(Group=c("1", "2")), - rm.ranef = T, - plot = F) - - m12 <- p12[which(abs(p12$est) == max(abs(p12$est))),]$nodeID - - df_out <- as.data.frame(matrix(NA, nrow=3, ncol=2)) - colnames(df_out) <- c("Comparison", "Node") - df_out[,1] <- c("01", "02", "12") - df_out[,2] <- c(m01, m02, m12) - - }else if(g_type == 1){ - - p01 <- plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "1")), - rm.ranef = T, - plot = F) - - m01 <- p01[which(abs(p01$est) == max(abs(p01$est))),]$nodeID - - df_out <- as.data.frame(matrix(NA, nrow=1, ncol=2)) - colnames(df_out) <- c("Comparison", "Node") - df_out[,1] <- "01" - df_out[,2] <- m01 - } - - return(df_out) -} - func_stat_gam <- function(tract, df_tract, g_type){ ### --- Notes: @@ -731,656 +566,650 @@ func_stat_gam <- function(tract, df_tract, g_type){ # func_plot_gam(df_pred, plot_title, tract, g_type) return(fit_cov_pds) } - -func_stat_diff <- function(model, tract, g_type){ - - ### --- Notes: - # - # Test for differences in GAM splines - - func_plot_diff(model, tract, g_type) - - # make table of sig regions - df_out <- as.data.frame(matrix(NA, nrow=1, ncol=4)) - colnames(df_out) <- c("Comparison", "Section", "Start", "End") - if(g_type == 1){ - compList <- "01" - }else if(g_type == 2){ - compList <- c("01", "02", "12") - } - for(comp in compList){ - - # get table made by func_plot_diff - h_cmd = paste0( - "tail -n +10 ", - tableDir, "Table_Diff_", tract, "_", "G", g_type, "_", comp, - ".txt | sed 's/-/,/g'" - ) - - h_lines <- system(h_cmd, intern = T) - h_df <- read.table( - text=paste(h_lines, collapse = "\n"), - header = F, - stringsAsFactors = F, - sep = "," - ) - - for(i in 1:dim(h_df)[1]){ - df_out <- rbind(df_out, c(comp, i, h_df[i,1], h_df[i,2])) - } - } - df_out <- na.omit(df_out) - return(df_out) -} - -# Functions - LM -func_df_avg <- function(comp, df_tract, df_diff){ +func_plot_diff1 <- function(model, tract, g_type){ ### --- Notes: - # - # Make a dataframe of averaged FA - # values from all regions that differ - # between two splines - - grpA <- as.numeric(substr(comp, start=1, stop=1)) - grpB <- as.numeric(substr(comp, start=2, stop=2)) - df_comp <- df_diff[which(df_diff$Comparison == comp),] + # + # This will draw plots and write tables of sig + # node differences for GAM when group style=1 - # make dataframe - subjList <- unique( - df_tract[which( - df_tract$Group == grpA | df_tract$Group == grpB - ),]$subjectID + png(filename = paste0( + plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png"), + width = 600, height = 600 ) - df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) - colnames(df_lm) <- c("Comp", "Subj", "FAvalue", - "Pars6", "Group", - "NeuLGI", "NeuLDI", - "NegLGI", "NegLDI") - df_lm$Subj <- subjList + gA <- func_switch_g1("0")[[2]][1] + gB <- func_switch_g1("1")[[2]][1] - for(subj in subjList){ - h_mean <- vector() - for(i in 1:dim(df_comp)[1]){ - - h_start <- which( - df_tract$subjectID == subj & - df_tract$nodeID == df_comp[i,]$Start - ) - - h_end <- which( - df_tract$subjectID == subj & - df_tract$nodeID == df_comp[i,]$End - ) - - h_mean <- c(h_mean, mean(df_tract[h_start:h_end,]$dti_fa)) - } - - ind_out <- which(df_lm$Subj == subj) - df_lm[ind_out,]$FAvalue <- round(mean(h_mean), 4) - - ind_subj <- which(df_tract$subjectID == subj)[1] - df_lm[ind_out,]$Pars6 <- df_tract[ind_subj,]$Pars6 - df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_subj,]$Group) - 1 - - df_lm[ind_out,]$NeuLGI <- df_tract[ind_subj,]$NeuLGI - df_lm[ind_out,]$NeuLDI <- df_tract[ind_subj,]$NeuLDI - df_lm[ind_out,]$NegLGI <- df_tract[ind_subj,]$NegLGI - df_lm[ind_out,]$NegLDI <- df_tract[ind_subj,]$NegLDI - - df_lm[ind_out,]$Comp <- comp - } - df_lm$Group <- factor(df_lm$Group) - return(df_lm) + par(mar=c(5,5,4,2)) + capture.output(plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "1")), + rm.ranef = T, + main = paste0("Difference Scores, ", gA, "-", gB), + ylab = "Est. FA difference", + xlab = "Tract Node", + cex.lab = 2, + cex.axis = 2, + cex.main = 2.5, + cex.sub = 1.5), + file = paste0(tableDir, + "Table_Diff_", + tract, "_", "G", + g_type, "_01.txt" + ) + ) + par(mar=c(5,4,4,2)) + dev.off() } -func_df_max <- function(comp, df_tract, df_max){ +func_plot_diff2 <- function(model, tract, g_type){ ### --- Notes: # - # make a dataframe of FA values - # from single region showing showing - # maximal A-B difference + # This will draw plots and write tables of sig + # node differences for GAM when group style=2 - grpA <- as.numeric(substr(comp, start=1, stop=1)) - grpB <- as.numeric(substr(comp, start=2, stop=2)) - node <- df_max[which(df_max$Comparison == comp),]$Node + png(filename = paste0( + plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png" + ), width = 1800, height = 600 + ) - # make dataframe - subjList <- unique( - df_tract[which( - df_tract$Group == grpA | df_tract$Group == grpB - ),]$subjectID + gA <- func_switch_g2("0")[[2]][1] + gB <- func_switch_g2("1")[[2]][1] + gC <- func_switch_g2("2")[[2]][1] + + par(mfrow=c(1,3), mar=c(5,5,4,2)) + capture.output(plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "1")), + rm.ranef = T, + main = paste0("Difference Scores, ", gA, "-", gB), + ylab = "Est. FA difference", + xlab = "Tract Node", + cex.lab = 2, + cex.axis = 2, + cex.main = 2.5, + cex.sub = 1.5), + file = paste0(tableDir, + "Table_Diff_", + tract, "_", "G", + g_type, "_01.txt" + ) ) - df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) - colnames(df_lm) <- c("Comp","Subj", "FAvalue", - "Pars6", "Group", - "NeuLGI", "NeuLDI", - "NegLGI", "NegLDI") - df_lm$Subj <- subjList - for(subj in subjList){ - - ind_data <- which(df_tract$subjectID == subj & df_tract$nodeID == node) - ind_out <- which(df_lm$Subj == subj) - - df_lm[ind_out,]$Comp <- comp - df_lm[ind_out,]$FAvalue <- df_tract[ind_data,]$dti_fa - df_lm[ind_out,]$Pars6 <- df_tract[ind_data,]$Pars6 - df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_data,]$Group)-1 - df_lm[ind_out,]$NeuLGI <- df_tract[ind_data,]$NeuLGI - df_lm[ind_out,]$NeuLDI <- df_tract[ind_data,]$NeuLDI - df_lm[ind_out,]$NegLGI <- df_tract[ind_data,]$NegLGI - df_lm[ind_out,]$NegLDI <- df_tract[ind_data,]$NegLDI - } - df_lm$Group <- factor(df_lm$Group) - return(df_lm) + par(mar=c(5,3,4,2)) + capture.output(plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "2")), + rm.ranef = T, + main = paste0("Difference Scores, ", gA, "-", gC), + ylab = "", + xlab = "Tract Node", + cex.lab = 2, + cex.axis = 2, + cex.main = 2.5, + cex.sub = 2), + file = paste0(tableDir, + "Table_Diff_", + tract, "_", "G", + g_type, "_02.txt" + ) + ) + + capture.output(plot_diff(model, + view="nodeID", + comp=list(Group=c("1", "2")), + rm.ranef = T, + main = paste0("Difference Scores, ", gB, "-", gC), + ylab = "", + xlab = "Tract Node", + cex.lab = 2, + cex.axis = 2, + cex.main = 2.5, + cex.sub = 2), + file = paste0(tableDir, + "Table_Diff_", + tract, "_", "G", + g_type, "_12.txt" + ) + ) + par(mfrow=c(1,1), mar=c(5,4,4,2)) + dev.off() } -func_stat_lm <- function(df_lm, tract, gType, h_str, comp){ +func_mkdf_diff1 <- function(model){ ### --- Notes: # - # Conduct linear model, test lines - # then make plots. - - fit.int <- lm(NegLGI ~ FAvalue*Group, data = df_lm) + # Returns a dataframe of difference + # scores for each node. - capture.output( - summary(fit.int), - file = paste0(statsDir_lm, - "Stats_LM-", h_str, "_", tract, "_G", gType, ".txt" - ) - ) - capture.output( - anova(fit.int), - file = paste0(statsDir_lm, - "Stats_AN-", h_str, "_", tract, "_G", gType, ".txt" - ) - ) + p01 <- plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "1")), + rm.ranef = T, + plot = F) - # plot if group diff - anova_stat <- anova(fit.int) - if(anova_stat$`Pr(>F)`[2] < 0.05){ + colnames(p01) <- c(colnames(p01[,1:4]), "Comp") + p01$Comp <- "01" + return(p01) +} + +func_mkdf_diff2 <- function(model){ - comp1 <- substr(comp, start=1, stop=1) - comp2 <- substr(comp, start=2, stop=2) - - if(gType == 1){ - h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) - - }else if(gType == 2){ - h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) - } - - h_tract <- func_switch_name(tract) - # h_insert <- paste(h_str, h_tract) - # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") - - # p <- ggplot(df_lm) + - # aes(x=FAvalue, y=NegLGI, color=Group) + - # geom_point(aes(color=Group)) + - # geom_smooth(method = "lm") + - # ggtitle(h_title) + - # ylab("Neg LGI") + - # xlab("FA value") - # - # p + scale_color_manual( - # values = h_cols, - # breaks = h_breaks, - # labels = h_labels - # ) - # - # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) - - # --- update, plot separately - plot1.x <- df_lm[which(df_lm$Group == comp1),]$FAvalue - plot1.y <- df_lm[which(df_lm$Group == comp1),]$NegLGI - - plot2.x <- df_lm[which(df_lm$Group == comp2),]$FAvalue - plot2.y <- df_lm[which(df_lm$Group == comp2),]$NegLGI - - h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") - x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") - - png(filename = paste0( - plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, ".png" - ), - width = 8, - height = 4, - units = "in", - res = 600 - ) - - par(mfrow=c(1,2), oma=c(0,0,2,0), family="Times New Roman") - - plot(plot1.x, plot1.y, - xlab = x_title, - ylab = "Neg LGI", - ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI)), - main = h_labels[1]) - abline(lm(plot1.y ~ plot1.x)) - - plot(plot2.x, plot2.y, - xlab = x_title, - ylab = "", - main = h_labels[2], - ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI))) - abline(lm(plot2.y ~ plot2.x)) - - mtext(h_title, outer = T, cex = 1.5) - dev.off() - par(mfrow=c(1,1)) - } + ### --- Notes: + # + # Returns a dataframe of difference + # scores for each node. + + p01 <- plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "1")), + rm.ranef = T, + plot = F) + colnames(p01) <- c(colnames(p01[,1:4]), "Comp") + p01$Comp <- "01" + + p02 <- plot_diff(model, + view="nodeID", + comp=list(Group=c("0", "2")), + rm.ranef = T, + plot = F) + colnames(p02) <- c(colnames(p02[,1:4]), "Comp") + p02$Comp <- "02" + + p12 <- plot_diff(model, + view="nodeID", + comp=list(Group=c("1", "2")), + rm.ranef = T, + plot = F) + colnames(p12) <- c(colnames(p12[,1:4]), "Comp") + p12$Comp <- "12" + + df_out <- as.data.frame(matrix(NA, nrow=3*dim(p01)[1], ncol=dim(p01)[2])) + colnames(df_out) <- colnames(p01) + df_out <- rbind(p01, p02, p12) + return(df_out) } -func_stat_lm_new <- function(df_lm, tract, gType, h_str, comp){ +func_stat_diff <- function(model, tract, g_type){ ### --- Notes: # - # Conduct linear model, test lines - # then make plots. + # 1) Makes plots and tables + # Tables are not currently used, + # func_plot_diff and func_mkdf_diff could + # be combined. + # + # 2) Make dataframes of difference estimates + # + # 3) Determine nodes that differ in difference + # estimation. Repeats what is done in tables. + # Also gets max node. + # + # Returns list of nodes, max node - # "NeuLGI", "NeuLDI", - memList <- c("NegLGI", "NegLDI") + # make plots and tables + if(g_type == 1){ + func_plot_diff1(model, tract, g_type) + }else if(g_type == 2){ + func_plot_diff2(model, tract, g_type) + } - for(mem in memList){ - - ind_mem <- grep(mem, colnames(df_lm)) - - df_mem <- as.data.frame(matrix(NA, nrow = dim(df_lm)[1], ncol=4)) - colnames(df_mem) <- c("Comp", "FAvalue", "Group", "MemScore") - - df_mem$Comp <- df_lm$Comp - df_mem$FAvalue <- df_lm$FAvalue - df_mem$Group <- df_lm$Group - df_mem$MemScore <- df_lm[,ind_mem] - - fit.int <- lm(MemScore ~ FAvalue*Group, data = df_mem) + # # use tables to get sig nodes, save in df_sigNodes + # df_sigNodes <- as.data.frame(matrix(NA, nrow=1, ncol=4)) + # colnames(df_sigNodes) <- c("Comparison", "Section", "Start", "End") + # + # if(g_type == 1){ + # compList <- "01" + # }else if(g_type == 2){ + # compList <- c("01", "02", "12") + # } + # + # # get table made by func_plot_diff for e/comparison + # for(comp in compList){ + # + # # write, use bash + # h_cmd = paste0( + # "tail -n +10 ", + # tableDir, "Table_Diff_", tract, "_", "G", g_type, "_", comp, + # ".txt | sed 's/-/,/g'" + # ) + # h_lines <- system(h_cmd, intern = T) + # + # # make df + # h_df <- read.table( + # text=paste(h_lines, collapse = "\n"), + # header = F, + # stringsAsFactors = F, + # sep = "," + # ) + # + # # write to df_sigNodes + # for(i in 1:dim(h_df)[1]){ + # df_sigNodes <- rbind(df_sigNodes, c(comp, i, h_df[i,1], h_df[i,2])) + # } + # } + # df_sigNodes <- na.omit(df_sigNodes) + + + ## Step 2 - get plot_diff dataframes + if(g_type == 1){ + df_estDiff <- func_mkdf_diff1(model) + }else if(g_type == 2){ + df_estDiff <- func_mkdf_diff2(model) + } + + + ## Step 3 - for grouping 2, determine + # nodes where all groups differ from e/o + nodeList <- unique(df_estDiff$nodeID) + diffList <- vector() + for(node in nodeList){ + ind_node <- which(df_estDiff$nodeID == node) + if(g_type == 1){ + if((abs(df_estDiff[ind_node[1],]$est) - df_estDiff[ind_node[1],]$CI) > 0){ + diffList <- c(diffList, node) + } + }else if(g_type == 2){ + if( + (abs(df_estDiff[ind_node[1],]$est) - df_estDiff[ind_node[1],]$CI) > 0 & + (abs(df_estDiff[ind_node[2],]$est) - df_estDiff[ind_node[2],]$CI) > 0 & + (abs(df_estDiff[ind_node[3],]$est) - df_estDiff[ind_node[3],]$CI) > 0 + ){ + diffList <- c(diffList, node) + } + } + } + + # find max diff + if(g_type == 1){ - capture.output( - summary(fit.int), - file = paste0(statsDir_lm, - "Stats_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" - ) - ) - capture.output( - anova(fit.int), - file = paste0(statsDir_lm, - "Stats_AN-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" - ) - ) + h_df <- subset(df_estDiff, nodeID %in% diffList) + ind_max <- which(abs(h_df$est) == max(abs(h_df$est))) + node_max <- h_df[ind_max,]$nodeID - # plot if group diff - # anova_stat <- anova(fit.int) - # if(anova_stat$`Pr(>F)`[2] < 0.05){ - - comp1 <- substr(comp, start=1, stop=1) - comp2 <- substr(comp, start=2, stop=2) - - if(gType == 1){ - h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) - - }else if(gType == 2){ - h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) - } - - h_tract <- func_switch_name(tract) - # h_insert <- paste(h_str, h_tract) - # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") - - # p <- ggplot(df_lm) + - # aes(x=FAvalue, y=NegLGI, color=Group) + - # geom_point(aes(color=Group)) + - # geom_smooth(method = "lm") + - # ggtitle(h_title) + - # ylab("Neg LGI") + - # xlab("FA value") - # - # p + scale_color_manual( - # values = h_cols, - # breaks = h_breaks, - # labels = h_labels - # ) - # - # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) - - # --- update, plot separately - plot1.x <- df_mem[which(df_mem$Group == comp1),]$FAvalue - plot1.y <- df_mem[which(df_mem$Group == comp1),]$MemScore - - plot2.x <- df_mem[which(df_mem$Group == comp2),]$FAvalue - plot2.y <- df_mem[which(df_mem$Group == comp2),]$MemScore - - h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") - x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") - - png(filename = paste0( - plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".png" - ), - width = 8, - height = 4, - units = "in", - res = 600 - ) - - par(mfrow=c(1,2), - oma=c(0,0,2,0), - family="Times New Roman" - ) - - plot(plot1.x, plot1.y, - xlab = x_title, - ylab = mem, - ylim = c(min(df_mem$MemScore), max(df_mem$MemScore)), - main = h_labels[1]) - abline(lm(plot1.y ~ plot1.x)) + }else if(g_type == 2){ + f_sum <- function(x){ + sum(abs(df_estDiff[which(df_estDiff$nodeID == x),]$est)) + } + h_sum <- sapply(diffList, f_sum) + names(h_sum) <- diffList + h_max <- which(h_sum == max(h_sum)) + node_max <- as.numeric(names(h_sum[h_max])) + } + + return(list(diffList, node_max)) +} + + +# Functions - LM +func_df_avg <- function(comp, df_tract, df_diff){ + + ### --- Notes: + # + # Make a dataframe of averaged FA + # values from all regions that differ + # between two splines + + grpA <- as.numeric(substr(comp, start=1, stop=1)) + grpB <- as.numeric(substr(comp, start=2, stop=2)) + df_comp <- df_diff[which(df_diff$Comparison == comp),] + + # make dataframe + subjList <- unique( + df_tract[which( + df_tract$Group == grpA | df_tract$Group == grpB + ),]$subjectID + ) + + df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) + colnames(df_lm) <- c("Comp", "Subj", "FAvalue", + "Pars6", "Group", + "NeuLGI", "NeuLDI", + "NegLGI", "NegLDI") + df_lm$Subj <- subjList + + for(subj in subjList){ + h_mean <- vector() + for(i in 1:dim(df_comp)[1]){ - plot(plot2.x, plot2.y, - xlab = x_title, - ylab = "", - main = h_labels[2], - ylim = c(min(df_mem$MemScore), max(df_mem$MemScore))) - abline(lm(plot2.y ~ plot2.x)) + h_start <- which( + df_tract$subjectID == subj & + df_tract$nodeID == df_comp[i,]$Start + ) - mtext(h_title, outer = T, cex = 1.5) - dev.off() - par(mfrow=c(1,1)) - # } + h_end <- which( + df_tract$subjectID == subj & + df_tract$nodeID == df_comp[i,]$End + ) + + h_mean <- c(h_mean, mean(df_tract[h_start:h_end,]$dti_fa)) + } + + ind_out <- which(df_lm$Subj == subj) + df_lm[ind_out,]$FAvalue <- round(mean(h_mean), 4) + + ind_subj <- which(df_tract$subjectID == subj)[1] + df_lm[ind_out,]$Pars6 <- df_tract[ind_subj,]$Pars6 + df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_subj,]$Group) - 1 + + df_lm[ind_out,]$NeuLGI <- df_tract[ind_subj,]$NeuLGI + df_lm[ind_out,]$NeuLDI <- df_tract[ind_subj,]$NeuLDI + df_lm[ind_out,]$NegLGI <- df_tract[ind_subj,]$NegLGI + df_lm[ind_out,]$NegLDI <- df_tract[ind_subj,]$NegLDI + + df_lm[ind_out,]$Comp <- comp } + df_lm$Group <- factor(df_lm$Group) + return(df_lm) } - - -func_plot_diff1 <- function(model, tract){ +func_df_max <- function(comp, df_tract, df_max){ ### --- Notes: # - # This will draw plots and write tables of sig - # node differences for GAM when group style=1 - - png(filename = paste0( - plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png"), - width = 600, height = 600 - ) + # make a dataframe of FA values + # from single region showing showing + # maximal A-B difference - gA <- func_switch_g1("0")[[2]][1] - gB <- func_switch_g1("1")[[2]][1] + grpA <- as.numeric(substr(comp, start=1, stop=1)) + grpB <- as.numeric(substr(comp, start=2, stop=2)) + node <- df_max[which(df_max$Comparison == comp),]$Node - par(mar=c(5,5,4,2)) - capture.output(plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "1")), - rm.ranef = T, - main = paste0("Difference Scores, ", gA, "-", gB), - ylab = "Est. FA difference", - xlab = "Tract Node", - cex.lab = 2, - cex.axis = 2, - cex.main = 2.5, - cex.sub = 1.5), - file = paste0(tableDir, - "Table_Diff_", - tract, "_", "G", - g_type, "_01.txt" - ) + # make dataframe + subjList <- unique( + df_tract[which( + df_tract$Group == grpA | df_tract$Group == grpB + ),]$subjectID ) - par(mar=c(5,4,4,2)) - dev.off() + df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) + colnames(df_lm) <- c("Comp","Subj", "FAvalue", + "Pars6", "Group", + "NeuLGI", "NeuLDI", + "NegLGI", "NegLDI") + df_lm$Subj <- subjList + + for(subj in subjList){ + + ind_data <- which(df_tract$subjectID == subj & df_tract$nodeID == node) + ind_out <- which(df_lm$Subj == subj) + + df_lm[ind_out,]$Comp <- comp + df_lm[ind_out,]$FAvalue <- df_tract[ind_data,]$dti_fa + df_lm[ind_out,]$Pars6 <- df_tract[ind_data,]$Pars6 + df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_data,]$Group)-1 + df_lm[ind_out,]$NeuLGI <- df_tract[ind_data,]$NeuLGI + df_lm[ind_out,]$NeuLDI <- df_tract[ind_data,]$NeuLDI + df_lm[ind_out,]$NegLGI <- df_tract[ind_data,]$NegLGI + df_lm[ind_out,]$NegLDI <- df_tract[ind_data,]$NegLDI + } + df_lm$Group <- factor(df_lm$Group) + return(df_lm) } -func_plot_diff2 <- function(model, tract){ +func_stat_lm <- function(df_lm, tract, gType, h_str, comp){ ### --- Notes: # - # This will draw plots and write tables of sig - # node differences for GAM when group style=2 - - png(filename = paste0( - plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png" - ), width = 1800, height = 600 - ) - - gA <- func_switch_g2("0")[[2]][1] - gB <- func_switch_g2("1")[[2]][1] - gC <- func_switch_g2("2")[[2]][1] + # Conduct linear model, test lines + # then make plots. - par(mfrow=c(1,3), mar=c(5,5,4,2)) - capture.output(plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "1")), - rm.ranef = T, - main = paste0("Difference Scores, ", gA, "-", gB), - ylab = "Est. FA difference", - xlab = "Tract Node", - cex.lab = 2, - cex.axis = 2, - cex.main = 2.5, - cex.sub = 1.5), - file = paste0(tableDir, - "Table_Diff_", - tract, "_", "G", - g_type, "_01.txt" - ) - ) + fit.int <- lm(NegLGI ~ FAvalue*Group, data = df_lm) - par(mar=c(5,3,4,2)) - capture.output(plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "2")), - rm.ranef = T, - main = paste0("Difference Scores, ", gA, "-", gC), - ylab = "", - xlab = "Tract Node", - cex.lab = 2, - cex.axis = 2, - cex.main = 2.5, - cex.sub = 2), - file = paste0(tableDir, - "Table_Diff_", - tract, "_", "G", - g_type, "_02.txt" - ) + capture.output( + summary(fit.int), + file = paste0(statsDir_lm, + "Stats_LM-", h_str, "_", tract, "_G", gType, ".txt" + ) ) - - capture.output(plot_diff(model, - view="nodeID", - comp=list(Group=c("1", "2")), - rm.ranef = T, - main = paste0("Difference Scores, ", gB, "-", gC), - ylab = "", - xlab = "Tract Node", - cex.lab = 2, - cex.axis = 2, - cex.main = 2.5, - cex.sub = 2), - file = paste0(tableDir, - "Table_Diff_", - tract, "_", "G", - g_type, "_12.txt" - ) + capture.output( + anova(fit.int), + file = paste0(statsDir_lm, + "Stats_AN-", h_str, "_", tract, "_G", gType, ".txt" + ) ) - par(mfrow=c(1,1), mar=c(5,4,4,2)) - dev.off() -} - -func_mkdf_diff1 <- function(model){ - - ### --- Notes: - # - # Returns a dataframe of difference - # scores for each node. - p01 <- plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "1")), - rm.ranef = T, - plot = F) + # plot if group diff + anova_stat <- anova(fit.int) + if(anova_stat$`Pr(>F)`[2] < 0.05){ - colnames(p01) <- c(colnames(p01[,1:4]), "Comp") - p01$Comp <- "01" - return(p01) + comp1 <- substr(comp, start=1, stop=1) + comp2 <- substr(comp, start=2, stop=2) + + if(gType == 1){ + h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) + names(h_cols) <- c(comp1, comp2) + h_breaks <- c(comp1, comp2) + h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) + + }else if(gType == 2){ + h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) + names(h_cols) <- c(comp1, comp2) + h_breaks <- c(comp1, comp2) + h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) + } + + h_tract <- func_switch_name(tract) + # h_insert <- paste(h_str, h_tract) + # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") + + # p <- ggplot(df_lm) + + # aes(x=FAvalue, y=NegLGI, color=Group) + + # geom_point(aes(color=Group)) + + # geom_smooth(method = "lm") + + # ggtitle(h_title) + + # ylab("Neg LGI") + + # xlab("FA value") + # + # p + scale_color_manual( + # values = h_cols, + # breaks = h_breaks, + # labels = h_labels + # ) + # + # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) + + # --- update, plot separately + plot1.x <- df_lm[which(df_lm$Group == comp1),]$FAvalue + plot1.y <- df_lm[which(df_lm$Group == comp1),]$NegLGI + + plot2.x <- df_lm[which(df_lm$Group == comp2),]$FAvalue + plot2.y <- df_lm[which(df_lm$Group == comp2),]$NegLGI + + h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") + x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") + + png(filename = paste0( + plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, ".png" + ), + width = 8, + height = 4, + units = "in", + res = 600 + ) + + par(mfrow=c(1,2), oma=c(0,0,2,0), family="Times New Roman") + + plot(plot1.x, plot1.y, + xlab = x_title, + ylab = "Neg LGI", + ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI)), + main = h_labels[1]) + abline(lm(plot1.y ~ plot1.x)) + + plot(plot2.x, plot2.y, + xlab = x_title, + ylab = "", + main = h_labels[2], + ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI))) + abline(lm(plot2.y ~ plot2.x)) + + mtext(h_title, outer = T, cex = 1.5) + dev.off() + par(mfrow=c(1,1)) + } } -func_mkdf_diff2 <- function(model){ +func_stat_lm_new <- function(df_lm, tract, gType, h_str, comp){ ### --- Notes: # - # Returns a dataframe of difference - # scores for each node. - - p01 <- plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "1")), - rm.ranef = T, - plot = F) - colnames(p01) <- c(colnames(p01[,1:4]), "Comp") - p01$Comp <- "01" - - p02 <- plot_diff(model, - view="nodeID", - comp=list(Group=c("0", "2")), - rm.ranef = T, - plot = F) - colnames(p02) <- c(colnames(p02[,1:4]), "Comp") - p02$Comp <- "02" + # Conduct linear model, test lines + # then make plots. - p12 <- plot_diff(model, - view="nodeID", - comp=list(Group=c("1", "2")), - rm.ranef = T, - plot = F) - colnames(p12) <- c(colnames(p12[,1:4]), "Comp") - p12$Comp <- "12" + # "NeuLGI", "NeuLDI", + memList <- c("NegLGI", "NegLDI") - df_out <- as.data.frame(matrix(NA, nrow=3*dim(p01)[1], ncol=dim(p01)[2])) - colnames(df_out) <- colnames(p01) - df_out <- rbind(p01, p02, p12) - return(df_out) + for(mem in memList){ + + ind_mem <- grep(mem, colnames(df_lm)) + + df_mem <- as.data.frame(matrix(NA, nrow = dim(df_lm)[1], ncol=4)) + colnames(df_mem) <- c("Comp", "FAvalue", "Group", "MemScore") + + df_mem$Comp <- df_lm$Comp + df_mem$FAvalue <- df_lm$FAvalue + df_mem$Group <- df_lm$Group + df_mem$MemScore <- df_lm[,ind_mem] + + fit.int <- lm(MemScore ~ FAvalue*Group, data = df_mem) + + capture.output( + summary(fit.int), + file = paste0(statsDir_lm, + "Stats_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" + ) + ) + capture.output( + anova(fit.int), + file = paste0(statsDir_lm, + "Stats_AN-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" + ) + ) + + # plot if group diff + # anova_stat <- anova(fit.int) + # if(anova_stat$`Pr(>F)`[2] < 0.05){ + + comp1 <- substr(comp, start=1, stop=1) + comp2 <- substr(comp, start=2, stop=2) + + if(gType == 1){ + h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) + names(h_cols) <- c(comp1, comp2) + h_breaks <- c(comp1, comp2) + h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) + + }else if(gType == 2){ + h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) + names(h_cols) <- c(comp1, comp2) + h_breaks <- c(comp1, comp2) + h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) + } + + h_tract <- func_switch_name(tract) + # h_insert <- paste(h_str, h_tract) + # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") + + # p <- ggplot(df_lm) + + # aes(x=FAvalue, y=NegLGI, color=Group) + + # geom_point(aes(color=Group)) + + # geom_smooth(method = "lm") + + # ggtitle(h_title) + + # ylab("Neg LGI") + + # xlab("FA value") + # + # p + scale_color_manual( + # values = h_cols, + # breaks = h_breaks, + # labels = h_labels + # ) + # + # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) + + # --- update, plot separately + plot1.x <- df_mem[which(df_mem$Group == comp1),]$FAvalue + plot1.y <- df_mem[which(df_mem$Group == comp1),]$MemScore + + plot2.x <- df_mem[which(df_mem$Group == comp2),]$FAvalue + plot2.y <- df_mem[which(df_mem$Group == comp2),]$MemScore + + h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") + x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") + + png(filename = paste0( + plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".png" + ), + width = 8, + height = 4, + units = "in", + res = 600 + ) + + par(mfrow=c(1,2), + oma=c(0,0,2,0), + family="Times New Roman" + ) + + plot(plot1.x, plot1.y, + xlab = x_title, + ylab = mem, + ylim = c(min(df_mem$MemScore), max(df_mem$MemScore)), + main = h_labels[1]) + abline(lm(plot1.y ~ plot1.x)) + + plot(plot2.x, plot2.y, + xlab = x_title, + ylab = "", + main = h_labels[2], + ylim = c(min(df_mem$MemScore), max(df_mem$MemScore))) + abline(lm(plot2.y ~ plot2.x)) + + mtext(h_title, outer = T, cex = 1.5) + dev.off() + par(mfrow=c(1,1)) + # } + } } -func_stat_diff_new <- function(model, tract, g_type){ - - # 1) func_plot_diff - # make plots, write tables - # determine diff nodes - # - # 2) func_max_diff - # make dataframes - # - # 3) determine nodes where groups differ - # return df_diff. - # Account for 2/3 groups - - ## Step 1 - make plots and tables - if(g_type == 1){ - func_plot_diff1(model, tract) - }else if(g_type == 2){ - func_plot_diff2(model, tract) - } - - # # use tables to get sig nodes, save in df_sigNodes - # df_sigNodes <- as.data.frame(matrix(NA, nrow=1, ncol=4)) - # colnames(df_sigNodes) <- c("Comparison", "Section", "Start", "End") - # - # if(g_type == 1){ - # compList <- "01" - # }else if(g_type == 2){ - # compList <- c("01", "02", "12") - # } - # - # # get table made by func_plot_diff for e/comparison - # for(comp in compList){ - # - # # write, use bash - # h_cmd = paste0( - # "tail -n +10 ", - # tableDir, "Table_Diff_", tract, "_", "G", g_type, "_", comp, - # ".txt | sed 's/-/,/g'" - # ) - # h_lines <- system(h_cmd, intern = T) - # - # # make df - # h_df <- read.table( - # text=paste(h_lines, collapse = "\n"), - # header = F, - # stringsAsFactors = F, - # sep = "," - # ) - # - # # write to df_sigNodes - # for(i in 1:dim(h_df)[1]){ - # df_sigNodes <- rbind(df_sigNodes, c(comp, i, h_df[i,1], h_df[i,2])) - # } - # } - # df_sigNodes <- na.omit(df_sigNodes) - - - ## Step 2 - get plot_diff dataframes - if(g_type == 1){ - df_estDiff <- func_mkdf_diff1(model) - }else if(g_type == 2){ - df_estDiff <- func_mkdf_diff2(model) - } - + +func_df_avg_new <- function(df_tract, node_list, g_type, avg_max){ - ## Step 3 - for grouping 2, determine - # nodes where all groups differ from e/o - nodeList <- unique(df_estDiff$nodeID) - diffList <- vector() - for(node in nodeList){ - ind_node <- which(df_estDiff$nodeID == node) - if(g_type == 1){ - if((abs(df_estDiff[ind_node[1],]$est) - df_estDiff[ind_node[1],]$CI) > 0){ - diffList <- c(diffList, node) - } - }else if(g_type == 2){ - if( - (abs(df_estDiff[ind_node[1],]$est) - df_estDiff[ind_node[1],]$CI) > 0 & - (abs(df_estDiff[ind_node[2],]$est) - df_estDiff[ind_node[2],]$CI) > 0 & - (abs(df_estDiff[ind_node[3],]$est) - df_estDiff[ind_node[3],]$CI) > 0 - ){ - diffList <- c(diffList, node) - } - } - } + subjList <- unique(df_tract$subjectID) + df_out <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=8)) + colnames(df_out) <- c("Subj", "FAvalue", "Pars6", "Group", + "NeuLGI", "NeuLDI","NegLGI", "NegLDI") + df_out$Subj <- subjList - # find max diff - if(g_type == 1){ - - h_df <- subset(df_estDiff, nodeID %in% diffList) - ind_max <- which(abs(h_df$est) == max(abs(h_df$est))) - node_max <- h_df[ind_max,]$nodeID + for(subj in subjList){ - }else if(g_type == 2){ - f_sum <- function(x){ - sum(abs(df_estDiff[which(df_estDiff$nodeID == x),]$est)) + ind_out <- which(df_out$Subj == subj) + df_subj <- df_tract[which(df_tract$subjectID == subj),] + + df_out[ind_out,]$Pars6 <- df_subj[1,]$Pars6 + df_out[ind_out,]$Group <- df_subj[1,]$Group + df_out[ind_out,]$NeuLGI <- df_subj[1,]$NeuLGI + df_out[ind_out,]$NeuLDI <- df_subj[1,]$NeuLDI + df_out[ind_out,]$NegLGI <- df_subj[1,]$NegLGI + df_out[ind_out,]$NegLDI <- df_subj[1,]$NegLDI + + if(avg_max == "avg"){ + df_node <- subset(df_subj, nodeID %in% node_list) + df_out[ind_out,]$FAvalue <- round(mean(df_node$dti_fa), 4) + }else if(avg_max == "max"){ + ind_max <- which(df_subj$nodeID == node_list) + df_out[ind_out,]$FAvalue <- round(df_subj[ind_max,]$dti_fa, 4) } - h_sum <- sapply(diffList, f_sum) - names(h_sum) <- diffList - h_max <- which(h_sum == max(h_sum)) - node_max <- as.numeric(names(h_sum[h_max])) } - - return(list(diffList, node_max)) + return(df_out) } + ### Work # Two analyses (grouping types): # 1) Con vs Anx @@ -1420,24 +1249,32 @@ for(gType in groupType){ func_plot_gam(gam_model, tract, gType, df_tract) # determine group differences - df_diff <- func_stat_diff(gam_model, tract, gType) - compList <- unique(df_diff$Comparison) + nodeList <- func_stat_diff(gam_model, tract, gType) + avg_nodeList <- nodeList[[1]] + max_nodeList <- nodeList[[2]] - # pairwise lm, since plot_diff does pairwise spline tests - for(comp in compList){ - - # predict mem score from dti average diff - df_lm <- func_df_avg(comp, df_tract, df_diff) - # func_stat_lm(df_lm, tract, gType, "Avg", comp) - func_stat_lm_new(df_lm, tract, gType, "Avg", comp) - - # predict mem score from dti max diff - # just rerun plot_diff to get table of max diff - df_max <- func_max_diff(gam_model, gType) - df_lm <- func_df_max(comp, df_tract, df_max) - # func_stat_lm(df_lm, tract, gType, "Max", comp) - func_stat_lm_new(df_lm, tract, gType, "Max", comp) - } + # avg lm + + + + + # compList <- unique(df_diff$Comparison) + # + # # pairwise lm, since plot_diff does pairwise spline tests + # for(comp in compList){ + # + # # predict mem score from dti average diff + # df_lm <- func_df_avg(comp, df_tract, df_diff) + # # func_stat_lm(df_lm, tract, gType, "Avg", comp) + # func_stat_lm_new(df_lm, tract, gType, "Avg", comp) + # + # # predict mem score from dti max diff + # # just rerun plot_diff to get table of max diff + # df_max <- func_max_diff(gam_model, gType) + # df_lm <- func_df_max(comp, df_tract, df_max) + # # func_stat_lm(df_lm, tract, gType, "Max", comp) + # func_stat_lm_new(df_lm, tract, gType, "Max", comp) + # } } } From 9bb3522adc1e1464b743e0358f5fe3d7049b455e Mon Sep 17 00:00:00 2001 From: Nathan Muncy Date: Fri, 30 Apr 2021 15:03:09 -0400 Subject: [PATCH 6/7] updating LM --- afq_step3_stats.R | 914 +++++++++++++++++++++++++--------------------- 1 file changed, 504 insertions(+), 410 deletions(-) diff --git a/afq_step3_stats.R b/afq_step3_stats.R index 94ae1bc..01291de 100644 --- a/afq_step3_stats.R +++ b/afq_step3_stats.R @@ -37,7 +37,7 @@ tractList <- c("UNC_L", "UNC_R", "FA") # Functions - general -func_df_master <- function(g_type){ +func_df_master <- function(gType){ ### --- Notes: # @@ -95,7 +95,7 @@ func_df_master <- function(g_type){ # # ugly - maybe convert to case switch? - if(g_type == 1){ + if(gType == 1){ h_search <- c("Anxiety", "Phobia") @@ -107,7 +107,7 @@ func_df_master <- function(g_type){ next } - }else if(g_type == 2){ + }else if(gType == 2){ h_search <- c("Separation", "Social") @@ -208,7 +208,7 @@ func_df_master <- function(g_type){ # clean NA (from Group skip), write csv df_out <- df_afq[complete.cases(df_afq$Group),] - outFile <- paste0(dataDir, "Master_dataframe_G", g_type,".csv") + outFile <- paste0(dataDir, "Master_dataframe_G", gType,".csv") write.csv(df_out, file=outFile, quote=F, row.names = F) # return(df_out) } @@ -374,15 +374,13 @@ func_switch_name <- function(tract){ # Functions - GAM -func_plot_gam <- function(model, tract, g_type, df_tract){ +func_plot_gam <- function(model, tract, gType, df_tract){ ### --- Notes: # # Will plot the GAM model of # dti data # - # wrapped by func_stat_gam - # # TODO update h_cols, etc to use switch # plot @@ -405,14 +403,27 @@ func_plot_gam <- function(model, tract, g_type, df_tract){ h_tract <- func_switch_name(tract) h_title = paste0("GAM Fit of ", h_tract," FA Values") - if(g_type == 1){ - h_cols <- c("0" = "blue", "1" = "darkred") + if(gType == 1){ + + h_cols = c(func_switch_g1("0")[[1]][1], func_switch_g1("1")[[1]][1]) + names(h_cols) <- c("0", "1") h_breaks <- c("0", "1") - h_labels <- c("Con", "Anx") - }else if(g_type == 2){ - h_cols <- c("0" = "blue", "1" = "darkred", "2" = "black") + h_labels <- c(func_switch_g1("0")[[2]][1], func_switch_g1("1")[[2]][1]) + + }else if(gType == 2){ + + h_cols = c( + func_switch_g2("0")[[1]][1], + func_switch_g2("1")[[1]][1], + func_switch_g2("2")[[1]][1] + ) + names(h_cols) <- c("0", "1", "2") h_breaks <- c("0", "1", "2") - h_labels <- c("Con", "GAD", "SAD") + h_labels <- c( + func_switch_g2("0")[[2]][1], + func_switch_g2("1")[[2]][1], + func_switch_g2("2")[[2]][1] + ) } p <- ggplot(data = df_pred) + @@ -427,10 +438,10 @@ func_plot_gam <- function(model, tract, g_type, df_tract){ labels = h_labels ) - ggsave(paste0(plotDir_gam, "Plot_GAM_", tract, "_", "G", g_type, ".png")) + ggsave(paste0(plotDir_gam, "Plot_GAM_", tract, "_", "G", gType, ".png")) } -func_stat_gam <- function(tract, df_tract, g_type){ +func_stat_gam <- function(tract, df_tract, gType){ ### --- Notes: # @@ -509,7 +520,7 @@ func_stat_gam <- function(tract, df_tract, g_type){ capture.output( summary(fit_gamma), file = paste0( - statsDir_gam, "Stats_GAM-gamma_", tract, "_", "G", g_type, ".txt" + statsDir_gam, "Stats_GAM-gamma_", tract, "_", "G", gType, ".txt" ) ) @@ -530,44 +541,20 @@ func_stat_gam <- function(tract, df_tract, g_type){ capture.output( compareML(fit_gamma, fit_cov_pds), file = paste0( - statsDir_gam, "Stats_GAM-comp_", tract, "_", "G", g_type, ".txt" + statsDir_gam, "Stats_GAM-comp_", tract, "_", "G", gType, ".txt" ) ) capture.output( summary(fit_cov_pds), file = paste0( - statsDir_gam, "Stats_GAM-cov_", tract, "_", "G", g_type, ".txt" + statsDir_gam, "Stats_GAM-cov_", tract, "_", "G", gType, ".txt" ) ) - # # plot - # df_pred <- predict.bam( - # fit_cov_pds, - # exclude_terms = c("PDS", "Sex","subjectID"), - # values=list(PDS = NULL, Sex = NULL), - # se.fit=T, - # type="response") - # - # df_pred <- data.frame(Group=df_tract$Group, - # Sex=df_tract$Sex, - # subjectID=df_tract$subjectID, - # PDS=df_tract$PDS, - # nodeID=df_tract$nodeID, - # fit=df_pred$fit, - # se.fit=df_pred$se.fit) - # - # h_tract <- switch( - # tract, - # "UNC_L" = "L. Uncinate", - # "FA" = "A. Forceps" - # ) - # - # plot_title = paste0("GAM Fit of ", h_tract," FA Values") - # func_plot_gam(df_pred, plot_title, tract, g_type) return(fit_cov_pds) } -func_plot_diff1 <- function(model, tract, g_type){ +func_plot_diff1 <- function(model, tract, gType){ ### --- Notes: # @@ -575,7 +562,7 @@ func_plot_diff1 <- function(model, tract, g_type){ # node differences for GAM when group style=1 png(filename = paste0( - plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png"), + plotDir_gam, "Plot_Diff_", tract, "_", "G", gType, ".png"), width = 600, height = 600 ) @@ -597,14 +584,14 @@ func_plot_diff1 <- function(model, tract, g_type){ file = paste0(tableDir, "Table_Diff_", tract, "_", "G", - g_type, "_01.txt" + gType, "_01.txt" ) ) par(mar=c(5,4,4,2)) dev.off() } -func_plot_diff2 <- function(model, tract, g_type){ +func_plot_diff2 <- function(model, tract, gType){ ### --- Notes: # @@ -612,7 +599,7 @@ func_plot_diff2 <- function(model, tract, g_type){ # node differences for GAM when group style=2 png(filename = paste0( - plotDir_gam, "Plot_Diff_", tract, "_", "G", g_type, ".png" + plotDir_gam, "Plot_Diff_", tract, "_", "G", gType, ".png" ), width = 1800, height = 600 ) @@ -635,7 +622,7 @@ func_plot_diff2 <- function(model, tract, g_type){ file = paste0(tableDir, "Table_Diff_", tract, "_", "G", - g_type, "_01.txt" + gType, "_01.txt" ) ) @@ -654,7 +641,7 @@ func_plot_diff2 <- function(model, tract, g_type){ file = paste0(tableDir, "Table_Diff_", tract, "_", "G", - g_type, "_02.txt" + gType, "_02.txt" ) ) @@ -672,7 +659,7 @@ func_plot_diff2 <- function(model, tract, g_type){ file = paste0(tableDir, "Table_Diff_", tract, "_", "G", - g_type, "_12.txt" + gType, "_12.txt" ) ) par(mfrow=c(1,1), mar=c(5,4,4,2)) @@ -734,7 +721,7 @@ func_mkdf_diff2 <- function(model){ return(df_out) } -func_stat_diff <- function(model, tract, g_type){ +func_stat_diff <- function(model, tract, gType){ ### --- Notes: # @@ -752,19 +739,19 @@ func_stat_diff <- function(model, tract, g_type){ # Returns list of nodes, max node # make plots and tables - if(g_type == 1){ - func_plot_diff1(model, tract, g_type) - }else if(g_type == 2){ - func_plot_diff2(model, tract, g_type) + if(gType == 1){ + func_plot_diff1(model, tract, gType) + }else if(gType == 2){ + func_plot_diff2(model, tract, gType) } # # use tables to get sig nodes, save in df_sigNodes # df_sigNodes <- as.data.frame(matrix(NA, nrow=1, ncol=4)) # colnames(df_sigNodes) <- c("Comparison", "Section", "Start", "End") # - # if(g_type == 1){ + # if(gType == 1){ # compList <- "01" - # }else if(g_type == 2){ + # }else if(gType == 2){ # compList <- c("01", "02", "12") # } # @@ -774,7 +761,7 @@ func_stat_diff <- function(model, tract, g_type){ # # write, use bash # h_cmd = paste0( # "tail -n +10 ", - # tableDir, "Table_Diff_", tract, "_", "G", g_type, "_", comp, + # tableDir, "Table_Diff_", tract, "_", "G", gType, "_", comp, # ".txt | sed 's/-/,/g'" # ) # h_lines <- system(h_cmd, intern = T) @@ -796,9 +783,9 @@ func_stat_diff <- function(model, tract, g_type){ ## Step 2 - get plot_diff dataframes - if(g_type == 1){ + if(gType == 1){ df_estDiff <- func_mkdf_diff1(model) - }else if(g_type == 2){ + }else if(gType == 2){ df_estDiff <- func_mkdf_diff2(model) } @@ -809,11 +796,11 @@ func_stat_diff <- function(model, tract, g_type){ diffList <- vector() for(node in nodeList){ ind_node <- which(df_estDiff$nodeID == node) - if(g_type == 1){ + if(gType == 1){ if((abs(df_estDiff[ind_node[1],]$est) - df_estDiff[ind_node[1],]$CI) > 0){ diffList <- c(diffList, node) } - }else if(g_type == 2){ + }else if(gType == 2){ if( (abs(df_estDiff[ind_node[1],]$est) - df_estDiff[ind_node[1],]$CI) > 0 & (abs(df_estDiff[ind_node[2],]$est) - df_estDiff[ind_node[2],]$CI) > 0 & @@ -825,13 +812,13 @@ func_stat_diff <- function(model, tract, g_type){ } # find max diff - if(g_type == 1){ + if(gType == 1){ h_df <- subset(df_estDiff, nodeID %in% diffList) ind_max <- which(abs(h_df$est) == max(abs(h_df$est))) node_max <- h_df[ind_max,]$nodeID - }else if(g_type == 2){ + }else if(gType == 2){ f_sum <- function(x){ sum(abs(df_estDiff[which(df_estDiff$nodeID == x),]$est)) } @@ -846,197 +833,118 @@ func_stat_diff <- function(model, tract, g_type){ # Functions - LM -func_df_avg <- function(comp, df_tract, df_diff){ - - ### --- Notes: - # - # Make a dataframe of averaged FA - # values from all regions that differ - # between two splines - - grpA <- as.numeric(substr(comp, start=1, stop=1)) - grpB <- as.numeric(substr(comp, start=2, stop=2)) - df_comp <- df_diff[which(df_diff$Comparison == comp),] +func_mkdf_lm <- function(df_tract, node_list, gType, avg_max){ - # make dataframe - subjList <- unique( - df_tract[which( - df_tract$Group == grpA | df_tract$Group == grpB - ),]$subjectID - ) - - df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) - colnames(df_lm) <- c("Comp", "Subj", "FAvalue", - "Pars6", "Group", - "NeuLGI", "NeuLDI", - "NegLGI", "NegLDI") - df_lm$Subj <- subjList + subjList <- unique(df_tract$subjectID) + df_out <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=8)) + colnames(df_out) <- c("Subj", "FAvalue", "Pars6", "Group", + "NeuLGI", "NeuLDI","NegLGI", "NegLDI") + df_out$Subj <- subjList for(subj in subjList){ - h_mean <- vector() - for(i in 1:dim(df_comp)[1]){ - - h_start <- which( - df_tract$subjectID == subj & - df_tract$nodeID == df_comp[i,]$Start - ) - - h_end <- which( - df_tract$subjectID == subj & - df_tract$nodeID == df_comp[i,]$End - ) - - h_mean <- c(h_mean, mean(df_tract[h_start:h_end,]$dti_fa)) - } - - ind_out <- which(df_lm$Subj == subj) - df_lm[ind_out,]$FAvalue <- round(mean(h_mean), 4) - - ind_subj <- which(df_tract$subjectID == subj)[1] - df_lm[ind_out,]$Pars6 <- df_tract[ind_subj,]$Pars6 - df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_subj,]$Group) - 1 - - df_lm[ind_out,]$NeuLGI <- df_tract[ind_subj,]$NeuLGI - df_lm[ind_out,]$NeuLDI <- df_tract[ind_subj,]$NeuLDI - df_lm[ind_out,]$NegLGI <- df_tract[ind_subj,]$NegLGI - df_lm[ind_out,]$NegLDI <- df_tract[ind_subj,]$NegLDI - df_lm[ind_out,]$Comp <- comp - } - df_lm$Group <- factor(df_lm$Group) - return(df_lm) -} - -func_df_max <- function(comp, df_tract, df_max){ - - ### --- Notes: - # - # make a dataframe of FA values - # from single region showing showing - # maximal A-B difference - - grpA <- as.numeric(substr(comp, start=1, stop=1)) - grpB <- as.numeric(substr(comp, start=2, stop=2)) - node <- df_max[which(df_max$Comparison == comp),]$Node - - # make dataframe - subjList <- unique( - df_tract[which( - df_tract$Group == grpA | df_tract$Group == grpB - ),]$subjectID - ) - df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) - colnames(df_lm) <- c("Comp","Subj", "FAvalue", - "Pars6", "Group", - "NeuLGI", "NeuLDI", - "NegLGI", "NegLDI") - df_lm$Subj <- subjList - - for(subj in subjList){ + ind_out <- which(df_out$Subj == subj) + df_subj <- df_tract[which(df_tract$subjectID == subj),] - ind_data <- which(df_tract$subjectID == subj & df_tract$nodeID == node) - ind_out <- which(df_lm$Subj == subj) + df_out[ind_out,]$Pars6 <- df_subj[1,]$Pars6 + df_out[ind_out,]$Group <- as.numeric(df_subj[1,]$Group) - 1 + df_out[ind_out,]$NeuLGI <- df_subj[1,]$NeuLGI + df_out[ind_out,]$NeuLDI <- df_subj[1,]$NeuLDI + df_out[ind_out,]$NegLGI <- df_subj[1,]$NegLGI + df_out[ind_out,]$NegLDI <- df_subj[1,]$NegLDI - df_lm[ind_out,]$Comp <- comp - df_lm[ind_out,]$FAvalue <- df_tract[ind_data,]$dti_fa - df_lm[ind_out,]$Pars6 <- df_tract[ind_data,]$Pars6 - df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_data,]$Group)-1 - df_lm[ind_out,]$NeuLGI <- df_tract[ind_data,]$NeuLGI - df_lm[ind_out,]$NeuLDI <- df_tract[ind_data,]$NeuLDI - df_lm[ind_out,]$NegLGI <- df_tract[ind_data,]$NegLGI - df_lm[ind_out,]$NegLDI <- df_tract[ind_data,]$NegLDI + if(avg_max == "Avg"){ + df_node <- subset(df_subj, nodeID %in% node_list) + df_out[ind_out,]$FAvalue <- round(mean(df_node$dti_fa), 4) + }else if(avg_max == "Max"){ + ind_max <- which(df_subj$nodeID == node_list) + df_out[ind_out,]$FAvalue <- round(df_subj[ind_max,]$dti_fa, 4) + } } - df_lm$Group <- factor(df_lm$Group) - return(df_lm) + return(df_out) } -func_stat_lm <- function(df_lm, tract, gType, h_str, comp){ +func_stat_lm <- function(df_lm, tract, gType, avg_max){ ### --- Notes: # # Conduct linear model, test lines # then make plots. - fit.int <- lm(NegLGI ~ FAvalue*Group, data = df_lm) - - capture.output( - summary(fit.int), - file = paste0(statsDir_lm, - "Stats_LM-", h_str, "_", tract, "_G", gType, ".txt" - ) - ) - capture.output( - anova(fit.int), - file = paste0(statsDir_lm, - "Stats_AN-", h_str, "_", tract, "_G", gType, ".txt" - ) - ) - - # plot if group diff - anova_stat <- anova(fit.int) - if(anova_stat$`Pr(>F)`[2] < 0.05){ + # "NeuLGI", "NeuLDI", + memList <- c("NegLGI", "NegLDI") - comp1 <- substr(comp, start=1, stop=1) - comp2 <- substr(comp, start=2, stop=2) + for(mem in memList){ - if(gType == 1){ - h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) - - }else if(gType == 2){ - h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) - } + df_mem <- as.data.frame(matrix(NA, nrow = dim(df_lm)[1], ncol=4)) + colnames(df_mem) <- c("Subj", "FAvalue", "Group", "MemScore") - h_tract <- func_switch_name(tract) - # h_insert <- paste(h_str, h_tract) - # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") + df_mem$Subj <- df_lm$Subj + df_mem$FAvalue <- df_lm$FAvalue + df_mem$Group <- df_lm$Group - # p <- ggplot(df_lm) + - # aes(x=FAvalue, y=NegLGI, color=Group) + - # geom_point(aes(color=Group)) + - # geom_smooth(method = "lm") + - # ggtitle(h_title) + - # ylab("Neg LGI") + - # xlab("FA value") + ind_mem <- grep(mem, colnames(df_lm)) + df_mem$MemScore <- df_lm[,ind_mem] + + fit.int <- lm(MemScore ~ FAvalue*Group, data = df_mem) + capture.output( + summary(fit.int), + file = paste0(statsDir_lm, + "Stats_LM-", h_str, "_", tract, "_G", gType, "_", mem, ".txt" + ) + ) + capture.output( + anova(fit.int), + file = paste0(statsDir_lm, + "Stats_AN-", h_str, "_", tract, "_G", gType, "_", mem, ".txt" + ) + ) + + # # plot if group diff + # if(gType == 1){ + # h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) + # names(h_cols) <- c(comp1, comp2) + # h_breaks <- c(comp1, comp2) + # h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) # - # p + scale_color_manual( - # values = h_cols, - # breaks = h_breaks, - # labels = h_labels - # ) - # - # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) + # }else if(gType == 2){ + # h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) + # names(h_cols) <- c(comp1, comp2) + # h_breaks <- c(comp1, comp2) + # h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) + # } + + h_tract <- func_switch_name(tract) + # --- update, plot separately - plot1.x <- df_lm[which(df_lm$Group == comp1),]$FAvalue - plot1.y <- df_lm[which(df_lm$Group == comp1),]$NegLGI + plot1.x <- df_mem[which(df_mem$Group == comp1),]$FAvalue + plot1.y <- df_mem[which(df_mem$Group == comp1),]$MemScore - plot2.x <- df_lm[which(df_lm$Group == comp2),]$FAvalue - plot2.y <- df_lm[which(df_lm$Group == comp2),]$NegLGI + plot2.x <- df_mem[which(df_mem$Group == comp2),]$FAvalue + plot2.y <- df_mem[which(df_mem$Group == comp2),]$MemScore h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") png(filename = paste0( - plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, ".png" - ), + plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".png" + ), width = 8, height = 4, units = "in", res = 600 ) - par(mfrow=c(1,2), oma=c(0,0,2,0), family="Times New Roman") + par(mfrow=c(1,2), + oma=c(0,0,2,0), + family="Times New Roman" + ) plot(plot1.x, plot1.y, xlab = x_title, - ylab = "Neg LGI", - ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI)), + ylab = mem, + ylim = c(min(df_mem$MemScore), max(df_mem$MemScore)), main = h_labels[1]) abline(lm(plot1.y ~ plot1.x)) @@ -1044,209 +952,55 @@ func_stat_lm <- function(df_lm, tract, gType, h_str, comp){ xlab = x_title, ylab = "", main = h_labels[2], - ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI))) + ylim = c(min(df_mem$MemScore), max(df_mem$MemScore))) abline(lm(plot2.y ~ plot2.x)) mtext(h_title, outer = T, cex = 1.5) dev.off() par(mfrow=c(1,1)) + # } } } -func_stat_lm_new <- function(df_lm, tract, gType, h_str, comp){ + + +### Work +# Two analyses (grouping types): +# 1) Con vs Anx +# 2) Con vs GAD vs SAD +for(gType in groupType){ + + # make/get data, assign factor + dataFile <- paste0(dataDir, "Master_dataframe_G", gType,".csv") - ### --- Notes: - # - # Conduct linear model, test lines - # then make plots. + if( ! file.exists(dataFile)){ + func_df_master(gType) + } - # "NeuLGI", "NeuLDI", - memList <- c("NegLGI", "NegLDI") + df_afq <- read.csv(dataFile) + df_afq$Group <- factor(df_afq$Group) + df_afq$Sex <- factor(df_afq$Sex) - for(mem in memList){ - - ind_mem <- grep(mem, colnames(df_lm)) + # # Check Memory behavior + # func_stat_mem() + + for(tract in tractList){ - df_mem <- as.data.frame(matrix(NA, nrow = dim(df_lm)[1], ncol=4)) - colnames(df_mem) <- c("Comp", "FAvalue", "Group", "MemScore") + # subset df_afq for tract + df_tract <- df_afq[which(df_afq$tractID == tract), ] + df_tract$dti_fa <- round(df_tract$dti_fa, 3) - df_mem$Comp <- df_lm$Comp - df_mem$FAvalue <- df_lm$FAvalue - df_mem$Group <- df_lm$Group - df_mem$MemScore <- df_lm[,ind_mem] + # run gam, plot + gamFile <- paste0(dataDir, "G", gType, "_gam_", tract, ".Rda") - fit.int <- lm(MemScore ~ FAvalue*Group, data = df_mem) + if( ! file.exists(gamFile)){ + h_gam <- func_stat_gam(tract, df_tract, gType) + saveRDS(h_gam, file=gamFile) + rm(h_gam) + } - capture.output( - summary(fit.int), - file = paste0(statsDir_lm, - "Stats_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" - ) - ) - capture.output( - anova(fit.int), - file = paste0(statsDir_lm, - "Stats_AN-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" - ) - ) - - # plot if group diff - # anova_stat <- anova(fit.int) - # if(anova_stat$`Pr(>F)`[2] < 0.05){ - - comp1 <- substr(comp, start=1, stop=1) - comp2 <- substr(comp, start=2, stop=2) - - if(gType == 1){ - h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) - - }else if(gType == 2){ - h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) - } - - h_tract <- func_switch_name(tract) - # h_insert <- paste(h_str, h_tract) - # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") - - # p <- ggplot(df_lm) + - # aes(x=FAvalue, y=NegLGI, color=Group) + - # geom_point(aes(color=Group)) + - # geom_smooth(method = "lm") + - # ggtitle(h_title) + - # ylab("Neg LGI") + - # xlab("FA value") - # - # p + scale_color_manual( - # values = h_cols, - # breaks = h_breaks, - # labels = h_labels - # ) - # - # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) - - # --- update, plot separately - plot1.x <- df_mem[which(df_mem$Group == comp1),]$FAvalue - plot1.y <- df_mem[which(df_mem$Group == comp1),]$MemScore - - plot2.x <- df_mem[which(df_mem$Group == comp2),]$FAvalue - plot2.y <- df_mem[which(df_mem$Group == comp2),]$MemScore - - h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") - x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") - - png(filename = paste0( - plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".png" - ), - width = 8, - height = 4, - units = "in", - res = 600 - ) - - par(mfrow=c(1,2), - oma=c(0,0,2,0), - family="Times New Roman" - ) - - plot(plot1.x, plot1.y, - xlab = x_title, - ylab = mem, - ylim = c(min(df_mem$MemScore), max(df_mem$MemScore)), - main = h_labels[1]) - abline(lm(plot1.y ~ plot1.x)) - - plot(plot2.x, plot2.y, - xlab = x_title, - ylab = "", - main = h_labels[2], - ylim = c(min(df_mem$MemScore), max(df_mem$MemScore))) - abline(lm(plot2.y ~ plot2.x)) - - mtext(h_title, outer = T, cex = 1.5) - dev.off() - par(mfrow=c(1,1)) - # } - } -} - - -func_df_avg_new <- function(df_tract, node_list, g_type, avg_max){ - - subjList <- unique(df_tract$subjectID) - df_out <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=8)) - colnames(df_out) <- c("Subj", "FAvalue", "Pars6", "Group", - "NeuLGI", "NeuLDI","NegLGI", "NegLDI") - df_out$Subj <- subjList - - for(subj in subjList){ - - ind_out <- which(df_out$Subj == subj) - df_subj <- df_tract[which(df_tract$subjectID == subj),] - - df_out[ind_out,]$Pars6 <- df_subj[1,]$Pars6 - df_out[ind_out,]$Group <- df_subj[1,]$Group - df_out[ind_out,]$NeuLGI <- df_subj[1,]$NeuLGI - df_out[ind_out,]$NeuLDI <- df_subj[1,]$NeuLDI - df_out[ind_out,]$NegLGI <- df_subj[1,]$NegLGI - df_out[ind_out,]$NegLDI <- df_subj[1,]$NegLDI - - if(avg_max == "avg"){ - df_node <- subset(df_subj, nodeID %in% node_list) - df_out[ind_out,]$FAvalue <- round(mean(df_node$dti_fa), 4) - }else if(avg_max == "max"){ - ind_max <- which(df_subj$nodeID == node_list) - df_out[ind_out,]$FAvalue <- round(df_subj[ind_max,]$dti_fa, 4) - } - } - return(df_out) -} - - - - -### Work -# Two analyses (grouping types): -# 1) Con vs Anx -# 2) Con vs GAD vs SAD -for(gType in groupType){ - - # make/get data, assign factor - dataFile <- paste0(dataDir, "Master_dataframe_G", gType,".csv") - - if( ! file.exists(dataFile)){ - func_df_master(gType) - } - - df_afq <- read.csv(dataFile) - df_afq$Group <- factor(df_afq$Group) - df_afq$Sex <- factor(df_afq$Sex) - - # # Check Memory behavior - # func_stat_mem() - - for(tract in tractList){ - - # subset df_afq for tract - df_tract <- df_afq[which(df_afq$tractID == tract), ] - df_tract$dti_fa <- round(df_tract$dti_fa, 3) - - # run gam, plot - gamFile <- paste0(dataDir, "G", gType, "_gam_", tract, ".Rda") - - if( ! file.exists(gamFile)){ - h_gam <- func_stat_gam(tract, df_tract, gType) - saveRDS(h_gam, file=gamFile) - rm(h_gam) - } - - gam_model <- readRDS(gamFile) - func_plot_gam(gam_model, tract, gType, df_tract) + gam_model <- readRDS(gamFile) + func_plot_gam(gam_model, tract, gType, df_tract) # determine group differences nodeList <- func_stat_diff(gam_model, tract, gType) @@ -1254,7 +1008,12 @@ for(gType in groupType){ max_nodeList <- nodeList[[2]] # avg lm - + df_avg <- func_mkdf_lm(df_tract, avg_nodeList, gType, "Avg") + + + + # max lm + df_max <- func_mkdf_lm(df_tract, max_nodeList, gType, "Max") @@ -1280,6 +1039,7 @@ for(gType in groupType){ + # # For working out df construction syntax # # df_adis <- read.delim(paste0(privateDir, "emuR01_adis.csv"), sep = ",", header=T) @@ -1305,13 +1065,13 @@ for(gType in groupType){ # } # # write.csv(df_practice, file="~/Desktop/dana_table.csv", quote = F, row.names=F, col.names = T) # -# g_type <- 2 +# gType <- 2 # df_practice$Group <- NA # for(i in 1:dim(df_practice)[1]){ # # ind_adis <- which(df_adis$Participant.ID == df_practice[i,]$Subject) # -# if(g_type == 1){ +# if(gType == 1){ # h_search <- c("Anxiety", "Phobia") # if(sum(grep(paste(h_search, collapse = "|"), df_adis[ind_adis,])) != 0){ # df_practice[i,]$Group <- "Anx" @@ -1319,7 +1079,7 @@ for(gType in groupType){ # df_practice[i,]$Group <- "Con" # }else{df_practice[i,]$Group <- "Excl"} # -# }else if(g_type == 2){ +# }else if(gType == 2){ # # h_search <- c("Separation", "Social") # @@ -1343,4 +1103,338 @@ for(gType in groupType){ +# Functions - old LM +func_df_avg <- function(comp, df_tract, df_diff){ + + ### --- Notes: + # + # Make a dataframe of averaged FA + # values from all regions that differ + # between two splines + + grpA <- as.numeric(substr(comp, start=1, stop=1)) + grpB <- as.numeric(substr(comp, start=2, stop=2)) + df_comp <- df_diff[which(df_diff$Comparison == comp),] + + # make dataframe + subjList <- unique( + df_tract[which( + df_tract$Group == grpA | df_tract$Group == grpB + ),]$subjectID + ) + + df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) + colnames(df_lm) <- c("Comp", "Subj", "FAvalue", + "Pars6", "Group", + "NeuLGI", "NeuLDI", + "NegLGI", "NegLDI") + df_lm$Subj <- subjList + + for(subj in subjList){ + h_mean <- vector() + for(i in 1:dim(df_comp)[1]){ + + h_start <- which( + df_tract$subjectID == subj & + df_tract$nodeID == df_comp[i,]$Start + ) + + h_end <- which( + df_tract$subjectID == subj & + df_tract$nodeID == df_comp[i,]$End + ) + + h_mean <- c(h_mean, mean(df_tract[h_start:h_end,]$dti_fa)) + } + + ind_out <- which(df_lm$Subj == subj) + df_lm[ind_out,]$FAvalue <- round(mean(h_mean), 4) + + ind_subj <- which(df_tract$subjectID == subj)[1] + df_lm[ind_out,]$Pars6 <- df_tract[ind_subj,]$Pars6 + df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_subj,]$Group) - 1 + + df_lm[ind_out,]$NeuLGI <- df_tract[ind_subj,]$NeuLGI + df_lm[ind_out,]$NeuLDI <- df_tract[ind_subj,]$NeuLDI + df_lm[ind_out,]$NegLGI <- df_tract[ind_subj,]$NegLGI + df_lm[ind_out,]$NegLDI <- df_tract[ind_subj,]$NegLDI + + df_lm[ind_out,]$Comp <- comp + } + df_lm$Group <- factor(df_lm$Group) + return(df_lm) +} + +func_df_max <- function(comp, df_tract, df_max){ + + ### --- Notes: + # + # make a dataframe of FA values + # from single region showing showing + # maximal A-B difference + + grpA <- as.numeric(substr(comp, start=1, stop=1)) + grpB <- as.numeric(substr(comp, start=2, stop=2)) + node <- df_max[which(df_max$Comparison == comp),]$Node + + # make dataframe + subjList <- unique( + df_tract[which( + df_tract$Group == grpA | df_tract$Group == grpB + ),]$subjectID + ) + df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) + colnames(df_lm) <- c("Comp","Subj", "FAvalue", + "Pars6", "Group", + "NeuLGI", "NeuLDI", + "NegLGI", "NegLDI") + df_lm$Subj <- subjList + + for(subj in subjList){ + + ind_data <- which(df_tract$subjectID == subj & df_tract$nodeID == node) + ind_out <- which(df_lm$Subj == subj) + + df_lm[ind_out,]$Comp <- comp + df_lm[ind_out,]$FAvalue <- df_tract[ind_data,]$dti_fa + df_lm[ind_out,]$Pars6 <- df_tract[ind_data,]$Pars6 + df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_data,]$Group)-1 + df_lm[ind_out,]$NeuLGI <- df_tract[ind_data,]$NeuLGI + df_lm[ind_out,]$NeuLDI <- df_tract[ind_data,]$NeuLDI + df_lm[ind_out,]$NegLGI <- df_tract[ind_data,]$NegLGI + df_lm[ind_out,]$NegLDI <- df_tract[ind_data,]$NegLDI + } + df_lm$Group <- factor(df_lm$Group) + return(df_lm) +} + +func_stat_lm <- function(df_lm, tract, gType, h_str, comp){ + + ### --- Notes: + # + # Conduct linear model, test lines + # then make plots. + + fit.int <- lm(NegLGI ~ FAvalue*Group, data = df_lm) + + capture.output( + summary(fit.int), + file = paste0(statsDir_lm, + "Stats_LM-", h_str, "_", tract, "_G", gType, ".txt" + ) + ) + capture.output( + anova(fit.int), + file = paste0(statsDir_lm, + "Stats_AN-", h_str, "_", tract, "_G", gType, ".txt" + ) + ) + + # plot if group diff + anova_stat <- anova(fit.int) + if(anova_stat$`Pr(>F)`[2] < 0.05){ + + comp1 <- substr(comp, start=1, stop=1) + comp2 <- substr(comp, start=2, stop=2) + + if(gType == 1){ + h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) + names(h_cols) <- c(comp1, comp2) + h_breaks <- c(comp1, comp2) + h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) + + }else if(gType == 2){ + h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) + names(h_cols) <- c(comp1, comp2) + h_breaks <- c(comp1, comp2) + h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) + } + + h_tract <- func_switch_name(tract) + # h_insert <- paste(h_str, h_tract) + # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") + + # p <- ggplot(df_lm) + + # aes(x=FAvalue, y=NegLGI, color=Group) + + # geom_point(aes(color=Group)) + + # geom_smooth(method = "lm") + + # ggtitle(h_title) + + # ylab("Neg LGI") + + # xlab("FA value") + # + # p + scale_color_manual( + # values = h_cols, + # breaks = h_breaks, + # labels = h_labels + # ) + # + # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) + + # --- update, plot separately + plot1.x <- df_lm[which(df_lm$Group == comp1),]$FAvalue + plot1.y <- df_lm[which(df_lm$Group == comp1),]$NegLGI + + plot2.x <- df_lm[which(df_lm$Group == comp2),]$FAvalue + plot2.y <- df_lm[which(df_lm$Group == comp2),]$NegLGI + + h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") + x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") + + png(filename = paste0( + plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, ".png" + ), + width = 8, + height = 4, + units = "in", + res = 600 + ) + + par(mfrow=c(1,2), oma=c(0,0,2,0), family="Times New Roman") + + plot(plot1.x, plot1.y, + xlab = x_title, + ylab = "Neg LGI", + ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI)), + main = h_labels[1]) + abline(lm(plot1.y ~ plot1.x)) + + plot(plot2.x, plot2.y, + xlab = x_title, + ylab = "", + main = h_labels[2], + ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI))) + abline(lm(plot2.y ~ plot2.x)) + + mtext(h_title, outer = T, cex = 1.5) + dev.off() + par(mfrow=c(1,1)) + } +} + +func_stat_lm_new <- function(df_lm, tract, gType, h_str, comp){ + + ### --- Notes: + # + # Conduct linear model, test lines + # then make plots. + + # "NeuLGI", "NeuLDI", + memList <- c("NegLGI", "NegLDI") + + for(mem in memList){ + + ind_mem <- grep(mem, colnames(df_lm)) + + df_mem <- as.data.frame(matrix(NA, nrow = dim(df_lm)[1], ncol=4)) + colnames(df_mem) <- c("Comp", "FAvalue", "Group", "MemScore") + + df_mem$Comp <- df_lm$Comp + df_mem$FAvalue <- df_lm$FAvalue + df_mem$Group <- df_lm$Group + df_mem$MemScore <- df_lm[,ind_mem] + + fit.int <- lm(MemScore ~ FAvalue*Group, data = df_mem) + + capture.output( + summary(fit.int), + file = paste0(statsDir_lm, + "Stats_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" + ) + ) + capture.output( + anova(fit.int), + file = paste0(statsDir_lm, + "Stats_AN-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" + ) + ) + + # plot if group diff + # anova_stat <- anova(fit.int) + # if(anova_stat$`Pr(>F)`[2] < 0.05){ + + comp1 <- substr(comp, start=1, stop=1) + comp2 <- substr(comp, start=2, stop=2) + + if(gType == 1){ + h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) + names(h_cols) <- c(comp1, comp2) + h_breaks <- c(comp1, comp2) + h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) + + }else if(gType == 2){ + h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) + names(h_cols) <- c(comp1, comp2) + h_breaks <- c(comp1, comp2) + h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) + } + + h_tract <- func_switch_name(tract) + # h_insert <- paste(h_str, h_tract) + # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") + + # p <- ggplot(df_lm) + + # aes(x=FAvalue, y=NegLGI, color=Group) + + # geom_point(aes(color=Group)) + + # geom_smooth(method = "lm") + + # ggtitle(h_title) + + # ylab("Neg LGI") + + # xlab("FA value") + # + # p + scale_color_manual( + # values = h_cols, + # breaks = h_breaks, + # labels = h_labels + # ) + # + # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) + + # --- update, plot separately + plot1.x <- df_mem[which(df_mem$Group == comp1),]$FAvalue + plot1.y <- df_mem[which(df_mem$Group == comp1),]$MemScore + + plot2.x <- df_mem[which(df_mem$Group == comp2),]$FAvalue + plot2.y <- df_mem[which(df_mem$Group == comp2),]$MemScore + + h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") + x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") + + png(filename = paste0( + plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".png" + ), + width = 8, + height = 4, + units = "in", + res = 600 + ) + + par(mfrow=c(1,2), + oma=c(0,0,2,0), + family="Times New Roman" + ) + + plot(plot1.x, plot1.y, + xlab = x_title, + ylab = mem, + ylim = c(min(df_mem$MemScore), max(df_mem$MemScore)), + main = h_labels[1]) + abline(lm(plot1.y ~ plot1.x)) + + plot(plot2.x, plot2.y, + xlab = x_title, + ylab = "", + main = h_labels[2], + ylim = c(min(df_mem$MemScore), max(df_mem$MemScore))) + abline(lm(plot2.y ~ plot2.x)) + + mtext(h_title, outer = T, cex = 1.5) + dev.off() + par(mfrow=c(1,1)) + # } + } +} + + + + + From 6b614d48215df60d021d8a387e6acc22787fc331 Mon Sep 17 00:00:00 2001 From: Nathan Muncy Date: Fri, 30 Apr 2021 17:21:55 -0400 Subject: [PATCH 7/7] refactored --- afq_step3_stats.R | 653 +++++++++++----------------------------------- 1 file changed, 153 insertions(+), 500 deletions(-) diff --git a/afq_step3_stats.R b/afq_step3_stats.R index 01291de..f982d64 100644 --- a/afq_step3_stats.R +++ b/afq_step3_stats.R @@ -11,19 +11,12 @@ library("dplyr") library("lme4") -### TODO: -# -# 1) Combine plot_diff functions -# -# 2) find group diffs from plot_diff when -# group n=3 - - ### Set Up # Orienting paths - set globally dataDir <- "/Users/nmuncy/Projects/emu_AFQ/analyses/" privateDir <- "/Users/nmuncy/Projects/emu_private/" + plotDir_gam <- paste0(dataDir, "plots_gam/") statsDir_gam <- paste0(dataDir, "stats_gam/") plotDir_lm <- paste0(dataDir, "plots_lm/") @@ -811,6 +804,11 @@ func_stat_diff <- function(model, tract, gType){ } } + if(length(diffList) == 0){ + return(NA) + stop + } + # find max diff if(gType == 1){ @@ -835,6 +833,12 @@ func_stat_diff <- function(model, tract, gType){ # Functions - LM func_mkdf_lm <- function(df_tract, node_list, gType, avg_max){ + ### --- Notes: + # + # Returns a dataframe for linear models + # with either average FA or max FA + # difference + subjList <- unique(df_tract$subjectID) df_out <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=8)) colnames(df_out) <- c("Subj", "FAvalue", "Pars6", "Group", @@ -864,15 +868,132 @@ func_mkdf_lm <- function(df_tract, node_list, gType, avg_max){ return(df_out) } +func_plot_lm1 <- function(df_plot, avg_max, mem){ + + ### --- Notes: + # + # Plots A and B + + # plot if group diff + h_labels <- c(func_switch_g1("0")[[2]][1], func_switch_g1("1")[[2]][1]) + h_tract <- func_switch_name(tract) + + plot1.x <- df_plot[which(df_plot$Group == 0),]$FAvalue + plot1.y <- df_plot[which(df_plot$Group == 0),]$MemScore + + plot2.x <- df_plot[which(df_plot$Group == 1),]$FAvalue + plot2.y <- df_plot[which(df_plot$Group == 1),]$MemScore + + h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") + x_title <- ifelse(avg_max == "Avg", "Mean FA", "Max FA") + + png(filename = paste0( + plotDir_lm, "Plot_LM-", avg_max, "_", tract, "_G", gType, "_", mem, ".png" + ), + width = 8, + height = 4, + units = "in", + res = 600 + ) + + par(mfrow=c(1,2), + oma=c(0,0,2,0), + family="Times New Roman" + ) + + plot(plot1.x, plot1.y, + xlab = x_title, + ylab = mem, + ylim = c(min(df_plot$MemScore), max(df_plot$MemScore)), + main = h_labels[1]) + abline(lm(plot1.y ~ plot1.x)) + + plot(plot2.x, plot2.y, + xlab = x_title, + ylab = "", + main = h_labels[2], + ylim = c(min(df_plot$MemScore), max(df_plot$MemScore))) + abline(lm(plot2.y ~ plot2.x)) + + mtext(h_title, outer = T, cex = 1.5) + dev.off() + par(mfrow=c(1,1)) +} + +func_plot_lm2 <- function(df_plot, avg_max, mem){ + + ### --- Notes: + # + # Plots A, B, and C + + # plot if group diff + h_labels <- c( + func_switch_g2("0")[[2]][1], + func_switch_g2("1")[[2]][1], + func_switch_g2("2")[[2]][1] + ) + h_tract <- func_switch_name(tract) + + plot1.x <- df_plot[which(df_plot$Group == 0),]$FAvalue + plot1.y <- df_plot[which(df_plot$Group == 0),]$MemScore + + plot2.x <- df_plot[which(df_plot$Group == 1),]$FAvalue + plot2.y <- df_plot[which(df_plot$Group == 1),]$MemScore + + plot3.x <- df_plot[which(df_plot$Group == 2),]$FAvalue + plot3.y <- df_plot[which(df_plot$Group == 2),]$MemScore + + h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") + x_title <- ifelse(avg_max == "Avg", "Mean FA", "Max FA") + + png(filename = paste0( + plotDir_lm, "Plot_LM-", avg_max, "_", tract, "_G", gType, "_", mem, ".png" + ), + width = 8, + height = 4, + units = "in", + res = 600 + ) + + par(mfrow=c(1,3), + oma=c(0,0,2,0), + family="Times New Roman" + ) + + plot(plot1.x, plot1.y, + xlab = x_title, + ylab = mem, + ylim = c(min(df_plot$MemScore), max(df_plot$MemScore)), + main = h_labels[1]) + abline(lm(plot1.y ~ plot1.x)) + + plot(plot2.x, plot2.y, + xlab = x_title, + ylab = "", + main = h_labels[2], + ylim = c(min(df_plot$MemScore), max(df_plot$MemScore))) + abline(lm(plot2.y ~ plot2.x)) + + plot(plot3.x, plot3.y, + xlab = x_title, + ylab = "", + main = h_labels[3], + ylim = c(min(df_plot$MemScore), max(df_plot$MemScore))) + abline(lm(plot3.y ~ plot3.x)) + + mtext(h_title, outer = T, cex = 1.5) + dev.off() + par(mfrow=c(1,1)) +} + func_stat_lm <- function(df_lm, tract, gType, avg_max){ ### --- Notes: # - # Conduct linear model, test lines - # then make plots. + # Conduct linear model for list of mem scores + # then make plots. - # "NeuLGI", "NeuLDI", - memList <- c("NegLGI", "NegLDI") + memList <- c("NeuLGI", "NeuLDI","NegLGI", "NegLDI") for(mem in memList){ @@ -890,84 +1011,32 @@ func_stat_lm <- function(df_lm, tract, gType, avg_max){ capture.output( summary(fit.int), file = paste0(statsDir_lm, - "Stats_LM-", h_str, "_", tract, "_G", gType, "_", mem, ".txt" + "Stats_LM-", avg_max, "_", tract, "_G", gType, "_", mem, ".txt" ) ) capture.output( anova(fit.int), file = paste0(statsDir_lm, - "Stats_AN-", h_str, "_", tract, "_G", gType, "_", mem, ".txt" + "Stats_AN-", avg_max, "_", tract, "_G", gType, "_", mem, ".txt" ) ) - # # plot if group diff - # if(gType == 1){ - # h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) - # names(h_cols) <- c(comp1, comp2) - # h_breaks <- c(comp1, comp2) - # h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) - # - # }else if(gType == 2){ - # h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) - # names(h_cols) <- c(comp1, comp2) - # h_breaks <- c(comp1, comp2) - # h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) - # } - - h_tract <- func_switch_name(tract) - - - # --- update, plot separately - plot1.x <- df_mem[which(df_mem$Group == comp1),]$FAvalue - plot1.y <- df_mem[which(df_mem$Group == comp1),]$MemScore - - plot2.x <- df_mem[which(df_mem$Group == comp2),]$FAvalue - plot2.y <- df_mem[which(df_mem$Group == comp2),]$MemScore - - h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") - x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") - - png(filename = paste0( - plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".png" - ), - width = 8, - height = 4, - units = "in", - res = 600 - ) - - par(mfrow=c(1,2), - oma=c(0,0,2,0), - family="Times New Roman" - ) - - plot(plot1.x, plot1.y, - xlab = x_title, - ylab = mem, - ylim = c(min(df_mem$MemScore), max(df_mem$MemScore)), - main = h_labels[1]) - abline(lm(plot1.y ~ plot1.x)) - - plot(plot2.x, plot2.y, - xlab = x_title, - ylab = "", - main = h_labels[2], - ylim = c(min(df_mem$MemScore), max(df_mem$MemScore))) - abline(lm(plot2.y ~ plot2.x)) - - mtext(h_title, outer = T, cex = 1.5) - dev.off() - par(mfrow=c(1,1)) - # } + if(gType == 1){ + func_plot_lm1(df_mem, avg_max, mem) + }else if(gType == 2){ + func_plot_lm2(df_mem, avg_max, mem) + } } } -### Work -# Two analyses (grouping types): -# 1) Con vs Anx -# 2) Con vs GAD vs SAD +### --- Work: +# +# Two analyses (grouping types): +# 1) Con vs Anx +# 2) Con vs GAD vs SAD + for(gType in groupType){ # make/get data, assign factor @@ -1004,437 +1073,21 @@ for(gType in groupType){ # determine group differences nodeList <- func_stat_diff(gam_model, tract, gType) + + # deal w/no differences + if(is.na(nodeList)){ + next + } avg_nodeList <- nodeList[[1]] max_nodeList <- nodeList[[2]] # avg lm df_avg <- func_mkdf_lm(df_tract, avg_nodeList, gType, "Avg") - - + func_stat_lm(df_avg, tract, gType, "Avg") # max lm df_max <- func_mkdf_lm(df_tract, max_nodeList, gType, "Max") - - - - # compList <- unique(df_diff$Comparison) - # - # # pairwise lm, since plot_diff does pairwise spline tests - # for(comp in compList){ - # - # # predict mem score from dti average diff - # df_lm <- func_df_avg(comp, df_tract, df_diff) - # # func_stat_lm(df_lm, tract, gType, "Avg", comp) - # func_stat_lm_new(df_lm, tract, gType, "Avg", comp) - # - # # predict mem score from dti max diff - # # just rerun plot_diff to get table of max diff - # df_max <- func_max_diff(gam_model, gType) - # df_lm <- func_df_max(comp, df_tract, df_max) - # # func_stat_lm(df_lm, tract, gType, "Max", comp) - # func_stat_lm_new(df_lm, tract, gType, "Max", comp) - # } + func_stat_lm(df_max, tract, gType, "Max") } } - - - -# # For working out df construction syntax -# -# df_adis <- read.delim(paste0(privateDir, "emuR01_adis.csv"), sep = ",", header=T) -# df_pds <- read.delim(paste0(privateDir, "emuR01_pds_latest.csv"), sep = ",", header=T) -# -# subjList <- unique(df_tract$subjectID) -# df_practice <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) -# colnames(df_practice) <- c("Subject", "Age", "Sex", "PDS", "Pars6", "ADIS.1", "ADIS.2", "ADIS.3", "ADIS.4") -# df_practice$Subject <- subjList -# -# for(subj in subjList){ -# -# ind_subj <- grep(subj, df_tract$subjectID)[1] -# ind_adis <- grep(subj, df_adis$Participant.ID) -# ind_pds <- grep(subj, df_pds$emu_study_id) -# ind_out <- grep(subj, df_practice$Subject) -# -# df_practice[ind_out,]$Age <- df_tract[ind_subj,]$Age -# df_practice[ind_out,]$Sex <- df_pds[ind_pds,]$pinf_gender -# df_practice[ind_out,]$PDS <- as.integer(df_tract[ind_subj,]$PDS) -# df_practice[ind_out,]$Pars6 <- df_tract[ind_subj,]$Pars6 -# df_practice[ind_out,6:9] <- df_adis[ind_adis,3:6] -# } -# # write.csv(df_practice, file="~/Desktop/dana_table.csv", quote = F, row.names=F, col.names = T) -# -# gType <- 2 -# df_practice$Group <- NA -# for(i in 1:dim(df_practice)[1]){ -# -# ind_adis <- which(df_adis$Participant.ID == df_practice[i,]$Subject) -# -# if(gType == 1){ -# h_search <- c("Anxiety", "Phobia") -# if(sum(grep(paste(h_search, collapse = "|"), df_adis[ind_adis,])) != 0){ -# df_practice[i,]$Group <- "Anx" -# }else if(length(grep("None", df_adis[ind_adis,])) != 0){ -# df_practice[i,]$Group <- "Con" -# }else{df_practice[i,]$Group <- "Excl"} -# -# }else if(gType == 2){ -# -# h_search <- c("Separation", "Social") -# -# # GAD in dx.1, or in dx.2 but SAD not dx.1 -# if( -# grepl("Gen", df_adis[ind_adis,]$Diagnosis.1) == T | -# (sum(grep("Gen", df_adis[ind_adis,])) != 0 & -# sum(grep(paste(h_search, collapse = "|"), df_adis[ind_adis,])) == 0) -# ){ -# df_practice[i,]$Group <- "GAD" -# }else if(sum(grep(paste(h_search, collapse = "|"), df_adis[ind_adis,])) != 0){ -# df_practice[i,]$Group <- "SAD" -# }else if(length(grep("None", df_adis[ind_adis,])) != 0){ -# df_practice[i,]$Group <- "Con" -# }else{df_practice[i,]$Group <- "Excl"} -# -# } -# } - - - - - -# Functions - old LM -func_df_avg <- function(comp, df_tract, df_diff){ - - ### --- Notes: - # - # Make a dataframe of averaged FA - # values from all regions that differ - # between two splines - - grpA <- as.numeric(substr(comp, start=1, stop=1)) - grpB <- as.numeric(substr(comp, start=2, stop=2)) - df_comp <- df_diff[which(df_diff$Comparison == comp),] - - # make dataframe - subjList <- unique( - df_tract[which( - df_tract$Group == grpA | df_tract$Group == grpB - ),]$subjectID - ) - - df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) - colnames(df_lm) <- c("Comp", "Subj", "FAvalue", - "Pars6", "Group", - "NeuLGI", "NeuLDI", - "NegLGI", "NegLDI") - df_lm$Subj <- subjList - - for(subj in subjList){ - h_mean <- vector() - for(i in 1:dim(df_comp)[1]){ - - h_start <- which( - df_tract$subjectID == subj & - df_tract$nodeID == df_comp[i,]$Start - ) - - h_end <- which( - df_tract$subjectID == subj & - df_tract$nodeID == df_comp[i,]$End - ) - - h_mean <- c(h_mean, mean(df_tract[h_start:h_end,]$dti_fa)) - } - - ind_out <- which(df_lm$Subj == subj) - df_lm[ind_out,]$FAvalue <- round(mean(h_mean), 4) - - ind_subj <- which(df_tract$subjectID == subj)[1] - df_lm[ind_out,]$Pars6 <- df_tract[ind_subj,]$Pars6 - df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_subj,]$Group) - 1 - - df_lm[ind_out,]$NeuLGI <- df_tract[ind_subj,]$NeuLGI - df_lm[ind_out,]$NeuLDI <- df_tract[ind_subj,]$NeuLDI - df_lm[ind_out,]$NegLGI <- df_tract[ind_subj,]$NegLGI - df_lm[ind_out,]$NegLDI <- df_tract[ind_subj,]$NegLDI - - df_lm[ind_out,]$Comp <- comp - } - df_lm$Group <- factor(df_lm$Group) - return(df_lm) -} - -func_df_max <- function(comp, df_tract, df_max){ - - ### --- Notes: - # - # make a dataframe of FA values - # from single region showing showing - # maximal A-B difference - - grpA <- as.numeric(substr(comp, start=1, stop=1)) - grpB <- as.numeric(substr(comp, start=2, stop=2)) - node <- df_max[which(df_max$Comparison == comp),]$Node - - # make dataframe - subjList <- unique( - df_tract[which( - df_tract$Group == grpA | df_tract$Group == grpB - ),]$subjectID - ) - df_lm <- as.data.frame(matrix(NA, nrow=length(subjList), ncol=9)) - colnames(df_lm) <- c("Comp","Subj", "FAvalue", - "Pars6", "Group", - "NeuLGI", "NeuLDI", - "NegLGI", "NegLDI") - df_lm$Subj <- subjList - - for(subj in subjList){ - - ind_data <- which(df_tract$subjectID == subj & df_tract$nodeID == node) - ind_out <- which(df_lm$Subj == subj) - - df_lm[ind_out,]$Comp <- comp - df_lm[ind_out,]$FAvalue <- df_tract[ind_data,]$dti_fa - df_lm[ind_out,]$Pars6 <- df_tract[ind_data,]$Pars6 - df_lm[ind_out,]$Group <- as.numeric(df_tract[ind_data,]$Group)-1 - df_lm[ind_out,]$NeuLGI <- df_tract[ind_data,]$NeuLGI - df_lm[ind_out,]$NeuLDI <- df_tract[ind_data,]$NeuLDI - df_lm[ind_out,]$NegLGI <- df_tract[ind_data,]$NegLGI - df_lm[ind_out,]$NegLDI <- df_tract[ind_data,]$NegLDI - } - df_lm$Group <- factor(df_lm$Group) - return(df_lm) -} - -func_stat_lm <- function(df_lm, tract, gType, h_str, comp){ - - ### --- Notes: - # - # Conduct linear model, test lines - # then make plots. - - fit.int <- lm(NegLGI ~ FAvalue*Group, data = df_lm) - - capture.output( - summary(fit.int), - file = paste0(statsDir_lm, - "Stats_LM-", h_str, "_", tract, "_G", gType, ".txt" - ) - ) - capture.output( - anova(fit.int), - file = paste0(statsDir_lm, - "Stats_AN-", h_str, "_", tract, "_G", gType, ".txt" - ) - ) - - # plot if group diff - anova_stat <- anova(fit.int) - if(anova_stat$`Pr(>F)`[2] < 0.05){ - - comp1 <- substr(comp, start=1, stop=1) - comp2 <- substr(comp, start=2, stop=2) - - if(gType == 1){ - h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) - - }else if(gType == 2){ - h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) - } - - h_tract <- func_switch_name(tract) - # h_insert <- paste(h_str, h_tract) - # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") - - # p <- ggplot(df_lm) + - # aes(x=FAvalue, y=NegLGI, color=Group) + - # geom_point(aes(color=Group)) + - # geom_smooth(method = "lm") + - # ggtitle(h_title) + - # ylab("Neg LGI") + - # xlab("FA value") - # - # p + scale_color_manual( - # values = h_cols, - # breaks = h_breaks, - # labels = h_labels - # ) - # - # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) - - # --- update, plot separately - plot1.x <- df_lm[which(df_lm$Group == comp1),]$FAvalue - plot1.y <- df_lm[which(df_lm$Group == comp1),]$NegLGI - - plot2.x <- df_lm[which(df_lm$Group == comp2),]$FAvalue - plot2.y <- df_lm[which(df_lm$Group == comp2),]$NegLGI - - h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") - x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") - - png(filename = paste0( - plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, ".png" - ), - width = 8, - height = 4, - units = "in", - res = 600 - ) - - par(mfrow=c(1,2), oma=c(0,0,2,0), family="Times New Roman") - - plot(plot1.x, plot1.y, - xlab = x_title, - ylab = "Neg LGI", - ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI)), - main = h_labels[1]) - abline(lm(plot1.y ~ plot1.x)) - - plot(plot2.x, plot2.y, - xlab = x_title, - ylab = "", - main = h_labels[2], - ylim = c(min(df_lm$NegLGI), max(df_lm$NegLGI))) - abline(lm(plot2.y ~ plot2.x)) - - mtext(h_title, outer = T, cex = 1.5) - dev.off() - par(mfrow=c(1,1)) - } -} - -func_stat_lm_new <- function(df_lm, tract, gType, h_str, comp){ - - ### --- Notes: - # - # Conduct linear model, test lines - # then make plots. - - # "NeuLGI", "NeuLDI", - memList <- c("NegLGI", "NegLDI") - - for(mem in memList){ - - ind_mem <- grep(mem, colnames(df_lm)) - - df_mem <- as.data.frame(matrix(NA, nrow = dim(df_lm)[1], ncol=4)) - colnames(df_mem) <- c("Comp", "FAvalue", "Group", "MemScore") - - df_mem$Comp <- df_lm$Comp - df_mem$FAvalue <- df_lm$FAvalue - df_mem$Group <- df_lm$Group - df_mem$MemScore <- df_lm[,ind_mem] - - fit.int <- lm(MemScore ~ FAvalue*Group, data = df_mem) - - capture.output( - summary(fit.int), - file = paste0(statsDir_lm, - "Stats_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" - ) - ) - capture.output( - anova(fit.int), - file = paste0(statsDir_lm, - "Stats_AN-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".txt" - ) - ) - - # plot if group diff - # anova_stat <- anova(fit.int) - # if(anova_stat$`Pr(>F)`[2] < 0.05){ - - comp1 <- substr(comp, start=1, stop=1) - comp2 <- substr(comp, start=2, stop=2) - - if(gType == 1){ - h_cols = c(func_switch_g1(comp1)[[1]][1], func_switch_g1(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g1(comp1)[[2]][1], func_switch_g1(comp2)[[2]][1]) - - }else if(gType == 2){ - h_cols = c(func_switch_g2(comp1)[[1]][1], func_switch_g2(comp2)[[1]][1]) - names(h_cols) <- c(comp1, comp2) - h_breaks <- c(comp1, comp2) - h_labels <- c(func_switch_g2(comp1)[[2]][1], func_switch_g2(comp2)[[2]][1]) - } - - h_tract <- func_switch_name(tract) - # h_insert <- paste(h_str, h_tract) - # h_title <- paste0("Memory Index Predicted by ", h_insert, " Spline Difference") - - # p <- ggplot(df_lm) + - # aes(x=FAvalue, y=NegLGI, color=Group) + - # geom_point(aes(color=Group)) + - # geom_smooth(method = "lm") + - # ggtitle(h_title) + - # ylab("Neg LGI") + - # xlab("FA value") - # - # p + scale_color_manual( - # values = h_cols, - # breaks = h_breaks, - # labels = h_labels - # ) - # - # ggsave(paste0(plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, ".png")) - - # --- update, plot separately - plot1.x <- df_mem[which(df_mem$Group == comp1),]$FAvalue - plot1.y <- df_mem[which(df_mem$Group == comp1),]$MemScore - - plot2.x <- df_mem[which(df_mem$Group == comp2),]$FAvalue - plot2.y <- df_mem[which(df_mem$Group == comp2),]$MemScore - - h_title <- paste(h_tract, "Spline Differences Predicting Memory Performance") - x_title <- ifelse(h_str == "Avg", "Mean FA", "Max FA") - - png(filename = paste0( - plotDir_lm, "Plot_LM-", h_str, "_", tract, "_G", gType, "_", comp, "_", mem, ".png" - ), - width = 8, - height = 4, - units = "in", - res = 600 - ) - - par(mfrow=c(1,2), - oma=c(0,0,2,0), - family="Times New Roman" - ) - - plot(plot1.x, plot1.y, - xlab = x_title, - ylab = mem, - ylim = c(min(df_mem$MemScore), max(df_mem$MemScore)), - main = h_labels[1]) - abline(lm(plot1.y ~ plot1.x)) - - plot(plot2.x, plot2.y, - xlab = x_title, - ylab = "", - main = h_labels[2], - ylim = c(min(df_mem$MemScore), max(df_mem$MemScore))) - abline(lm(plot2.y ~ plot2.x)) - - mtext(h_title, outer = T, cex = 1.5) - dev.off() - par(mfrow=c(1,1)) - # } - } -} - - - - - -