#SPR-A data processing space #JW, last update 3/3/23 library(plyr) library(dplyr) library(pbkrtest) library(data.table) library(stringr) library(lme4) library(lmerTest) #Note on summarySE: I did not write this function. I would love to give credit to the person who wrote this function, but I have long lost track of where I found it. Thank you, summarySE writer. I did, however, modify it to also calculate IQR, Q1 and Q3. summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) { library(plyr) # New version of length which can handle NA's: if na.rm==T, don't count them length2 <- function (x, na.rm=FALSE) { if (na.rm) sum(!is.na(x)) else length(x) } # This does the summary. For each group's data frame, return a vector with # N, mean, and sd datac <- ddply(data, groupvars, .drop=.drop, .fun = function(xx, col) { c(N = length2(xx[[col]], na.rm=na.rm), mean = mean (xx[[col]], na.rm=na.rm), sd = sd (xx[[col]], na.rm=na.rm), IQR = IQR (xx[[col]], na.rm=na.rm), Q3 = quantile (xx[[col]], .75, na.rm=na.rm), Q1 = quantile (xx[[col]], .25, na.rm=na.rm) ) }, measurevar ) # Rename the "mean" column datac <- plyr::rename(datac, c("mean" = measurevar)) datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean # Confidence interval multiplier for standard error # Calculate t-statistic for confidence interval: # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 ciMult <- qt(conf.interval/2 + .5, datac$N-1) datac$ci <- datac$se * ciMult return(datac) } #####Processing ###main data List1 <- read_csv(file="/Users/josh/Library/CloudStorage/Box...", col_names = TRUE) List2 <- read_csv(file="/Users/josh/Library/CloudStorage/Box...", col_names = TRUE) List3 <- read_csv(file="/Users/josh/Library/CloudStorage/Box...", col_names = TRUE) List4 <- read_csv(file="/Users/josh/Library/CloudStorage/Box...", col_names = TRUE) SPR_A_YA <- rbind(List1, List2, List3, List4) write_csv(SPR_A_YA, "/Users/josh/Library/CloudStorage/Box...") SPR_A_YA <- read_csv(file="/Users/josh/Library/CloudStorage/Box...") SPR_A_YA <- SPR_A_YA %>% filter(str_detect(Item_type, "Test")) SPR_A_YA <- SPR_A_YA %>% filter(str_detect(Attempt, ".")) SPR_A_YA <- SPR_A_YA %>% filter(!str_detect(`Zone Type`, "response_keyboard_single")) SPR_A_YA <- SPR_A_YA %>% filter(str_detect(Response, ".")) SPR_A_YA <- mutate(SPR_A_YA, N_words = sapply(strsplit(c(Response), " "), length)) SPR_A_YA <- filter(SPR_A_YA, N_words ==1) SPR_A_YA <- SPR_A_YA %>% filter(!str_detect(Response, "assembled")) split <- split(SPR_A_YA, SPR_A_YA$Sentence_type) DO <- split$DO PO <- split$PO #Process DO for regions split_DO <- split(DO, DO$`Participant External Session ID`) result <- list() for(i in 1:length(split_DO)) { output <- mutate(split_DO[[i]], Region = with(split_DO[[i]], ave(Item, Item, FUN = seq_along))) result[[i]] <- output } DO2 <- rbindlist(result) ###find the positions of the values 1:20 in Region in R.DO i1 <- which(DO2$Region %in% c(1,2,3)) i2 <- which(DO2$Region %in% c(4,5)) i3 <- which(DO2$Region %in% c(6,7)) i4 <- which(DO2$Region %in% c(8,9)) i5 <- which(DO2$Region %in% c(10:15)) ###replace values in R.DO with DO2$Region <- replace(DO2$Region, i1, 1) DO2$Region <- replace(DO2$Region, i2, 2) DO2$Region <- replace(DO2$Region, i3, 3) DO2$Region <- replace(DO2$Region, i4, 4) DO2$Region <- replace(DO2$Region, i5, 5) #Process PO for regions split_PO <- split(PO, PO$`Participant External Session ID`) result <- list() for(i in 1:length(split_PO)) { output <- mutate(split_PO[[i]], Region = with(split_PO[[i]], ave(Item, Item, FUN = seq_along))) result[[i]] <- output } PO2 <- rbindlist(result) i1 <- which(PO2$Region %in% c(1,2,3)) i2 <- which(PO2$Region %in% c(4,5)) i2.5 <- which(PO2$Region %in% c(6)) i3 <- which(PO2$Region %in% c(7,8)) i4 <- which(PO2$Region %in% c(9,10)) i5 <- which(PO2$Region %in% c(11:16)) PO2$Region <- replace(PO2$Region, i1, 1) PO2$Region <- replace(PO2$Region, i2, 2) PO2$Region <- replace(PO2$Region, i2.5, 2.5) PO2$Region <- replace(PO2$Region, i3, 3) PO2$Region <- replace(PO2$Region, i4, 4) PO2$Region <- replace(PO2$Region, i5, 5) SPR_A_YA_r <- rbind(PO2, DO2) write_csv(SPR_A_YA_r, "/Users/josh/Library/CloudStorage/Box...") ###comprehension accuracy SPR_YA_comp <- read_csv(file="/Users/josh/Library/CloudStorage/Box...") SPR_YA_comp <- SPR_YA_comp %>% filter(str_detect(Item_type, "Test")) SPR_YA_comp <- SPR_YA_comp %>% filter(str_detect(Attempt, ".")) SPR_YA_comp <- SPR_YA_comp %>% filter(str_detect(`Zone Type`, "response_keyboard_single")) SPR_YA_comp <- mutate(SPR_YA_comp, comp_correct = 9) setDT(SPR_YA_comp)[(`Zone Type` == "response_keyboard_single") & (Correct ==1), comp_correct := 1] setDT(SPR_YA_comp)[(`Zone Type` == "response_keyboard_single") & (Correct ==0), comp_correct := 0] SPR_YA_comp_slim <- select(SPR_YA_comp, Participant_External_Session_ID, Item, comp_correct) write_csv(SPR_YA_comp_slim, "/Users/josh/Library/CloudStorage/Box...") #####Trimming & visualization ###start trimming #read in SPR data SPR_YA <- read_csv(file="/Users/josh/Library/CloudStorage/Box...") #read in comprehension question data SPR_YA_comp_slim <- read_csv(file="/Users/josh/Library/CloudStorage/Box...") #merge comprehension question data SPR_YA <- merge(SPR_YA, SPR_YA_comp_slim, by=c("Participant_External_Session_ID","Item")) SPR_YA$Region <- as.factor(SPR_YA$Region) SPR_YA_summary <- summarySE(SPR_YA, measurevar="Reaction_Time", groupvars=c("Participant_External_Session_ID", "Region","Sentence_type", "Condition")) SPR_YA_summary <- mutate(SPR_YA_summary, Cutoff = (`Q3.75%` + IQR*1.5)) SPR_YA_summary$sd <- NULL SPR_YA_summary$se <- NULL SPR_YA_summary$ci <- NULL SPR_YA_summary$N <- NULL SPR_YA_summary$Reaction_Time <- NULL SPR_YA_summary$IQR <- NULL SPR_YA_summary$`Q3.75%` <- NULL SPR_YA_summary$`Q1.25%` <- NULL SPR_YA2 <- merge(SPR_YA, SPR_YA_summary, by=c("Participant_External_Session_ID","Condition","Region","Sentence_type")) write_csv(SPR_YA2, "/Users/josh/Library/CloudStorage/Box...") SPR_YA2 <- SPR_YA2 %>% filter(!Reaction_Time > Cutoff) SPR_YA2 <- SPR_YA2 %>% filter(!Reaction_Time < 100) #starting obs: 15880 #ending obs: 15070 #loss: 810 #%loss: 0.05100756 ###Start visualization SPR_YA2_sesum <- summarySE(SPR_YA2, measurevar="Reaction_Time", groupvars=c("Region","Sentence_type", "Condition")) ggplot(SPR_YA2_sesum, aes(x=Region, y=Reaction_Time, group=Condition, color=Condition))+ theme_classic()+ theme(text=element_text(size=16))+ #theme(legend.position="none")+ geom_line(aes (linetype=Condition, color=Condition))+ #guides(shape = FALSE) geom_point(aes (color=Condition))+ geom_errorbar(aes(ymin=Reaction_Time-se, ymax=Reaction_Time+se), width=.1) + expand_limits(y=c(300,550))+ scale_y_continuous(breaks=seq(300,550,50))+ #scale_y_continuous(breaks = seq(300, 550, len = 50))+ scale_x_discrete(limits=c("1", "2", "3", "4", "5"), labels=c("1" = "S&V", "2" = "OBJ1", "3" = "OBJ2", "4" = "Spill1", "5" = "Spill2"))+ theme(axis.text.x=element_text(size=8))+ labs(y ="RT (ms.)")+ facet_wrap(~Sentence_type)+ scale_color_manual(values=c("#273746", "#b7950b", "#909497"), name=("Condition")) ggsave("SPR_YA2.png", dpi=300, height=6, width = 8, units="in")