#SPR-A data processing space
#JW, last update 3/3/23

library(plyr)
library(dplyr)
library(pbkrtest)
library(data.table)
library(stringr)
library(lme4)
library(lmerTest)

#Note on summarySE: I did not write this function. I would love to give credit to the person who wrote this function, but I have long lost track of where I found it. Thank you, summarySE writer. I did, however, modify it to also calculate IQR, Q1 and Q3.
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  library(plyr)
  
  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm),
                     IQR = IQR     (xx[[col]], na.rm=na.rm),
                     Q3 = quantile (xx[[col]], .75, na.rm=na.rm), 
                     Q1 = quantile (xx[[col]], .25, na.rm=na.rm)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column    
  datac <- plyr::rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}

#####Processing

###main data

List1 <-  read_csv(file="/Users/josh/Library/CloudStorage/Box...", col_names = TRUE)
List2 <-  read_csv(file="/Users/josh/Library/CloudStorage/Box...", col_names = TRUE)
List3 <-  read_csv(file="/Users/josh/Library/CloudStorage/Box...", col_names = TRUE)
List4 <-  read_csv(file="/Users/josh/Library/CloudStorage/Box...", col_names = TRUE)

SPR_A_YA <- rbind(List1, List2, List3, List4)

write_csv(SPR_A_YA, "/Users/josh/Library/CloudStorage/Box...")

SPR_A_YA <- read_csv(file="/Users/josh/Library/CloudStorage/Box...")

SPR_A_YA <- SPR_A_YA %>% filter(str_detect(Item_type, "Test"))

SPR_A_YA <- SPR_A_YA %>% filter(str_detect(Attempt, "."))

SPR_A_YA <- SPR_A_YA %>% filter(!str_detect(`Zone Type`, "response_keyboard_single"))

SPR_A_YA <- SPR_A_YA %>% filter(str_detect(Response, "."))

SPR_A_YA <- mutate(SPR_A_YA, N_words = sapply(strsplit(c(Response), " "), length))

SPR_A_YA <- filter(SPR_A_YA, N_words ==1)

SPR_A_YA <- SPR_A_YA %>% filter(!str_detect(Response, "assembled"))

split <- split(SPR_A_YA, SPR_A_YA$Sentence_type)

DO <- split$DO
PO <- split$PO

#Process DO for regions
split_DO <- split(DO, DO$`Participant External Session ID`)

result <- list()

for(i in 1:length(split_DO)) {
  output <- mutate(split_DO[[i]], Region = with(split_DO[[i]], ave(Item, Item, FUN = seq_along))) 
  result[[i]] <- output        
}

DO2 <- rbindlist(result)

###find the positions of the values 1:20 in Region in R.DO
i1 <- which(DO2$Region %in% c(1,2,3))
i2 <- which(DO2$Region %in% c(4,5))
i3 <- which(DO2$Region %in% c(6,7))
i4 <- which(DO2$Region %in% c(8,9))
i5 <- which(DO2$Region %in% c(10:15))

###replace values in R.DO with 
DO2$Region <- replace(DO2$Region, i1, 1)
DO2$Region <- replace(DO2$Region, i2, 2)
DO2$Region <- replace(DO2$Region, i3, 3)
DO2$Region <- replace(DO2$Region, i4, 4)
DO2$Region <- replace(DO2$Region, i5, 5)

#Process PO for regions
split_PO <- split(PO, PO$`Participant External Session ID`)

result <- list()

for(i in 1:length(split_PO)) {
  output <- mutate(split_PO[[i]], Region = with(split_PO[[i]], ave(Item, Item, FUN = seq_along))) 
  result[[i]] <- output        
}

PO2 <- rbindlist(result)

i1 <- which(PO2$Region %in% c(1,2,3))
i2 <- which(PO2$Region %in% c(4,5))
i2.5 <- which(PO2$Region %in% c(6))
i3 <- which(PO2$Region %in% c(7,8))
i4 <- which(PO2$Region %in% c(9,10))
i5 <- which(PO2$Region %in% c(11:16))

PO2$Region <- replace(PO2$Region, i1, 1)
PO2$Region <- replace(PO2$Region, i2, 2)
PO2$Region <- replace(PO2$Region, i2.5, 2.5)
PO2$Region <- replace(PO2$Region, i3, 3)
PO2$Region <- replace(PO2$Region, i4, 4)
PO2$Region <- replace(PO2$Region, i5, 5)

SPR_A_YA_r <- rbind(PO2, DO2)

write_csv(SPR_A_YA_r, "/Users/josh/Library/CloudStorage/Box...")

###comprehension accuracy

SPR_YA_comp <- read_csv(file="/Users/josh/Library/CloudStorage/Box...")

SPR_YA_comp <- SPR_YA_comp %>% filter(str_detect(Item_type, "Test"))

SPR_YA_comp <- SPR_YA_comp %>% filter(str_detect(Attempt, "."))

SPR_YA_comp <- SPR_YA_comp %>% filter(str_detect(`Zone Type`, "response_keyboard_single"))

SPR_YA_comp <- mutate(SPR_YA_comp, comp_correct = 9)

setDT(SPR_YA_comp)[(`Zone Type` == "response_keyboard_single") & (Correct ==1), comp_correct := 1]
setDT(SPR_YA_comp)[(`Zone Type` == "response_keyboard_single") & (Correct ==0), comp_correct := 0]

SPR_YA_comp_slim <- select(SPR_YA_comp, Participant_External_Session_ID, Item, comp_correct)

write_csv(SPR_YA_comp_slim, "/Users/josh/Library/CloudStorage/Box...")

#####Trimming & visualization

###start trimming
#read in SPR data
SPR_YA <- read_csv(file="/Users/josh/Library/CloudStorage/Box...")

#read in comprehension question data
SPR_YA_comp_slim <- read_csv(file="/Users/josh/Library/CloudStorage/Box...")

#merge comprehension question data
SPR_YA <- merge(SPR_YA, SPR_YA_comp_slim, by=c("Participant_External_Session_ID","Item"))

SPR_YA$Region <- as.factor(SPR_YA$Region)

SPR_YA_summary <- summarySE(SPR_YA, measurevar="Reaction_Time", groupvars=c("Participant_External_Session_ID", "Region","Sentence_type", "Condition"))

SPR_YA_summary <- mutate(SPR_YA_summary, Cutoff = (`Q3.75%` + IQR*1.5))

SPR_YA_summary$sd <- NULL
SPR_YA_summary$se <- NULL
SPR_YA_summary$ci <- NULL
SPR_YA_summary$N <- NULL
SPR_YA_summary$Reaction_Time <- NULL
SPR_YA_summary$IQR <- NULL
SPR_YA_summary$`Q3.75%` <- NULL
SPR_YA_summary$`Q1.25%` <- NULL

SPR_YA2 <- merge(SPR_YA, SPR_YA_summary, by=c("Participant_External_Session_ID","Condition","Region","Sentence_type"))

write_csv(SPR_YA2, "/Users/josh/Library/CloudStorage/Box...")

SPR_YA2 <- SPR_YA2 %>% filter(!Reaction_Time > Cutoff)
SPR_YA2 <- SPR_YA2 %>% filter(!Reaction_Time < 100)

#starting obs: 15880
#ending obs: 15070
#loss: 810
#%loss: 0.05100756

###Start visualization

SPR_YA2_sesum <- summarySE(SPR_YA2, measurevar="Reaction_Time", groupvars=c("Region","Sentence_type", "Condition"))

ggplot(SPR_YA2_sesum, aes(x=Region, y=Reaction_Time, group=Condition, color=Condition))+
  theme_classic()+
  theme(text=element_text(size=16))+
  #theme(legend.position="none")+
  geom_line(aes (linetype=Condition, color=Condition))+
  #guides(shape = FALSE)
  geom_point(aes (color=Condition))+
  geom_errorbar(aes(ymin=Reaction_Time-se, ymax=Reaction_Time+se), width=.1) +
  expand_limits(y=c(300,550))+
  scale_y_continuous(breaks=seq(300,550,50))+
  #scale_y_continuous(breaks = seq(300, 550, len = 50))+
  scale_x_discrete(limits=c("1", "2", "3", "4", "5"), labels=c("1" = "S&V", "2" = "OBJ1", "3" = "OBJ2", "4" = "Spill1", "5" = "Spill2"))+
  theme(axis.text.x=element_text(size=8))+
  labs(y ="RT (ms.)")+
  facet_wrap(~Sentence_type)+
  scale_color_manual(values=c("#273746", "#b7950b", "#909497"), name=("Condition"))

ggsave("SPR_YA2.png", dpi=300, height=6, width = 8, units="in")