####POWER ANALYSIS ABSOLUTELY RELATIVE CHICKS#### ####Load Packages#### library(dplyr) library(multimode) library(lme4) library(glmmTMB) library(effsize) library(pwr) library(FamilyRank) ####Calculating effect sizes from existing data#### discr <- read.csv("Data Absolutely Relative Chicks.csv", sep=";", dec=",", tryLogical=F) #last one ensures that sex does not become true or false str(discr) discr <- discr[discr$Subject<125,] discr$Subject <- as.factor(discr$Subject) discr$Homogenous_Heterogenous <- as.factor(discr$Homogenous_Heterogenous) discr$Sex <- as.factor(discr$Sex) discr$Training <- as.factor(discr$Training) discr$Positive_Training_Numerosity <- as.factor(discr$Positive_Training_Numerosity) discr$Positive_Higher_Lower <- as.factor(discr$Positive_Higher_Lower) discr$Test <- as.factor(discr$Test) discr$Side_Positive <- as.factor(discr$Side_Positive) discr$Zone_Left <- as.numeric(discr$Zone_Left) discr$Zone_Middle <- as.numeric(discr$Zone_Middle) discr$Zone_Right <- as.numeric(discr$Zone_Right) discr$Zone_Correct <- as.numeric(discr$Zone_Correct) discr$Zone_Incorrect <- as.numeric(discr$Zone_Incorrect) discr$Group <- as.factor(discr$Group) #Create a data set per individual discr_subj <- discr %>% group_by(Subject) %>% summarise( Zone_Correct_mean = mean(Zone_Correct), Zone_Correct = sum(Zone_Correct), Zone_Incorrect = sum(Zone_Incorrect), Zone_Middle = sum(Zone_Middle), Sex, Training, Positive_Training_Numerosity, Test, Side_Positive, Group, Preference_Index=mean(Preference_Index)) #take only every 6th row discr_subj <- discr_subj[seq(1, nrow(discr_subj ),6),] #Calculate cumulative preference index per subject discr_subj$Preference_Index_cum <- discr_subj$Zone_Correct/(discr_subj$Zone_Correct+discr_subj$Zone_Incorrect) summary(discr_subj) Group1 <- subset(discr_subj, Group==1) Group2 <- subset(discr_subj, Group==2) Group3 <- subset(discr_subj, Group==3) Group4 <- subset(discr_subj, Group==4) #control group ###Power analysis part 1: group level (main analysis)#### #Calculate the effect size for each experiment/group #formula: (mean1-mean2)/SD es_1 <- (mean(discr$Preference_Index[discr$Group==1], na.rm=T)-0.5)/sd(discr$Preference_Index[discr$Group==1], na.rm=T) es_2 <- (mean(discr$Preference_Index[discr$Group==2], na.rm=T)-0.5)/sd(discr$Preference_Index[discr$Group==2], na.rm=T) es_3 <- (mean(discr$Preference_Index[discr$Group==3], na.rm=T)-0.5)/sd(discr$Preference_Index[discr$Group==3], na.rm=T) es_4 <- (mean(discr$Preference_Index[discr$Group==4], na.rm=T)-0.5)/sd(discr$Preference_Index[discr$Group==4], na.rm=T) #pwr test for proportions for each group #Group 1: pwr.p.test(h = es_1, n = 40*6, sig.level = 0.05) # n is the number of subjects times 6 (6 minutes per trial, i.e., 6 values per subject) #0.3893013 #Group 2 pwr.p.test(h = es_2, n = 40*6, sig.level = 0.05) #0.06632401 #Group 3 pwr.p.test(h = es_3, n = 25*6, sig.level = 0.05) #0.1979655 #Group 4: pwr.p.test(h = es_4, n = 19*6, sig.level = 0.05) #0.06053942 ####Power analysis part 2: group level individual preference indices (main analysis)#### #calculate effect sizes es_ind_1 <- (mean(Group1$Preference_Index, na.rm=T)-0.5)/sd(Group1$Preference_Index, na.rm=T) es_ind_2 <- (mean(Group2$Preference_Index, na.rm=T)-0.5)/sd(Group2$Preference_Index, na.rm=T) es_ind_3 <- (mean(Group3$Preference_Index, na.rm=T)-0.5)/sd(Group3$Preference_Index, na.rm=T) es_ind_4 <- (mean(Group4$Preference_Index, na.rm=T)-0.5)/sd(Group4$Preference_Index, na.rm=T) #Group 1: pwr.p.test(h = es_ind_1, n = 40, sig.level = 0.05) # here n is equal to the number of subjects as one preference index was calculated per subject, across all 6 minutes #0.1880349 #Group 2 pwr.p.test(h = es_ind_2, n = 40, sig.level = 0.05) #0.0570012 #Group 3 pwr.p.test(h = es_ind_3, n = 25, sig.level = 0.05) #0.09776434 #Group 4: pwr.p.test(h = es_ind_4, n = 19, sig.level = 0.05) # 0.05597055 ####Power analysis part 3: mode-tests for the distribution of individual preference indices#### #To calculate the two means (one for each mode) and standard deviations needd for the simulation of the datasets, we used the outputs of the modetest and locmodes function (see main script) #for Group 1 these results were: #Excess mass = 0.143, p-value = 0.038 #Estimated location #Modes: 0.3733176 0.9575151 #Antimode: 0.682744 #We used the antimode as a cut-off point to divide our sample in two and subsequently calculated the standard deviation for each of the two sub-groups sdg1_a <- sd(Group1$Preference_Index_cum[Group1$Preference_Index_cum<0.682744 ], na.rm=T) sdg1_b <- sd(Group1$Preference_Index_cum[Group1$Preference_Index_cum>0.682744 ], na.rm=T) #number of simulations nsim <- 5000 #number of subjects (one value per subject) n <- 40 #create an empty vector to save the results of the simulation res_p <- c() #simulate a bimodal dataset 5000 times and perform a modetest on the dataset. Save the results in the vector. for (i in 1:nsim) { #the values for the means of the two modes are the two estimated modes of our sample (locmodes output, see line 107) #we used the antimode (see line 108) as the probablity of a value belonging to the first (rather than the second) mode Preference_Index <- c(rbinorm(n=n, mean1=0.3733176, mean2=0.9575151, sdg1_a, sdg1_b, prop=0.682744)) #when generating data using rbinorm, some values might be below 0 or above 1, unlike for our preference index #we rounded values below 0 to 0 anjd values above 1 to 1 Preference_Index[Preference_Index<0] <- 0 Preference_Index[Preference_Index>1] <- 1 resmod <- modetest(Preference_Index) res_p[i] <- resmod$p.value } #display the vector of p-values generated res_p #calculate how many of tehse p-values are smaller than or equal to 0.05 mean(res_p<=0.05) #72% #Note that the result can vary slightly between simulations. #Group 2: #The results of the locmode function (see main script) were: #Estimated location #Modes: 0.367636 0.9302031 #Antimode: 0.6725359 #calculate the standard deviations sdg2_a <- sd(Group2$Preference_Index_cum[Group2$Preference_Index_cum<0.6725359 ], na.rm=T) sdg2_b <- sd(Group2$Preference_Index_cum[Group2$Preference_Index_cum>0.6725359 ], na.rm=T) nsim <- 5000 n <- 40 res_p <- c() for (i in 1:nsim) { Preference_Index <- c(rbinorm(n=n, mean1=0.367636, mean2=0.9302031, sdg2_a, sdg2_b, prop=0.6725359)) Preference_Index[Preference_Index<0] <- 0 Preference_Index[Preference_Index>1] <- 1 resmod <- modetest(Preference_Index) res_p[i] <- resmod$p.value } res_p mean(res_p<=0.05) #51% #Group 3: #The results of the locmode function (see main script) were: #Estimated location #Modes: 0.02331283 0.5553202 #Antimode: 0.2792899 #calculate the standard deviations sdg3_a <- sd(Group3$Preference_Index_cum[Group3$Preference_Index_cum<0.2792899 ], na.rm=T) sdg3_b <- sd(Group3$Preference_Index_cum[Group3$Preference_Index_cum>0.2792899 ], na.rm=T) nsim <- 5000 n <- 25 res_p <- c() for (i in 1:nsim) { Preference_Index <- c(rbinorm(n=n, mean1=0.02331283, mean2=0.5553202, sdg3_a, sdg3_b, prop=0.2792899)) Preference_Index[Preference_Index<0] <- 0 Preference_Index[Preference_Index>1] <- 1 resmod <- modetest(Preference_Index) res_p[i] <- resmod$p.value } res_p mean(res_p<=0.05) #50% #Group 4: #Estimated location #Modes: 0.05187036 0.4094076 #Antimode: 0.1653512 #Note that here the modetest did not indicate that the distribution differs disgnificantly from unimodality. #We nevertheless had to simulate bimodal datasets for the power analysis. sdg4_a <- sd(Group4$Preference_Index_cum[Group4$Preference_Index_cum<0.1653512 ], na.rm=T) sdg4_b <- sd(Group4$Preference_Index_cum[Group4$Preference_Index_cum>0.1653512 ], na.rm=T) nsim <- 5000 n <- 19 res_p <- c() for (i in 1:nsim) { Preference_Index <- c(rbinorm(n=n, mean1=0.05187036, mean2=0.4094076, sdg4_a, sdg4_b, prop=0.1653512)) Preference_Index[Preference_Index<0] <- 0 Preference_Index[Preference_Index>1] <- 1 resmod <- modetest(Preference_Index) res_p[i] <- resmod$p.value } res_p mean(res_p<=0.05) #13%