setwd("~/Documents/Rstudio/Tony/") # change, ours is named Tony, this is the location where your data will end up! # This is later used to read the files, so ensure it is correct. # Ran on R 3.3.2 in OSX on 20th Dec. 2016 (via RStudio) # Paper uses seeds 1 and 2, trials= 2, samplesize 30 to 120 with increments of 10. # nsims was 100,000. Code is ugly and not built for speed. # Install packages below first. library(reshape2) # 1.4.1 library(plyr) # 1.8.4 library(ggplot2) # 2.1.0 library(ggthemes) # 3.2.0 library(dplyr) # 0.5.0 library(stargazer) #5.2 # set some parameters. seed1 <- 1 # this makes it replicable set.seed(seed1) samplesize <- 30 # change this to get different samples trials <- 2 # change increment_trials <- 2 # this creates the increments via the formula below probability <- .5 # the probability in a 2AFC task nsims <- 100000 #how many simulations to get. # main function, funtion takes the input () # you could use a running indicator for exp1 to exp10. accuracysim <- function (samplesize, trials, probability) { exp1 <- rbinom(samplesize, trials, probability) accuracy1 <- mean(exp1 / trials) exp2 <- rbinom(samplesize, (trials + increment_trials), probability) accuracy2 <- mean(exp2 / (trials + increment_trials)) exp3 <- rbinom(samplesize, (trials + (increment_trials * 2)), probability) accuracy3 <- mean(exp3 / (trials + (increment_trials * 2))) exp4 <- rbinom(samplesize, (trials + (increment_trials * 4)), probability) accuracy4 <- mean(exp4 / (trials + (increment_trials * 4))) exp5 <- rbinom(samplesize, (trials + (increment_trials * 6)), probability) accuracy5 <- mean(exp5 / (trials + (increment_trials * 6))) exp6 <- rbinom(samplesize, (trials + (increment_trials * 8)), probability) accuracy6 <- mean(exp6 / (trials + (increment_trials * 8))) exp7 <- rbinom(samplesize, (trials + (increment_trials * 10)), probability) accuracy7 <- mean(exp7 / (trials + (increment_trials * 10))) exp8 <- rbinom(samplesize, (trials + (increment_trials * 12)), probability) accuracy8 <- mean(exp8 / (trials + (increment_trials * 12))) exp9 <- rbinom(samplesize, (trials + (increment_trials * 14)), probability) accuracy9 <- mean(exp9 / (trials + (increment_trials * 14))) exp10 <- rbinom(samplesize, (trials + (increment_trials * 16)), probability) accuracy10 <- mean(exp10 / (trials + (increment_trials * 16))) results <- cbind( accuracy1, accuracy2, accuracy3, accuracy4, accuracy5, accuracy6, accuracy7, accuracy8, accuracy9, accuracy10 ) return(results) } result <- vector("list", nsims) # the resuls are a list for (i in 1:nsims) { result[[i]] <- print(accuracysim(samplesize, trials, probability)) } # this will print the results num.el <- sapply(result, length) # we need to,specify how large the container should be resultsmatrix <- cbind(unlist(result), rep(1:length(result), num.el)) # results are a list which we "unlist", we add a running indicator and num.el to check. resultsmatrix <- as.data.frame(resultsmatrix) # turn a matric into a dataframe. resultsID <- c(rep(1:10, nsims)) # simulation number resultsmatrix <- as.data.frame(cbind(resultsmatrix, resultsID)) # combine resultsmatrix2 <- NULL # make an empty matrix to store the data in. We want to reshape it resultsmatrix2 <- dcast(resultsmatrix, V2 ~ resultsID, value.var = "V1") means <- sapply(resultsmatrix2, mean) # get the means CI_95 <- sapply(resultsmatrix2, quantile, probs = c(0.025, 0.975)) # calculate the 95%CI export <- rbind(means, CI_95) # row bind so you can export it write.csv2(export, file = paste(seed1, samplesize, trials, ".csv"), row.names = FALSE) # export result to working directory, it stores the seed used # Now let's synthesize those simulations. # Read batch - ensure only .csv are in there. (These were all the simulations) file_list <- list.files("~/Documents/Rstudio/Tony/") # get all the files as a list dataset <- ldply(file_list, read.csv2) #apply read.csv2 to list samplesizegraph1 <- seq(30, 90, 10) # make the sample sizes for the graphs intervals used. samplesizegraph2 <- seq(100, 120, 10) # this is done in two chunks as the folder is ordered. samplesizegraph <- c(samplesizegraph2, samplesizegraph1) # the folder is ordered so that 100 is read first. samplesizegraph <- rep(samplesizegraph, each = 3) # We need the sample sizes repeated 3 times as we have three values (means; 95CI) dataset$samplesizegraph <- samplesizegraph #add it to our dataset # make averages across both run and synthesize. run1 <- dataset[1:30, ] run2 <- dataset[31:60, ] cor(run1, run2) #r >.99 for all. So it is OK for us to average result <- ((run1 + run2) / 2)# take average (paranoid extra brackets) result <- result[, -1] # remove the junk column colnames(result) <- c(2, 4, 6, 10, 14, 18, 22, 26, 30, 34, 'sample') # make column names result2 <- melt(result, id.vars = c("sample")) #reshape so that we can plot it resultmeans <- result2[seq(1, nrow(result2), 3),] resultmeans <- arrange(resultmeans, sample) # fix order so it is ordered resultlowci <- result2[seq(2, nrow(result2), 3),] resultlowci <- arrange(resultlowci, sample) resulthighci <- result2[seq(3, nrow(result2), 3),] resulthighci <- arrange(resulthighci, sample) # Figure 1. resultmeans$variable <- as.numeric(levels(resultmeans$variable)) # need numeric variable. fig <- ggplot(resultmeans, aes(x = resultmeans$variable, y = resultmeans$value)) # make a plot limits <- aes(ymax = resulthighci$value, ymin = resultlowci$value) # make CI limits for the graph. fig + geom_point() + facet_wrap( ~ sample, nrow = 2) + geom_errorbar(limits, width = 0.25) + ylab("True probability") + xlab("Number of trials") + scale_x_continuous(breaks = resultmeans$variable) + theme_gdocs(base_size = 14) # make a pretty graph. # Table resultmeans <- rename(resultmeans, means = value, trials = variable) # rename., Note that our dataframes are already resultlowci <- rename(resultlowci, lowCI = value) resultlowci <- select(resultlowci, lowCI) # only need lowCI resulthighci <- rename(resulthighci, highCI = value) resulthighci <- select(resulthighci, highCI) combined_means_ci <- cbind.data.frame(resultmeans, resultlowci, resulthighci) stargazer(combined_means_ci, type = "html", summary = F, out = "Appendix_Table.htm") # prints a htm table. Make sure to remove when re-running code! # Illustration with Little et al 2007, study 1. We use the parameters reported. samplesize_tony <- 110 trials_tony <- 8 probability_tony <- .5 nsims_tony <- 100000 set.seed(seed1) # make it replicable, this uses the seed you put above. # make a similar function. Similar to the one above. accuracysim_tony <- function (samplesize_tony, trials_tony, probability_tony) { exp1_tony <- rbinom(samplesize_tony, trials_tony, probability_tony) accuracy1_tony <- mean(exp1_tony / trials_tony) results_tony <- cbind(accuracy1_tony) return(results_tony) } result_tony <- vector("list", nsims_tony) for (i in 1:nsims_tony) { result_tony[[i]] <- print(accuracysim_tony(samplesize_tony, trials_tony, probability_tony)) } num.el_tony <- sapply(result_tony, length) resultsmatrix_tony <- cbind(unlist(result_tony), rep(1:length(result_tony), num.el_tony)) resultsmatrix_tony <- as.data.frame(resultsmatrix_tony) entriesoverorequalto57 <- filter(resultsmatrix_tony, V1 >= 0.57) # only one case, entry 44305. #Figure 2. plot1 <- ggplot(resultsmatrix_tony, aes(x = V1)) plot1 + geom_histogram(colour = "black", fill = "white") + ylab("Count") + xlab("Probability") + geom_vline(xintercept = .57, linetype = "longdash", show.legend = T) + theme_gdocs(base_size = 24) # simple histogram ############### ####The END ### ############### #▓▓▓▓ #▒▒▒▓▓ #▒▒▒▒▒▓ #▒▒▒▒▒▒▓ #▒▒▒▒▒▒▓ #▒▒▒▒▒▒▒▓ #▒▒▒▒▒▒▒▓▓▓ #▒▓▓▓▓▓▓░░░▓ #▒▓░░░░▓░░░░▓ #▓░░░░░░▓░▓░▓ #▓░░░░░░▓░░░▓ #▓░░▓░░░▓▓▓▓ #▒▓░░░░▓▒▒▒▒▓ #▒▒▓▓▓▓▒▒▒▒▒▓ #▒▒▒▒▒▒▒▒▓▓▓▓ #▒▒▒▒▒▓▓▓▒▒▒▒▓ #▒▒▒▒▓▒▒▒▒▒▒▒▒▓ #▒▒▒▓▒▒▒▒▒▒▒▒▒▓ #▒▒▓▒▒▒▒▒▒▒▒▒▒▒▓ #▒▓▒▓▒▒▒▒▒▒▒▒▒▓ #▒▓▒▓▓▓▓▓▓▓▓▓▓ #▒▓▒▒▒▒▒▒▒▓ #▒▒▓▒▒▒▒▒▓ ###