setwd("~/Documents/Rstudio/Tony/") # change, ours is named Tony, this is the location where your data will end up!
# This is later used to read the files, so ensure it is correct.

# Ran on R 3.3.2 in OSX on 20th Dec. 2016 (via RStudio)
# Paper uses seeds 1 and 2, trials= 2, samplesize 30 to 120 with increments of 10.
# nsims was 100,000. Code is ugly and not built for speed.
# Install packages below first.

library(reshape2) # 1.4.1
library(plyr) # 1.8.4
library(ggplot2) # 2.1.0
library(ggthemes) # 3.2.0
library(dplyr) # 0.5.0
library(stargazer) #5.2

# set some parameters.
seed1 <- 1 # this makes it replicable
set.seed(seed1)
samplesize <- 30 # change this to get different samples
trials <- 2 # change
increment_trials <-
  2 # this creates the increments via the formula below
probability <- .5 # the probability in a 2AFC task
nsims <- 100000 #how many simulations to get.

# main function, funtion takes the input ()
# you could use a running indicator for exp1 to exp10.
accuracysim <- function (samplesize, trials, probability) {
  exp1 <- rbinom(samplesize, trials, probability)
  accuracy1 <- mean(exp1 / trials)
  exp2 <- rbinom(samplesize, (trials + increment_trials), probability)
  accuracy2 <- mean(exp2 / (trials + increment_trials))
  exp3 <- rbinom(samplesize, (trials + (increment_trials * 2)), probability)
  accuracy3 <- mean(exp3 / (trials + (increment_trials * 2)))
  exp4 <- rbinom(samplesize, (trials + (increment_trials * 4)), probability)
  accuracy4 <- mean(exp4 / (trials + (increment_trials * 4)))
  exp5 <- rbinom(samplesize, (trials + (increment_trials * 6)), probability)
  accuracy5 <- mean(exp5 / (trials + (increment_trials * 6)))
  exp6 <- rbinom(samplesize, (trials + (increment_trials * 8)), probability)
  accuracy6 <- mean(exp6 / (trials + (increment_trials * 8)))
  exp7 <-
    rbinom(samplesize, (trials + (increment_trials * 10)), probability)
  accuracy7 <- mean(exp7 / (trials + (increment_trials * 10)))
  exp8 <-
    rbinom(samplesize, (trials + (increment_trials * 12)), probability)
  accuracy8 <- mean(exp8 / (trials + (increment_trials * 12)))
  exp9 <-
    rbinom(samplesize, (trials + (increment_trials * 14)), probability)
  accuracy9 <- mean(exp9 / (trials + (increment_trials * 14)))
  exp10 <-
    rbinom(samplesize, (trials + (increment_trials * 16)), probability)
  accuracy10 <- mean(exp10 / (trials + (increment_trials * 16)))
  results <-
    cbind(
      accuracy1,
      accuracy2,
      accuracy3,
      accuracy4,
      accuracy5,
      accuracy6,
      accuracy7,
      accuracy8,
      accuracy9,
      accuracy10
    )
  return(results)
}

result <- vector("list", nsims) # the resuls are a list
for (i in 1:nsims)
{
  result[[i]] <-
    print(accuracysim(samplesize, trials, probability))
} # this will print the results
num.el <-
  sapply(result, length) # we need to,specify how large the container should be
resultsmatrix <-
  cbind(unlist(result), rep(1:length(result), num.el)) # results are a list which we "unlist", we add a running indicator and num.el to check.
resultsmatrix <-
  as.data.frame(resultsmatrix) # turn a matric into a dataframe.
resultsID <- c(rep(1:10, nsims)) # simulation number
resultsmatrix <-
  as.data.frame(cbind(resultsmatrix, resultsID)) # combine
resultsmatrix2 <-
  NULL # make an empty matrix to store the data in. We want to reshape it
resultsmatrix2 <- dcast(resultsmatrix, V2 ~ resultsID, value.var = "V1")
means <- sapply(resultsmatrix2, mean) # get the means
CI_95 <-
  sapply(resultsmatrix2, quantile, probs = c(0.025, 0.975)) # calculate the 95%CI
export <- rbind(means, CI_95) # row bind so you can export it
write.csv2(export,
           file = paste(seed1, samplesize, trials, ".csv"),
           row.names = FALSE) # export result to working directory, it stores the seed used

# Now let's synthesize those simulations.
# Read batch - ensure only .csv are in there. (These were all the simulations)
file_list <-
  list.files("~/Documents/Rstudio/Tony/") # get all the files as a list
dataset <- ldply(file_list, read.csv2) #apply read.csv2 to list
samplesizegraph1 <-
  seq(30, 90, 10) # make the sample sizes for the graphs intervals used.
samplesizegraph2 <-
  seq(100, 120, 10) # this is done in two chunks as the folder is ordered.
samplesizegraph <-
  c(samplesizegraph2, samplesizegraph1) # the folder is ordered so that 100 is read first.
samplesizegraph <-
  rep(samplesizegraph, each = 3) # We need the sample sizes repeated 3 times as we have three values (means; 95CI)
dataset$samplesizegraph <- samplesizegraph #add it to our dataset
# make averages across both run and synthesize.
run1 <- dataset[1:30, ]
run2 <- dataset[31:60, ]
cor(run1, run2) #r >.99 for all. So it is OK for us to average
result <- ((run1 + run2) / 2)# take average (paranoid extra brackets)
result <- result[, -1] # remove the junk column
colnames(result) <-
  c(2, 4, 6, 10, 14, 18, 22, 26, 30, 34, 'sample') # make column names
result2 <-
  melt(result, id.vars = c("sample")) #reshape so that we can plot it
resultmeans <- result2[seq(1, nrow(result2), 3),]
resultmeans <-
  arrange(resultmeans, sample) # fix order so it is ordered
resultlowci <- result2[seq(2, nrow(result2), 3),]
resultlowci <- arrange(resultlowci, sample)
resulthighci <- result2[seq(3, nrow(result2), 3),]
resulthighci <- arrange(resulthighci, sample)
# Figure 1.
resultmeans$variable <-
  as.numeric(levels(resultmeans$variable)) # need numeric variable.
fig <-
  ggplot(resultmeans,
         aes(x = resultmeans$variable, y = resultmeans$value)) # make a plot
limits <-
  aes(ymax = resulthighci$value, ymin = resultlowci$value) # make CI limits for the graph.
fig + geom_point() + facet_wrap( ~ sample, nrow = 2) + geom_errorbar(limits, width =
                                                                       0.25) + ylab("True probability") + xlab("Number of trials") + scale_x_continuous(breaks =
                                                                                                                                                          resultmeans$variable) + theme_gdocs(base_size = 14) # make a pretty graph.
# Table
resultmeans <-
  rename(resultmeans, means = value, trials = variable) # rename., Note that our dataframes are already
resultlowci <- rename(resultlowci, lowCI = value)
resultlowci <- select(resultlowci, lowCI) # only need lowCI
resulthighci <- rename(resulthighci, highCI = value)
resulthighci <- select(resulthighci, highCI)
combined_means_ci <-
  cbind.data.frame(resultmeans, resultlowci, resulthighci)
stargazer(combined_means_ci,
          type = "html",
          summary = F,
          out = "Appendix_Table.htm") # prints a htm table. Make sure to remove when re-running code!

# Illustration with Little et al 2007, study 1. We use the parameters reported.
samplesize_tony <- 110
trials_tony <- 8
probability_tony <- .5
nsims_tony <- 100000
set.seed(seed1) # make it replicable, this uses the seed you put above.
# make a similar function. Similar to the one above.
accuracysim_tony <-
  function (samplesize_tony,
            trials_tony,
            probability_tony) {
    exp1_tony <- rbinom(samplesize_tony, trials_tony, probability_tony)
    accuracy1_tony <- mean(exp1_tony / trials_tony)
    results_tony <- cbind(accuracy1_tony)
    return(results_tony)
  }
result_tony <- vector("list", nsims_tony)
for (i in 1:nsims_tony)
{
  result_tony[[i]] <-
    print(accuracysim_tony(samplesize_tony, trials_tony, probability_tony))
}
num.el_tony <- sapply(result_tony, length)
resultsmatrix_tony <-
  cbind(unlist(result_tony), rep(1:length(result_tony), num.el_tony))
resultsmatrix_tony <- as.data.frame(resultsmatrix_tony)
entriesoverorequalto57 <-
  filter(resultsmatrix_tony, V1 >= 0.57) # only one case, entry 44305.
#Figure 2.
plot1 <- ggplot(resultsmatrix_tony, aes(x = V1))
plot1 + geom_histogram(colour = "black", fill = "white") + ylab("Count") + xlab("Probability") + geom_vline(xintercept = .57,
                                                                                                            linetype = "longdash",
                                                                                                            show.legend = T) + theme_gdocs(base_size = 24) # simple histogram

###############
####The END ###
###############
#▓▓▓▓
#▒▒▒▓▓
#▒▒▒▒▒▓
#▒▒▒▒▒▒▓
#▒▒▒▒▒▒▓
#▒▒▒▒▒▒▒▓
#▒▒▒▒▒▒▒▓▓▓
#▒▓▓▓▓▓▓░░░▓
#▒▓░░░░▓░░░░▓
#▓░░░░░░▓░▓░▓
#▓░░░░░░▓░░░▓
#▓░░▓░░░▓▓▓▓
#▒▓░░░░▓▒▒▒▒▓
#▒▒▓▓▓▓▒▒▒▒▒▓
#▒▒▒▒▒▒▒▒▓▓▓▓
#▒▒▒▒▒▓▓▓▒▒▒▒▓
#▒▒▒▒▓▒▒▒▒▒▒▒▒▓
#▒▒▒▓▒▒▒▒▒▒▒▒▒▓
#▒▒▓▒▒▒▒▒▒▒▒▒▒▒▓
#▒▓▒▓▒▒▒▒▒▒▒▒▒▓
#▒▓▒▓▓▓▓▓▓▓▓▓▓
#▒▓▒▒▒▒▒▒▒▓
#▒▒▓▒▒▒▒▒▓
###