### SOLUTIONS TO LING280 W13 Problem Set 1 ### M. Wagers, January 18, 2013 ### Problem 1 # median of each condition (i.e. component of the data frame) median(islands$that.matrix) median(islands$that.embedded) median(islands$whether.matrix) median(islands$whether.embedded) # OR apply(islands, 2, median) # standard deviation sd(islands$that.matrix) # etc … OR apply(islands, 2, sd) # mean mean(islands$that.matrix) # etc … OR apply(islands, 2, mean) # OR colMeans(islands) # inter-quartile range IQR(islands$that.matrix) # etc … OR apply(islands, 2, IQR) ### Problem 2 # The function apply applies the function table (its third argument) # to the columns (its second argument, 2 for columns) of the data # frame judge (its first argument). The function table gives the count # for each level. apply(judge, 2, table) ### Problem 3 # There are numerous ways to solve this problem; here are 3 # APPROACH 1 # Create a table of Counts per rating category and condition apply(islands, 2, table) -> islands.tab # Sum up just the 6/7 category apply(islands.tab[6:7, ], 2, sum) -> islands.hi.sum # Sum up each condition apply(islands.tab, 2, sum) -> islands.bycondition.sum # Take the ratio islands.hi.prop <- islands.hi.sum / islands.bycondition.sum # APPROACH 2 # Use the prop.table function; the second arg, '2' means to proportionalize by column prop.table(islands.tab, 2) -> islands.tab.prop islands.hi.prop <- apply(islands.tab.prop[6:7,], 2, sum) # APPROACH 3 # Calculate the empirical cumulative distribution function; it gives us the # fraction of judgments less than or equal to its argument (5 in this case); # subtract from 1 to obtain the proportion of judgements assigned 6 or 7. Fn.tm <- ecdf(islands$that.matrix) 1-Fn.tm(5) # Repeat for each condition ### Problem 4 # boxplot quartz() boxplot(islands) ### Problem 5 # freq=TRUE produces a histogram with the counts quartz() hist(islands[,'whether.matrix'], breaks=7, freq=TRUE) # freq=FALSE produces a histogram with the probability density quartz() hist(islands[,'whether.matrix'], breaks=7, freq=FALSE) ### Problem 6 # This code fragment performs 5 different experiments which differ in the # number of observation that each experiment involves (i.e. how many observations # are sampled). Each experiment is replicated 1000 times and the mean for each # experiment is plotted (i.e. boxplot of the means). The resulting boxplots show # that the higher the nunmber of observations the means are based on the higher # the precision of the estimate of the random variable. rep.experiment <- matrix(nrow=1000, ncol=5) islands[,'whether.embedded'] -> island.judgments for(i in 1:1000){ sample(island.judgments,5,replace=TRUE) -> n5; mean(n5) -> rep.experiment[i,1]; sample(island.judgments,10,replace=TRUE) -> n10; mean(n10) -> rep.experiment[i,2]; sample(island.judgments,20,replace=TRUE) -> n20; mean(n20) -> rep.experiment[i,3]; sample(island.judgments,40,replace=TRUE) -> n40; mean(n40) -> rep.experiment[i,4]; sample(island.judgments,80,replace=TRUE) -> n80; mean(n80) -> rep.experiment[i,5]; } quartz() boxplot(rep.experiment~col(rep.experiment), names=c("n = 5", "n = 10", "n = 20", "n = 40", "n = 80"), ylim=c(1,7), ylab="mean rating", xlab="sample size", main="Simulated outcome of 1000 experiments") ### Problem 7 # This code fragment computes and plots the average squared deviations of the data from the numbers along the interval [0-7] - the smallest 'average squared deviation' is from the mean of the data set -- this is the variance -- i.e., the mean is the value for which the average deviation is smallest. The mean minimizes variance! # create a vector numbers containing the numbers from 0 to 7 in 0.05 increments numbers <- seq(0, 7, by=0.05) # create a vector with the length of the newly created numbers vector squared.deviation <- vector(length=length(numbers)) # initialize k k <- 1 # loop for every number in numbers for (n in numbers) { # for each number in the numbers vector, subtract the vector island.judgements from n; # square the resulting vector and sum its elements storing the value in sum.squares sum((n - island.judgments)^2) -> sum.squares; # divide the sum of the squares by the number of elements in island.judgements minus 1 # store the result in the current position in the squared.deviation vector sum.squares / (length(island.judgments) - 1) -> squared.deviation[k]; # increment k k <- k+1; } quartz() # plot the values in squared.deviation with the numbers on the x axis plot(numbers, squared.deviation, pch="+") # draw a horizontal line through the min value of squared.deviation abline(h=min(squared.deviation))