### These text files are skeleton documents for students in LING280 to create more fleshed-out annotated notes ### 1. DOWNLOAD DATA FROM http://people.ucsc.edu/~mwagers/ling280/data/plurals.csv.gz # Load data from a comma-separated values file read.csv("/path/to/plurals.csv") -> plurals ### 2. SOME COMMANDS ON DATA FRAMES TO TRY: WHAT'S GOING ON FOR EACH OF THESE? plurals summary(plurals) head(plurals) plurals$RT plurals$match # Draw out a smaller sample to work with sample(plurals$RT, 50) -> my.sample sample(plurals$RT, 50, replace=TRUE) -> my.sample.2 ### 3. CALCULATE DESCRIPTIVE STATISTICS mean(my.sample) # mean; where is the sample's `center of mass` located? var(my.sample) # variance; how are the values dispersed about the mean? ### What is the standard deviation? > ... > ... ### What are the extrema? max(my.sample)->samp.max > ... ### 3A. WHERE ARE X% OF THE OBSERVATIONS?? # where's the 'middle'? sort(my.sample) -> my.sorted.sample print(my.sorted.sample) my.sorted.sample[25:26] mean(my.sorted.sample[25:26]) median(my.sorted.sample) median(my.sorted.sample[1:49]) > ... # quantiles quantile(my.sample) quantile(my.sample,seq(0,1,0.1)) quantile(my.sample, 0.29) summary(my.sample) # summary visualization of sample center/spread boxplot(my.sample) abline(h=median(my.sample)) abline(h=quantile(my.sample), lty="dashed", col=c("red","blue","green","blue","red"), lwd=3) # my.sample[my.sample < 800] -> my.censored.sample boxplot(my.censored.sample) stripchart(my.censored.sample, method="jitter", vertical=TRUE, add=1, pch=18, col="darkgreen") ### 4. CUMULATIVE DENSITY FUNCTION # What is the following command going to do? # Hint: give the command `?plot` to find the arguments to plot() plot(quantile(my.sample, seq(0,1,0.05)), seq(0,1,0.05)) abline(h = 0.5, v = median(my.sample)) > ... > ... # sample histogram hist(my.sample) hist(my.sample,breaks=5) hist(my.sample,breaks=15) hist(my.sample,breaks=seq(0,1500,50),col="blue",density=25) -> my.hist > ... cumsum(my.hist$counts) -> my.hist.cumsum barplot(my.hist.cumsum, ylim=c(0,50), density=35,col="blue",angle=-45) barplot(my.hist$counts, ylim=c(0,50), density=45,col="red",angle=45,add=1) > ... #### 4. HOW STABLE A MEASUREMENT IS THE MEAN OF A SAMPLE? #### WE WILL SOLVE THIS QUESTION USING A LOOP # vector(length=50)->my.sample.means for(i in 1:50){ sample(plurals$RT,50)->my.sample mean(my.sample)->my.sample.means[i] } > ... > ... ### ### ### plot.ecdf(log(plurals$RT),lwd=1.2) for(i in 1:50){ sample(plurals$RT,50)->my.sample plot(ecdf(my.sample),col="red",lwd=0.6,add=1,pch="+") }