### These text files are skeleton documents for students in LING280 to create more fleshed-out annotated notes

### 1. DOWNLOAD DATA FROM http://people.ucsc.edu/~mwagers/ling280/data/plurals.csv.gz

# Load data from a comma-separated values file
read.csv("/path/to/plurals.csv") -> plurals


### 2. SOME COMMANDS ON DATA FRAMES TO TRY: WHAT'S GOING ON FOR EACH OF THESE?

plurals

summary(plurals)

head(plurals)

plurals$RT

plurals$match

# Draw out a smaller sample to work with
sample(plurals$RT, 50) -> my.sample
sample(plurals$RT, 50, replace=TRUE) -> my.sample.2

### 3. CALCULATE DESCRIPTIVE STATISTICS
mean(my.sample)		# mean; where is the sample's `center of mass` located?
var(my.sample)		# variance; how are the values dispersed about the mean?

### What is the standard deviation?

> ...
> ...

### What are the extrema?

max(my.sample)->samp.max
> ...

### 3A. WHERE ARE X% OF THE OBSERVATIONS??

# where's the 'middle'?
sort(my.sample) -> my.sorted.sample
print(my.sorted.sample)
my.sorted.sample[25:26]
mean(my.sorted.sample[25:26])
median(my.sorted.sample)

median(my.sorted.sample[1:49])
> ...

# quantiles
quantile(my.sample)
quantile(my.sample,seq(0,1,0.1))
quantile(my.sample, 0.29)

summary(my.sample)

# summary visualization of sample center/spread
boxplot(my.sample)
abline(h=median(my.sample))
abline(h=quantile(my.sample),
  lty="dashed", col=c("red","blue","green","blue","red"), lwd=3)

#
my.sample[my.sample < 800] -> my.censored.sample

boxplot(my.censored.sample)
stripchart(my.censored.sample,
  method="jitter", vertical=TRUE, add=1, pch=18, col="darkgreen")

### 4. CUMULATIVE DENSITY FUNCTION

# What is the following command going to do?
# Hint: give the command `?plot` to find the arguments to plot()
plot(quantile(my.sample, seq(0,1,0.05)), seq(0,1,0.05))
abline(h = 0.5, v = median(my.sample))
> ...

> ... 

# sample histogram
hist(my.sample)
hist(my.sample,breaks=5)
hist(my.sample,breaks=15)
hist(my.sample,breaks=seq(0,1500,50),col="blue",density=25) -> my.hist

> ...

cumsum(my.hist$counts) -> my.hist.cumsum

barplot(my.hist.cumsum, 
	ylim=c(0,50), density=35,col="blue",angle=-45)

barplot(my.hist$counts, 
		ylim=c(0,50), density=45,col="red",angle=45,add=1)
		
> ...

#### 4. HOW STABLE A MEASUREMENT IS THE MEAN OF A SAMPLE?
#### WE WILL SOLVE THIS QUESTION USING A LOOP

#
vector(length=50)->my.sample.means

for(i in 1:50){
	
	sample(plurals$RT,50)->my.sample
	mean(my.sample)->my.sample.means[i]
}

> ...

> ...

###
###
###

plot.ecdf(log(plurals$RT),lwd=1.2)
for(i in 1:50){

	sample(plurals$RT,50)->my.sample
	plot(ecdf(my.sample),col="red",lwd=0.6,add=1,pch="+")

}