### MEETING 2 ### LING280 CLASS TRANSCRIPT ### January 10, 2013 ### CONTINUING FROM MEETING 1 ### CUMULATIVE DENSITY FUNCTION # What is the following command going to do? # Hint: give the command `?plot` to find the arguments to plot() # load the plurals data ... # read.csv("/path/to/plurals.csv")->plurals # my.sample <- sample(plurals$RT, 50) plot(quantile(my.sample, seq(0, 1, 0.05)), seq(0, 1, 0.05)) abline(h = 0.5, v = median(my.sample)) # Plot another quantile (abline h = ...) > ... # Histogram hist(my.sample) hist(my.sample, breaks=5) hist(my.sample, breaks=15) hist(my.sample, breaks=seq(0, 1500, 50), col="blue", density=25) -> my.hist # inspect the hist object > ... # cumsum(my.hist$counts) -> my.hist.cumsum # barplot(my.hist.cumsum, ylim=c(0,50), density=35, col="blue", angle=-45, names=my.hist$mids) barplot(my.hist$counts, ylim=c(0,50), density=45, col="red", angle=45, add=1) # Empirical CDF/PDF > ... > my.cumulative.fraction <- ... > ... ### ### BINOMIAL THEOREM, BINOMIAL DISTRIBUTION ### rbinom(1 , 10, 0.5) rbinom(10, 1, 0.5) # WHAT ... # # Args: n # size # prob # Returns: # Sample 'p' sum( rbinom( 10, 1, 0.5 ) ) / 10 sum( rbinom( 100, 1, 0.5 ) ) / 100 sum( rbinom( 10000000, 1, 0.5 ) ) / 10000000 # Simulate a large number of experiments my.experiments <- rbinom(100000, 40, 0.5) hist(my.experiments, breaks=0:40, freq=FALSE) ### COMPOUND EVENTS ### 4 CHOOSE 1 choose(4, 1) ### 4 CHOOSE 2 choose(4, 2) # choose(4, 0:4) # for(i in 1:10){ print(choose(i, 0:i)) } ### OK, now what about for 40 events? num.outcomes <- choose(40, 0:40) ### How many total possibilities are there? > .... ### PDF hist(my.experiments,breaks=0:40,freq=FALSE) lines(0:40, ... , pty = 'l', col="red") ### CDF > ... ### Deriving the binomial distribution p <- ... ### COMPUTE THE DISTRIBUTION binom.distr.40 <- choose(40, 0:40) * p ### SAME PLOT AS ABOVE hist(my.experiment, breaks=0:40, freq=FALSE) lines(binom.distr.40) ### == 1 sum( ... ) ### WHAT IS THE EXACT PROBABILITY OF 20 occurring? > .... ### SUPPOSE THE LIKELIHOOD IS NOT p = 0.5, but p = 0.75 > .... ### p = 0.75, 40 events, likelihood of count being 30 > .... ### p = 0.75, 4 events, likelihood of count being 3 > .... ### p = 0.75, 400 events, likelihood of count being 300 > .... ### INTERVALS/RANGES n.events <- 40 p.event <- 0.7 sim.events<- rbinom(10000, 40, 0.7) probs <- dbinom(0:n.events, n.events, p.event) plot(0:40, probs, col="red", lwd=3, pty='l') hist(sim.events, breaks=0:40, freq=FALSE, col="grey", density=20, add=1) abline( v = mean(sim.events) ) ### ### +/- 1 sum(probs[27:29]) abline( v = c(27, 29), col="blue",lwd=2) ### +/- 2 sum(probs[26:30]) abline( v = c(26, 30), col="green", lwd=2) ### +/- 6 sum(probs[22:34]) abline( v = c(25, 31), col="purple", lwd=2) #### TAKE HOME CHALLENGE #### REPLICATE FIGURE 2.8 in V&B WITHOUT COPYING THEIR CODE VERBATIM