#STEP 1: READ DATA FILE trilo_data<-read.csv("http://people.ucsc.edu/~mclapham/earth101/data/biogeog_data.csv",row.names=1) #END OF STEP 1 #STEP 2: FORBES SIMILARITY COEFFICIENT #We will use the Forbes coefficient #It avoids some of the problems of the more widely-used Jaccard coefficient forbes<-function(x,y) { a<-length(which((x * y)>0)) b<-length(which(x>0))-a c<-length(which(y>0))-a n<-a+b+c return(a*n/((a + b)*(a + c))) } forbes.dist<-function(x) { m<-matrix(nrow=ncol(x),ncol=ncol(x)) for (i in 1:ncol(x)) { for (j in 1:ncol(x)) { m[i,j]<-forbes(x[,i],x[,j]) } } colnames(m)<-colnames(x) rownames(m)<-colnames(x) return(1-as.dist(m)) } #STEP 3: PERFORM CLUSTER ANALYSIS AND PLOT DENDROGRAM plot(hclust(forbes.dist(trilo_data),method="average"),hang=-1,main="Ordovician Trilobite Localities",sub="",xlab="",ylab="Forbes dissimilarity") #END OF STEP 3