#STEP 1: READ DATA FILE
trilo_data<-read.csv("http://people.ucsc.edu/~mclapham/earth101/data/biogeog_data.csv",row.names=1)
#END OF STEP 1

#STEP 2: FORBES SIMILARITY COEFFICIENT
#We will use the Forbes coefficient
#It avoids some of the problems of the more widely-used Jaccard coefficient
forbes<-function(x,y)  {
    a<-length(which((x * y)>0))
    b<-length(which(x>0))-a
    c<-length(which(y>0))-a
    n<-a+b+c
    return(a*n/((a + b)*(a + c)))
}

forbes.dist<-function(x)  {
  m<-matrix(nrow=ncol(x),ncol=ncol(x))
  for (i in 1:ncol(x))	{
    for (j in 1:ncol(x))	{
      m[i,j]<-forbes(x[,i],x[,j])
    }
  }
  colnames(m)<-colnames(x)
  rownames(m)<-colnames(x)
  return(1-as.dist(m))
}


#STEP 3: PERFORM CLUSTER ANALYSIS AND PLOT DENDROGRAM
plot(hclust(forbes.dist(trilo_data),method="average"),hang=-1,main="Ordovician Trilobite Localities",sub="",xlab="",ylab="Forbes dissimilarity")
#END OF STEP 3