# K-Means Clustering #Simulated data set.seed(2) x=matrix(rnorm(50*2), ncol=2) x[1:25,1]=x[1:25,1]+3 x[1:25,2]=x[1:25,2]-4 plot(x[,1],x[,2]) help(kmeans) km.out=kmeans(x,2,nstart=20) km.out$cluster km.out plot(x, col=(km.out$cluster+1), main="K-Means Clustering Results with K=2", xlab="", ylab="", pch=20, cex=2) #3-clustering set.seed(4) km.out=kmeans(x,3,nstart=20) km.out plot(x, col=(km.out$cluster+1), main="K-Means Clustering Results with K=3", xlab="", ylab="", pch=20, cex=2) #Repeat several times km.out=kmeans(x,3) km.out plot(x, col=(km.out$cluster+1), main="K-Means Clustering Results with K=3", xlab="", ylab="", pch=20, cex=2) set.seed(3) km.out=kmeans(x,3,nstart=1) km.out$tot.withinss km.out=kmeans(x,3,nstart=20) km.out$tot.withinss # Hierarchical Clustering hc.complete=hclust(dist(x), method="complete") hc.average=hclust(dist(x), method="average") hc.single=hclust(dist(x), method="single") par(mfrow=c(1,3)) plot(hc.complete,main="Complete Linkage", xlab="", sub="", cex=.9) plot(hc.average, main="Average Linkage", xlab="", sub="", cex=.9) plot(hc.single, main="Single Linkage", xlab="", sub="", cex=.9) cutree(hc.complete, 2) cutree(hc.average, 2) cutree(hc.single, 2) cutree(hc.single, 4) #3-dimensional features x=matrix(rnorm(30*3), ncol=3) dd=as.dist(1-cor(t(x))) plot(hclust(dd, method="complete"), main="Complete Linkage with Correlation-Based Distance", xlab="", sub="") #Try to change x so that it will exhibit some clusters and repeat the above, to make it more interesting #Settingthe working directory #It is assummed that in this folder the data file 'cities4.dat' is located setwd("/home/podgorsk/Dropbox/Wszystko/Teaching/TaughtCourses/DataMiningVisualization/Labs/Lab2") help(hclust) help(dist) Cit=read.table("cities4.dat") dm=dist(Cit) plot(cs <- hclust(dm, method = "single")) plot(cc <- hclust(dm, method = "complete")) plot(ca <- hclust(dm, method = "average")) Cities=read.table("citiesName.txt",header=TRUE,sep='&') Cities[,2:4] Cities[c(3, 4, 7, 15, 18, 25, 22, 33, 38, 41, 42),1] Cities[c(32,9,24,11,1,5,14,12,40),1] Cities[c(6,17,26,29,16,8,30),1] Cities[c(21,34,23,10,27,45,20,35,2,19),1] Cities[c(44,43,13,46,36,31,37,28,39),1]