############## ## Introduction## ############## # clab.col and rlab.col are colours to identify the arrays or genes in the dataset. This can be used for checking # the correlation between some suspected factors (such as ER or cell-cycle regulated genes etc) against the # # clustered dendrogram. # 'key.txt' are the key label according to the increasing values of clab.col. If clab.col={1,2} representing ER # # negative and ER positive, then perhaps key.txt=c("ER negative", "ER positive"). ################################################################################### library(sma); library(amap) source(file="ecluster.fn.R") # An artificial example. fake.data <- matrix(rnorm(4000), ncol=40) # 100 genes and 40 arrays/tumors ER <- sample( c( rep(-1,25), rep(1, 15))) # suppose 25 of the tumors are ER negative and 15 ER positive fake.data[,which(ER==1)] <- fake.data[,which(ER==1)] + runif(150,-0.5, 3) colnames(fake.data)[which(ER==1)] <- LETTERS[1:15] # ER positive have capital letters colnames(fake.data)[which(ER==-1)] <- letters[1:25] rownames(fake.data) <- paste("Gene", 1:nrow(fake.data), sep="") ER.col <- ifelse(ER==-1, 1, 2) # So that ER positive labels are red ER.txt <- c("ER negative", "ER positive") # Usually the data is row z-scored before inputting in Cluster 3.0 and but assume our fake.data is already z-scored # ecluster.fn(data=fake.data, "pearson", "complete", key.txt=ER.txt, clab.col=ER.col) ecluster.fn(data=fake.data, "pearson", "average", key.txt=ER.txt, clab.col=ER.col) # You might have noticed that sometimes the cluster need to be rearranged ######################### ## Identified problems so far## ######################### # Problems with too many row labels being squeezed when large number of genes (> 100) are involved # Need inclusion of title which can be used for the plot.mat # Maybe need some rearranging within array dendrograms to make the results more correlated with visual # # output.