##############
## Introduction##
##############

# clab.col and rlab.col are colours to identify the arrays or genes in the dataset. This can be used for checking # the correlation between some suspected factors (such as ER or cell-cycle regulated genes etc) against the # #  clustered dendrogram.

# 'key.txt' are the key label according to the increasing values of clab.col. If clab.col={1,2} representing ER # #   negative and ER positive, then perhaps key.txt=c("ER negative", "ER positive").

###################################################################################

library(sma); library(amap)
source(file="ecluster.fn.R")

# An artificial example.

fake.data <- matrix(rnorm(4000), ncol=40)      # 100 genes and 40 arrays/tumors
ER <- sample( c( rep(-1,25), rep(1, 15))) # suppose 25 of the tumors are ER negative and 15 ER positive
fake.data[,which(ER==1)] <- fake.data[,which(ER==1)] + runif(150,-0.5, 3)

colnames(fake.data)[which(ER==1)]  <- LETTERS[1:15] # ER positive have capital letters
colnames(fake.data)[which(ER==-1)] <- letters[1:25]
rownames(fake.data) <- paste("Gene", 1:nrow(fake.data), sep="")
ER.col <- ifelse(ER==-1, 1, 2)                 # So that ER positive labels are red
ER.txt <- c("ER negative", "ER positive")

# Usually the data is row z-scored before inputting in Cluster 3.0 and but assume our fake.data is already z-scored #

ecluster.fn(data=fake.data, "pearson", "complete", key.txt=ER.txt, clab.col=ER.col)
ecluster.fn(data=fake.data, "pearson", "average", key.txt=ER.txt, clab.col=ER.col) 

# You might have noticed that sometimes the cluster need to be rearranged

#########################
## Identified problems so far##
#########################

# Problems with too many row labels being squeezed when large number of genes (> 100) are involved
# Need inclusion of title which can be used for the plot.mat
#  Maybe need some rearranging within array dendrograms to make the results more correlated with visual #  #   output.