[BioC] KEGG2heatmap genesymbols as row names?
Saurin D. Jani
jani at musc.edu
Mon Jun 30 14:42:27 CEST 2008
About Pathway to HEATMAP
# Variable explanation:
# esetSub2 = expression matirx
# chiptype = annotation found from GeneChip, here 'hgu95a'
# envPath2Probes = Environment
# envGeneNames <- hgu95aGENENAME;
# envPath2Probes <- hgu95aPATH2PROBE;
# Generate Pathway Heatmap
All_DE_Pathway <- aafPathway(geneNames(esetSub2), chiptype);
unique(sapply(All_DE_Pathway, length));
pathlist <- do.call("c", All_DE_Pathway);
pathL <- length(pathlist);
pathmatrix <- sapply(pathlist, attributes);
pathnamesX <- unique(t(pathmatrix));
kegg <- as.list(envPath2Probes);
pathRow <- as.numeric(nrow(pathnamesX));
if(pathL >=1)
{
for(i in 1:pathRow)
{
pathID <- pathnamesX[,"id"][i];
pathID <- as.character(unlist(pathID));
pathName <- as.character(unlist(pathnamesX[,"name"][i]));
pathName <- trimWhiteSpace(pathName);
pathName <- sub("/", "_", pathName);
pathName <- sub(" ", "_", pathName);
pathName <- sub(" ", "_", pathName);
pathName <- sub(" ", "_", pathName);
pathName <- sub(" ", "_", pathName);
pathName <- sub("-", "_", pathName);
pathFile <- paste("Pathway",pathName,".jpeg",sep = "_");
pathSub <- paste(pathName,"Legend: Red = High, Blue = Low, White =
Medium",sep = " ");
pathSub <- paste("Date of Analysis:",date(),pathSub,sep = " ");
Pathway2Heatmap(pathID,pathFile,pathSub); # check out the function below
}
} # if ends here
#
###########################################################
Pathway2Heatmap <- function(pathway,file,Hsub)
{
kegg <- as.list(envPath2Probes);
pathProbes <- as.matrix(kegg[[pathway]]); # genes extracted from pathway
esetSub2Genes <- as.matrix(geneNames(esetSub2));
temp <- as.matrix(rbind(unique(esetSub2Genes),unique(pathProbes)));
temp1 <- as.matrix(temp[ as.matrix(duplicated(temp,))]);
exp2 <- esetSub2[temp1];
exp2X <- exprs(exp2);
pathDEGenes <- as.matrix(geneNames(exp2));
pathDEGeneNames <- mget(unique(pathDEGenes),env =
envGeneNames,ifnotfound="No
Annotation For this Probe");
row.names(exp2X) <- pathDEGeneNames;
colnames(exp2X) <- samples;
if (nrow(exp2X) > 1)
{
jpeg(filename= file,width=2000,height=2000);
row.dist <- as.dist(1 - cor(t(exp2X)));
Hsub <- c("Generated by EXAMPLE");
gmpalette <- bluered(64);
heatmap.2(exp2X,col=gmpalette,Colv= FALSE,Rowv =
as.dendrogram(hclust(row.dist,method="centroid")),scale="row",key=TRUE,keysize=0.60,symkey=FALSE,density.info="none",trace="none",margins=c(5,85),cexRow=1,cexCol=1,
sub=Hsub,cex.sub=1);
dev.off(which = dev.cur());
print("Pathway Heat map done");
}#if
}#pathway2heatmap
###########################################################
For more info. to run free web based analysis on your data
http://proteogenomics.musc.edu/ma/arrayQuest.php?page=home&act=manage&process=Methods_Library_List
Saurin
--
|------------------------------------------------
| Saurin D. Jani
| Bioinformatician
|
| Department of Cell Biology and Anatomy
| Medical University of South Carolina (MUSC)
| 173 Ashley Ave
| Charleston,SC - 29425
|
| Email: jani at musc.edu
| Phone: (843)792-1340
| Website: http://www.musc.edu/~jani
|------------------------------------------------
Quoting Celine Carret <ckc at sanger.ac.uk>:
> Dear BioC users,
>
>
>
> I want to plot the genes in my result-matrix that have an annotation in
> a given KEGG pathway as a heatmap. For that, I use the KEGG2heatmap
> function from the annotate package (see code below).
>
> I was wondering if anyone has an idea about how to change the default
> affy-IDs as row names in a KEGG2heatmap (or GO2heatmap) to have gene
> symbols or gene names for that matter displayed in the heatmap.
>
> Changing the probeset for gene symbols in the matrix doesn't work as the
> function uses the affy ID to query your matrix and find the probesets
> having the KEGG (or GO) annotation and put them in the heatmap output.
>
> I also tried to use labRow as in a normal heatmap but it doesn't work
> either.
>
> I then tried to write the object of the function and recover the order
> of the genes from the rowInd, thinking I could change this post-plot in
> a way, but this relates only to the number of genes found in there,
> so... not working!
>
> If anyone has an idea about how to solve this problem, I would be very
> grateful as I'm suppose to do many of those plots, and modifying them
> one after the other by adding the labRow=c("ICAM1","VCAM"...)
> accordingly for each GO or KEGG after checking for affy-ID to gene
> symbols is going to be long and painful.
>
>
>
> Thanks a lot and best wishes
>
> Celine
>
>
>
> NB: I know I didn't yet upgrade to the latest version of R and BioC, and
> I will, but in the meantime I would appreciate any suggestions to help
> me
>
>
>
>> library(affycoretools)
>
>> library(gplots)
>
>> library(mouse4302)
>
>> load("my.RData")
>
>> ls("package:mouse4302")
>
>> symbols1108_ids <- mget(rownames(matrix_eset_1108),
> envir=mouse4302SYMBOL)
>
>> matrix_eset_1108_symbols <- matrix_eset_1108
>
>> rownames(matrix_eset_1108_symbols) <- symbols1108_ids
>
>> KEGG2heatmap("04514", matrix_eset_1108, data = "mouse4302",
> main="CAMs", xlab="Samples",ylab="Genes",margins=c(9,9),
> col=bluered(256),ColSideColors=groups.color2,cexRow=1.2)
>
> ##gives what I expect, with row names written as probeset Affymetrix Ids
>
>
>
>> matrix_eset_1108_symbols <- cbind(symbols1108_ids,matrix_eset_1108)
>
>> KEGG2heatmap("04514", matrix_eset_1108_symbols, data =
> "mouse4302",main="CAMs", xlab="Samples",ylab="Genes",margins=c(9,9),
> col=bluered(256),ColSideColors=groups.color2,cexRow=1.2)
>
> Error in heatmap(dataM, ...) : 'x' must be a numeric matrix
>
>
>
>> KEGG2heatmap("04514", matrix_eset_1108, data =
> "mouse4302",labRow=symbols1108_ids,main="CAMs",
> xlab="Samples",ylab="Genes",margins=c(9,9),
> col=bluered(256),ColSideColors=groups.color2,cexRow=1.2)
>
> ## gives a heatmap with gene symbols as row names, however, the symbols
> do not correspond to the probeset ids, they do correspond to the
> ordering as in the rowInd:
>
>
>
>> CAMs <- KEGG2heatmap("04514", matrix_eset_1108, data =
> "mouse4302",main="CAMs", xlab="Samples",ylab="Genes",margins=c(9,9),
> col=bluered(256),ColSideColors=groups.color2,cexRow=1.2)
>
>> names(CAMs)
>
> [1] "rowInd" "colInd" "Rowv" "Colv"
>
>> CAMs$rowInd
>
> [1] 9 11 6 7 8 1 5 10 2 12 4 3
>
> ## those indexes relate to the order of the 12 genes found in the CAM
> pathway within my matrix, not across the whole matrix (1108 probesets).
>
> ## I identified this by running a KEGG2heatmap for a different pathway.
> The symbols were the same, only shuffled in a different order.
>
>
>
>> head(matrix_eset_1108_symbols)
>
> symbols1108_ids I_6hA I_6hB I_12hA I_12hB I_18hA I_18hB
> I_24hA I_24hB Non_6hA Non_6hB Non_12hA Non_12hB Non_18hA Non_18hB
> Non_24hA Non_24hB
>
> Psph "Psph" 9.527257 NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL NULL NULL NULL NULL
>
>
> Arfgef1 "Arfgef1" 11.25055 NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL NULL NULL NULL NULL
>
>
> Tbl3 "Tbl3" 8.922073 NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL NULL NULL NULL NULL
>
>
> Snapap "Snapap" 8.899586 NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL NULL NULL NULL NULL
>
>
> Cx3cl1 "Cx3cl1" 8.244229 NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL NULL NULL NULL NULL
>
>
> Gsn "Gsn" 7.061844 NULL NULL NULL NULL NULL NULL
> NULL NULL NULL NULL NULL NULL NULL NULL NULL
>
>
>> head(symbols1108_ids)
>
> $`1415673_at`
>
> [1] "Psph"
>
>
>
> $`1415711_at`
>
> [1] "Arfgef1"
>
>
>
> $`1415750_at`
>
> [1] "Tbl3"
>
>
>
> $`1415756_a_at`
>
> [1] "Snapap"
>
>
>
> $`1415803_at`
>
> [1] "Cx3cl1"
>
>
>
> $`1415812_at`
>
> [1] "Gsn"
>
>
>
>> head(matrix_eset_1108)
>
> I_6hA I_6hB I_12hA I_12hB I_18hA I_18hB
> I_24hA I_24hB Non_6hA Non_6hB Non_12hA Non_12hB Non_18hA
> Non_18hB
>
> 1415673_at 9.527257 9.838667 9.890481 10.020292 9.799559 9.904187
> 9.645748 9.445655 8.950675 8.943936 8.941548 8.823886 8.687042
> 9.178271
>
> 1415711_at 11.250548 11.217327 11.286776 11.223121 11.021241 11.113918
> 10.941252 10.794538 11.665688 11.510708 11.436264 11.455783 11.489206
> 11.264688
>
> 1415750_at 8.922073 8.952717 9.196360 9.139688 9.161736 9.141267
> 8.940151 8.859688 8.939075 8.748251 8.909915 9.044485 8.383893
> 8.578295
>
> 1415756_a_at 8.899586 8.894382 9.012984 9.148159 9.193082 8.982367
> 9.238500 9.427381 9.560140 9.428719 9.245954 9.178162 9.395412
> 9.180315
>
> 1415803_at 8.244229 7.768872 7.399605 7.107893 6.802274 6.212161
> 6.864431 6.767189 8.107881 7.942794 8.205100 8.188111 7.029770
> 7.136613
>
> 1415812_at 7.061844 7.754393 7.717294 7.922748 8.321420 8.339937
> 8.257631 8.064229 7.024239 7.139190 6.834602 7.163571 6.093165
> 6.861289
>
> Non_24hA Non_24hB
>
> 1415673_at 9.129459 8.691872
>
> 1415711_at 11.490485 11.553578
>
> 1415750_at 8.495280 8.442697
>
> 1415756_a_at 9.230993 9.417729
>
> 1415803_at 7.105229 7.201890
>
> 1415812_at 6.961344 7.257823
>
>
>
>
>
>> sessionInfo()
>
> R version 2.6.0 (2007-10-03)
>
> i386-pc-mingw32
>
>
>
> locale:
>
> LC_COLLATE=English_United Kingdom.1252;LC_CTYPE=English_United
> Kingdom.1252;LC_MONETARY=English_United
> Kingdom.1252;LC_NUMERIC=C;LC_TIME=English_United Kingdom.1252
>
>
>
> attached base packages:
>
> [1] splines tools stats graphics grDevices utils datasets
> methods base
>
>
>
> other attached packages:
>
> [1] gplots_2.3.2 gdata_2.3.1 gtools_2.4.0
> mouse4302_2.0.1 affycoretools_1.10.0 annaffy_1.10.0
> KEGG_2.0.0
>
> [8] GO_2.0.0 gcrma_2.10.0 matchprobes_1.10.0
> biomaRt_1.12.0 RCurl_0.8-1 GOstats_2.4.0
> Category_2.4.0
>
> [15] genefilter_1.16.0 survival_2.32 RBGL_1.14.0
> annotate_1.16.0 xtable_1.5-1 GO.db_2.0.0
> AnnotationDbi_1.0.4
>
> [22] RSQLite_0.6-3 DBI_0.2-3 graph_1.16.1
> limma_2.12.0 affy_1.16.0 preprocessCore_1.0.0
> affyio_1.6.0
>
> [29] Biobase_1.16.0
>
>
>
> loaded via a namespace (and not attached):
>
> [1] cluster_1.11.9 XML_1.93-2
>
>>
>
>
>
>
>
>
>
> **************************************
>
> Celine Carret, PhD
>
> Pathogen Microarrays
>
> Wellcome Trust Sanger Institute
>
> Hinxton, Cambridge
>
> CB10 1SA, UK
>
> tel.+44-1223494940
>
> fax.+44-1223494919
>
> **************************************
>
>
>
>
>
>
> --
> The Wellcome Trust Sanger Institute is operated by Genome Research
>
> Limited, a charity registered in England with number 1021457 and a
> compa
> ny registered in England with number 2742969, whose registered
> office is 2
> 15 Euston Road, London, NW1 2BE.
>
>
>
> [[alternative HTML version deleted]]
>
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at stat.math.ethz.ch
> https://stat.ethz.ch/mailman/listinfo/bioconductor
> Search the archives:
> http://news.gmane.org/gmane.science.biology.informatics.conductor
>
>
More information about the Bioconductor
mailing list