[BioC] KEGG2heatmap genesymbols as row names?

Saurin D. Jani jani at musc.edu
Mon Jun 30 14:42:27 CEST 2008


About Pathway to HEATMAP


# Variable explanation:

# esetSub2 = expression matirx
# chiptype = annotation found from GeneChip, here 'hgu95a'
# envPath2Probes = Environment
# envGeneNames <- hgu95aGENENAME;
# envPath2Probes <- hgu95aPATH2PROBE;


# Generate Pathway Heatmap
   All_DE_Pathway <- aafPathway(geneNames(esetSub2), chiptype);
   unique(sapply(All_DE_Pathway, length));
   pathlist <- do.call("c", All_DE_Pathway);
   pathL <- length(pathlist);
   pathmatrix <- sapply(pathlist, attributes);
   pathnamesX <- unique(t(pathmatrix));
   kegg <- as.list(envPath2Probes);
   pathRow <-  as.numeric(nrow(pathnamesX));
   if(pathL >=1)
   {
     for(i in 1:pathRow)
     {
       pathID <- pathnamesX[,"id"][i];
       pathID <- as.character(unlist(pathID));

       pathName <- as.character(unlist(pathnamesX[,"name"][i]));
       pathName <- trimWhiteSpace(pathName);
       pathName <- sub("/", "_", pathName);
       pathName <- sub(" ", "_", pathName);
       pathName <- sub("  ", "_", pathName);
       pathName <- sub("   ", "_", pathName);
       pathName <- sub(" ", "_", pathName);
       pathName <- sub("-", "_", pathName);
       pathFile <- paste("Pathway",pathName,".jpeg",sep = "_");
       pathSub <- paste(pathName,"Legend: Red = High, Blue = Low, White =
Medium",sep = "  ");
       pathSub <- paste("Date of Analysis:",date(),pathSub,sep = "  ");

       Pathway2Heatmap(pathID,pathFile,pathSub); # check out the function below
     }

   } # if ends here
#

###########################################################
Pathway2Heatmap <- function(pathway,file,Hsub)
{

   kegg <- as.list(envPath2Probes);
   pathProbes <- as.matrix(kegg[[pathway]]); # genes extracted from pathway
   esetSub2Genes <- as.matrix(geneNames(esetSub2));

   temp <- as.matrix(rbind(unique(esetSub2Genes),unique(pathProbes)));
   temp1 <- as.matrix(temp[ as.matrix(duplicated(temp,))]);

   exp2 <- esetSub2[temp1];
   exp2X <-  exprs(exp2);

   pathDEGenes <- as.matrix(geneNames(exp2));
   pathDEGeneNames <- mget(unique(pathDEGenes),env = 
envGeneNames,ifnotfound="No
Annotation For this Probe");

   row.names(exp2X) <- pathDEGeneNames;
   colnames(exp2X) <- samples;

   if (nrow(exp2X) > 1)
   {
     jpeg(filename= file,width=2000,height=2000);
     row.dist <- as.dist(1 - cor(t(exp2X)));
     Hsub <- c("Generated by EXAMPLE");
     gmpalette <- bluered(64);

heatmap.2(exp2X,col=gmpalette,Colv= FALSE,Rowv =
as.dendrogram(hclust(row.dist,method="centroid")),scale="row",key=TRUE,keysize=0.60,symkey=FALSE,density.info="none",trace="none",margins=c(5,85),cexRow=1,cexCol=1,
sub=Hsub,cex.sub=1);

dev.off(which = dev.cur());

     print("Pathway Heat map done");
   }#if

}#pathway2heatmap
###########################################################

For more info. to run free web based analysis on your data
http://proteogenomics.musc.edu/ma/arrayQuest.php?page=home&act=manage&process=Methods_Library_List

Saurin
-- 
|------------------------------------------------
| Saurin D. Jani
| Bioinformatician
|
| Department of Cell Biology and Anatomy
| Medical  University of South Carolina (MUSC)
| 173 Ashley Ave
| Charleston,SC - 29425
|
| Email: jani at musc.edu
| Phone: (843)792-1340
| Website: http://www.musc.edu/~jani
|------------------------------------------------


Quoting Celine Carret <ckc at sanger.ac.uk>:

> Dear BioC users,
>
>
>
> I want to plot the genes in my result-matrix that have an annotation in
> a given KEGG pathway as a heatmap. For that, I use the KEGG2heatmap
> function from the annotate package (see code below).
>
> I was wondering if anyone has an idea about how to change the default
> affy-IDs as row names in a KEGG2heatmap (or GO2heatmap) to have gene
> symbols or gene names for that matter displayed in the heatmap.
>
> Changing the probeset for gene symbols in the matrix doesn't work as the
> function uses the affy ID to query your matrix and find the probesets
> having the KEGG (or GO) annotation and put them in the heatmap output.
>
> I also tried to use labRow as in a normal heatmap but it doesn't work
> either.
>
> I then tried to write the object of the function and recover the order
> of the genes from the rowInd, thinking I could change this post-plot in
> a way, but this relates only to the number of genes found in there,
> so... not working!
>
> If anyone has an idea about how to solve this problem, I would be very
> grateful as I'm suppose to do many of those plots, and modifying them
> one after the other by adding the labRow=c("ICAM1","VCAM"...)
> accordingly for each GO or KEGG after checking for affy-ID to gene
> symbols is going to be long and painful.
>
>
>
> Thanks a lot and best wishes
>
> Celine
>
>
>
> NB: I know I didn't yet upgrade to the latest version of R and BioC, and
> I will, but in the meantime I would appreciate any suggestions to help
> me
>
>
>
>> library(affycoretools)
>
>> library(gplots)
>
>> library(mouse4302)
>
>> load("my.RData")
>
>> ls("package:mouse4302")
>
>> symbols1108_ids <- mget(rownames(matrix_eset_1108),
> envir=mouse4302SYMBOL)
>
>> matrix_eset_1108_symbols <- matrix_eset_1108
>
>> rownames(matrix_eset_1108_symbols) <- symbols1108_ids
>
>> KEGG2heatmap("04514", matrix_eset_1108, data = "mouse4302",
> main="CAMs", xlab="Samples",ylab="Genes",margins=c(9,9),
> col=bluered(256),ColSideColors=groups.color2,cexRow=1.2)
>
> ##gives what I expect, with row names written as probeset Affymetrix Ids
>
>
>
>> matrix_eset_1108_symbols <- cbind(symbols1108_ids,matrix_eset_1108)
>
>> KEGG2heatmap("04514", matrix_eset_1108_symbols, data =
> "mouse4302",main="CAMs", xlab="Samples",ylab="Genes",margins=c(9,9),
> col=bluered(256),ColSideColors=groups.color2,cexRow=1.2)
>
> Error in heatmap(dataM, ...) : 'x' must be a numeric matrix
>
>
>
>> KEGG2heatmap("04514", matrix_eset_1108, data =
> "mouse4302",labRow=symbols1108_ids,main="CAMs",
> xlab="Samples",ylab="Genes",margins=c(9,9),
> col=bluered(256),ColSideColors=groups.color2,cexRow=1.2)
>
> ## gives a heatmap with gene symbols as row names, however, the symbols
> do not correspond to the probeset ids, they do correspond to the
> ordering as in the rowInd:
>
>
>
>> CAMs <- KEGG2heatmap("04514", matrix_eset_1108, data =
> "mouse4302",main="CAMs", xlab="Samples",ylab="Genes",margins=c(9,9),
> col=bluered(256),ColSideColors=groups.color2,cexRow=1.2)
>
>> names(CAMs)
>
> [1] "rowInd" "colInd" "Rowv"   "Colv"
>
>> CAMs$rowInd
>
>  [1]  9 11  6  7  8  1  5 10  2 12  4  3
>
> ## those indexes relate to the order of the 12 genes found in the CAM
> pathway within my matrix, not across the whole matrix (1108 probesets).
>
> ## I identified this by running a KEGG2heatmap for a different pathway.
> The symbols were the same, only shuffled in a different order.
>
>
>
>> head(matrix_eset_1108_symbols)
>
>         symbols1108_ids I_6hA    I_6hB I_12hA I_12hB I_18hA I_18hB
> I_24hA I_24hB Non_6hA Non_6hB Non_12hA Non_12hB Non_18hA Non_18hB
> Non_24hA Non_24hB
>
> Psph    "Psph"          9.527257 NULL  NULL   NULL   NULL   NULL   NULL
> NULL   NULL    NULL    NULL     NULL     NULL     NULL     NULL     NULL
>
>
> Arfgef1 "Arfgef1"       11.25055 NULL  NULL   NULL   NULL   NULL   NULL
> NULL   NULL    NULL    NULL     NULL     NULL     NULL     NULL     NULL
>
>
> Tbl3    "Tbl3"          8.922073 NULL  NULL   NULL   NULL   NULL   NULL
> NULL   NULL    NULL    NULL     NULL     NULL     NULL     NULL     NULL
>
>
> Snapap  "Snapap"        8.899586 NULL  NULL   NULL   NULL   NULL   NULL
> NULL   NULL    NULL    NULL     NULL     NULL     NULL     NULL     NULL
>
>
> Cx3cl1  "Cx3cl1"        8.244229 NULL  NULL   NULL   NULL   NULL   NULL
> NULL   NULL    NULL    NULL     NULL     NULL     NULL     NULL     NULL
>
>
> Gsn     "Gsn"           7.061844 NULL  NULL   NULL   NULL   NULL   NULL
> NULL   NULL    NULL    NULL     NULL     NULL     NULL     NULL     NULL
>
>
>> head(symbols1108_ids)
>
> $`1415673_at`
>
> [1] "Psph"
>
>
>
> $`1415711_at`
>
> [1] "Arfgef1"
>
>
>
> $`1415750_at`
>
> [1] "Tbl3"
>
>
>
> $`1415756_a_at`
>
> [1] "Snapap"
>
>
>
> $`1415803_at`
>
> [1] "Cx3cl1"
>
>
>
> $`1415812_at`
>
> [1] "Gsn"
>
>
>
>> head(matrix_eset_1108)
>
>                  I_6hA     I_6hB    I_12hA    I_12hB    I_18hA    I_18hB
> I_24hA    I_24hB   Non_6hA   Non_6hB  Non_12hA  Non_12hB  Non_18hA
> Non_18hB
>
> 1415673_at    9.527257  9.838667  9.890481 10.020292  9.799559  9.904187
> 9.645748  9.445655  8.950675  8.943936  8.941548  8.823886  8.687042
> 9.178271
>
> 1415711_at   11.250548 11.217327 11.286776 11.223121 11.021241 11.113918
> 10.941252 10.794538 11.665688 11.510708 11.436264 11.455783 11.489206
> 11.264688
>
> 1415750_at    8.922073  8.952717  9.196360  9.139688  9.161736  9.141267
> 8.940151  8.859688  8.939075  8.748251  8.909915  9.044485  8.383893
> 8.578295
>
> 1415756_a_at  8.899586  8.894382  9.012984  9.148159  9.193082  8.982367
> 9.238500  9.427381  9.560140  9.428719  9.245954  9.178162  9.395412
> 9.180315
>
> 1415803_at    8.244229  7.768872  7.399605  7.107893  6.802274  6.212161
> 6.864431  6.767189  8.107881  7.942794  8.205100  8.188111  7.029770
> 7.136613
>
> 1415812_at    7.061844  7.754393  7.717294  7.922748  8.321420  8.339937
> 8.257631  8.064229  7.024239  7.139190  6.834602  7.163571  6.093165
> 6.861289
>
>               Non_24hA  Non_24hB
>
> 1415673_at    9.129459  8.691872
>
> 1415711_at   11.490485 11.553578
>
> 1415750_at    8.495280  8.442697
>
> 1415756_a_at  9.230993  9.417729
>
> 1415803_at    7.105229  7.201890
>
> 1415812_at    6.961344  7.257823
>
>
>
>
>
>> sessionInfo()
>
> R version 2.6.0 (2007-10-03)
>
> i386-pc-mingw32
>
>
>
> locale:
>
> LC_COLLATE=English_United Kingdom.1252;LC_CTYPE=English_United
> Kingdom.1252;LC_MONETARY=English_United
> Kingdom.1252;LC_NUMERIC=C;LC_TIME=English_United Kingdom.1252
>
>
>
> attached base packages:
>
> [1] splines   tools     stats     graphics  grDevices utils     datasets
> methods   base
>
>
>
> other attached packages:
>
>  [1] gplots_2.3.2         gdata_2.3.1          gtools_2.4.0
> mouse4302_2.0.1      affycoretools_1.10.0 annaffy_1.10.0
> KEGG_2.0.0
>
>  [8] GO_2.0.0             gcrma_2.10.0         matchprobes_1.10.0
> biomaRt_1.12.0       RCurl_0.8-1          GOstats_2.4.0
> Category_2.4.0
>
> [15] genefilter_1.16.0    survival_2.32        RBGL_1.14.0
> annotate_1.16.0      xtable_1.5-1         GO.db_2.0.0
> AnnotationDbi_1.0.4
>
> [22] RSQLite_0.6-3        DBI_0.2-3            graph_1.16.1
> limma_2.12.0         affy_1.16.0          preprocessCore_1.0.0
> affyio_1.6.0
>
> [29] Biobase_1.16.0
>
>
>
> loaded via a namespace (and not attached):
>
> [1] cluster_1.11.9 XML_1.93-2
>
>>
>
>
>
>
>
>
>
> **************************************
>
> Celine Carret, PhD
>
> Pathogen Microarrays
>
> Wellcome Trust Sanger Institute
>
> Hinxton, Cambridge
>
> CB10 1SA, UK
>
> tel.+44-1223494940
>
> fax.+44-1223494919
>
> **************************************
>
>
>
>
>
>
> --
>  The Wellcome Trust Sanger Institute is operated by Genome Research
>
>  Limited, a charity registered in England with number 1021457 and a
>  compa
> ny registered in England with number 2742969, whose registered
>  office is 2
> 15 Euston Road, London, NW1 2BE.
>
>
>
> 	[[alternative HTML version deleted]]
>
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at stat.math.ethz.ch
> https://stat.ethz.ch/mailman/listinfo/bioconductor
> Search the archives: 
> http://news.gmane.org/gmane.science.biology.informatics.conductor
>
>



More information about the Bioconductor mailing list