[BioC] ChIPpeakAnno as.data.frame error

Tue Mar 6 15:49:22 CET 2012

Dear Mark,

I am wondering what happens if you add library(RangedData) after all the
packages have been imported.

Best regards,

Julie

On 3/5/12 10:31 PM, "Mark Cowley" <m.cowley at garvan.org.au> wrote:

> Dear list,
> i've had an odd error for a few days now, which i'm really struggling to
> pinpoint. 
> Essentially, i'm trying to annotate ChIP-Seq peaks using ChIPpeakAnno. after
> doing annotatePeakInBatch, I convert to data.frame & I get this error:
> 
>> peaks.bed <- read.delim(bed.file, header=F)
>> peaks.RangedData <- BED2RangedData(peaks.bed, FALSE)
>> peaks.annot <- annotatePeakInBatch(peaks.RangedData, AnnotationData =
>> annoData, output="both", multiple=TRUE, maxgap=0)
>> peaks.annot <- as.data.frame(peaks.annot)
> Error in as.data.frame.default(peaks.annot) :
>   cannot coerce class 'structure("RangedData", package = "IRanges")' into a
> data.frame
> 
> A combination of debug, getMethod & traceback (see below) prove that my
> peaks.annot object is of class RangedData, the S4 method "as.data.frame" for
> "RangedData" exists, but is not actually called; instead as.data.frame.default
> is called, causing the error.
> Oddly enough, the error only occurs when run inside my function, which i think
> implies that my package NAMESPACE is incorrectly setup. To this end, I put
> this code in its own package, deliberately do not import any code from my
> other [potentially dodgy] packages & I still get this error.
> 
> My code & the debug trace/getMethod/traceback is below & any advice would be
> greatly recommended.
> cheers,
> Mark
> 
> 
> ##############################################################################
> ##
> The function:
> #' annotate MACS peaks using ChIPpeakAnno
> #' 
> #' This compares the BED regions from running MACS to the nearest ENSEMBL
> #' Gene, and to CpG islands. This uses
> \code{\link[ChIPpeakAnno]{annotatePeakInBatch}}
> #' from ChIPpeakAnno, which has a very loose definition of close, ie
> #' within 0.5Mb.
> #' 
> #' This uses pre-built CpG island definitions downloaded from UCSC.
> #'
> #' @param dir the path to a MACS result directory
> #' @param genome one of \sQuote{hg18}, or \sQuote{hg19}
> #' @return nothing. it writes a <prefix>_peaks_annot.xls file. To interpret
> #' the new columns, see \code{\link[ChIPpeakAnno]{annotatePeakInBatch}}
> #' @author Mark Cowley, 2012-03-01
> #' @importFrom ChIPpeakAnno BED2RangedData annotatePeakInBatch
> #' @importFrom plyr join
> #' @export
> #' @examples
> #' \dontrun{
> #' dir <- "./MACS/TAMR.H3k4Me3.vs.MCF7.H3k4Me3/"
> #' macs.ChIPpeakAnno(dir, "hg19")
> #' }
> annotate.macs.output <- function(dir, genome=c("hg19", "hg18", "mm9", "rn4"),
> tss=TRUE, cpg=TRUE) {
> length(dir) == 1 && is.dir(dir) || stop("dir must be the path to a macs result
> directory.")
> SUPPORTED.GENOMES <- c("hg19", "hg18")
> genome <- genome[1]
> genome %in% SUPPORTED.GENOMES || stop("genome is unsupported.")
> ##############################################################################
> ##
> saf <- getOption("stringsAsFactors")
> on.exit(options(stringsAsFactors=saf))
> options(stringsAsFactors=FALSE)
> ##############################################################################
> ##
> 
> ##############################################################################
> ##
> # which genome?
> annoData <- switch(genome,
> hg19={data(TSS.human.GRCh37);  TSS.human.GRCh37},
> hg18={data(TSS.human.NCBI36);  TSS.human.NCBI36},
> mm9={data(TSS.mouse.NCBIM37); TSS.mouse.NCBIM37},
> rn4={data(TSS.rat.RGSC3.4);   TSS.rat.RGSC3.4},
> stop("unsupported genome")
> )
> cpgData <- switch(genome,
> hg19={data(CpG.human.GRCh37);  CpG.human.GRCh37},
> hg18={data(CpG.human.NCBI36);  CpG.human.NCBI36},
> mm9={data(CpG.mouse.NCBIM37); CpG.mouse.NCBIM37},
> rn4={data(CpG.rat.RGSC3.4);   CpG.rat.RGSC3.4},
> stop("unsupported genome")
> )
> ##############################################################################
> ##
> 
> ##############################################################################
> ##
> # setup the input/output file paths
> # bed.file <- "TAMR.vs.MCF7.MBD2_peaks.bed"
> bed.file <- dir(dir, pattern="_peaks.bed", full=TRUE)
> file.exists(bed.file) || stop("bed.file must exist")
> 
> # macs.peaks.file <- "TAMR.vs.MCF7.MBD2_peaks.xls"
> macs.peaks.file <- rev(dir(dir, pattern="_peaks.xls", full=TRUE))[1]
> file.exists(macs.peaks.file) || stop("macs.peaks.file must exist")
> 
> result.file <- sub("_peaks.xls", "_peaks_annot.xls", macs.peaks.file)
> ##############################################################################
> ##
> 
> ##############################################################################
> ##
> # import peaks
> peaks.bed <- read.delim(bed.file, header=F)
> # head(peaks.bed)
> 
> peaks.RangedData <- BED2RangedData(peaks.bed, FALSE)
> # peaks.RangedData
> ##############################################################################
> ##
> 
> #
> # import peaks & join to closest gene and CpG island
> # 
> peaks.stats <- import.macs.peaks.file(macs.peaks.file)
> # head(peaks.stats)
> 
> res <- peaks.stats
> 
> ##############################################################################
> ##
> message("Annotating peaks to nearest ENSG TSS...")
> if( tss ) {
> peaks.annot <- annotatePeakInBatch(peaks.RangedData, AnnotationData =
> annoData, output="both", multiple=TRUE, maxgap=0)
> message("done")
> 
> peaks.annot <- as.data.frame(peaks.annot)
> peaks.annot <- peaks.annot[,-match(c("space", "start", "end", "width",
> "names"), colnames(peaks.annot))]
> library(org.Hs.eg.db)
> peaks.annot$GeneSymbol <- mget.chain(as.character(peaks.annot$feature),
> org.Hs.egENSEMBL2EG, org.Hs.egSYMBOL)
> peaks.annot <- rename.column(peaks.annot, "feature", "Ensembl ID")
> peaks.annot <- rename.column(peaks.annot, "peak", "name")
> peaks.annot <- move.column(peaks.annot, "strand", "insideFeature")
> peaks.annot <- move.column(peaks.annot, "GeneSymbol", "start_position")
> 
> res <- plyr::join(peaks.stats, peaks.annot, by="name", type="full")
> }
> else {
> message("skipping")
> }
> ##############################################################################
> ##
> 
> ##############################################################################
> ##
> message("Annotating peaks to nearest CpG...")
> if( cpg ) {
> peaks.annot.cpg <- annotatePeakInBatch(peaks.RangedData, AnnotationData =
> cpgData, output="both", multiple=T, maxgap=0)
> message("done")
> 
> peaks.annot.cpg <- as.data.frame(peaks.annot.cpg)
> peaks.annot.cpg <- peaks.annot.cpg[,-match(c("space", "start", "end", "width",
> "names"), colnames(peaks.annot.cpg))]
> library(org.Hs.eg.db)
> peaks.annot.cpg <- rename.column(peaks.annot.cpg, "feature", "CpG island")
> peaks.annot.cpg <- rename.column(peaks.annot.cpg, "peak", "name")
> peaks.annot.cpg <- move.column(peaks.annot.cpg, "strand", "insideFeature")
> 
> res <- plyr::join(res, peaks.annot.cpg, by="name", type="full")
> }
> else {
> message("skipping")
> }
> ##############################################################################
> ##
> 
> write.xls(res, result.file) 
> }
> ##############################################################################
> ##
> 
> 
> ##############################################################################
> ##
> Selection from Debug showing that the method is at least visible:
> ...
> Browse[2]> class(peaks.annot)
> [1] "RangedData"
> attr(,"package")
> [1] "IRanges"
> Browse[2]> getMethod("as.data.frame", "RangedData")
> Method Definition:
> 
> function (x, row.names = NULL, optional = FALSE, ...)
> {
>     if (!(is.null(row.names) || is.character(row.names)))
>         stop("'row.names'  must be NULL or a character vector")
>     if (!missing(optional) || length(list(...)))
>         warning("'optional' and arguments in '...' ignored")
>     data.frame(as.data.frame(ranges(x)), as.data.frame(values(x))[-1L],
>         row.names = row.names, stringsAsFactors = FALSE)
> }
> <environment: namespace:IRanges>
> 
> Signatures:
>         x        
> target  "RangedData"
> defined "RangedData"
> Browse[2]> 
> Error in as.data.frame.default(peaks.annot) :
>   cannot coerce class 'structure("RangedData", package = "IRanges")' into a
> data.frame
> traceback()
> 4: stop(gettextf("cannot coerce class '%s' into a data.frame",
> deparse(class(x))),
>        domain = NA)
> 3: as.data.frame.default(peaks.annot)
> 2: as.data.frame(peaks.annot)
> 1: annotate.macs.output("MACS/TAMR.MBD2.vs.MCF7.MBD2")
> ##############################################################################
> ##
> 
> sessionInfo()
> R version 2.13.1 (2011-07-08)
> Platform: x86_64-apple-darwin9.8.0/x86_64 (64-bit)
> 
> locale:
> [1] en_AU.UTF-8/en_AU.UTF-8/C/C/en_AU.UTF-8/en_AU.UTF-8
> 
> attached base packages:
> [1] stats     graphics  grDevices utils     datasets  methods   base
> 
> other attached packages:
>  [1] gdata_2.8.1                         macsR_1.0
>  [3] ChIPpeakAnno_1.9.0                  limma_3.8.3
>  [5] org.Hs.eg.db_2.5.0                  GO.db_2.5.0
>  [7] RSQLite_0.9-4                       DBI_0.2-5
>  [9] AnnotationDbi_1.14.1                BSgenome.Ecoli.NCBI.20080805_1.3.17
> [11] BSgenome_1.20.0                     GenomicRanges_1.4.6
> [13] Biostrings_2.20.2                   IRanges_1.10.5
> [15] multtest_2.8.0                      Biobase_2.12.2
> [17] biomaRt_2.8.1                       plyr_1.6
> 
> loaded via a namespace (and not attached):
> [1] gtools_2.6.2    MASS_7.3-14     RCurl_1.6-7     splines_2.13.1
> [5] survival_2.36-9 tools_2.13.1    XML_3.4-2
> 
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at r-project.org
> https://stat.ethz.ch/mailman/listinfo/bioconductor
> Search the archives:
> http://news.gmane.org/gmane.science.biology.informatics.conductor