[Bioc-devel] summarizeOverlaps example error

Mon Aug 6 22:01:39 CEST 2012

Hi all,

My apologies in advance if I've done something pin-headed here, but can anyone else produce an error just by doing:

library(GenomicRanges)
example(summarizeOverlaps)

I think I have a set of updated (devel) packages:

> source("http://bioconductor.org/biocLite.R")
BiocInstaller version 1.5.12, ?biocLite for help
> old.packages(repos=biocinstallRepos())
NULL

My R devel is a bit old (2012-06-14 r59564) … maybe I should upgrade?

Error, traceback() and sessionInfo() below …

Help much appreciated.

Best,
Mark

> example(summarizeOverlaps)

smmrzO> reads <- GappedAlignments(
smmrzO+     names = c("a","b","c","d","e","f","g"),
smmrzO+     seqnames = Rle(c(rep(c("chr1", "chr2"), 3), "chr1")),
smmrzO+     pos = as.integer(c(1400, 2700, 3400, 7100, 4000, 3100, 5200)),
smmrzO+     cigar = c("500M", "100M", "300M", "500M", "300M", 
smmrzO+         "50M200N50M", "50M150N50M"),
smmrzO+     strand = strand(rep("+", 7)))

smmrzO> ## ---------------------------------------------------------------------
smmrzO> ## 'features' as GRanges
smmrzO> ##
smmrzO> 
smmrzO> features <- GRanges(
smmrzO+     seqnames = c(rep("chr1", 7), rep("chr2", 4)), strand = "+", 
smmrzO+     ranges = IRanges(c(1000, 3000, 3600, 4000, 4000, 5000, 5400, 2000, 
smmrzO+         3000, 7000, 7500), width = c(500, 500, 300, 500, 900, 500, 500, 
smmrzO+         900, 500, 600, 300)), 
smmrzO+     group_id = c("A", "B", "C", "C", "D", "D", "E", "F", "G", "H", "H"))

smmrzO> ## Each row is a feature the read can 'hit'.
smmrzO> rowsAsFeatures <- data.frame(
smmrzO+     union = assays(summarizeOverlaps(features, reads))$counts, 
smmrzO+     intStrict = assays(summarizeOverlaps(features, reads, 
smmrzO+         mode="IntersectionStrict"))$counts,
smmrzO+     intNotEmpty = assays(summarizeOverlaps(features, reads,
smmrzO+         mode="IntersectionNotEmpty"))$counts,
smmrzO+     countOverlaps = countOverlaps(features, reads))

smmrzO> ## Results from countOverlaps() are included to highlight how
smmrzO> ## summarizeOverlaps() counts a read only once. For these 7
smmrzO> ## reads, 'Union' is the most conservative counting mode, followed 
smmrzO> ## by 'Intersectionstrict' and then 'IntersectionNotEmpty'. 
smmrzO> 
smmrzO> colSums(rowsAsFeatures)
        union     intStrict   intNotEmpty countOverlaps 
            3             4             5            11 

smmrzO> ## ---------------------------------------------------------------------
smmrzO> ## 'features' as GRangesList
smmrzO> ##
smmrzO> 
smmrzO> ## Each highest list-level is a feature the read can 'hit'.
smmrzO> lst <- split(features, values(features)[["group_id"]])

smmrzO> listAsFeatures <- data.frame(
smmrzO+     union = assays(summarizeOverlaps(lst, reads))$counts, 
smmrzO+     intStrict = assays(summarizeOverlaps(lst, reads, 
smmrzO+         mode="IntersectionStrict"))$counts,
smmrzO+     intNotEmpty = assays(summarizeOverlaps(lst, reads,
smmrzO+         mode="IntersectionNotEmpty"))$counts,
smmrzO+     countOverlaps = countOverlaps(lst, reads))

smmrzO> ## ---------------------------------------------------------------------
smmrzO> ## 'reads' as BamFileList 
smmrzO> ##
smmrzO> 
smmrzO> ## Count BAM files and prepare output for DESeq or edgeR analysis
smmrzO> library(Rsamtools)

smmrzO> library(DESeq)

smmrzO> library(edgeR)

smmrzO> fls <- list.files(system.file("extdata",package="GenomicRanges"),
smmrzO+     recursive=TRUE, pattern="*bam$", full=TRUE)

smmrzO> names(fls) <- basename(fls)

smmrzO> bfl <- BamFileList(fls, index=character())

smmrzO> features <- GRanges(
smmrzO+     seqnames = c(rep("chr2L", 4), rep("chr2R", 5), rep("chr3L", 2)),
smmrzO+     ranges = IRanges(c(1000, 3000, 4000, 7000, 2000, 3000, 3600, 4000, 
smmrzO+         7500, 5000, 5400), width=c(rep(500, 3), 600, 900, 500, 300, 900, 
smmrzO+         300, 500, 500)), "-",
smmrzO+     group_id=c(rep("A", 4), rep("B", 5), rep("C", 2)))

smmrzO> solap <- summarizeOverlaps(features, bfl)

smmrzO> deseq <- newCountDataSet(assays(solap)$counts, rownames(colData(solap)))

smmrzO> edger <- DGEList(assays(solap)$counts, group=rownames(colData(solap)))
Calculating library sizes from column totals.

smmrzO> ## ---------------------------------------------------------------------
smmrzO> ## paired-end reads 
smmrzO> ##
smmrzO> 
smmrzO> library("TxDb.Dmelanogaster.UCSC.dm3.ensGene")

smmrzO> exbygene <- exonsBy(TxDb.Dmelanogaster.UCSC.dm3.ensGene, "gene")

smmrzO> fl <- system.file("extdata", "untreated3_chr4.bam", 
smmrzO+     package="pasillaBamSubset") 

smmrzO> ## Paired-end reads are stored in a GappedAlignmentPairs object
smmrzO> reads <- readGappedAlignmentPairs(fl)

smmrzO> res <- summarizeOverlaps(exbygene, reads, "Union")
Warning messages:
1: In is.na(unique(f)) :
  is.na() applied to non-(list or vector) of type 'S4'
2: In IRanges:::newCompressedList("GRangesList", x, splitFactor = f,  :
  data length is not a multiple of split variable
Error in countOverlaps(reads, features, ignore.strand = ignore.strand) : 
  error in evaluating the argument 'query' in selecting a method for function 'countOverlaps': Error in unique.default(x) : unique() applies only to vectors
> traceback()
10: countOverlaps(reads, features, ignore.strand = ignore.strand)
9: mode(reads, features, ignore.strand)
8: .dispatchOverlaps(grglist(reads), features, mode, ignore.strand)
7: summarizeOverlaps(exbygene, reads, "Union")
6: summarizeOverlaps(exbygene, reads, "Union") at Rex84b25fb6afa1#102
5: eval(expr, envir, enclos)
4: eval(ei, envir)
3: withVisible(eval(ei, envir))
2: source(tf, local, echo = echo, prompt.echo = paste0(prompt.prefix, 
       getOption("prompt")), continue.echo = paste0(prompt.prefix, 
       getOption("continue")), verbose = verbose, max.deparse.length = Inf, 
       encoding = "UTF-8", skip.echo = skips, keep.source = TRUE)
1: example(summarizeOverlaps)

> sessionInfo()
R Under development (unstable) (2012-06-14 r59564)
Platform: x86_64-unknown-linux-gnu (64-bit)

locale:
[1] en_CA.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] BiocInstaller_1.5.12                     
 [2] TxDb.Dmelanogaster.UCSC.dm3.ensGene_2.7.1
 [3] GenomicFeatures_1.9.28                   
 [4] AnnotationDbi_1.19.28                    
 [5] edgeR_2.99.4                             
 [6] limma_3.13.16                            
 [7] DESeq_1.9.11                             
 [8] lattice_0.20-6                           
 [9] locfit_1.5-8                             
[10] Biobase_2.17.6                           
[11] Rsamtools_1.9.25                         
[12] Biostrings_2.25.8                        
[13] GenomicRanges_1.9.41                     
[14] IRanges_1.15.25                          
[15] BiocGenerics_0.3.0                       

loaded via a namespace (and not attached):
 [1] annotate_1.35.3     biomaRt_2.13.2      bitops_1.0-4.1     
 [4] BSgenome_1.25.6     DBI_0.2-5           genefilter_1.39.0  
 [7] geneplotter_1.35.1  grid_2.16.0         RColorBrewer_1.0-5 
[10] RCurl_1.91-1        RSQLite_0.11.1      rtracklayer_1.17.15
[13] splines_2.16.0      stats4_2.16.0       survival_2.36-14   
[16] tools_2.16.0        XML_3.9-4           xtable_1.7-0       
[19] zlibbioc_1.3.0     
> 

----------
Prof. Dr. Mark Robinson
Bioinformatics
Institute of Molecular Life Sciences
University of Zurich
Winterthurerstrasse 190
8057 Zurich
Switzerland

v: +41 44 635 4848
f: +41 44 635 6898
e: mark.robinson at imls.uzh.ch
o: Y11-J-16
w: http://tiny.cc/mrobin

----------
http://www.fgcz.ch/Bioconductor2012
http://www.eccb12.org/t5