[BioC] Using readBAM function in segmentSeq
James Perkins
jperkins at biochem.ucl.ac.uk
Tue Jul 3 12:58:03 CEST 2012
Dear Thomas,
I want to use the seqmentSeq package to look for intergenic expression
(lincRNAs etc.) above background.
I have case and control samples, with 3 biological replicates.
I have bam files (produced using bowtie and samtools) that read in
fine with RSamtools.
> xx <- readBamGappedAlignments("../../raw/file.sorted.bam")
However I can't get it to read the files in using readBAM in the
segmentSeq package:
> zz <- readBAM("../../raw/file.sorted.bam", replicates=1, chrs="chr1",chrlens=267910886)
Reading files...Error in .Call2("solve_user_SEW0", start, end, width,
PACKAGE = "IRanges") :
solving row 9146840: range cannot be determined from the supplied
arguments (too many NAs)
Am I missing an argument/doing something else wrong? I hope this is
enough information for you to go on, please let me know if not.
Many thanks!
Jim
> traceback()
8: .Call(.NAME, ..., PACKAGE = PACKAGE)
7: .Call2("solve_user_SEW0", start, end, width, PACKAGE = "IRanges")
6: solveUserSEW0(start = start, end = end, width = width)
5: IRanges(start = as.integer(tags$pos), width = tags$qwidth)
4: FUN(1L[[1L]], ...)
3: lapply(sampleNumbers, function(ii) {
tags <- scanBam(files[ii])[[1]]
if (!is.null(countID)) {
counts <- Rle(scanBam(files[ii], param = ScanBamParam(tag =
countID))[[1]][[1]][[1]])
}
else counts <- Rle(rep(1L, length(tags$seq)))
ir <- IRanges(start = as.integer(tags$pos), width = tags$qwidth)
aln <- GRanges(seqnames = tags$rname, ir, strand = tags$strand,
tag = Rle(as.character(tags$seq)), count = counts)
aln <- aln[as.integer(seqnames(aln)) %in% which(seqlevels(aln) %in%
chrs), ]
if (!is.null(polyLength)) {
polyBaseRemove <- unique(unlist(lapply(polyBase, grep,
as.character(values(aln)$tag))))
if (length(polyBaseRemove) > 0)
aln <- aln[-polyBaseRemove, ]
}
if (is.null(countID)) {
aln <- aln[order(as.factor(seqnames(aln)), as.integer(start(aln)),
as.character(values(aln)$tag)), ]
dupTags <- which(!(as.character(seqnames(aln)) ==
c(as.character(seqnames(aln))[-1],
"!") & start(aln) == c(start(aln)[-1], Inf) & end(aln) ==
c(end(aln)[-1], Inf) & as.character(strand(aln)) ==
c(as.character(strand(aln))[-1], "!") &
as.character(values(aln)$tag) ==
as.character(c(values(aln)$tag[-1], "!"))))
aln <- aln[dupTags, ]
values(aln)$count <- diff(c(0, dupTags))
}
message(".", appendLF = FALSE)
aln
})
2: lapply(sampleNumbers, function(ii) {
tags <- scanBam(files[ii])[[1]]
if (!is.null(countID)) {
counts <- Rle(scanBam(files[ii], param = ScanBamParam(tag =
countID))[[1]][[1]][[1]])
}
else counts <- Rle(rep(1L, length(tags$seq)))
ir <- IRanges(start = as.integer(tags$pos), width = tags$qwidth)
aln <- GRanges(seqnames = tags$rname, ir, strand = tags$strand,
tag = Rle(as.character(tags$seq)), count = counts)
aln <- aln[as.integer(seqnames(aln)) %in% which(seqlevels(aln) %in%
chrs), ]
if (!is.null(polyLength)) {
polyBaseRemove <- unique(unlist(lapply(polyBase, grep,
as.character(values(aln)$tag))))
if (length(polyBaseRemove) > 0)
aln <- aln[-polyBaseRemove, ]
}
if (is.null(countID)) {
aln <- aln[order(as.factor(seqnames(aln)), as.integer(start(aln)),
as.character(values(aln)$tag)), ]
dupTags <- which(!(as.character(seqnames(aln)) ==
c(as.character(seqnames(aln))[-1],
"!") & start(aln) == c(start(aln)[-1], Inf) & end(aln) ==
c(end(aln)[-1], Inf) & as.character(strand(aln)) ==
c(as.character(strand(aln))[-1], "!") &
as.character(values(aln)$tag) ==
as.character(c(values(aln)$tag[-1], "!"))))
aln <- aln[dupTags, ]
values(aln)$count <- diff(c(0, dupTags))
}
message(".", appendLF = FALSE)
aln
})
1: readBAM("../../raw/file.sorted.bam",
replicates = 1, chrs = "chr1", chrlens = 267910886)
> sessionInfo()
R version 2.15.0 (2012-03-30)
Platform: x86_64-unknown-linux-gnu (64-bit)
locale:
[1] LC_CTYPE=en_US LC_NUMERIC=C
[3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
[5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
[7] LC_PAPER=C LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] segmentSeq_1.8.0 ShortRead_1.14.4 latticeExtra_0.6-19
[4] RColorBrewer_1.0-5 Rsamtools_1.8.5 lattice_0.20-6
[7] Biostrings_2.24.1 GenomicRanges_1.8.7 IRanges_1.14.4
[10] BiocGenerics_0.2.0 baySeq_1.10.0
loaded via a namespace (and not attached):
[1] Biobase_2.16.0 bitops_1.0-4.1 grid_2.15.0 hwriter_1.3 stats4_2.15.0
[6] zlibbioc_1.2.0
More information about the Bioconductor
mailing list