[BioC] transport mcols from one GRange obejct to another
Martin Morgan
mtmorgan at fhcrc.org
Sat Apr 27 15:15:59 CEST 2013
On 04/26/2013 03:33 PM, Wim Kreinen wrote:
> Hello,
>
> I have 2 GRange objects gr1 and gr2 (with the same names but in a different
> order!) and I would like to "transport" the mcols of gr1 to gr2 - of course
> suitable for the names.
I think you're looking to use `mcols` to get the metadata columns (DataFrame)
from gr1, to reorder the rows to match gr2, and to use the "setter" `mcols<-` on
to assign the columns to gr2,
idx = match(names(gr2), names(gr1))
mcols(gr2) <- mcols(gr1)[idx,]
actually, if the GRanges really are the same, you could
idx = match(gr2, gr1)
mcols(gr2) <- mcols(gr1)[idx,]
but doing that for your ranges gives
> match(gr2, gr1)
[1] NA NA NA NA NA
because the ranges don't match, even though the names do! Hopefully that's
something you're expecting...
Martin
>
> Please ignore IRanges data. The problem must be solved only via the names.
>
> gr1
> GRanges with 5 ranges and 2 metadata columns:
> seqnames ranges strand | score data
> <Rle> <IRanges> <Rle> | <factor> <numeric>
> rs3737728 chr1 [1021365, 1021375] * | 0.340955 4
> rs9651273 chr1 [1030515, 1030525] * | 0.438123 5
> rs6687776 chr1 [1031490, 1031500] * | 0.196662 6
> rs4970405 chr1 [1048905, 1048915] * | 0.208463 7
> rs12726255 chr1 [1049900, 1049910] * | 0.433541 8
> ---
> seqlengths:
> chr1 chr10 ... chrX chrY
> NA NA ... NA NA
>> gr2
> GRanges with 5 ranges and 0 metadata columns:
> seqnames ranges strand
> <Rle> <IRanges> <Rle>
> rs3737728 chr1 [1021415, 1021415] *
> rs6687776 chr1 [1030565, 1030565] *
> rs9651273 chr1 [1031540, 1031540] *
> rs4970405 chr1 [1048955, 1048955] *
> rs12726255 chr1 [1049950, 1049950] *
> ---
> seqlengths:
> chr1 chr10 ... chrX chrY
> NA NA ... NA NA
>>
>
> Thanks
> Wim
>
> class (drei)
> [1] "data.frame"
>> dput (gr1)
> new("GRanges"
> , seqnames = new("Rle"
> , values = structure(1L, .Label = c("chr1", "chr10", "chr11", "chr12",
> "chr13",
> "chr14", "chr15", "chr16", "chr17", "chr17_ctg5_hap1", "chr18",
> "chr19", "chr2", "chr20", "chr21", "chr22", "chr3", "chr4",
> "chr4_ctg9_hap1",
> "chr5", "chr6", "chr6_apd_hap1", "chr6_cox_hap2", "chr6_dbb_hap3",
> "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6", "chr6_ssto_hap7",
> "chr7", "chr8", "chr9", "chrM", "chrX", "chrY"), class = "factor")
> , lengths = 5L
> , elementMetadata = NULL
> , metadata = list()
> )
> , ranges = new("IRanges"
> , start = c(1021365L, 1030515L, 1031490L, 1048905L, 1049900L)
> , width = c(11L, 11L, 11L, 11L, 11L)
> , NAMES = c("rs3737728", "rs9651273", "rs6687776", "rs4970405",
> "rs12726255"
> )
> , elementType = "integer"
> , elementMetadata = NULL
> , metadata = list()
> )
> , strand = new("Rle"
> , values = structure(3L, .Label = c("+", "-", "*"), class = "factor")
> , lengths = 5L
> , elementMetadata = NULL
> , metadata = list()
> )
> , elementMetadata = new("DataFrame"
> , rownames = NULL
> , nrows = 5L
> , listData = structure(list(score = structure(c(3L, 5L, 1L, 2L, 4L),
> .Label = c("0.196662",
> "0.208463", "0.340955", "0.433541", "0.438123"), class = "factor"),
> data = c(4, 5, 6, 7, 8)), .Names = c("score", "data"))
> , elementType = "ANY"
> , elementMetadata = NULL
> , metadata = list()
> )
> , seqinfo = new("Seqinfo"
> , seqnames = c("chr1", "chr10", "chr11", "chr12", "chr13", "chr14",
> "chr15",
> "chr16", "chr17", "chr17_ctg5_hap1", "chr18", "chr19", "chr2",
> "chr20", "chr21", "chr22", "chr3", "chr4", "chr4_ctg9_hap1",
> "chr5", "chr6", "chr6_apd_hap1", "chr6_cox_hap2", "chr6_dbb_hap3",
> "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6", "chr6_ssto_hap7",
> "chr7", "chr8", "chr9", "chrM", "chrX", "chrY")
> , seqlengths = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_)
> , is_circular = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, NA)
> , genome = c(NA_character_, NA_character_, NA_character_,
> NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_
> )
> )
> , metadata = list()
> )
>> dput (gr2)
> new("GRanges"
> , seqnames = new("Rle"
> , values = structure(1L, .Label = c("chr1", "chr10", "chr11", "chr12",
> "chr13",
> "chr14", "chr15", "chr16", "chr17", "chr17_ctg5_hap1", "chr18",
> "chr19", "chr2", "chr20", "chr21", "chr22", "chr3", "chr4",
> "chr4_ctg9_hap1",
> "chr5", "chr6", "chr6_apd_hap1", "chr6_cox_hap2", "chr6_dbb_hap3",
> "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6", "chr6_ssto_hap7",
> "chr7", "chr8", "chr9", "chrM", "chrX", "chrY"), class = "factor")
> , lengths = 5L
> , elementMetadata = NULL
> , metadata = list()
> )
> , ranges = new("IRanges"
> , start = c(1021415L, 1030565L, 1031540L, 1048955L, 1049950L)
> , width = c(1L, 1L, 1L, 1L, 1L)
> , NAMES = c("rs3737728", "rs6687776", "rs9651273", "rs4970405",
> "rs12726255"
> )
> , elementType = "integer"
> , elementMetadata = NULL
> , metadata = list()
> )
> , strand = new("Rle"
> , values = structure(3L, .Label = c("+", "-", "*"), class = "factor")
> , lengths = 5L
> , elementMetadata = NULL
> , metadata = list()
> )
> , elementMetadata = new("DataFrame"
> , rownames = NULL
> , nrows = 5L
> , listData = structure(list(), .Names = character(0))
> , elementType = "ANY"
> , elementMetadata = NULL
> , metadata = list()
> )
> , seqinfo = new("Seqinfo"
> , seqnames = c("chr1", "chr10", "chr11", "chr12", "chr13", "chr14",
> "chr15",
> "chr16", "chr17", "chr17_ctg5_hap1", "chr18", "chr19", "chr2",
> "chr20", "chr21", "chr22", "chr3", "chr4", "chr4_ctg9_hap1",
> "chr5", "chr6", "chr6_apd_hap1", "chr6_cox_hap2", "chr6_dbb_hap3",
> "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6", "chr6_ssto_hap7",
> "chr7", "chr8", "chr9", "chrM", "chrX", "chrY")
> , seqlengths = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_)
> , is_circular = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, NA)
> , genome = c(NA_character_, NA_character_, NA_character_,
> NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_
> )
> )
> , metadata = list()
> )
>>
>
> [[alternative HTML version deleted]]
>
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at r-project.org
> https://stat.ethz.ch/mailman/listinfo/bioconductor
> Search the archives: http://news.gmane.org/gmane.science.biology.informatics.conductor
>
--
Computational Biology / Fred Hutchinson Cancer Research Center
1100 Fairview Ave. N.
PO Box 19024 Seattle, WA 98109
Location: Arnold Building M1 B861
Phone: (206) 667-2793
More information about the Bioconductor
mailing list