[BioC] transport mcols from one GRange obejct to another

Martin Morgan mtmorgan at fhcrc.org
Sat Apr 27 15:15:59 CEST 2013


On 04/26/2013 03:33 PM, Wim Kreinen wrote:
> Hello,
>
> I have 2 GRange objects gr1 and gr2 (with the same names but in a different
> order!) and I would like to "transport" the mcols of gr1 to gr2 - of course
> suitable for the names.

I think you're looking to use `mcols` to get the metadata columns (DataFrame) 
from gr1, to reorder the rows to match gr2, and to use the "setter" `mcols<-` on 
to assign the columns to gr2,

   idx = match(names(gr2), names(gr1))
   mcols(gr2) <- mcols(gr1)[idx,]

actually, if the GRanges really are the same, you could

   idx = match(gr2, gr1)
   mcols(gr2) <- mcols(gr1)[idx,]

but doing that for your ranges gives

 > match(gr2, gr1)
[1] NA NA NA NA NA

because the ranges don't match, even though the names do! Hopefully that's 
something you're expecting...

Martin

>
> Please ignore IRanges data. The problem must be solved only via the names.
>
> gr1
> GRanges with 5 ranges and 2 metadata columns:
>               seqnames             ranges strand |    score      data
>                  <Rle>          <IRanges>  <Rle> | <factor> <numeric>
>     rs3737728     chr1 [1021365, 1021375]      * | 0.340955         4
>     rs9651273     chr1 [1030515, 1030525]      * | 0.438123         5
>     rs6687776     chr1 [1031490, 1031500]      * | 0.196662         6
>     rs4970405     chr1 [1048905, 1048915]      * | 0.208463         7
>    rs12726255     chr1 [1049900, 1049910]      * | 0.433541         8
>    ---
>    seqlengths:
>                chr1           chr10 ...            chrX            chrY
>                  NA              NA ...              NA              NA
>> gr2
> GRanges with 5 ranges and 0 metadata columns:
>               seqnames             ranges strand
>                  <Rle>          <IRanges>  <Rle>
>     rs3737728     chr1 [1021415, 1021415]      *
>     rs6687776     chr1 [1030565, 1030565]      *
>     rs9651273     chr1 [1031540, 1031540]      *
>     rs4970405     chr1 [1048955, 1048955]      *
>    rs12726255     chr1 [1049950, 1049950]      *
>    ---
>    seqlengths:
>                chr1           chr10 ...            chrX            chrY
>                  NA              NA ...              NA              NA
>>
>
> Thanks
> Wim
>
> class (drei)
> [1] "data.frame"
>> dput (gr1)
> new("GRanges"
>      , seqnames = new("Rle"
>      , values = structure(1L, .Label = c("chr1", "chr10", "chr11", "chr12",
> "chr13",
> "chr14", "chr15", "chr16", "chr17", "chr17_ctg5_hap1", "chr18",
> "chr19", "chr2", "chr20", "chr21", "chr22", "chr3", "chr4",
> "chr4_ctg9_hap1",
> "chr5", "chr6", "chr6_apd_hap1", "chr6_cox_hap2", "chr6_dbb_hap3",
> "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6", "chr6_ssto_hap7",
> "chr7", "chr8", "chr9", "chrM", "chrX", "chrY"), class = "factor")
>      , lengths = 5L
>      , elementMetadata = NULL
>      , metadata = list()
> )
>      , ranges = new("IRanges"
>      , start = c(1021365L, 1030515L, 1031490L, 1048905L, 1049900L)
>      , width = c(11L, 11L, 11L, 11L, 11L)
>      , NAMES = c("rs3737728", "rs9651273", "rs6687776", "rs4970405",
> "rs12726255"
> )
>      , elementType = "integer"
>      , elementMetadata = NULL
>      , metadata = list()
> )
>      , strand = new("Rle"
>      , values = structure(3L, .Label = c("+", "-", "*"), class = "factor")
>      , lengths = 5L
>      , elementMetadata = NULL
>      , metadata = list()
> )
>      , elementMetadata = new("DataFrame"
>      , rownames = NULL
>      , nrows = 5L
>      , listData = structure(list(score = structure(c(3L, 5L, 1L, 2L, 4L),
> .Label = c("0.196662",
> "0.208463", "0.340955", "0.433541", "0.438123"), class = "factor"),
>      data = c(4, 5, 6, 7, 8)), .Names = c("score", "data"))
>      , elementType = "ANY"
>      , elementMetadata = NULL
>      , metadata = list()
> )
>      , seqinfo = new("Seqinfo"
>      , seqnames = c("chr1", "chr10", "chr11", "chr12", "chr13", "chr14",
> "chr15",
> "chr16", "chr17", "chr17_ctg5_hap1", "chr18", "chr19", "chr2",
> "chr20", "chr21", "chr22", "chr3", "chr4", "chr4_ctg9_hap1",
> "chr5", "chr6", "chr6_apd_hap1", "chr6_cox_hap2", "chr6_dbb_hap3",
> "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6", "chr6_ssto_hap7",
> "chr7", "chr8", "chr9", "chrM", "chrX", "chrY")
>      , seqlengths = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_)
>      , is_circular = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, NA)
>      , genome = c(NA_character_, NA_character_, NA_character_,
> NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_
> )
> )
>      , metadata = list()
> )
>> dput (gr2)
> new("GRanges"
>      , seqnames = new("Rle"
>      , values = structure(1L, .Label = c("chr1", "chr10", "chr11", "chr12",
> "chr13",
> "chr14", "chr15", "chr16", "chr17", "chr17_ctg5_hap1", "chr18",
> "chr19", "chr2", "chr20", "chr21", "chr22", "chr3", "chr4",
> "chr4_ctg9_hap1",
> "chr5", "chr6", "chr6_apd_hap1", "chr6_cox_hap2", "chr6_dbb_hap3",
> "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6", "chr6_ssto_hap7",
> "chr7", "chr8", "chr9", "chrM", "chrX", "chrY"), class = "factor")
>      , lengths = 5L
>      , elementMetadata = NULL
>      , metadata = list()
> )
>      , ranges = new("IRanges"
>      , start = c(1021415L, 1030565L, 1031540L, 1048955L, 1049950L)
>      , width = c(1L, 1L, 1L, 1L, 1L)
>      , NAMES = c("rs3737728", "rs6687776", "rs9651273", "rs4970405",
> "rs12726255"
> )
>      , elementType = "integer"
>      , elementMetadata = NULL
>      , metadata = list()
> )
>      , strand = new("Rle"
>      , values = structure(3L, .Label = c("+", "-", "*"), class = "factor")
>      , lengths = 5L
>      , elementMetadata = NULL
>      , metadata = list()
> )
>      , elementMetadata = new("DataFrame"
>      , rownames = NULL
>      , nrows = 5L
>      , listData = structure(list(), .Names = character(0))
>      , elementType = "ANY"
>      , elementMetadata = NULL
>      , metadata = list()
> )
>      , seqinfo = new("Seqinfo"
>      , seqnames = c("chr1", "chr10", "chr11", "chr12", "chr13", "chr14",
> "chr15",
> "chr16", "chr17", "chr17_ctg5_hap1", "chr18", "chr19", "chr2",
> "chr20", "chr21", "chr22", "chr3", "chr4", "chr4_ctg9_hap1",
> "chr5", "chr6", "chr6_apd_hap1", "chr6_cox_hap2", "chr6_dbb_hap3",
> "chr6_mann_hap4", "chr6_mcf_hap5", "chr6_qbl_hap6", "chr6_ssto_hap7",
> "chr7", "chr8", "chr9", "chrM", "chrX", "chrY")
>      , seqlengths = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_)
>      , is_circular = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA,
> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, NA)
>      , genome = c(NA_character_, NA_character_, NA_character_,
> NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_
> )
> )
>      , metadata = list()
> )
>>
>
> 	[[alternative HTML version deleted]]
>
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at r-project.org
> https://stat.ethz.ch/mailman/listinfo/bioconductor
> Search the archives: http://news.gmane.org/gmane.science.biology.informatics.conductor
>


-- 
Computational Biology / Fred Hutchinson Cancer Research Center
1100 Fairview Ave. N.
PO Box 19024 Seattle, WA 98109

Location: Arnold Building M1 B861
Phone: (206) 667-2793



More information about the Bioconductor mailing list