[BioC] package xps: export.filter issues
cstrato
cstrato at aon.at
Thu Aug 30 20:25:09 CEST 2012
Dear Steven,
Since I cannot reproduce your problem (see below) could you please supply:
- sessionInfo()
- version of ROOT
- version of Affymetrix annotation file
- your complete code
Here is what I have just done w/o experiencing any problems:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Tissues from Affymetrix Exon Array Dataset for HG-U133_Plus_2
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### create ROOT scheme files for ivt expression arrays ###
### new R session: load library xps
library(xps)
### define directories:
# directory containing Affymetrix library files
libdir <- "/Volumes/GigaDrive/Affy/libraryfiles"
# directory containing Affymetrix annotation files
anndir <- "/Volumes/GigaDrive/Affy/Annotation"
# directory to store ROOT scheme files
scmdir <- "/Volumes/GigaDrive/CRAN/Workspaces/Schemes"
# HG-U133_Plus_2:
scheme.hgu133plus2.na32 <- import.expr.scheme("hgu133plus2", filedir =
file.path(scmdir, "na32"),
schemefile = file.path(libdir,
"HG-U133_Plus_2.CDF"),
probefile = file.path(libdir,
"HG-U133-PLUS_probe.tab"),
annotfile = file.path(anndir,
"Version11Jul", "HG-U133_Plus_2.na32.annot.csv"))
### HG-U133_Plus_2 data: import raw data ###
### new R session: load library xps
library(xps)
### define directories:
# directory of ROOT scheme files
scmdir <- "/Volumes/GigaDrive/CRAN/Workspaces/Schemes/na32"
# directory containing Tissues CEL files
celdir <- "/Volumes/GigaDrive/ChipData/Exon/HuMixture"
# directory to store ROOT raw data files
datdir <- "/Volumes/GigaDrive/CRAN/Workspaces/BreastProstate"
# first, import ROOT scheme file
scheme.u133p2 <- root.scheme(paste(scmdir,"hgu133plus2.root",sep="/"))
# subset of CEL files to import
celfiles <-
c("u1332plus_ivt_breast_A.CEL","u1332plus_ivt_breast_B.CEL","u1332plus_ivt_breast_C.CEL",
"u1332plus_ivt_prostate_A.CEL","u1332plus_ivt_prostate_B.CEL","u1332plus_ivt_prostate_C.CEL")
# rename CEL files
celnames <-
c("BreastA","BreastB","BreastC","ProstateA","ProstateB","ProstateC")
# import CEL files
data.mix.u133p2 <- import.data(scheme.u133p2, "BrPrU133P2",
filedir=datdir,celdir=celdir,celfiles=celfiles,celnames=celnames)
### preprocess raw data ###
### new R session: load library xps
library(xps)
### first, load ROOT scheme file and ROOT data file
scmdir <- "/Volumes/GigaDrive/CRAN/Workspaces/Schemes/na32"
scheme.u133p2 <- root.scheme(paste(scmdir,"hgu133plus2.root",sep="/"))
datdir <- "/Volumes/MitziData/CRAN/Workspaces/BreastProstate"
data.u133p2 <- root.data(scheme.u133p2,
paste(datdir,"BrPrU133P2_cel.root",sep="/"))
### RMA
data.rma <-
rma(data.u133p2,"BrPrU133P2RMA",tmpdir="",background="pmonly",normalize=TRUE)
# get data.frames
expr.rma <- validData(data.rma)
# export expression data
export.expr(data.rma, treename = "*", treetype = "mdp", varlist = "*",
outfile = "BreastProstateRMAU133P2.txt", sep = "\t", as.dataframe =
FALSE, verbose = TRUE)
### apply univariate filters ###
### new R session: load library xps
library(xps)
# create UniFilter
unifltr <- UniFilter(unitest=c("t.test", "two.sided", "BH", 0, 0.0,
FALSE, 0.95, TRUE))
# apply unifilter
rma.ufr <- unifilter(data.rma, "BrPrU133P2Unifilter", getwd(), unifltr,
group=c("GrpA","GrpA","GrpA", "GrpB","GrpB","GrpB"))
export.filter(rma.ufr, treename = "*", treetype = "stt", varlist =
"fUnitName:fName:fSymbol:fc:pval:flag", outfile = "UniFltr.txt", sep =
"\t", as.dataframe = FALSE, verbose = TRUE)
tmp <- validData(rma.ufr, which="UnitName")
tmp <- export.filter(rma.ufr, treename = "*", treetype = "stt", varlist
= "fUnitName:fName:fSymbol:fc:pval:flag", as.dataframe = TRUE, verbose =
TRUE)
Best regards,
Christian
_._._._._._._._._._._._._._._._._._
C.h.r.i.s.t.i.a.n S.t.r.a.t.o.w.a
V.i.e.n.n.a A.u.s.t.r.i.a
e.m.a.i.l: cstrato at aon.at
_._._._._._._._._._._._._._._._._._
On 8/16/12 12:25 PM, steven wink wrote:
> Dear xps user/ developer,
>
> My goal is to obtain a dataframe with all the statistics (fold changes
> p-values etc) and annotation data (gene symbols etc). I need this dataframe
> to select a subset hereof of 311 genes based on gene symbols.
>
> The issue I have is as follows ( I use xps.pdf vignette var names to make
> it easier to follow)
>
> 1) root scheme loaded for hgu133plus2
> 2) cel files imported (just 4 of them: 2 vehicle, 2 treated)
> 3) rma probe set normalized (data.rma)
> 4) filter operation: unifltr<-UniFilter and: uniTest(unifltr) <-
> c("t.test","two.sided","BH",0,0.0,FALSE,0.95,TRUE) for Mult Testing
> correction (adjp)
> 5) this dataframe is fine: tmp <- validData(rma.ufr)
>
> adding phenotype data is the problem:
> 6)
>
> tmp <- export.filter(rma.ufr, treetype="stt",
> +
> varlist="fUnitName:fName:fSymbol:fc:pval:flag",
> +
> as.dataframe=TRUE, verbose=FALSE)
>
> the resulting data.frame in R memory and when I write it to disk is messed
> up in certain rows ( clumps ALOT of rows in some rows in the 4th column)
> and then clumps columns in 1 column at the end of the file.
> Bit hard to explain exactly as I don't see the logic atm.
>
>
> So then I tried removing the "FUnitName from the varlist because it seems
> like unit_ID and unit_name are kinda redundant and I get a crash
> (reproduced it 3 times with underlying code): I don't know if this is
> relevant to the messed up data.frame, but added it just in case.
>
> tmp<-export.filter(rma.ufr,treetype="stt",varlist="fName:fSymbol:fc:pval:flag",as.dataframe=TRUE,verbose=FALSE)
>
> *** Break *** segmentation violation
>
>
>
>
>
> thanks in advance
> Steven Wink
>
> [[alternative HTML version deleted]]
>
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at r-project.org
> https://stat.ethz.ch/mailman/listinfo/bioconductor
> Search the archives: http://news.gmane.org/gmane.science.biology.informatics.conductor
>
More information about the Bioconductor
mailing list