[BioC] package xps: export.filter issues

Thu Aug 30 20:25:09 CEST 2012

Dear Steven,

Since I cannot reproduce your problem (see below) could you please supply:
- sessionInfo()
- version of ROOT
- version of Affymetrix annotation file
- your complete code

Here is what I have just done w/o experiencing any problems:

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
# Tissues from Affymetrix Exon Array Dataset for HG-U133_Plus_2
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

### create ROOT scheme files for ivt expression arrays ###

### new R session: load library xps
library(xps)

### define directories:
# directory containing Affymetrix library files
libdir <- "/Volumes/GigaDrive/Affy/libraryfiles"
# directory containing Affymetrix annotation files
anndir <- "/Volumes/GigaDrive/Affy/Annotation"
# directory to store ROOT scheme files
scmdir <- "/Volumes/GigaDrive/CRAN/Workspaces/Schemes"

# HG-U133_Plus_2:
scheme.hgu133plus2.na32 <- import.expr.scheme("hgu133plus2", filedir = 
file.path(scmdir, "na32"),
                            schemefile = file.path(libdir, 
"HG-U133_Plus_2.CDF"),
                            probefile  = file.path(libdir, 
"HG-U133-PLUS_probe.tab"),
                            annotfile  = file.path(anndir, 
"Version11Jul", "HG-U133_Plus_2.na32.annot.csv"))

### HG-U133_Plus_2 data: import raw data ###

### new R session: load library xps
library(xps)

### define directories:
# directory of ROOT scheme files
scmdir <- "/Volumes/GigaDrive/CRAN/Workspaces/Schemes/na32"
# directory containing Tissues CEL files
celdir <- "/Volumes/GigaDrive/ChipData/Exon/HuMixture"
# directory to store ROOT raw data files
datdir <- "/Volumes/GigaDrive/CRAN/Workspaces/BreastProstate"

# first, import ROOT scheme file
scheme.u133p2 <- root.scheme(paste(scmdir,"hgu133plus2.root",sep="/"))

# subset of CEL files to import
celfiles <- 
c("u1332plus_ivt_breast_A.CEL","u1332plus_ivt_breast_B.CEL","u1332plus_ivt_breast_C.CEL",

"u1332plus_ivt_prostate_A.CEL","u1332plus_ivt_prostate_B.CEL","u1332plus_ivt_prostate_C.CEL")
# rename CEL files
celnames <- 
c("BreastA","BreastB","BreastC","ProstateA","ProstateB","ProstateC")
# import CEL files
data.mix.u133p2 <- import.data(scheme.u133p2, "BrPrU133P2", 
filedir=datdir,celdir=celdir,celfiles=celfiles,celnames=celnames)

### preprocess raw data ###

### new R session: load library xps
library(xps)

### first, load ROOT scheme file and ROOT data file
scmdir <- "/Volumes/GigaDrive/CRAN/Workspaces/Schemes/na32"
scheme.u133p2 <- root.scheme(paste(scmdir,"hgu133plus2.root",sep="/"))
datdir <- "/Volumes/MitziData/CRAN/Workspaces/BreastProstate"
data.u133p2 <- root.data(scheme.u133p2, 
paste(datdir,"BrPrU133P2_cel.root",sep="/"))

### RMA
data.rma <- 
rma(data.u133p2,"BrPrU133P2RMA",tmpdir="",background="pmonly",normalize=TRUE)

# get data.frames
expr.rma <- validData(data.rma)

# export expression data
export.expr(data.rma, treename = "*", treetype = "mdp", varlist = "*", 
outfile = "BreastProstateRMAU133P2.txt", sep = "\t", as.dataframe = 
FALSE, verbose = TRUE)

### apply univariate filters ###

### new R session: load library xps
library(xps)

# create UniFilter
unifltr <- UniFilter(unitest=c("t.test", "two.sided", "BH", 0, 0.0, 
FALSE, 0.95, TRUE))
# apply unifilter
rma.ufr <- unifilter(data.rma, "BrPrU133P2Unifilter", getwd(), unifltr, 
group=c("GrpA","GrpA","GrpA", "GrpB","GrpB","GrpB"))

export.filter(rma.ufr, treename = "*", treetype = "stt", varlist = 
"fUnitName:fName:fSymbol:fc:pval:flag", outfile = "UniFltr.txt", sep = 
"\t", as.dataframe = FALSE, verbose = TRUE)

tmp <- validData(rma.ufr, which="UnitName")
tmp <- export.filter(rma.ufr, treename = "*", treetype = "stt", varlist 
= "fUnitName:fName:fSymbol:fc:pval:flag", as.dataframe = TRUE, verbose = 
TRUE)

Best regards,
Christian
_._._._._._._._._._._._._._._._._._
C.h.r.i.s.t.i.a.n   S.t.r.a.t.o.w.a
V.i.e.n.n.a           A.u.s.t.r.i.a
e.m.a.i.l:        cstrato at aon.at
_._._._._._._._._._._._._._._._._._

On 8/16/12 12:25 PM, steven wink wrote:
> Dear xps user/ developer,
>
> My goal is to obtain a dataframe with all the statistics (fold changes
> p-values etc) and annotation data (gene symbols etc). I need this dataframe
> to select a subset hereof of 311 genes based on gene symbols.
>
> The issue I have is as follows ( I use xps.pdf vignette var names to make
> it easier to follow)
>
> 1) root scheme loaded for hgu133plus2
> 2) cel files imported (just 4 of them: 2 vehicle, 2 treated)
> 3) rma probe set normalized (data.rma)
> 4) filter operation: unifltr<-UniFilter  and: uniTest(unifltr) <-
> c("t.test","two.sided","BH",0,0.0,FALSE,0.95,TRUE) for Mult Testing
> correction (adjp)
> 5) this dataframe is fine:  tmp <- validData(rma.ufr)
>
> adding phenotype data is the problem:
> 6)
>
> tmp <- export.filter(rma.ufr, treetype="stt",
> +
> varlist="fUnitName:fName:fSymbol:fc:pval:flag",
> +
> as.dataframe=TRUE, verbose=FALSE)
>
> the resulting data.frame in R memory and when I write it to disk is messed
> up in certain rows ( clumps ALOT of rows in some rows in the 4th column)
> and then clumps columns in 1 column at the end of the file.
> Bit hard to explain exactly as I don't see the logic atm.
>
>
> So then I tried removing the "FUnitName from the varlist because it seems
> like unit_ID and unit_name are kinda redundant and I get a crash
> (reproduced it 3 times with underlying code): I don't know if this is
> relevant to the messed up data.frame, but added it just in case.
>
> tmp<-export.filter(rma.ufr,treetype="stt",varlist="fName:fSymbol:fc:pval:flag",as.dataframe=TRUE,verbose=FALSE)
>
>   *** Break *** segmentation violation
>
>
>
>
>
> thanks in advance
> Steven Wink
>
> 	[[alternative HTML version deleted]]
>
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at r-project.org
> https://stat.ethz.ch/mailman/listinfo/bioconductor
> Search the archives: http://news.gmane.org/gmane.science.biology.informatics.conductor
>