[BioC] remove probes - code
Ariel Chernomoretz
ariel.chernomoretz at crchul.ulaval.ca
Thu Apr 21 17:02:06 CEST 2005
I am sorry, I think that the attachments didn't make it.
Here is the R script with the ResetEnvir and RemoveProbes function
implementations.
The first function just load the cdf and probe packages, and if
necessary, detaches previously installed ones.
The second one does the job. As it modifies the cdf and probe
environment used by affy to access probe level info it should work
with any algorithm implemented in affy + gcrma + affyPLM packages (I
tested it on mas, rma, rmaPLM, and gcrma).
Please be aware of the behavior of different versions of GCRMA
reported in my previous post to the list.
Regards,
A./
#
# ResetEnvir
# goal:Detach, if necessary, cdf and probe environments,
# and (re)load them.
# in :
# cdfpackagename (e.g. "hgu95av2cdf")
# probepackagename (e.g. "hgu95av2probe")
# out : ---
#
ResetEnvir<-function(cdfpackagename,probepackagename){
ll<-search()
cdfpackagepos <- grep(cdfpackagename,ll)
if(length(cdfpackagepos)>0) detach(pos=cdfpackagepos)
ll<-search()
probepackagepos <- grep(probepackagename,ll)
if(length(probepackagepos)>0) detach(pos=probepackagepos)
library(cdfpackagename,character.only=T)
library(probepackagename,character.only=T)
}
#
# RemoveProbes
# goal: Modification of CDF and PROBE environments
# in :
# listOutProbes: list of probes to be removed from cdf and probe
# environments (e.g. c("1001_at1","1032_at6").
# If NULL no probe will be taken out.
# listOutProbeSets: list of probesets to be removed from cdf and
probe
# environments (e.g. c("1006_at","1032_f_at").
# If NULL no probeset will be removed.
# cdfpackagename: (e.g. "hgu95av2cdf")
# probepackagename: (e.g. "hgu95av2probe")
# destructive: unimplemented option, see NOTE
#
# out : ---
#
# NOTE 1: After a call to RemoveProbes the probenames reported by
# pm and mm accessing functions implemented in the affy package,
will
# be in general differents from the original ones in probesets
where
# probes have been removed. This happens as in this functions the
probe names
# are always assigned sequentially.
# RemoveProbes modifies the specified CDF and PROBE environments
# in a consistent BUT destructive way. Take this in consideartion
if
# your code relays on absolute references to probe names.
#
# A chunk of code to illustrate this
#
# library(affy)
# library(affydata)
# source("removeProbes.R")
#
# data(Dilution)
# Dilution at cdfName<-"hgu95av2" # fix cdf name
#
# cleancdf <-
cleancdfname(Dilution at cdfName,addcdf=FALSE)
# cdfpackagename <- paste(cleancdf,"cdf",sep="")
# probepackagename <- paste(cleancdf,"probe",sep="")
#
# ResetEnvir(cdfpackagename,probepackagename)
# pm(Dilution,"1000_at")
# as.data.frame(get(probepackagename))[1:16,1:4]
#
#
RemoveProbes(c("1000_at2"),NULL,cdfpackagename,probepackagename)
# pm(Dilution,"1000_at")
# as.data.frame(get(probepackagename))[1:15,1:4]
#
# NOTE2 : See my April 20 post to BioC mailing list (and eventually its
# continuation) regarding differences reported
# between GCRMA 1.1.0 and GCRMA 1.1.3
#
RemoveProbes<-function(listOutProbes=NULL,
listOutProbeSets=NULL,
cdfpackagename,probepackagename,destructive=TRUE){
#default probe dataset values
probe.env.orig <- get(probepackagename)
if(!is.null(listOutProbes)){
# taking probes out from CDF env
probes<- unlist(lapply(listOutProbes,function(x){
a<-strsplit(x,"at")
aux1<-paste(a[[1]][1],"at",sep="")
aux2<-as.integer(a[[1]][2])
c(aux1,aux2)
}))
n1<-as.character(probes[seq(1,(length(probes)/2))*2-1])
n2<-as.integer(probes[seq(1,(length(probes)/2))*2])
probes<-data.frame(I(n1),n2)
probes[,1]<-as.character(probes[,1])
probes[,2]<-as.integer(probes[,2])
pset<-unique(probes[,1])
for(i in seq(along=pset)){
ii <-grep(pset[i],probes[,1])
iout<-probes[ii,2]
a<-get(pset[i],env=get(cdfpackagename))
a<-a[-iout,]
assign(pset[i],a,env=get(cdfpackagename))
}
}
# taking probesets out from CDF env
if(!is.null(listOutProbeSets)){
rm(list=listOutProbeSets,envir=get(cdfpackagename))
}
# setting the PROBE env accordingly (idea from gcrma
compute.affinities.R)
tmp <- get("xy2i",paste("package:",cdfpackagename,sep=""))
newAB <- new("AffyBatch",cdfName=cleancdf)
pmIndex <- unlist(indexProbes(newAB,"pm"))
subIndex<- match(tmp(probe.env.orig$x,probe.env.orig$y),pmIndex)
rm(newAB)
iNA <- which(is.na(subIndex))
if(length(iNA)>0){
ipos<-grep(probepackagename,search())
assign(probepackagename,probe.env.orig[-iNA,],pos=ipos)
}
}
Ariel Chernomoretz, Ph.D.
Centre de recherche du CHUL
2705 Blv Laurier, bloc T-367
Sainte-Foy, Qc
G1V 4G2
(418)-525-4444 ext 46339
More information about the Bioconductor
mailing list