[BioC] remove probes - code

Ariel Chernomoretz ariel.chernomoretz at crchul.ulaval.ca
Thu Apr 21 17:02:06 CEST 2005


I am sorry, I think that the attachments didn't make it.
Here is the R script with the ResetEnvir and RemoveProbes function 
implementations.
The first function just load the cdf and probe packages, and if 
necessary, detaches previously installed ones.
The second one does the job. As it modifies the cdf and probe 
environment used by  affy to access probe level info it should work 
with any algorithm implemented in affy + gcrma  + affyPLM packages (I 
tested it on mas, rma, rmaPLM, and gcrma).
Please  be aware of the  behavior of different versions of GCRMA 
reported in my previous post to the list.

Regards,
A./


#
# ResetEnvir
# goal:Detach, if necessary, cdf and probe environments,
#      and (re)load them.
# in  :
#      cdfpackagename   (e.g. "hgu95av2cdf")
#      probepackagename (e.g. "hgu95av2probe")
# out : ---
#
ResetEnvir<-function(cdfpackagename,probepackagename){
  ll<-search()
  cdfpackagepos <- grep(cdfpackagename,ll)
  if(length(cdfpackagepos)>0) detach(pos=cdfpackagepos)
  ll<-search()
  probepackagepos <- grep(probepackagename,ll)
  if(length(probepackagepos)>0) detach(pos=probepackagepos)
  library(cdfpackagename,character.only=T)
  library(probepackagename,character.only=T)
}

#
# RemoveProbes
# goal: Modification of CDF and PROBE  environments
# in  :
#       listOutProbes: list of probes to be removed from cdf and probe
#                      environments (e.g. c("1001_at1","1032_at6").
#                      If NULL no probe will be taken out.
#       listOutProbeSets: list of probesets to be removed from cdf and 
probe
#                         environments (e.g. c("1006_at","1032_f_at").
#                         If NULL no probeset will be removed.
#       cdfpackagename:   (e.g. "hgu95av2cdf")
#       probepackagename: (e.g. "hgu95av2probe")
#       destructive: unimplemented option, see NOTE
#
# out : ---
#
# NOTE 1: After a call to RemoveProbes the probenames reported by
#       pm and mm accessing functions implemented in the affy package, 
will
#       be in general differents from the original ones in probesets 
where
#       probes have been removed. This happens as in this functions the 
probe names
#       are always assigned sequentially.
#       RemoveProbes modifies the specified CDF and PROBE environments
#       in a consistent BUT destructive way. Take this in consideartion 
if
#       your code relays on absolute references to probe names.
#
#       A chunk of code to illustrate this
#
#         library(affy)
#         library(affydata)
#         source("removeProbes.R")
#
#         data(Dilution)
#         Dilution at cdfName<-"hgu95av2" # fix cdf name
#
#         cleancdf         <- 
cleancdfname(Dilution at cdfName,addcdf=FALSE)
#         cdfpackagename   <- paste(cleancdf,"cdf",sep="")
#         probepackagename <- paste(cleancdf,"probe",sep="")
#
#         ResetEnvir(cdfpackagename,probepackagename)
#         pm(Dilution,"1000_at")
#         as.data.frame(get(probepackagename))[1:16,1:4]
#
#         
RemoveProbes(c("1000_at2"),NULL,cdfpackagename,probepackagename)
#         pm(Dilution,"1000_at")
#         as.data.frame(get(probepackagename))[1:15,1:4]
#
# NOTE2 : See my April 20 post to BioC mailing list  (and eventually its
#         continuation) regarding differences reported
#         between GCRMA 1.1.0 and GCRMA 1.1.3
#
RemoveProbes<-function(listOutProbes=NULL,
                        listOutProbeSets=NULL,
                        
cdfpackagename,probepackagename,destructive=TRUE){

  #default probe dataset values
  probe.env.orig <- get(probepackagename)

  if(!is.null(listOutProbes)){
   # taking probes out from CDF env
   probes<- unlist(lapply(listOutProbes,function(x){
			   a<-strsplit(x,"at")
                            aux1<-paste(a[[1]][1],"at",sep="")
                            aux2<-as.integer(a[[1]][2])
                            c(aux1,aux2)
		          }))
   n1<-as.character(probes[seq(1,(length(probes)/2))*2-1])
   n2<-as.integer(probes[seq(1,(length(probes)/2))*2])
   probes<-data.frame(I(n1),n2)
   probes[,1]<-as.character(probes[,1])
   probes[,2]<-as.integer(probes[,2])
   pset<-unique(probes[,1])
   for(i in seq(along=pset)){
    ii  <-grep(pset[i],probes[,1])
    iout<-probes[ii,2]
    a<-get(pset[i],env=get(cdfpackagename))
    a<-a[-iout,]
    assign(pset[i],a,env=get(cdfpackagename))
   }
  }

  # taking probesets out from CDF env
  if(!is.null(listOutProbeSets)){
   rm(list=listOutProbeSets,envir=get(cdfpackagename))
  }

  # setting the PROBE env accordingly (idea from gcrma 
compute.affinities.R)
  tmp <- get("xy2i",paste("package:",cdfpackagename,sep=""))
  newAB   <- new("AffyBatch",cdfName=cleancdf)
  pmIndex <-  unlist(indexProbes(newAB,"pm"))
  subIndex<- match(tmp(probe.env.orig$x,probe.env.orig$y),pmIndex)
  rm(newAB)
  iNA     <- which(is.na(subIndex))

  if(length(iNA)>0){
   ipos<-grep(probepackagename,search())
   assign(probepackagename,probe.env.orig[-iNA,],pos=ipos)
  }
}












Ariel Chernomoretz, Ph.D.
Centre de recherche du CHUL
2705 Blv Laurier, bloc T-367
Sainte-Foy, Qc
G1V 4G2
(418)-525-4444 ext 46339



More information about the Bioconductor mailing list