[BioC] cdf vs probe package & Linux vs PC

Justin Borevitz borevitz at salk.edu
Mon May 10 20:50:12 CEST 2004


I've noticed that the order of probes in the 2 packages does not agree.  At
least for barley1 and ath1121501.  Also the way probes are ordered in Linux
and Rgui (PC) does not agree.  It could be something with the alphabetizing
of probsets names in the 2 versions.  Its possible this is true for the
probe package coming from Affymetrix as well, which doesn't match either
Linux or PC ordering. Lesson never assume ordering...

Maybe everyone knows this already and that is the purpose of matchprobes??
Any help with simple calls to avoid this problem are appreciated.

# in Linux
barley.object <- read.affybatch(filenames = list.celfiles()[2])
Warning message:
Incompatible phenoData object. Created a new one.
 in: read.affybatch(filenames = list.celfiles()[2])
pnL <- rownames(pm(barley.object))
save(pnL,file = "pnL.RData",compress=T)
## then download from linux to PC

# On PC
barley.object <- read.affybatch(filenames = list.celfiles()[2])
Warning message:
Incompatible phenoData object. Created a new one.
 in: read.affybatch(filenames = list.celfiles()[2])
pnPC <- rownames(pm(barley.object))

load("D:/barley/pnL.RData")

table(pnPC == pnL)
 FALSE   TRUE 
172752  78685



#Observation and rough fix for probe package ordering to PC ordering

setwd("d:/barley")
library(affy)
barley.object <- read.affybatch(filenames = list.celfiles())
probesets <- rownames(pm(barley.object))
length(probesets)

library(barley1probe)
length(barley1probe$Probe.Set.Names)

psn <- gsub("_at[0-9]","_at",probesets)
psn <- gsub("_at[0-9]","_at",psn)
table(psn == barley1probe$Probe.Set.Name)
# FALSE   TRUE 
#249955   1482

setwd("d:/ath1")
ath1.obj <- read.affybatch(filenames = list.celfiles()[1])
aprobesets <- rownames(pm(ath1.obj))
apsn <- gsub("_at[0-9]","_at",aprobesets)
apsn <- gsub("_at[0-9]","_at",apsn)
library(ath1121501probe)
table(apsn == ath1121501probe$Probe.Set.Name)
# FALSE   TRUE 
#   439 250639

Using the x and y coords I'm reordered the probe package as follows...

setwd("d:/barley")
library(affy)
barley.object <- read.affybatch(filenames = list.celfiles())
pm.i <- indexProbes(barley.object, which="pm") # all genes
pm1 <- unlist(pm.i)
pm.i.xy <- matrix(indices2xy(pm1, abatch = barley.object),nc = 2)
length(pm1)
dim(pm.i.xy)
pm.i.xy <- pm.i.xy - 1 # for affy units starting at 0.
probesets <- rownames(pm(barley.object))
length(probesets)
# now match with xy in barley1probe..
cdfxy <- paste(pm.i.xy[,1],pm.i.xy[,2])

library(barley1probe)
names(barley1probe)
probexy <- paste(barley1probe$x,barley1probe$y)
ordcdf <- match(cdfxy,probexy)
psn <- gsub("_at[0-9]","_at",probesets)
psn <- gsub("_at[0-9]","_at",psn)
table(psn == barley1probe$Probe.Set.Name)
# FALSE   TRUE 
#250100   1337 
table(psn == barley1probe$Probe.Set.Name[ordcdf])
#  TRUE 
#251437 

barley1probe <- barley1probe[ordcdf, ]
save(barley1probe,file = "barley1probe.RData", compress=T)
 



setwd("d:/ath1")
ath1.obj <- read.affybatch(filenames = list.celfiles()[1])
aprobesets <- rownames(pm(ath1.obj))
apsn <- gsub("_at[0-9]","_at",aprobesets)
apsn <- gsub("_at[0-9]","_at",apsn)
apsn <- gsub("_at[0-9]","_at",apsn)
pm.i <- indexProbes(ath1.obj, which="pm") # all genes
pm1 <- unlist(pm.i)
pm.i.xy <- matrix(indices2xy(pm1, abatch = ath1.obj),nc = 2)
length(pm1)
dim(pm.i.xy)
pm.i.xy <- pm.i.xy - 1 # for affy units starting at 0.
# now match with xy in ath1121501probe..
cdfxy <- paste(pm.i.xy[,1],pm.i.xy[,2])

library(ath1121501probe)
probexy <- paste(ath1121501probe$x,ath1121501probe$y)
ordcdf <- match(cdfxy,probexy)

table(apsn == ath1121501probe$Probe.Set.Name)
# FALSE   TRUE 
#   439 250639
table(apsn == ath1121501probe$Probe.Set.Name[ordcdf])
# TRUE 
#251078
ath1121501probe <- ath1121501probe[ordcdf, ]
save(ath1121501probe,file = "ath1121501probe.RData", compress=T)




---
Justin Borevitz

Plant Biology
Salk Institute
10010 N. Torrey Pines Rd.
La Jolla CA, 92037
USA
ph. 858 453-4100X1796
fax 858 452-4315
mailto:borevitz at salk.edu
http://naturalvariation.org



More information about the Bioconductor mailing list