[BioC] cdf vs probe package & Linux vs PC
Justin Borevitz
borevitz at salk.edu
Mon May 10 20:50:12 CEST 2004
I've noticed that the order of probes in the 2 packages does not agree. At
least for barley1 and ath1121501. Also the way probes are ordered in Linux
and Rgui (PC) does not agree. It could be something with the alphabetizing
of probsets names in the 2 versions. Its possible this is true for the
probe package coming from Affymetrix as well, which doesn't match either
Linux or PC ordering. Lesson never assume ordering...
Maybe everyone knows this already and that is the purpose of matchprobes??
Any help with simple calls to avoid this problem are appreciated.
# in Linux
barley.object <- read.affybatch(filenames = list.celfiles()[2])
Warning message:
Incompatible phenoData object. Created a new one.
in: read.affybatch(filenames = list.celfiles()[2])
pnL <- rownames(pm(barley.object))
save(pnL,file = "pnL.RData",compress=T)
## then download from linux to PC
# On PC
barley.object <- read.affybatch(filenames = list.celfiles()[2])
Warning message:
Incompatible phenoData object. Created a new one.
in: read.affybatch(filenames = list.celfiles()[2])
pnPC <- rownames(pm(barley.object))
load("D:/barley/pnL.RData")
table(pnPC == pnL)
FALSE TRUE
172752 78685
#Observation and rough fix for probe package ordering to PC ordering
setwd("d:/barley")
library(affy)
barley.object <- read.affybatch(filenames = list.celfiles())
probesets <- rownames(pm(barley.object))
length(probesets)
library(barley1probe)
length(barley1probe$Probe.Set.Names)
psn <- gsub("_at[0-9]","_at",probesets)
psn <- gsub("_at[0-9]","_at",psn)
table(psn == barley1probe$Probe.Set.Name)
# FALSE TRUE
#249955 1482
setwd("d:/ath1")
ath1.obj <- read.affybatch(filenames = list.celfiles()[1])
aprobesets <- rownames(pm(ath1.obj))
apsn <- gsub("_at[0-9]","_at",aprobesets)
apsn <- gsub("_at[0-9]","_at",apsn)
library(ath1121501probe)
table(apsn == ath1121501probe$Probe.Set.Name)
# FALSE TRUE
# 439 250639
Using the x and y coords I'm reordered the probe package as follows...
setwd("d:/barley")
library(affy)
barley.object <- read.affybatch(filenames = list.celfiles())
pm.i <- indexProbes(barley.object, which="pm") # all genes
pm1 <- unlist(pm.i)
pm.i.xy <- matrix(indices2xy(pm1, abatch = barley.object),nc = 2)
length(pm1)
dim(pm.i.xy)
pm.i.xy <- pm.i.xy - 1 # for affy units starting at 0.
probesets <- rownames(pm(barley.object))
length(probesets)
# now match with xy in barley1probe..
cdfxy <- paste(pm.i.xy[,1],pm.i.xy[,2])
library(barley1probe)
names(barley1probe)
probexy <- paste(barley1probe$x,barley1probe$y)
ordcdf <- match(cdfxy,probexy)
psn <- gsub("_at[0-9]","_at",probesets)
psn <- gsub("_at[0-9]","_at",psn)
table(psn == barley1probe$Probe.Set.Name)
# FALSE TRUE
#250100 1337
table(psn == barley1probe$Probe.Set.Name[ordcdf])
# TRUE
#251437
barley1probe <- barley1probe[ordcdf, ]
save(barley1probe,file = "barley1probe.RData", compress=T)
setwd("d:/ath1")
ath1.obj <- read.affybatch(filenames = list.celfiles()[1])
aprobesets <- rownames(pm(ath1.obj))
apsn <- gsub("_at[0-9]","_at",aprobesets)
apsn <- gsub("_at[0-9]","_at",apsn)
apsn <- gsub("_at[0-9]","_at",apsn)
pm.i <- indexProbes(ath1.obj, which="pm") # all genes
pm1 <- unlist(pm.i)
pm.i.xy <- matrix(indices2xy(pm1, abatch = ath1.obj),nc = 2)
length(pm1)
dim(pm.i.xy)
pm.i.xy <- pm.i.xy - 1 # for affy units starting at 0.
# now match with xy in ath1121501probe..
cdfxy <- paste(pm.i.xy[,1],pm.i.xy[,2])
library(ath1121501probe)
probexy <- paste(ath1121501probe$x,ath1121501probe$y)
ordcdf <- match(cdfxy,probexy)
table(apsn == ath1121501probe$Probe.Set.Name)
# FALSE TRUE
# 439 250639
table(apsn == ath1121501probe$Probe.Set.Name[ordcdf])
# TRUE
#251078
ath1121501probe <- ath1121501probe[ordcdf, ]
save(ath1121501probe,file = "ath1121501probe.RData", compress=T)
---
Justin Borevitz
Plant Biology
Salk Institute
10010 N. Torrey Pines Rd.
La Jolla CA, 92037
USA
ph. 858 453-4100X1796
fax 858 452-4315
mailto:borevitz at salk.edu
http://naturalvariation.org
More information about the Bioconductor
mailing list