[BioC] Does combineAffyBatch of package matchprobes eliminate the wrong probe sets?

Christian.Stratowa@vie.boehringer-ingelheim.com Christian.Stratowa at vie.boehringer-ingelheim.com
Thu Mar 9 15:03:27 CET 2006


Maybe I am doing something wrong but when combining U95A and U95Av2 files
using function combineAffyBatch() 
it seems that 197 probe sets from U95A and from U95Av2 are deleted although
there are only 51 different probe sets. 
Furthermore, it seems that probe sets which are available on both U95A and
U95Av2 are deleted and not the different 
probe sets. 

Here is my code, which supports my statements: 
# call libraries 
>library(matchprobes) 
>library(affy) 
>library(hgu95acdf) 
>library(hgu95aprobe) 
>library(hgu95av2cdf) 
>library(hgu95av2probe) 

# load CEL files from folders HG-U95A and HG-U95Av2 
>x95A <- ReadAffy(celfile.path="HG-U95A") 
>x95Av2 <- ReadAffy(celfile.path="HG-U95Av2") 

# combine data 
>res <- combineAffyBatch(list(x95A,x95Av2),
c("hgu95aprobe","hgu95av2probe"), newcdf="comb95") 
>comb95 <- res$cdf 

# rma normalization for combined data 
>dat.rma <- rma(res$dat) 

# rma normalization for U95A and U95Av2 separately 
>x95A.rma <- rma(x95A) 
>x95Av2.rma <- rma(x95Av2) 

# store log2 of expression data as matrix, sort for AffyIDs, and export 
>dat.r2 <- exprs(dat.rma) 
>d <- dimnames(dat.r2)[[1]] 
>dat.r2 <- dat.r2[order(d),] 
>write.table(dat.r2,file="dat_rma.txt",sep="\t",quote=F,col.names=NA) 

>x95A.r2 <- exprs(x95A.rma) 
>d <- dimnames(x95A.r2)[[1]] 
>x95A.r2 <- x95A.r2[order(d),] 
>write.table(x95A.r2,file="x95A_rma.txt",sep="\t",quote=F,col.names=NA) 

>x95Av2.r2 <- exprs(x95Av2.rma) 
>d <- dimnames(x95Av2.r2)[[1]] 
>x95Av2.r2 <- x95Av2.r2[order(d),] 
>write.table(x95Av2.r2,file="x95Av2_rma.txt",sep="\t",quote=F,col.names=NA) 

The following 25 probe sets are on U95Av2 but NOT on U95A: 
>unlist(setdiff(dimnames(x95Av2.r2)[[1]],dimnames(x95A.r2)[[1]])) 
 [1] "160020_at"   "160021_r_at" "160022_at"   "160023_at"   "160024_at"  
 [6] "160025_at"   "160026_at"   "160027_s_at" "160028_s_at" "160029_at"  
[11] "160030_at"   "160031_at"   "160032_at"   "160033_s_at" "160034_s_at" 
[16] "160035_at"   "160036_at"   "160037_at"   "160038_s_at" "160039_at"  
[21] "160040_at"   "160041_at"   "160042_s_at" "160043_at"   "160044_g_at" 

The following 26 probe sets are on U95A but NOT on U95Av2: 
>unlist(setdiff(dimnames(x95A.r2)[[1]],dimnames(x95Av2.r2)[[1]])) 
 [1] "119_at"    "1215_at"   "1216_at"   "124_i_at"  "125_r_at"  "127_at"   
 [7] "1301_s_at" "1302_s_at" "132_at"    "1429_at"   "1502_s_at" "1829_at"  
[13] "1864_at"   "1889_s_at" "1982_s_at" "36969_at"  "383_at"    "397_at"   
[19] "412_s_at"  "426_at"    "439_at"    "787_at"    "788_s_at"  "972_s_at" 
[25] "985_s_at"  "997_at" 

This is corrrect, since I checked this also manually! 

Thus alltogether 51 probe sets which are not on both chips, should be
eliminated by function combineAffyBatch(). 
Is this correct? 

However, these are the differences between the combined table and U95Av2: 
>unlist(setdiff(dimnames(x95Av2.r2)[[1]],dimnames(dat.r2)[[1]])) 
  [1] "1142_at"     "1143_s_at"   "1144_at"     "1145_g_at"   "1146_at"    
  [6] "1147_at"     "1148_s_at"   "1149_at"     "1150_at"     "1151_at"    
 [11] "1162_g_at"   "1163_at"     "1164_at"     "1170_at"     "1171_s_at"  
 [16] "1172_at"     "1173_g_at"   "1174_at"     "1175_s_at"   "1176_at"    
 [21] "1177_at"     "1178_at"     "1179_at"     "1180_g_at"   "1181_at"    
 [26] "1278_at"     "1279_s_at"   "1280_i_at"   "1281_f_at"   "1282_s_at"  
 [31] "1283_at"     "1284_at"     "1285_at"     "1286_s_at"   "1428_at"    
 [36] "1513_at"     "1514_g_at"   "1515_at"     "1516_g_at"   "160020_at"  
 [41] "160021_r_at" "160022_at"   "160023_at"   "160024_at"   "160025_at"  
 [46] "160026_at"   "160027_s_at" "160028_s_at" "160029_at"   "160030_at"  
 [51] "160031_at"   "160032_at"   "160033_s_at" "160034_s_at" "160035_at"  
 [56] "160036_at"   "160037_at"   "160038_s_at" "160039_at"   "160040_at"  
 [61] "160041_at"   "160042_s_at" "160043_at"   "160044_g_at" "1608_at"    
 [66] "1609_g_at"   "1623_s_at"   "1624_at"     "1625_at"     "1626_at"    
 [71] "1627_at"     "1628_at"     "1629_s_at"   "1630_s_at"   "1631_at"    
 [76] "1632_at"     "1661_i_at"   "1662_r_at"   "1663_at"     "1664_at"    
 [81] "1665_s_at"   "1725_s_at"   "1726_at"     "1743_s_at"   "1744_at"    
 [86] "1745_at"     "1746_s_at"   "1790_s_at"   "1812_s_at"   "1813_at"    
 [91] "1818_at"     "1819_at"     "1820_g_at"   "1821_at"     "1822_at"    
 [96] "1823_g_at"   "1837_at"     "1838_g_at"   "1839_at"     "1840_g_at"  
[101] "1841_s_at"   "1842_at"     "1843_at"     "1876_at"     "1877_g_at"  
[106] "1881_at"     "1882_g_at"   "1883_s_at"   "1892_s_at"   "1893_s_at"  
[111] "1894_f_at"   "1903_at"     "1905_s_at"   "1906_at"     "1921_at"    
[116] "1922_g_at"   "1936_s_at"   "1937_at"     "292_s_at"    "293_at"     
[121] "294_s_at"    "295_s_at"    "296_at"      "297_g_at"    "298_at"     
[126] "299_i_at"    "300_f_at"    "301_at"      "302_at"      "303_at"     
[131] "304_at"      "305_g_at"    "311_s_at"    "312_s_at"    "313_at"     
[136] "323_at"      "324_f_at"    "325_s_at"    "326_i_at"    "327_f_at"   
[141] "328_at"      "329_s_at"    "330_s_at"    "331_at"      "332_at"     
[146] "333_s_at"    "334_s_at"    "335_r_at"    "693_g_at"    "694_at"     
[151] "695_at"      "696_at"      "697_f_at"    "698_f_at"    "699_s_at"   
[156] "700_s_at"    "701_s_at"    "702_f_at"    "703_at"      "704_at"     
[161] "705_at"      "706_at"      "707_s_at"    "711_at"      "712_s_at"   
[166] "713_at"      "714_at"      "723_s_at"    "724_at"      "725_i_at"   
[171] "726_f_at"    "727_at"      "728_at"      "729_i_at"    "730_r_at"   
[176] "731_f_at"    "732_f_at"    "733_at"      "734_at"      "735_s_at"   
[181] "918_at"      "919_at"      "920_at"      "921_s_at"    "936_s_at"   
[186] "937_at"      "938_at"      "939_at"      "952_at"      "953_g_at"   
[191] "954_s_at"    "955_at"      "956_at"      "957_at"      "958_s_at"   
[196] "959_at"      "960_g_at" 
  
and these are the differences between the combined table and U95A: 
>unlist(setdiff(dimnames(x95A.r2)[[1]],dimnames(dat.r2)[[1]])) 
  [1] "1142_at"   "1143_s_at" "1144_at"   "1145_g_at" "1146_at"   "1147_at"

  [7] "1148_s_at" "1149_at"   "1150_at"   "1151_at"   "1162_g_at" "1163_at"

 [13] "1164_at"   "1170_at"   "1171_s_at" "1172_at"   "1173_g_at" "1174_at"

 [19] "1175_s_at" "1176_at"   "1177_at"   "1178_at"   "1179_at"
"1180_g_at" 
 [25] "1181_at"   "119_at"    "1215_at"   "1216_at"   "124_i_at"  "125_r_at"

 [31] "1278_at"   "1279_s_at" "127_at"    "1280_i_at" "1281_f_at"
"1282_s_at" 
 [37] "1283_at"   "1284_at"   "1285_at"   "1286_s_at" "1301_s_at"
"1302_s_at" 
 [43] "132_at"    "1428_at"   "1429_at"   "1502_s_at" "1513_at"
"1514_g_at" 
 [49] "1515_at"   "1516_g_at" "1608_at"   "1609_g_at" "1623_s_at" "1624_at"

 [55] "1625_at"   "1626_at"   "1627_at"   "1628_at"   "1629_s_at"
"1630_s_at" 
 [61] "1631_at"   "1632_at"   "1661_i_at" "1662_r_at" "1663_at"   "1664_at"

 [67] "1665_s_at" "1725_s_at" "1726_at"   "1743_s_at" "1744_at"   "1745_at"

 [73] "1746_s_at" "1790_s_at" "1812_s_at" "1813_at"   "1818_at"   "1819_at"

 [79] "1820_g_at" "1821_at"   "1822_at"   "1823_g_at" "1829_at"   "1837_at"

 [85] "1838_g_at" "1839_at"   "1840_g_at" "1841_s_at" "1842_at"   "1843_at"

 [91] "1864_at"   "1876_at"   "1877_g_at" "1881_at"   "1882_g_at"
"1883_s_at" 
 [97] "1889_s_at" "1892_s_at" "1893_s_at" "1894_f_at" "1903_at"
"1905_s_at" 
[103] "1906_at"   "1921_at"   "1922_g_at" "1936_s_at" "1937_at"
"1982_s_at" 
[109] "292_s_at"  "293_at"    "294_s_at"  "295_s_at"  "296_at"    "297_g_at"

[115] "298_at"    "299_i_at"  "300_f_at"  "301_at"    "302_at"    "303_at"

[121] "304_at"    "305_g_at"  "311_s_at"  "312_s_at"  "313_at"    "323_at"

[127] "324_f_at"  "325_s_at"  "326_i_at"  "327_f_at"  "328_at"    "329_s_at"

[133] "330_s_at"  "331_at"    "332_at"    "333_s_at"  "334_s_at"  "335_r_at"

[139] "36969_at"  "383_at"    "397_at"    "426_at"    "439_at"    "693_g_at"

[145] "694_at"    "695_at"    "696_at"    "697_f_at"  "698_f_at"  "699_s_at"

[151] "700_s_at"  "701_s_at"  "702_f_at"  "703_at"    "704_at"    "705_at"

[157] "706_at"    "707_s_at"  "711_at"    "712_s_at"  "713_at"    "714_at"

[163] "723_s_at"  "724_at"    "725_i_at"  "726_f_at"  "727_at"    "728_at"

[169] "729_i_at"  "730_r_at"  "731_f_at"  "732_f_at"  "733_at"    "734_at"

[175] "735_s_at"  "787_at"    "788_s_at"  "918_at"    "919_at"    "920_at"

[181] "921_s_at"  "936_s_at"  "937_at"    "938_at"    "939_at"    "952_at"

[187] "953_g_at"  "954_s_at"  "955_at"    "956_at"    "957_at"    "958_s_at"

[193] "959_at"    "960_g_at"  "972_s_at"  "985_s_at"  "997_at"   

Can anybody tell me if there is a mistake in my code or why function
combineAffyBatch() deletes the wrong probe sets? 

Best regards 
Christian 

============================================== 
Christian Stratowa, PhD 
Boehringer Ingelheim Austria 
Dept NCE Lead Discovery - Bioinformatics 
Dr. Boehringergasse 5-11 
A-1121 Vienna, Austria 
Tel.: ++43-1-80105-2470 
Fax: ++43-1-80105-2782 
email: christian.stratowa at vie.boehringer-ingelheim.com



More information about the Bioconductor mailing list