[BioC] normalize.quantil, different results, affy_1.14.1 and affy_1.16.0

Ben Bolstad bmb at bmbolstad.com
Fri Feb 15 16:31:05 CET 2008


A priori I don't expect there to be any major differences. At one point
I made a minor modification to how it dealt with ties. I also changed
the C code that is used when you access quantile normalization code
using normalize.quantiles() which is what
normalize.AffyBatch.quantiles() is calling. This second change was made
so that the function would handle NA values.

rma() etc still call the older standard code, though this has also been
adjusted slightly for the ties behavior meaning that there may be small
differences in RMA expression values etc.

The ties behavior affects situations where the number of values tied is
even (odd numbers of ties were and remain handled in the same manner
that they always have been). In the raw CEL file values you will often
find large numbers of ties, thus this is what I suspect is driving your
result.

Best,

Ben

Short examples 

####(using R-2.7 devel etc)
> Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
> normalize.quantiles(Toy.matrix)
     [,1] [,2]
[1,] 1.00  1.0
[2,] 2.00  2.0
[3,] 3.75  3.0
[4,] 3.75  3.5
[5,] 3.75  4.0
[6,] 3.75  4.5
[7,] 7.00  7.0
[8,] 8.00  8.0
> 
> Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
> normalize.quantiles(Toy.matrix2)
     [,1] [,2]
[1,]  1.0  1.0
[2,]  2.0  2.0
[3,]  3.5  3.0
[4,]  3.5  3.5
[5,]  3.5  4.0
[6,]  6.0  6.0
[7,]  7.0  7.0
[8,]  8.0  8.0
> 


####(using R-2.5.0  etc)
> Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
> normalize.quantiles(Toy.matrix)
     [,1] [,2]
[1,]  1.0  1.0
[2,]  2.0  2.0
[3,]  3.5  3.0
[4,]  3.5  3.5
[5,]  3.5  4.0
[6,]  3.5  4.5
[7,]  7.0  7.0
[8,]  8.0  8.0
> 
> Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
> normalize.quantiles(Toy.matrix2)
     [,1] [,2]
[1,]  1.0  1.0
[2,]  2.0  2.0
[3,]  3.5  3.0
[4,]  3.5  3.5
[5,]  3.5  4.0
[6,]  6.0  6.0
[7,]  7.0  7.0
[8,]  8.0  8.0



Everything below can safely be ignored.

Code I used for testing R-2.5.X series set of code:

library(affy)
set.seed(123456789)
Data <- matrix(rnorm(10^7),ncol=10)
Data.norm <- normalize.quantiles(Data)
save(Data,Data.norm,file="old.Rda")
sessionInfo()
library(affydata)
data(Dilution)
Dilution.norm <- normalize.AffyBatch.quantiles(Dilution, type="pmonly")
Dilution.old <- Dilution
Dilution.pm.old <- pm(Dilution.old)
Dilution.pm.norm.old <- normalize.quantiles(Dilution.pm.old)
save(Dilution.norm, Dilution.old, Dilution.pm.old, Dilution.pm.norm.old,
file="Dil-old.Rda")

Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
normalize.quantiles(Toy.matrix)

Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
normalize.quantiles(Toy.matrix2)
 
eset.old <- rma(Dilution)
exprvals.old <- exprs(eset.old)
save(exprvals.old,file="ev_old.Rda")


Output from this code:
> library(affy)
Loading required package: Biobase
Loading required package: tools

Welcome to Bioconductor

  Vignettes contain introductory material. To view, type
  'openVignette()'. To cite Bioconductor, see
  'citation("Biobase")' and for packages 'citation(pkgname)'.

Loading required package: affyio
> set.seed(123456789)
> Data <- matrix(rnorm(10^7),ncol=10)
> Data.norm <- normalize.quantiles(Data)
> save(Data,Data.norm,file="old.Rda")
> sessionInfo()
R version 2.5.0 (2007-04-23) 
x86_64-unknown-linux-gnu 

locale:
LC_CTYPE=en_US.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=en_US.UTF-8;LC_MONETARY=en_US.UTF-8;LC_MESSAGES=en_US.UTF-8;LC_PAPER=en_US.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=en_US.UTF-8;LC_IDENTIFICATION=C

attached base packages:
[1] "tools"     "stats"     "graphics"  "grDevices" "utils"
"datasets" 
[7] "methods"   "base"     

other attached packages:
    affy   affyio  Biobase 
"1.14.1"  "1.4.1" "1.14.0" 
> library(affydata)
> data(Dilution)
> Dilution.norm <- normalize.AffyBatch.quantiles(Dilution,
type="pmonly")
> Dilution.old <- Dilution
> Dilution.pm.old <- pm(Dilution.old)
> Dilution.pm.norm.old <- normalize.quantiles(Dilution.pm.old)
> save(Dilution.norm, Dilution.old, Dilution.pm.old,
Dilution.pm.norm.old, file="Dil-old.Rda")
> 
> Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
> normalize.quantiles(Toy.matrix)
     [,1] [,2]
[1,]  1.0  1.0
[2,]  2.0  2.0
[3,]  3.5  3.0
[4,]  3.5  3.5
[5,]  3.5  4.0
[6,]  3.5  4.5
[7,]  7.0  7.0
[8,]  8.0  8.0
> 
> Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
> normalize.quantiles(Toy.matrix2)
     [,1] [,2]
[1,]  1.0  1.0
[2,]  2.0  2.0
[3,]  3.5  3.0
[4,]  3.5  3.5
[5,]  3.5  4.0
[6,]  6.0  6.0
[7,]  7.0  7.0
[8,]  8.0  8.0
> 
> eset.old <- rma(Dilution)
Background correcting
Normalizing
Calculating Expression
> exprvals.old <- exprs(eset.old)
> save(exprvals.old,file="ev_old.Rda")


Code I used for testing R-2.7 series set of code:

library(affy)
set.seed(123456789)
Data.new <- matrix(rnorm(10^7),ncol=10)
Data.norm.new <- normalize.quantiles(Data.new)
load("old.Rda")
sessionInfo()
all.equal(Data.new,Data)
all.equal(Data.norm.new,Data.norm)
library(affydata)
data(Dilution)

Dilution.norm.new <- normalize.AffyBatch.quantiles(Dilution,
type="pmonly")
Dilution.new <- Dilution
Dilution.pm.new <- pm(Dilution.new)
Dilution.pm.norm.new <- normalize.quantiles(Dilution.pm.new)



load("Dil-old.Rda")  
all.equal(exprs(Dilution.norm),exprs(Dilution.norm.new))
sum(exprs(Dilution.norm) != exprs(Dilution.norm.new))
all.equal(Dilution.pm.old,Dilution.pm.new)
all.equal(Dilution.pm.norm.old,Dilution.pm.norm.new)

length(Dilution.pm.new[,1])
length(unique(Dilution.pm.new[,1]))

### looking at the relative differences
plot(1/2*log2(Dilution.pm.norm.new*Dilution.pm.norm.old),abs(log2(Dilution.pm.norm.new/Dilution.pm.norm.old)))

### visually seeing just how many ties there really are
par(mfrow=c(2,2))
barplot(table(Dilution.pm.new[,1]))
barplot(table(Dilution.pm.new[,2]))
barplot(table(Dilution.pm.new[,3]))
barplot(table(Dilution.pm.new[,4]))


Toy.matrix <-	cbind(c(1,2,3,3,3,3,7,8),1:8)
normalize.quantiles(Toy.matrix)

Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
normalize.quantiles(Toy.matrix2)

eset.new <- rma(Dilution)
exprvals.new <- exprs(eset.new)
load("ev_old.Rda")
max(exprvals.old - exprvals.new)



### output
> library(affy)
Loading required package: Biobase
Loading required package: tools

Welcome to Bioconductor

  Vignettes contain introductory material. To view, type
  'openVignette()'. To cite Bioconductor, see
  'citation("Biobase")' and for packages 'citation(pkgname)'.

Loading required package: affyio
Loading required package: preprocessCore
> set.seed(123456789)
> Data.new <- matrix(rnorm(10^7),ncol=10)
> Data.norm.new <- normalize.quantiles(Data.new)
> load("old.Rda")
> sessionInfo()
R version 2.7.0 Under development (unstable) (2008-02-02 r44303) 
x86_64-unknown-linux-gnu 

locale:
LC_CTYPE=en_US.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=en_US.UTF-8;LC_MONETARY=en_US.UTF-8;LC_MESSAGES=en_US.UTF-8;LC_PAPER=en_US.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=en_US.UTF-8;LC_IDENTIFICATION=C

attached base packages:
[1] tools     stats     graphics  grDevices utils     datasets
methods  
[8] base     

other attached packages:
[1] affy_1.17.3          preprocessCore_1.1.5 affyio_1.7.11       
[4] Biobase_1.17.9      
> all.equal(Data.new,Data)
[1] TRUE
> all.equal(Data.norm.new,Data.norm)
[1] TRUE
> library(affydata)
> data(Dilution)
> 
> Dilution.norm.new <- normalize.AffyBatch.quantiles(Dilution,
type="pmonly")
> Dilution.new <- Dilution
> Dilution.pm.new <- pm(Dilution.new)
> Dilution.pm.norm.new <- normalize.quantiles(Dilution.pm.new)
> 
> 
> 
> load("Dil-old.Rda")  
> all.equal(exprs(Dilution.norm),exprs(Dilution.norm.new))
[1] "Mean relative  difference: 7.334068e-05"
> sum(exprs(Dilution.norm) != exprs(Dilution.norm.new))
[1] 36770
> all.equal(Dilution.pm.old,Dilution.pm.new)
[1] TRUE
> all.equal(Dilution.pm.norm.old,Dilution.pm.norm.new)
[1] "Mean relative  difference: 7.334068e-05"
> 
> length(Dilution.pm.new[,1])
[1] 201800
> length(unique(Dilution.pm.new[,1]))
[1] 12578
> 
> ### looking at the relative differences
>
plot(1/2*log2(Dilution.pm.norm.new*Dilution.pm.norm.old),abs(log2(Dilution.pm.norm.new/Dilution.pm.norm.old)))
> 
> ### visually seeing just how many ties there really are
> par(mfrow=c(2,2))
> barplot(table(Dilution.pm.new[,1]))
> barplot(table(Dilution.pm.new[,2]))
> barplot(table(Dilution.pm.new[,3]))
> barplot(table(Dilution.pm.new[,4]))
> 
> 
> Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
> normalize.quantiles(Toy.matrix)
     [,1] [,2]
[1,] 1.00  1.0
[2,] 2.00  2.0
[3,] 3.75  3.0
[4,] 3.75  3.5
[5,] 3.75  4.0
[6,] 3.75  4.5
[7,] 7.00  7.0
[8,] 8.00  8.0
> 
> Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
> normalize.quantiles(Toy.matrix2)
     [,1] [,2]
[1,]  1.0  1.0
[2,]  2.0  2.0
[3,]  3.5  3.0
[4,]  3.5  3.5
[5,]  3.5  4.0
[6,]  6.0  6.0
[7,]  7.0  7.0
[8,]  8.0  8.0
> 
> eset.new <- rma(Dilution)
Background correcting
Normalizing
Calculating Expression
> exprvals.new <- exprs(eset.new)
> load("ev_old.Rda")
> max(exprvals.old - exprvals.new)
[1] 0.01004166






On Fri, 2008-02-15 at 12:11 +0100, Markus Schmidberger wrote:
> Hello,
> 
> using "normalize.AffyBatch.quantiles" on R-2.5.0 with affy_1.14.1 and on 
> R-2.6.0 with affy_1.16.0 generates different results! Is this correct?
> I think there were only some changes in the package structure 
> (normalize.quantile moved to preprocessCore). So there should be no 
> differences in the results.
> 
> For testing I used the code below.
> 
> Best regards
> Markus Schmidberger
> 
> 
> #####
> library(affy)
> sessionInfo()
> 
>     affy   affyio  Biobase
> "1.14.1"  "1.4.0" "1.14.0"
> 
> pathCELFiles <- "/home/schmidb/tmp/E-TABM-185"
> celFile <- list.celfiles(path=pathCELFiles,full.names=TRUE)[1:5];
> affyBatch <- ReadAffy(filenames=celFile);
> affyBatchALT <- normalize.AffyBatch.quantiles(affyBatch, type="pmonly")
> 
> save(affyBatchALT, file="alt")
> 
> #####
> library(affy)
> sessionInfo()
> 
> [1] affy_1.16.0          preprocessCore_1.0.0 affyio_1.6.1
> [4] Biobase_1.16.1
> 
> pathCELFiles <- "/home/schmidb/tmp/E-TABM-185"
> celFile <- list.celfiles(path=pathCELFiles,full.names=TRUE)[1:5];
> affyBatch <- ReadAffy(filenames=celFile);
> affyBatchNEU <- normalize.AffyBatch.quantiles(affyBatch, type="pmonly")
> 
> save(affyBatchNEU, file="neu")
> 
> #####
> load("alt")
> load("neu")
> 
> all.equal(exprs(affyBatchALT),exprs(affyBatchNEU))
> "Mean relative  difference: 0.0001107451"
> 
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at stat.math.ethz.ch
> https://stat.ethz.ch/mailman/listinfo/bioconductor
> Search the archives: http://news.gmane.org/gmane.science.biology.informatics.conductor



More information about the Bioconductor mailing list