[BioC] affymetrix in maanova, code

Tue Jan 3 09:27:52 CET 2006

Hello everyone,
Im just fowarding a mail I got from the MAANOVA maling list before 
christmas. I hope Jason and Yong dont mind. Contains code for maanova 
anlysis of affymetrix chips.. thought someone here may be interested..
best
morten

####################################################################
# affyprocessing.R - v1.2 7/14/2004                               
#                                                                 
# Template for pre-processing of Affymetrix CEL data using        
# R/affy (Bioconductor) and formatting for R/maanova analysis     
# [ref: http://www.bioconductor.org]                              
#                                                                 
# Jason Affourtit (jason.affourtit at jax.org)               
# originally developed by Yong Woo (yhw at jax.org)          
#                                                                 
####################################################################

#read in R/affy (rma) library
library(affy)

#read in all CEL files in R working directory to create AffyBatch object
CELData = ReadAffy()

#process CEL data using rma (default options)
rma.CELData = rma(CELData)

#export rma data into dataframe object
rma.expr = exprs(rma.CELData)

#transform to put row name (Probe ID) into the first column
rma.expr.df = data.frame(ProbeID=row.names(rma.expr),rma.expr)

#save as a tab-delimited text file with no row names
write.table(rma.expr.df,"rma.expr.dat",sep="\t",row=F,quote=F)

#check order and names of samples to set up design matrix
pData(CELData)

#create design matrix for R/maanova analysis
design.matrix=data.frame(Array=row.names(pData(rma.CELData)),Strain=c("wt","wt","wt","mut","mut","mut"),Sample=c(1,2,3,4,5,6),Dye=c(1,1,1,1,1,1))

#export design matrix to tab-delimited text file
write.table(design.matrix,"design.dat",sep="\t",row=F,quote=F)

#save R environment file
save.image("ProjectName.Rdata")

#############################################################
# affymaanova.R - v1.3 11/01/2004                          
#                                                          
# Template for R/maanova analysis of data pre-processed    
# using rma (affyprocessing.R)                             
# [ref: #http://www.jax.org/staff/churchill/labsite/]      
#                                                          
# Jason Affourtit (jason.affourtit at jax.org)             
# originally developed by Yong Woo (yhw at jax.org)       
#                                                          
#############################################################

#load the R/maanova library
library(maanova)

#read in the rma-processed experimental data and design file
raw.data = 
read.madata("rma.expr.dat",designfile="design.dat",cloneid=1,pmt=2,spot=F)

#convert the raw data from all arrays into an madata object
data = createData(raw.data,n.rep=1,log.trans=F)

#make the model based on the design
model.full.fix = makeModel(data=data,formula=~Strain)

#fit fixed model ANOVA
anova.full.fix = fitmaanova(data,model.full.fix)

#verify correct arrays are being used in analysis prior to ftest
model.full.fix$design

#fit permutation ftest and plot histogram of Fs p-values
ftest.full.fix = 
matest(data,model.full.fix,term="Strain",n.perm=500,shuffle.method="resid", 
pval.pool=TRUE)
hist(ftest.full.fix$Fs$Pvalperm, main ="Fs Pvalue histogram - 
ProjectName", breaks=100)

#output R workspace image in a file following f-test
save.image("ProjectName.Rdata")

#determine ANOVA object column order to set up output below
anova.full.fix$Strain.level

#assign column names and calculate fold change and ratios
#FoldChange and ratios default to [tester - control] or fold change of 
tester
#be sure that you have these set correctly depending upon column order 
determined above
#relative to control

tester= 2 #column 2= mutant
control=1 #column 1= wild type

RelativeFoldchange=sign(anova.full.fix$Strain[,tester] - 
anova.full.fix$Strain[,control])*2^(abs((anova.full.fix$Strain[,tester]-anova.full.fix$Strain[,control])))
RelativeFoldchangeName=paste("relativefoldchange",anova.full.fix$Strain.level[tester],"relative 
to",anova.full.fix$Strain.level[control])

#load qvalue library and generate qvalues
#[ref:http://faculty.washington.edu/~jstorey/qvalue/]
library(qvalue)

#calculate q-value based upon permuted p-value and summarize results
ftest.full.fix.Fs.qobj=qvalue(ftest.full.fix$Fs$Pvalperm)
qsummary(ftest.full.fix.Fs.qobj)

#test if the order of p-value is scrambled - should yield TRUE for all rows
table(ftest.full.fix.Fs.qobj$pval==ftest.full.fix$Fs$Pvalperm)

#concatenate them together
result.df=data.frame(data$cloneid, RelativeFoldchange, 
ftest.full.fix$Fs$Pvalperm, ftest.full.fix.Fs.qobj$qval)

#assign names to each column
names(result.df)=c("cloneid",RelativeFoldchangeName, "FsPvalue", 
"Qvalue.FDR")

#create subset where qvalue <0.05
index.Fs=ftest.full.fix.Fs.qobj$qval<0.05

#write two files - all genes and top hits
write.table(result.df,"all.genes.result.dat",sep="\t",row=F, quote=F)
write.table(result.df[index.Fs,],"top.hits.result.dat",sep="\t",row=F, 
quote=F)

#generate volcano plots
jpeg("volcanoPlot.jpeg")
plot(anova.full.fix$Strain[,tester]-anova.full.fix$Strain[,control], 
-log10(ftest.full.fix$F1$Ptab),main="volcano plot - ProjectName 
(q<0.05)", xlab=paste("log((",anova.full.fix$Strain.level[tester],") - 
", anova.full.fix$Strain.level[control],")",sep=""),ylab="-log10(F1 
Tabulated P-Value)",cex=.3,pch=3,col="blue")

points(anova.full.fix$Strain[index.Fs,tester]-anova.full.fix$Strain[index.Fs,control], 
-log10(ftest.full.fix$F1$Ptab[index.Fs]),pch=1,col="red")
dev.off()

#output R workspace image in a file
save.image("ProjectName.Rdata")