[BioC] affymetrix in maanova, code
Morten Mattingsdal
morten.mattingsdal at student.uib.no
Tue Jan 3 09:27:52 CET 2006
Hello everyone,
Im just fowarding a mail I got from the MAANOVA maling list before
christmas. I hope Jason and Yong dont mind. Contains code for maanova
anlysis of affymetrix chips.. thought someone here may be interested..
best
morten
####################################################################
# affyprocessing.R - v1.2 7/14/2004
#
# Template for pre-processing of Affymetrix CEL data using
# R/affy (Bioconductor) and formatting for R/maanova analysis
# [ref: http://www.bioconductor.org]
#
# Jason Affourtit (jason.affourtit at jax.org)
# originally developed by Yong Woo (yhw at jax.org)
#
####################################################################
#read in R/affy (rma) library
library(affy)
#read in all CEL files in R working directory to create AffyBatch object
CELData = ReadAffy()
#process CEL data using rma (default options)
rma.CELData = rma(CELData)
#export rma data into dataframe object
rma.expr = exprs(rma.CELData)
#transform to put row name (Probe ID) into the first column
rma.expr.df = data.frame(ProbeID=row.names(rma.expr),rma.expr)
#save as a tab-delimited text file with no row names
write.table(rma.expr.df,"rma.expr.dat",sep="\t",row=F,quote=F)
#check order and names of samples to set up design matrix
pData(CELData)
#create design matrix for R/maanova analysis
design.matrix=data.frame(Array=row.names(pData(rma.CELData)),Strain=c("wt","wt","wt","mut","mut","mut"),Sample=c(1,2,3,4,5,6),Dye=c(1,1,1,1,1,1))
#export design matrix to tab-delimited text file
write.table(design.matrix,"design.dat",sep="\t",row=F,quote=F)
#save R environment file
save.image("ProjectName.Rdata")
#############################################################
# affymaanova.R - v1.3 11/01/2004
#
# Template for R/maanova analysis of data pre-processed
# using rma (affyprocessing.R)
# [ref: #http://www.jax.org/staff/churchill/labsite/]
#
# Jason Affourtit (jason.affourtit at jax.org)
# originally developed by Yong Woo (yhw at jax.org)
#
#############################################################
#load the R/maanova library
library(maanova)
#read in the rma-processed experimental data and design file
raw.data =
read.madata("rma.expr.dat",designfile="design.dat",cloneid=1,pmt=2,spot=F)
#convert the raw data from all arrays into an madata object
data = createData(raw.data,n.rep=1,log.trans=F)
#make the model based on the design
model.full.fix = makeModel(data=data,formula=~Strain)
#fit fixed model ANOVA
anova.full.fix = fitmaanova(data,model.full.fix)
#verify correct arrays are being used in analysis prior to ftest
model.full.fix$design
#fit permutation ftest and plot histogram of Fs p-values
ftest.full.fix =
matest(data,model.full.fix,term="Strain",n.perm=500,shuffle.method="resid",
pval.pool=TRUE)
hist(ftest.full.fix$Fs$Pvalperm, main ="Fs Pvalue histogram -
ProjectName", breaks=100)
#output R workspace image in a file following f-test
save.image("ProjectName.Rdata")
#determine ANOVA object column order to set up output below
anova.full.fix$Strain.level
#assign column names and calculate fold change and ratios
#FoldChange and ratios default to [tester - control] or fold change of
tester
#be sure that you have these set correctly depending upon column order
determined above
#relative to control
tester= 2 #column 2= mutant
control=1 #column 1= wild type
RelativeFoldchange=sign(anova.full.fix$Strain[,tester] -
anova.full.fix$Strain[,control])*2^(abs((anova.full.fix$Strain[,tester]-anova.full.fix$Strain[,control])))
RelativeFoldchangeName=paste("relativefoldchange",anova.full.fix$Strain.level[tester],"relative
to",anova.full.fix$Strain.level[control])
#load qvalue library and generate qvalues
#[ref:http://faculty.washington.edu/~jstorey/qvalue/]
library(qvalue)
#calculate q-value based upon permuted p-value and summarize results
ftest.full.fix.Fs.qobj=qvalue(ftest.full.fix$Fs$Pvalperm)
qsummary(ftest.full.fix.Fs.qobj)
#test if the order of p-value is scrambled - should yield TRUE for all rows
table(ftest.full.fix.Fs.qobj$pval==ftest.full.fix$Fs$Pvalperm)
#concatenate them together
result.df=data.frame(data$cloneid, RelativeFoldchange,
ftest.full.fix$Fs$Pvalperm, ftest.full.fix.Fs.qobj$qval)
#assign names to each column
names(result.df)=c("cloneid",RelativeFoldchangeName, "FsPvalue",
"Qvalue.FDR")
#create subset where qvalue <0.05
index.Fs=ftest.full.fix.Fs.qobj$qval<0.05
#write two files - all genes and top hits
write.table(result.df,"all.genes.result.dat",sep="\t",row=F, quote=F)
write.table(result.df[index.Fs,],"top.hits.result.dat",sep="\t",row=F,
quote=F)
#generate volcano plots
jpeg("volcanoPlot.jpeg")
plot(anova.full.fix$Strain[,tester]-anova.full.fix$Strain[,control],
-log10(ftest.full.fix$F1$Ptab),main="volcano plot - ProjectName
(q<0.05)", xlab=paste("log((",anova.full.fix$Strain.level[tester],") -
", anova.full.fix$Strain.level[control],")",sep=""),ylab="-log10(F1
Tabulated P-Value)",cex=.3,pch=3,col="blue")
points(anova.full.fix$Strain[index.Fs,tester]-anova.full.fix$Strain[index.Fs,control],
-log10(ftest.full.fix$F1$Ptab[index.Fs]),pch=1,col="red")
dev.off()
#output R workspace image in a file
save.image("ProjectName.Rdata")
More information about the Bioconductor
mailing list