[BioC] DEXseq: Making exonCountSet object

Sun Feb 12 00:52:29 CET 2012

Dear Simon,

Thanks for your reply, even on a Saturday. =)

You were right about the incorrect file path- that was problem. (To anybody else out there struggling with general issues getting their files into R, my code is below. Hope I save you some time).

Another issue I was hoping you might help me with: I have no differentially-expressed exons (p-value = NA for all exons) at the end of the analysis!! I followed your other recommendations, and have spent some time trying to figure this out, but am still unsure. The fact that there are no integers in the p-value column has me worried.

Maybe the problem is that the column names in counts(ecs) do not word-for-word match up with my rownames in the samples dataframe. I didn't know how to change the colnames in counts(ecs), so I left them as they were, noting that the sample identity seemed to have been preserved.

Any advice on how to change the count(ecs) headers, or other advice about my non-existent p-values would be appreciated! =)

Many thanks,
Elena

P.S. The graphics w/ gene models in DEXseq are very useful and convenient!

______________________________

library(DEXSeq)
options(digits=3)
setwd("C:\\Rdata\\DEXseq")
library(DEXSeq)
rm(list=ls())

# this GFF file is an output from Simon's script dexseq_prepare_annotations.py
annotationfile = file.path("/Rdata/DEXseq/realFioles/DEXSeq_annotations.gff")
annotationfile
[1] "/Rdata/DEXseq/realFioles/DEXSeq_annotations.gff"

>  samples = data.frame(
+   condition = c("drug","drug","vehicle","vehicle"),
+   replicate = c(1:2,1:2),
+   row.names = c("drug1","drug2","veh1","veh2"),
+   stringsAsFactors = TRUE,
+   check.names = FALSE
+ )

>  samples
       condition replicate
drug1      drug         1
drug2      drug         2
veh1    vehicle         1
veh2    vehicle         2
>  fullFilenames<- list.files("C:/Rdata/DEXseq/realFioles/",full.names=TRUE,pattern="counts.txt")
>  fullFilenames
[1] "C:/Rdata/DEXseq/realFioles/drug1_counts.txt" "C:/Rdata/DEXseq/realFioles/drug2_counts.txt" "C:/Rdata/DEXseq/realFioles/veh1_counts.txt"
[4] "C:/Rdata/DEXseq/realFioles/veh2_counts.txt"
>  ecs<- read.HTSeqCounts(countfiles = fullFilenames,design = samples,flattenedfile = annotationfile)
>  head(counts(ecs))
            C:/Rdata/DEXseq/realFioles/drug1_counts.txt C:/Rdata/DEXseq/realFioles/drug2_counts.txt C:/Rdata/DEXseq/realFioles/veh1_counts.txt
2L52.1:001                                           0                                           2                                          2
2L52.1:002                                           4                                          12                                         13
2L52.1:003                                           7                                           8                                          7
2L52.1:004                                           6                                           4                                          4
2L52.1:005                                           9                                           6                                         16
2L52.1:006                                           6                                           4                                         13
            C:/Rdata/DEXseq/realFioles/veh2_counts.txt
2L52.1:001                                          4
2L52.1:002                                         20
2L52.1:003                                          8
2L52.1:004                                          7
2L52.1:005                                         14
2L52.1:006                                         12
>  head(fData(ecs))
       geneID exonID testable dispBeforeSharing dispFitted dispersion pvalue padjust   chr start  end strand transcripts
2L52.1:001 2L52.1   E001    FALSE                NA     0.5145     0.5145     NA      NA chrII  1867 1911      +      2L52.1
2L52.1:002 2L52.1   E002     TRUE          1.39e-01     0.0899     0.1391     NA      NA chrII  2506 2694      +      2L52.1
2L52.1:003 2L52.1   E003     TRUE          9.56e-10     0.1436     0.1436     NA      NA chrII  2738 2888      +      2L52.1
2L52.1:004 2L52.1   E004     TRUE          2.52e-10     0.2022     0.2022     NA      NA chrII  2931 3036      +      2L52.1
2L52.1:005 2L52.1   E005     TRUE          2.16e-09     0.0979     0.0979     NA      NA chrII  3406 3552      +      2L52.1
2L52.1:006 2L52.1   E006     TRUE          2.46e-09     0.1238     0.1238     NA      NA chrII  3802 3984      +      2L52.1

# Size factors
>  ecs<- estimateSizeFactors(ecs)
sizeFactors(ecs)
# Dispersion
>  ecs<- estimateDispersions(ecs)
# Fit Dispersion
>  ecs<- fitDispersionFunction(ecs)
# Plot Individual exons via mean expression
>  meanvalues<- rowMeans(counts(ecs))
>  plot(meanvalues, fData(ecs)$dispBeforeSharing, log="xy", main="mean vs CR dispersion")
>  x<- 0.01:max(meanvalues)
>  y<- ecs at dispFitCoefs[1] + ecs at dispFitCoefs[2] / x
>  lines(x, y, col="red")
# Plot looks good
# Test for Expression Difference
>  test<- testForDEU(ecs)
# Seems to work

>  ecs<- estimatelog2FoldChanges(ecs)
>  res1<- DEUresultTable(ecs)
head(res1)
            geneID exonID dispersion pvalue padjust meanBase log2fold(drug/vehicle)
2L52.1:001 2L52.1   E001     0.5145     NA      NA     2.01                 -0.844
2L52.1:002 2L52.1   E002     0.1391     NA      NA    12.25                 -0.304
2L52.1:003 2L52.1   E003     0.1436     NA      NA     7.45                  0.738
2L52.1:004 2L52.1   E004     0.2022     NA      NA     5.22                  0.595
2L52.1:005 2L52.1   E005     0.0979     NA      NA    11.18                 -0.264
2L52.1:006 2L52.1   E006     0.1238     NA      NA     8.71                 -0.586

>  table(res1$padjust<  0.1)
character(0)

# Are there any p-values?
>  table(res1$pvalue<  1)
character(0)