[BioC] bluefuse export and limma read.maimages
Gregory Lefebvre
gl2 at sanger.ac.uk
Wed Jul 6 16:22:11 CEST 2005
Hi all,
here are some hacks added to read.maimages() function in LIMMA package,
in order to read BlueFuse export files.
It works fine for me.
Let me know.
Greg
#############################
# read bluefuse export files headers #
#############################
my.readBlueFuseHeader <- function(file){
con <- file(file, "r")
on.exit( close( con ) )
error <- 0
i<-0
# First line contains BlueFuse version
i <- i+1
if( length( grep( "BlueFuse", readLines(con, n = 1) ) ) == 0 ){
cat( "Humm...I didnt find any BlueFuse expression in the fisrt
line !\n" )
error <- error + 1
}
# Second line is a blank line
i <- i+1
if( ( nbsp <- readLines(con, n = 1) ) != "" ){
cat( "file format error founded in line 2 !\n" )
error <- error + 1
}
if( error == 2 )
stop( "There is almost likely a problem with your file ; I am
stopping now !" )
# From the Third line begin headers until a new blank line from
which begin data
out <- list()
while( ( line <- readLines(con, n = 1) ) != "" ){
i<-i+1
txt <- strsplit( sub( "\"$", "", sub( "^\"", "", line ) ), split
= ": ")[[1]]
out[[i]] <- txt[2]
names(out)[i] <- txt[1]
}
out$NHeaderRecords <- i + 1
out
}
################
# bluefuse enabled #
################
my.read.maimages <- function (files, source = "spot", path = NULL, ext =
NULL, names = NULL,
columns = NULL, other.columns = NULL, annotation = NULL,
wt.fun = NULL, verbose = TRUE, sep = "\t", quote = "\"",
...)
{
if (missing(files)) {
if (missing(ext))
stop("Must specify input files")
else {
extregex <- paste("\\.", ext, "$", sep = "")
files <- dir(path = ifelse(is.null(path), ".", path),
pattern = extregex)
files <- sub(extregex, "", files)
}
}
## modif ##
source <- match.arg(source, c( "bluefuse", "agilent", "arrayvision",
"genepix",
"imagene", "quantarray", "smd.old", "smd", "spot",
"spot.close.open"))
if (source == "imagene")
return(read.imagene(files = files, path = path, ext = ext,
names = names, columns = columns, wt.fun = wt.fun,
verbose = verbose, sep = sep, quote = quote, ...))
slides <- as.vector(as.character(files))
if (!is.null(ext))
slides <- paste(slides, ext, sep = ".")
nslides <- length(slides)
if (is.null(names))
names <- removeExt(files)
if (is.null(columns))
columns <- switch(source,
agilent = list(Gf = "gMeanSignal", Gb = "gBGMedianSignal", Rf
= "rMeanSignal", Rb = "rBGMedianSignal"),
smd.old = list(Gf = "CH1I_MEAN", Gb = "CH1B_MEDIAN", Rf =
"CH2I_MEAN", Rb = "CH2B_MEDIAN"),
smd = list(Gf = "Ch1 Intensity (Mean)", Gb = "Ch1 Background
(Median)", Rf = "Ch2 Intensity (Mean)",
Rb = "Ch2 Background (Median)"),
spot = list(Rf = "Rmean", Gf = "Gmean", Rb = "morphR", Gb =
"morphG"),
spot.close.open = list(Rf = "Rmean", Gf = "Gmean", Rb =
"morphR.close.open", Gb = "morphG.close.open"),
genepix = list(Rf = "F635 Mean", Gf = "F532 Mean", Rb = "B635
Median", Gb = "B532 Median"),
quantarray = list(Rf = "ch2 Intensity", Gf = "ch1 Intensity", Rb
= "ch2 Background", Gb = "ch1 Background"),
# modif #
bluefuse = list( Rf="AMPCH2", Gf="AMPCH1", Rb="AMPCH2",
Gb="AMPCH1" )
)
fullname <- slides[1]
if (!is.null(path))
fullname <- file.path(path, fullname)
switch(source, quantarray = {
firstfield <- scan(fullname, what = "", sep = "\t", flush = TRUE,
quiet = TRUE, blank.lines.skip = FALSE, multi.line = FALSE)
skip <- grep("Begin Data", firstfield)
if (length(skip) == 0)
stop("Cannot find \"Begin Data\" in image output file")
nspots <- grep("End Data", firstfield) - skip - 2
obj <- read.table(fullname, skip = skip, header = TRUE,
sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
comment.char = "", nrows = nspots, ...)
}, arrayvision = {
skip <- 1
cn <- scan(fullname, what = "", sep = sep, quote = quote,
skip = 1, nlines = 1, quiet = TRUE)
fg <- grep(" Dens - ", cn)
if (length(fg) != 2)
stop(paste("Cannot find foreground columns in", fullname))
bg <- grep("Bkgd", cn)
if (length(fg) != 2)
stop(paste("Cannot find background columns in", fullname))
columns <- list(Rf = fg[1], Rb = bg[1], Gf = fg[2], Gb = bg[2])
obj <- read.table(fullname, skip = skip, header = TRUE,
sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
comment.char = "", ...)
nspots <- nrow(obj)
}, genepix = {
skip <- readGPRHeader(fullname)$NHeaderRecords
obj <- read.table(fullname, skip = skip, header = TRUE,
sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
comment.char = "", fill = TRUE, ...)
nspots <- nrow(obj)
}, smd.old = {
skip <- readSMDHeader(fullname)$NHeaderRecords
obj <- read.table(fullname, skip = skip, header = TRUE,
sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
comment.char = "", fill = TRUE, ...)
nspots <- nrow(obj)
}, smd = {
skip <- readSMDHeader(fullname)$NHeaderRecords
obj <- read.table(fullname, skip = skip, header = TRUE,
sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
comment.char = "", fill = TRUE, ...)
nspots <- nrow(obj)
}, bluefuse = {
# MODIF #
skip <- my.readBlueFuseHeader( fullname )$NHeaderRecords
obj <<- read.table(fullname, skip = skip, header = TRUE,
sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
comment.char = "", fill = TRUE, ...)
nspots <- nrow(obj)
},{
skip <- grep(protectMetachar(columns$Rf), readLines(fullname,
n = 80)) - 1
if (length(skip) == 0)
stop("Cannot find column heading in image output file")
else skip <- skip[1]
obj <- read.table(fullname, skip = skip, header = TRUE,
sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
comment.char = "", fill = TRUE, ...)
nspots <- nrow(obj)
})
Y <- matrix(0, nspots, nslides)
colnames(Y) <- names
RG <- list(R = Y, G = Y, Rb = Y, Gb = Y)
if (!is.null(wt.fun))
RG$weights <- Y
RG$targets <- data.frame(FileName = I(files), row.names = names)
if (is.null(annotation))
annotation <- switch(source,
agilent = c("Row", "Col",
"Start", "Sequence", "SwissProt", "GenBank", "Primate",
"GenPept", "ProbeUID", "ControlType", "ProbeName",
"GeneName", "SystematicName", "Description"),
genepix = c("Block",
"Row", "Column", "ID", "Name"),
smd = c("Spot", "Clone ID",
"Gene Symbol", "Gene Name", "Cluster ID", "Accession",
"Preferred name", "Locuslink ID", "Name", "Sequence Type",
"X Grid Coordinate (within sector)", "Y Grid Coordinate
(within sector)",
"Sector", "Failed", "Plate Number", "Plate Row",
"Plate Column", "Clone Source", "Is Verified", "Is
Contaminated",
"Luid"),
smd.old = c("SPOT", "NAME", "Clone ID",
"Gene Symbol", "Gene Name", "Cluster ID", "Accession",
"Preferred name", "SUID"),
# modif #
bluefuse = c( "BLOCK", "ID", "SUBGRIDROW", "SUBGRIDCOL", "NAME",
"FLAG" ),
NULL)
if (!is.null(annotation)) {
j <- match(annotation, colnames(obj), 0)
objGlob <<- obj
# modif #
if( source == "bluefuse" )
names(obj)[c(3,4,6,7,8,10)] <- c( "Row", "Column", "Block",
"Name", "ID", "Flags")
if (any(j > 0))
RG$genes <- data.frame(obj[, j, drop = FALSE])
}
if (source == "agilent") {
if (!is.null(RG$genes$Row) && !is.null(RG$genes$Col)) {
nr <- length(unique(RG$genes$Row))
nc <- length(unique(RG$genes$Col))
if (nspots == nr * nc)
RG$printer <- list(ngrid.r = 1, ngrid.c = 1,
nspot.r = nr, nspot.c = nc)
}
}
if (!is.null(other.columns)) {
other.columns <- as.character(other.columns)
j <- match(other.columns, colnames(obj), 0)
if (any(j > 0)) {
other.columns <- colnames(obj)[j]
RG$other <- list()
for (j in other.columns) RG$other[[j]] <- Y
}
else {
other.columns <- NULL
}
}
for (i in 1:nslides) {
if (i > 1) {
fullname <- slides[i]
if (!is.null(path))
fullname <- file.path(path, fullname)
if (source == "genepix")
skip <- readGPRHeader(fullname)$NHeaderRecords
# modif #
if ( source == "bluefuse" ){
skip <- my.readBlueFuseHeader(fullname)$NHeaderRecords
obj <- read.table(fullname, skip = skip, header = TRUE,
sep = sep, as.is = TRUE, quote = quote,
check.names = FALSE,
comment.char = "", fill = TRUE, nrows = nspots,
...)
names(obj)[c(3,4,6,7,8,10)] <- c( "Row", "Column",
"Block", "Name", "ID", "Flags")
}
}
RG$R[, i] <- obj[, columns$Rf]
RG$G[, i] <- obj[, columns$Gf]
RG$Rb[, i] <- obj[, columns$Rb]
RG$Gb[, i] <- obj[, columns$Gb]
if (!is.null(wt.fun))
RG$weights[, i] <- wt.fun(obj)
if (!is.null(other.columns))
for (j in other.columns){
RG$other[[j]][, i] <- obj[, j]
}
if (verbose)
cat(paste("Read", fullname, "\n"))
}
new("RGList", RG)
}
More information about the Bioconductor
mailing list