[BioC] loading SMD...
Mayte Suarez-Farinas
mayte at babel.rockefeller.edu
Thu Jun 3 16:36:36 CEST 2004
Hi Ian,
I have been working with SDM database and I have to say that I found a lot
of problems.
marray package have a function called read.SMD that you can use and it
skips the metainformation in the header. There is also a function
read.SMD2 that somebody (sorry, I do not remember who) posted here and
it is more flexible (it is attach to this message). One way is to
download the gal file form SMD and the use this functions.
But you should be careful when you use SDM data. It happened
to me that the information contained in the xls data do not match the gal
file because some spot were not printed, and both read.smd and read.smd2
assumes a continuity in the layout (they generates the layout using only
the dimensions of the chips, so you can not have spot (1, 26) and then
(2,1) ). So I modified the read.SMD2 by myself to math the layout information and the xls data by ID, including missing
rows in the matrix and update the weights matrix. But if you not have this
problem download the gal file and using read.smd should be enough.
best
Mayte
On Thu, 3 Jun 2004 bioconductor-request at stat.math.ethz.ch wrote:
> Hi All
> I am interested in downloading the Breast cancer data from Zhao et al from
> SMD. I have used the following two commands before to download SMD data
> before
>
> Library(limma)
>
> G = read.maimages(dir(pattern='*.xls'), source='smd', fill=TRUE,sep='\t')
>
>
> RG = read.maimages((dir(pattern='*.xls')),columns=(list(Rf=Ch1 Intensity
> (Mean),Gf=Ch2 Intensity (Mean),Rb=Ch1 Intensity (Median),Gb=Ch2 Intensity
> (Median) )),fill=TRUE)
>
> but when applied to this data I Get the error message
>
> Cannot find column heading in image output file
>
> I have tried specifying the column headings and using the skip command to
> rectify the problem but I only get the same error message
>
> Has anyone who has had a similar problem downloading data from SMD share
> their experience and suggest a suitable command.
>
>
> Thanks,
> Ian
>
> Ps Have attached a reduced version of the file type I am having difficulty
> with.
> Abstract and data for the paper can be found at
> http://genome-www5.stanford.edu/cgi-bin/publication/viewPublication.pl?pub_n
> o=322
> -------------- next part --------------
> A non-text attachment was scrubbed...
> Name: 18195.xls
> Type: application/vnd.ms-excel
> Size: 20992 bytes
> Desc: not available
> Url : https://www.stat.math.ethz.ch/pipermail/bioconductor/attachments/20040602/9200fc76/18195-0001.xls
>
> ------------------------------
-------------- next part --------------
read.SMD2 <- function(fnames = NULL, path = ".", name.Gf = "CH1I_MEAN", name.Gb = "CH1B_MEDIAN", name.Rf = "CH2I_MEAN", name.Rb = "CH2B_MEDIAN", name.W = NULL, layout = NULL, gnames = NULL,targets = NULL, notes = NULL, skip = 0, sep = "\t", quote = "", ...) {
if (is.null(fnames))
fnames <- dir(path = path, pattern = paste("*", "xls", sep = "."))
if (is.null(path))
fullfnames <- fnames
else
fullfnames <- file.path(path, fnames)
print(fullfnames)
y <- readLines(fullfnames[1], n = 100)
skip <- grep(name.Gf, y,extended=FALSE)[1] - 1
print(skip)
smdTable <- NULL
if (is.null(layout)) {
cat("Generating layout from ", fnames[1], "\n", sep="")
smdTable <- read.table(fullfnames[1], header=TRUE, sep="\t", quote = "", skip = skip, comment.char = "")
print(names(smdTable))
#names(smdTable)<-NewNamesTable
numSectors <- max(smdTable$SECTOR)
print(numSectors)
xsize <- max(smdTable$X.COORD) - min(smdTable$X.COORD)
ysize <- max(smdTable$Y.COORD) - min(smdTable$Y.COORD)
maNgr <- round(sqrt(numSectors*ysize/xsize))
maNgc <- round(sqrt(numSectors*xsize/ysize))
if (is.na(maNgr)) {
row <- grep("Exptid", y)[1]
exptid <- strsplit(y[row], "=")[[1]][2]
cat("Image:http://genome-www5.stanford.edu/cgi-bin/SMD/clickable.pl?exptid=", exptid, "\n", sep = "")
options(warn = getOption("warn")-1)
repeat {
cat("Enter number of vertical sectors (", numSectors," total sectors): ", sep = "")
maNgr <- as.integer(readLines(n = 1))
if (!is.na(maNgr) && maNgr > 0 && maNgr < numSectors &&
numSectors/maNgr == as.integer(numSectors/maNgr))
break
}
options(warn = getOption("warn")+1)
maNgc <- numSectors / maNgr
}
row <- grep("Rows per Sector", y)[1]
maNsr <- as.integer(strsplit(y[row], "=")[[1]][2])
row <- grep("Columns per Sector", y)[1]
maNsc <- as.integer(strsplit(y[row], "=")[[1]][2])
maNspots <- maNgr * maNgc * maNsr * maNsc
maSub <- rep(FALSE, maNspots)
maSub[smdTable$SPOT] <- TRUE
row <- grep("Printname", y)[1]
printname <- strsplit(y[row], "=")[[1]][2]
row <- grep("Tip Configuration", y)[1]
tipconfig <- strsplit(y[row], "=")[[1]][2]
maNotes <- paste("Print Name: ", printname, "\nTip Configuration: ", tipconfig, sep = "")
layout <- new("marrayLayout", maNgr = maNgr, maNgc = maNgc,
maNsr = maNsr, maNsc = maNsc, maNspots = maNspots,
maSub = maSub, maNotes = maNotes)
}
if (is.null(gnames)) {
cat("Generating probe sequence info from ", fnames[1], "\n", sep="")
if (is.null(smdTable))
smdTable <- read.table(fullfnames[1], header=TRUE, sep="\t",quote = "", skip = skip, comment.char = "")
maLabels <- as.character(smdTable$SUID)
cols <- 2:(match("SUID", colnames(smdTable))-1)
maInfo <- smdTable[,cols]
gnames <- new("marrayInfo", maLabels = maLabels, maInfo = maInfo)
}
if (is.null(targets)) {
cat("Generating target sample info from all files\n")
maLabels <- character(0)
maInfo <- data.frame()
for (i in 1:length(fnames)) {
z <- readLines(fullfnames[i], n = skip)
row <- grep("Exptid", z)[1]
maLabels <- c(maLabels, strsplit(z[row], "=")[[1]][2])
row <- grep("Experiment Name", z)[1]
Experiment <- strsplit(z[row], "=")[[1]][2]
row <- grep("Channel 1 Description", z)[1]
Cy3 <- strsplit(z[row], "=")[[1]][2]
row <- grep("Channel 2 Description", z)[1]
Cy5 <- strsplit(z[row], "=")[[1]][2]
row <- grep("SlideName", z)[1]
SlideName <- strsplit(z[row], "=")[[1]][2]
maInfo <- rbind(maInfo, data.frame(Experiment = Experiment, Cy3 = Cy3, Cy5 = Cy5, SlideName = SlideName))
}
rownames(maInfo) <- 1:dim(maInfo)[1]
targets <- new("marrayInfo", maLabels = maLabels, maInfo = maInfo)
}
if (is.null(notes)) {
cat("Generating notes from ", fnames[1], "\n", sep="")
row <- grep("Organism", y)[1]
organism <- strsplit(y[row], "=")[[1]][2]
row <- grep("Category", y)[1]
category <- strsplit(y[row], "=")[[1]][2]
row <- grep("Subcategory", y)[1]
subcategory <- strsplit(y[row], "=")[[1]][2]
row <- grep("Description", y)[1]
description <- strsplit(y[row], "=")[[1]][2]
row <- grep("Experimenter", y)[1]
experimenter <- strsplit(y[row], "=")[[1]][2]
row <- grep("Contact email", y)[1]
email <- strsplit(y[row], "=")[[1]][2]
row <- grep("Scanning Software", y)[1]
software <- strsplit(y[row], "=")[[1]][2]
row <- grep("Software version", y)[1]
version <- strsplit(y[row], "=")[[1]][2]
row <- grep("Scanning parameters", y)[1]
parameters <- strsplit(y[row], "=")[[1]]
if (length(parameters) > 1)
parameters <- paste(parameters[2:length(parameters)], collapse = ",")
else
parameters <- NA
notes <- paste("Organism: ", organism,
"\nCategory: ", category,
"\nSubcategory: ", subcategory,
"\nDescription: ", description,
"\nExperimenter: ", experimenter,
"\nE-Mail: ", email,
"\nScanning Software: ", software, " ", version,
"\nScanning Parameters: ", parameters, sep = "")
}
mraw <- read.marrayRaw(fnames = fnames, path = path, name.Gf = name.Gf,
name.Gb = name.Gb, name.Rf = name.Rf, name.Rb = name.Rb,
name.W = name.W, layout = layout, gnames = gnames, targets = targets,
notes = notes, skip = skip, sep = sep, quote = quote,
...)
return(mraw)
}
More information about the Bioconductor
mailing list