[BioC] Reading SMD datasets

Jean Yee Hwa Yang jean at biostat.ucsf.edu
Thu Apr 15 17:43:14 CEST 2004


Hi Stanley,

Colin Smith had written this update function for reading SMD files a
little while ago.  I am working towards putting them into marray at the
moment, but you might like to try it first.

Simply 
source("read.SMD2.R") 
into you R dictory and try your command again.

test <- read.SMD2("5912.xls")

If there is still problems.  Please e-mail the file off-line and I will
help you with it.

Cheers

Jean

> 
> I've been trying to learn R and Bioconductor and to read SMD datasets, 
> but I'm having trouble getting it to work. I can't even get it to work 
> with a small dataset (5912.xls)
> 
> I'm hoping that someone can help me get past the following error:
> 
>  > read.SMD(fnames = NULL, path = "e:/", name.Gf = "Ch 1 Intensity (Mean)",
> + name.Gb = "Ch 1 Background (Mean)", name.Rf = "Ch 2 Intensity (Mean)",
> + name.Rb = "Ch 2 Background (Median)", name.W = NULL, layout = NULL,
> + gnames = NULL, targets = NULL,notes = NULL, skip = 21, sep = " ", 
> quote = "",nmax = 24192)
> [1] "Reading e://5912.xls"
> Error in readLines(con, n, ok) : invalid value for `n'
>  >
> 
> I just don't know where to go next to get the dataset read in.  As you 
> can see, I tried to fix the number of records and should probably fix 
> the line format ('what' parameter to scan), but I suspect it should be 
> simpler than I'm making it out to be.
> 
> Thanks, in advance,
> Stan
> 
> -- 
> Stanley M Dunn, Ph.D.
> Paul S and Mary W Monroe Faculty Scholar
> Associate Dean for Graduate Education and Research
> School of Engineering
> Rutgers University
> 98 Brett Road
> Piscataway, NJ 08854
> 
> (732) 445 - 4462
> (732) 445 - 3224 (Center for Packaging)
> (732) 445 - 7067 (Fax)
> smd at occlusal.rutgers.edu
> 
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at stat.math.ethz.ch
> https://www.stat.math.ethz.ch/mailman/listinfo/bioconductor
> 
-------------- next part --------------
read.SMD2 <- function(fnames = NULL, path = ".", name.Gf = "CH1I_MEAN", 

    name.Gb = "CH1B_MEDIAN", name.Rf = "CH2I_MEAN", name.Rb = "CH2B_MEDIAN", 

    name.W = NULL, layout = NULL, gnames = NULL, targets = NULL, notes = NULL, 

    skip = 0, sep = "\t", quote = "", ...) {



    if (is.null(fnames)) 

        fnames <- dir(path = path, pattern = paste("*", "xls", sep = "."))

    if (is.null(path)) 

        fullfnames <- fnames

    else

        fullfnames <- file.path(path, fnames)

    y <- readLines(fullfnames[1], n = 100)

    skip <- grep(name.Gf, y)[1] - 1

    smdTable <- NULL

    

    if (is.null(layout)) {

    

        cat("Generating layout from ", fnames[1], "\n", sep="")

        smdTable <- read.table(fullfnames[1], header=TRUE, sep="\t", 

                               quote = "", skip = skip, comment.char = "")

    

        numSectors <- max(smdTable$SECTOR)

        xsize <- max(smdTable$X.COORD) - min(smdTable$X.COORD)

        ysize <- max(smdTable$Y.COORD) - min(smdTable$Y.COORD)

        

        maNgr <- round(sqrt(numSectors*ysize/xsize))

        maNgc <- round(sqrt(numSectors*xsize/ysize))

        if (is.na(maNgr)) {

            row <- grep("Exptid", y)[1]

            exptid <- strsplit(y[row], "=")[[1]][2]

            cat("Image: http://genome-www5.stanford.edu/cgi-bin/SMD/clickable.pl?exptid=",

                exptid, "\n", sep = "")

            options(warn = getOption("warn")-1)

            repeat {

                cat("Enter number of vertical sectors (", numSectors, 

                    " total sectors): ", sep = "")

                maNgr <- as.integer(readLines(n = 1))

                if (!is.na(maNgr) && maNgr > 0 && maNgr < numSectors &&

                    numSectors/maNgr == as.integer(numSectors/maNgr))

                    break

            }

            options(warn = getOption("warn")+1)

            maNgc <- numSectors / maNgr

        }

        

        row <- grep("Rows per Sector", y)[1]

        maNsr <- as.integer(strsplit(y[row], "=")[[1]][2])

        row <- grep("Columns per Sector", y)[1]

        maNsc <- as.integer(strsplit(y[row], "=")[[1]][2])

        

        maNspots <- maNgr * maNgc * maNsr * maNsc

        

        maSub <- rep(FALSE, maNspots)

        maSub[smdTable$SPOT] <- TRUE

        

        row <- grep("Printname", y)[1]

        printname <- strsplit(y[row], "=")[[1]][2]

        row <- grep("Tip Configuration", y)[1]

        tipconfig <- strsplit(y[row], "=")[[1]][2]

        maNotes <- paste("Print Name: ", printname, 

                         "\nTip Configuration: ", tipconfig, sep = "")

        

        layout <- new("marrayLayout", maNgr = maNgr, maNgc = maNgc, 

                      maNsr = maNsr, maNsc = maNsc, maNspots = maNspots, 

                      maSub = maSub, maNotes = maNotes)

    }

    

    if (is.null(gnames)) {

    

        cat("Generating probe sequence info from ", fnames[1], "\n", sep="")

        if (is.null(smdTable))

            smdTable <- read.table(fullfnames[1], header=TRUE, sep="\t", 

                                   quote = "", skip = skip, comment.char = "")

        maLabels <- as.character(smdTable$SUID)

        cols <- 2:(match("SUID", colnames(smdTable))-1)

        maInfo <- smdTable[,cols]

        

        gnames <- new("marrayInfo", maLabels = maLabels, maInfo = maInfo)

    }

    

    if (is.null(targets)) {

    

        cat("Generating target sample info from all files\n")

        maLabels <- character(0)

        maInfo <- data.frame()

        for (i in 1:length(fnames)) {

             z <- readLines(fullfnames[i], n = skip)

             row <- grep("Exptid", z)[1]

             maLabels <- c(maLabels, strsplit(z[row], "=")[[1]][2])

             row <- grep("Experiment Name", z)[1]

             Experiment <- strsplit(z[row], "=")[[1]][2]

             row <- grep("Channel 1 Description", z)[1]

             Cy3 <- strsplit(z[row], "=")[[1]][2]

             row <- grep("Channel 2 Description", z)[1]

             Cy5 <- strsplit(z[row], "=")[[1]][2]

             row <- grep("SlideName", z)[1]

             SlideName <- strsplit(z[row], "=")[[1]][2]

             maInfo <- rbind(maInfo, data.frame(Experiment = Experiment,

                                                Cy3 = Cy3, Cy5 = Cy5, 

                                                SlideName = SlideName))

        }

        rownames(maInfo) <- 1:dim(maInfo)[1]

        

        targets <- new("marrayInfo", maLabels = maLabels, maInfo = maInfo)

    }

    

    if (is.null(notes)) {

        

        cat("Generating notes from ", fnames[1], "\n", sep="")

        row <- grep("Organism", y)[1]

        organism <- strsplit(y[row], "=")[[1]][2]

        row <- grep("Category", y)[1]

        category <- strsplit(y[row], "=")[[1]][2]

        row <- grep("Subcategory", y)[1]

        subcategory <- strsplit(y[row], "=")[[1]][2]

        row <- grep("Description", y)[1]

        description <- strsplit(y[row], "=")[[1]][2]

        row <- grep("Experimenter", y)[1]

        experimenter <- strsplit(y[row], "=")[[1]][2]

        row <- grep("Contact email", y)[1]

        email <- strsplit(y[row], "=")[[1]][2]

        row <- grep("Scanning Software", y)[1]

        software <- strsplit(y[row], "=")[[1]][2]

        row <- grep("Software version", y)[1]

        version <- strsplit(y[row], "=")[[1]][2]

        row <- grep("Scanning parameters", y)[1]

        parameters <- strsplit(y[row], "=")[[1]]

        if (length(parameters) > 1)

            parameters <- paste(parameters[2:length(parameters)], collapse = ", ")

        else

            parameters <- NA

        

        notes <- paste("Organism: ", organism, 

                       "\nCategory: ", category, 

                       "\nSubcategory: ", subcategory, 

                       "\nDescription: ", description, 

                       "\nExperimenter: ", experimenter, 

                       "\nE-Mail: ", email,

                       "\nScanning Software: ", software, " ", version,

                       "\nScanning Parameters: ", parameters, sep = "")

    }

    

    mraw <- read.marrayRaw(fnames = fnames, path = path, name.Gf = name.Gf, 

        name.Gb = name.Gb, name.Rf = name.Rf, name.Rb = name.Rb, 

        name.W = name.W, layout = layout, gnames = gnames, targets = targets, 

        notes = notes, skip = skip, sep = sep, quote = quote, 

        ...)

    

    return(mraw)

}



More information about the Bioconductor mailing list