[BioC] bluefuse export and limma read.maimages

Wed Jul 6 16:22:11 CEST 2005

Hi all,

here are some hacks added to read.maimages() function in LIMMA package, 
in order to read BlueFuse export files.
It works fine for me.

Let me know.
Greg

#############################
# read bluefuse export files headers #
#############################
my.readBlueFuseHeader <- function(file){
    con <- file(file, "r")
    on.exit( close( con ) )
        error <- 0
        i<-0

        # First line contains BlueFuse version    
    i <- i+1
    if( length( grep( "BlueFuse", readLines(con, n = 1) ) ) == 0 ){
        cat( "Humm...I didnt find any BlueFuse expression in the fisrt 
line !\n" )
        error <- error + 1
    }

    # Second line is a blank line
    i <- i+1
    if( ( nbsp <- readLines(con, n = 1) ) != "" ){
        cat( "file format error founded in line 2 !\n" )
        error <- error + 1
    }

    if( error == 2 )
        stop( "There is almost likely a problem with your file ; I am 
stopping now !" )

        # From the Third line begin headers until a new blank line from 
which begin data
        out <- list()
        while( ( line <- readLines(con, n = 1) ) != "" ){
            i<-i+1
        txt <- strsplit( sub( "\"$", "", sub( "^\"", "", line ) ), split 
= ": ")[[1]]
          out[[i]] <- txt[2]
          names(out)[i] <- txt[1]
    }
        out$NHeaderRecords <- i + 1
        out
}

################
# bluefuse enabled #
################
my.read.maimages <- function (files, source = "spot", path = NULL, ext = 
NULL, names = NULL,
    columns = NULL, other.columns = NULL, annotation = NULL,
    wt.fun = NULL, verbose = TRUE, sep = "\t", quote = "\"",
    ...)
{
    if (missing(files)) {
        if (missing(ext))
            stop("Must specify input files")
        else {
            extregex <- paste("\\.", ext, "$", sep = "")
            files <- dir(path = ifelse(is.null(path), ".", path),
                pattern = extregex)
            files <- sub(extregex, "", files)
        }
    }
    ## modif ##
    source <- match.arg(source, c( "bluefuse", "agilent", "arrayvision", 
"genepix",
        "imagene", "quantarray", "smd.old", "smd", "spot", 
"spot.close.open"))
    if (source == "imagene")
        return(read.imagene(files = files, path = path, ext = ext,
            names = names, columns = columns, wt.fun = wt.fun,
            verbose = verbose, sep = sep, quote = quote, ...))
    slides <- as.vector(as.character(files))
    if (!is.null(ext))
        slides <- paste(slides, ext, sep = ".")
    nslides <- length(slides)
    if (is.null(names))
        names <- removeExt(files)
    if (is.null(columns))
        columns <- switch(source,
          agilent = list(Gf = "gMeanSignal", Gb = "gBGMedianSignal", Rf 
= "rMeanSignal", Rb = "rBGMedianSignal"),

        smd.old = list(Gf = "CH1I_MEAN", Gb = "CH1B_MEDIAN", Rf = 
"CH2I_MEAN", Rb = "CH2B_MEDIAN"),

        smd = list(Gf = "Ch1 Intensity (Mean)", Gb = "Ch1 Background 
(Median)", Rf = "Ch2 Intensity (Mean)",
                    Rb = "Ch2 Background (Median)"),
        spot = list(Rf = "Rmean", Gf = "Gmean", Rb = "morphR", Gb = 
"morphG"),

        spot.close.open = list(Rf = "Rmean", Gf = "Gmean", Rb = 
"morphR.close.open", Gb = "morphG.close.open"),

        genepix = list(Rf = "F635 Mean", Gf = "F532 Mean", Rb = "B635 
Median", Gb = "B532 Median"),

        quantarray = list(Rf = "ch2 Intensity", Gf = "ch1 Intensity", Rb 
= "ch2 Background", Gb = "ch1 Background"),

    # modif #   
        bluefuse = list( Rf="AMPCH2", Gf="AMPCH1", Rb="AMPCH2", 
Gb="AMPCH1" )
            )

    fullname <- slides[1]
    if (!is.null(path))
        fullname <- file.path(path, fullname)
    switch(source, quantarray = {
        firstfield <- scan(fullname, what = "", sep = "\t", flush = TRUE,
            quiet = TRUE, blank.lines.skip = FALSE, multi.line = FALSE)
        skip <- grep("Begin Data", firstfield)
        if (length(skip) == 0)
            stop("Cannot find \"Begin Data\" in image output file")
        nspots <- grep("End Data", firstfield) - skip - 2
        obj <- read.table(fullname, skip = skip, header = TRUE,
            sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
            comment.char = "", nrows = nspots, ...)
    }, arrayvision = {
        skip <- 1
        cn <- scan(fullname, what = "", sep = sep, quote = quote,
            skip = 1, nlines = 1, quiet = TRUE)
        fg <- grep(" Dens - ", cn)
        if (length(fg) != 2)
            stop(paste("Cannot find foreground columns in", fullname))
        bg <- grep("Bkgd", cn)
        if (length(fg) != 2)
            stop(paste("Cannot find background columns in", fullname))
        columns <- list(Rf = fg[1], Rb = bg[1], Gf = fg[2], Gb = bg[2])
        obj <- read.table(fullname, skip = skip, header = TRUE,
            sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
            comment.char = "", ...)
        nspots <- nrow(obj)
    }, genepix = {
        skip <- readGPRHeader(fullname)$NHeaderRecords
        obj <- read.table(fullname, skip = skip, header = TRUE,
            sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
            comment.char = "", fill = TRUE, ...)
        nspots <- nrow(obj)
    }, smd.old = {
        skip <- readSMDHeader(fullname)$NHeaderRecords
        obj <- read.table(fullname, skip = skip, header = TRUE,
            sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
            comment.char = "", fill = TRUE, ...)
        nspots <- nrow(obj)
    }, smd = {
        skip <- readSMDHeader(fullname)$NHeaderRecords
        obj <- read.table(fullname, skip = skip, header = TRUE,
            sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
            comment.char = "", fill = TRUE, ...)
        nspots <- nrow(obj)
    }, bluefuse = {
    # MODIF #
        skip <- my.readBlueFuseHeader( fullname )$NHeaderRecords
    obj <<-  read.table(fullname, skip = skip, header = TRUE,
            sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
            comment.char = "", fill = TRUE, ...)
           nspots <- nrow(obj)
    },{
        skip <- grep(protectMetachar(columns$Rf), readLines(fullname,
            n = 80)) - 1
        if (length(skip) == 0)
            stop("Cannot find column heading in image output file")
        else skip <- skip[1]
        obj <- read.table(fullname, skip = skip, header = TRUE,
            sep = sep, quote = quote, as.is = TRUE, check.names = FALSE,
            comment.char = "", fill = TRUE, ...)
        nspots <- nrow(obj)
    })
    Y <- matrix(0, nspots, nslides)
    colnames(Y) <- names
    RG <- list(R = Y, G = Y, Rb = Y, Gb = Y)
    if (!is.null(wt.fun))
        RG$weights <- Y
    RG$targets <- data.frame(FileName = I(files), row.names = names)

    if (is.null(annotation))
        annotation <- switch(source,
            agilent = c("Row", "Col",
            "Start", "Sequence", "SwissProt", "GenBank", "Primate",
            "GenPept", "ProbeUID", "ControlType", "ProbeName",
            "GeneName", "SystematicName", "Description"),

        genepix = c("Block",
            "Row", "Column", "ID", "Name"),

        smd = c("Spot", "Clone ID",
            "Gene Symbol", "Gene Name", "Cluster ID", "Accession",
            "Preferred name", "Locuslink ID", "Name", "Sequence Type",
            "X Grid Coordinate (within sector)", "Y Grid Coordinate 
(within sector)",
            "Sector", "Failed", "Plate Number", "Plate Row",
            "Plate Column", "Clone Source", "Is Verified", "Is 
Contaminated",
            "Luid"),

        smd.old = c("SPOT", "NAME", "Clone ID",
            "Gene Symbol", "Gene Name", "Cluster ID", "Accession",
            "Preferred name", "SUID"),
    # modif #
        bluefuse = c( "BLOCK", "ID", "SUBGRIDROW", "SUBGRIDCOL", "NAME", 
"FLAG" ),   

        NULL)

    if (!is.null(annotation)) {
        j <- match(annotation, colnames(obj), 0)

     objGlob <<- obj

      # modif #
      if( source == "bluefuse" )
          names(obj)[c(3,4,6,7,8,10)] <- c( "Row", "Column", "Block", 
"Name", "ID", "Flags")

      if (any(j > 0))
           RG$genes <- data.frame(obj[, j, drop = FALSE])
    }

    if (source == "agilent") {
        if (!is.null(RG$genes$Row) && !is.null(RG$genes$Col)) {
            nr <- length(unique(RG$genes$Row))
            nc <- length(unique(RG$genes$Col))
            if (nspots == nr * nc)
                RG$printer <- list(ngrid.r = 1, ngrid.c = 1,
                  nspot.r = nr, nspot.c = nc)
        }
    }

    if (!is.null(other.columns)) {
        other.columns <- as.character(other.columns)
        j <- match(other.columns, colnames(obj), 0)
        if (any(j > 0)) {
            other.columns <- colnames(obj)[j]
            RG$other <- list()
            for (j in other.columns) RG$other[[j]] <- Y
        }
        else {
            other.columns <- NULL
        }
    }

    for (i in 1:nslides) {

          if (i > 1) {
                fullname <- slides[i]
                if (!is.null(path))
                    fullname <- file.path(path, fullname)
                if (source == "genepix")
                    skip <- readGPRHeader(fullname)$NHeaderRecords

            # modif #
            if ( source == "bluefuse" ){
                skip <- my.readBlueFuseHeader(fullname)$NHeaderRecords
                    obj <- read.table(fullname, skip = skip, header = TRUE,
                        sep = sep, as.is = TRUE, quote = quote, 
check.names = FALSE,
                            comment.char = "", fill = TRUE, nrows = nspots,
                            ...)
                names(obj)[c(3,4,6,7,8,10)] <- c( "Row", "Column", 
"Block", "Name", "ID", "Flags")
            }

            }

        RG$R[, i] <- obj[, columns$Rf]
        RG$G[, i] <- obj[, columns$Gf]
        RG$Rb[, i] <- obj[, columns$Rb]
        RG$Gb[, i] <- obj[, columns$Gb]

        if (!is.null(wt.fun))
                RG$weights[, i] <- wt.fun(obj)

        if (!is.null(other.columns))
              for (j in other.columns){
                    RG$other[[j]][, i] <- obj[, j]
                }
            if (verbose)
                cat(paste("Read", fullname, "\n"))
    }
    new("RGList", RG)
}