[R] Question on accessing foreign files

William Dunlap wdunlap at tibco.com
Tue Apr 18 18:31:54 CEST 2017


I've attached data.restore4.txt, containing the function
data.restore4(), which has the same argument list as
foreign::data.restore() and is mean to be called by the latter if the
first line of the file is "## Dump S Version 4 Dump".   It can read
version 4 of the 'S data dump' format, which for which S+ uses the
file extension ".sdd".  It stores the objects it reads in the
environment specified by the 'env' argument/.

I think it works pretty well; please report any issues to me.

Bill Dunlap
TIBCO Software
wdunlap tibco.com


On Mon, Apr 17, 2017 at 3:20 PM, Daniel Molinari <d.a.molinari at gmail.com> wrote:
> Hi all,
>
> I have several data files provided in mtw format (Minitab) and sdd format
> (S-Plus) and I need to read them in R.
>
> I do not have access either to Minitab or to S-Plus.
>
> How can I accomplish this task ?
>
> Thank you,
> Daniel
>
>         [[alternative HTML version deleted]]
>
> ______________________________________________
> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
-------------- next part --------------
data.restore4 <- function(file, print = FALSE, verbose = FALSE, env = .GlobalEnv)
{
    # Like foreign::data.restore, but for S Version 4 data.dump format
    # TODO: when creating functions within functions or expressions, make the inner
    #    ones calls to function(), not already-created functions.  Splus does
    #    not have lexical scoping so this should not affect behavior, but makes
    #    the new  function more R-like.
    #    Dumping the function to a file and sourcing it back in would have the same
    #    effect.
    # TODO: deal with stored Splus objects that have an implicit class
    #    but no "class" attribute.  Except for "matrix" and "array" I don't
    #    think Splus creates such objects, but they exist in the 'data' package
    #    and they depend on getOldClass() to map the class field in the data.dump
    #    to a class vector in the object.  E.g., get("wafer", where="data") has
    #    class 'design' which should become attribute class=c("design", "data.frame").
    #    Some 'ordered' objects are analogous - no class attribute and class field is 'ordered' so you
    #    have to know that 'ordered' means class=c("ordered","factor").
    #    "factor" and "ordered" may also be stored without a named ".Label" attribute.
    #    (I have dealt with the factor the matrix/array objects are commonly stored without
    #    named attributes - I assume that the structure has length 3 and the attributes
    #    are ".Dims" and ".Dimnames".)
    origFile <- file
    if (!inherits(file, "connection")) {
        file <- file(file, "r")
        on.exit(close(file))
    }
    lineNo <- 0
    nextLine <- function(n = 1) {
        lineNo <<- lineNo + n
        readLines(file, n = n)
    }
    Verbosely <- function(...) {
        if (verbose) {
            message(simpleMessage(paste("(object ", objName, ", line ", lineNo, ") ", paste(..., collapse = " ", sep = ""), sep = ""), sys.call(-1)))
        }
    }
    Stop <- function(...) {
        stop(simpleError(paste(paste(..., collapse = " ", sep = ""), sep = "",
            " (object ", objName, ", file ", deparse(summary(file)$description), ", line ", lineNo, ")"), sys.call(-1)))
    }
    Recurse <- function(length) {
        # Never call 'blah <- .data.restore4()' directly as it may return a missing
        # argument object, which will break '<-' but not lapply.
        lapply(seq_len(length), function(i) { .data.restore4() })
    }
    constructMissingArgument <- function() formals(function(x)NULL)$x
    txt <- nextLine()
    objName <- "<none yet>"
    if (length(txt) != 1) {
        Stop("File is empty")
    }
    if (txt != "## Dump S Version 4 Dump ##") {
        Stop("File does not start with '## Dump S Version 4 Dump', so this is not a SV4 data.dump file")
    }
    .data.restore4 <- function()
    {
        class <- nextLine()
        mode <- nextLine()
        length <- as.numeric(tmp <- nextLine())
        if (is.na(length) || length%%1 != 0 || length < 0) {
            Stop("Expected nonnegative integer 'length' at line ", lineNo, " but got ", deparse(tmp))
        }
        if (mode == "character") {
            ret <- nextLine(length)
            # convert \\n to newline, \\t to tab, etc. by using parse()
            vapply(ret, function(string)parse(text=paste0("\"", string, "\""))[[1]], FUN.VALUE="", USE.NAMES=FALSE)
        } else if (mode == "logical") {
            txt <- nextLine(length)
            lglVector <- rep(NA, length)
            lglVector[txt != "N"] <- as.logical(as.integer(txt[txt != "N"]))
            lglVector
        } else if (mode %in% c("integer", "single", "numeric")) {
            txt <- nextLine(length)
            txt[txt == "M"] <- "NaN"
            txt[txt == "I"] <- "Inf"
            txt[txt == "J"] <- "-Inf"
            if (mode == "single") {
                mode <- "numeric"
            }
            atomicVector <- rep(as(NA, mode), length)
            atomicVector[txt != "N"] <- as(txt[txt != "N"], mode)
            atomicVector
        } else if (mode == "complex") {
            txt <- nextLine(length)
            txt <- gsub("M", "NaN", txt)
            txt <- gsub("\\<I\\>", "Inf", txt)
            txt <- gsub("\\<J\\>", "-Inf", txt)
            atomicVector <- rep(as(NA, mode), length)
            atomicVector[txt != "N"] <- as(txt[txt != "N"], mode)
            atomicVector
        } else if (mode == "list") {
            vectors <- Recurse(length)
            vectors
        } else if (mode == "NULL") {
            NULL
        } else if (mode == "structure") {
            vectors <- Recurse(length)
            if (class == ".named_I" || class == "named") {
                if (length != 2) {
                    Stop("expected length of '.named_I' component is 2, but got ", length)
                } else if (!is.character(vectors[[2]])) {
                    Stop("expected second component of '.named_I' to be character, but got ", deparse(mode(vectors[[2]])))
                }
                vector <- vectors[[1]]
                names <- vectors[[2]]
                if (is.call(vector) && identical(vector[[1]], as.name("for"))) {
                    if (length(names) != 3 || !all(names[2:3]  == "")) {
                        Stop("expected only first entry of 'names' for 'for' to be non-blank, but got ", deparse(names))
                    }
                    vector[[2]] <- as.name(names[1])
                    vector
                } else if (is.call(vector) && identical(vector[[1]], as.name(".Call"))) {
                    if (length(vector) - 1 != length(names)) {
                        Stop("expected lengths of names and .Call to be the same, but got ", length(vector) - 1, " and ", length(names))
                    }
                    vector[[2]] <- names[1]
                    names[1] <- ""
                    if (any(names != "")) {
                        names(vector) <- c("", names)
                    }
                    vector
                } else if (is.call(vector) && identical(vector[[1]], as.name(".Internal"))) {
                    if (length(vector) - 1 != length(names)) {
                        Stop("expected lengths of names and .Internal to be the same, but got ", length(vector) - 1, " and ", length(names))
                    }
                    Verbosely("Splus call to '.Internal' will not work in R (or TERR)\n")
                    vector[[3]] <- names[2]
                    vector
                } else if (is.call(vector) && identical(vector[[1]], as.name("function"))) {
                    if (length(vector) - 1 != length(names)) {
                        Stop("expected lengths of argument names and function to be the same, but got ", length(vector) - 1, " and ", length(names))
                    }
                    func <- function()NULL
                    formals(func) <- as.pairlist( structure(as.list(vector)[-c(1,length(vector))], names=names[-length(names)]) )
                    body(func) <- vector[[length(vector)]]
                    environment(func) <- env
                    func
                } else if (is.call(vector) && identical(vector[[1]], as.name("return"))) {
                    # In Splus, names are added to return(x,y) when return has more than one argument
                    Verbosely("Multi-argument returns will fail in R (or TERR): changing return(...) to return(list(...))\n")
                    if (length(vector)-1  != length(names)) {
                        Stop("expected number of returned items length of their name to be the same, but got ", length(vector)-1, " and ", length(names))
                    }
                    if (any(names != "")) {
                        names(vector) <- c("", names)
                    }
                    vector[[1]] <- as.name("list")
                    call("return", vector)
                } else {
                    # finally, attributes
                    if (length(vector) != length(names)) {
                        Stop("expected lengths of '.named_I' components to be the same, but got ", length(vector), " and ", length(names))
                    }
                    names(vector) <- names
                    if (identical(names[1], ".Data")) { # a hack - really want to know if vector had mode "structure" or not
                        if (".Tsp" %in% names) {
                            # ancient Splus objects have dates in .Tsp rounded to 6 significant digits
                            i <- which(".Tsp" == names)
                            if (length(i) != 1) {
                                Stop("Multiple '.Tsp' attributes on object")
                            }
                            tsp <- vector[[i]]
                            if (length(tsp) != 3 || !is.numeric(tsp)) {
                                Stop("'.Tsp' attribute should contain 3 numbers, but got ", deparse(tsp))
                            }
                            n <- round( (tsp[2] - tsp[1]) * tsp[3] + 1)
                            vector[[i]] <- c(tsp[1], tsp[1] + (n-1) / tsp[3], tsp[3])
                            if ( abs(tsp[2] - vector[[i]][2])/abs(tsp[2]) > 1e-8 ) {
                                Verbosely("Fixed up rounded '.Tsp' from ", deparse(tsp), " to ", deparse(vector[[i]]))
                            }
                        }
                        do.call(structure, vector, quote = TRUE)
                    } else {
                        vector
                    }
                }
            } else if (class %in% c("matrix", "array")) {
                if (length != 3) {
                    Stop("Expected 'matrix' or 'array' structures to have length 3, but got ", length)
                }
                array(vectors[[1]], dim=vectors[[2]], dimnames=vectors[[3]])
            } else {
                vectors # TODO: this is ok within a .Named_I/structure object, but otherwise means we omitted a known class (like 'factor' or 'ordered')
            }
        } else if (mode == "name") {
            if (length != 1) {
                Stop("expected length of 'name' objects is 1, but got", length)
            }
            name <- as.name(nextLine())
            # NULL is the NULL object itself in R, but a name bound to it in Splus
            if (identical(name, as.name("NULL"))) {
                NULL
            } else {
                name
            }
        } else if (mode == "call") {
            callList <- Recurse(length)
            as.call(callList)
        } else if (mode == "expression") {
            exprList <- Recurse(length)
            as.expression(exprList)
        } else if (mode %in% c("<-", "=", "<<-", "if", "{", "while", "repeat", "break", "next", "return")) {
            if (mode == "<<-") {
                Verbosely("The '<<-' operator acts differently in R (or TERR) and Splus")
            }
            as.call(c(list(as.name(mode)), Recurse(length)))
        } else if (mode == "for") {
            # Splus: list(loopVar = NULL, quote(sequenceCall), quote(bodyCall))
            # R: list(as.name("for"), as.name("loopVar"), quote(sequenceCall), quote(bodyCall))
            # In Splus, the loopVar is a name for the list, which gets added later by .named_I
            as.call(c(list(as.name(mode)), Recurse(length)))
        } else if (mode == "function") {
            # As with "for", this will be further processed by .named_I (if it has any arguments)
            if (length > 1) {
                as.call(c(list(as.name(mode)), Recurse(length)))
            } else {
                func <- function()NULL
                # body(func) <- .data.restore4()
                body(func) <- Recurse(length)[[1]]
                environment(func) <- env
                func
            }
        } else if (mode == ".Call") {
            # again, must finish processing via .named_I (the C function name will be in names(call))
            as.call(c(list(as.name(mode)), Recurse(length)))
        } else if (mode == "internal") {
            # again, must finish processing via .named_I (the C function name will be in names(call))
            as.call(c(list(as.name(".Internal")), Recurse(length)))
        } else if (mode == "missing") {
            constructMissingArgument()
        } else if (mode == "call with ...") {
            if (length != 1) {
                Stop("Expected length of 'call with ...' item to be 1, but it was ", length)
            }
            # call <- .data.restore4()
            call <- Recurse(length)[[1]]
            if (!is.call(call)) {
                Stop("Expected child to 'call with ...' to be a call, but it is a ", mode(call), "\n")
            }
            call
        } else if (mode == "comment expression") {
            if (length != 2) {
                Stop("Expected length of 'comment expression' is 2, but it was ", length)
            }
            commExprList <- Recurse(length)
            if (!is.character(commExprList[[1]])) {
                Stop("Expected first component of 'comment expression' to be character, but it was ", mode(commExprList[[1]]))
            }
            commExprList[[2]]
        } else if (mode == "(") {
            callExpr <- Recurse(length)
            as.call(callExpr)
        } else {
            # What else did I miss?
            Stop("Unimplemented mode: ", deparse(mode))
        }
    }
    while (length(objName <- nextLine()) == 1) {
        if (print) {
            cat(deparse(objName), ":\n", sep="")
        }
        Verbosely("Starting to read\n")
        obj <- .data.restore4()
        Verbosely("  class=", deparse(class(obj)), ", size=", object.size(obj), "\n")
        assign(objName, obj, envir=env)
        if (print) {
            cat("    ", class(obj), "\n", sep="")
        }
    }
    origFile
}


More information about the R-help mailing list