[Rd] RFC: "loop connections"

dhinds@sonic.net dhinds at sonic.net
Thu Sep 1 00:34:08 CEST 2005


Martin Maechler <maechler at stat.math.ethz.ch> wrote:

> I think the main point of David's proposal is still worth
> consideration:  One way to see text connections is as a way to
> treat some kind of R objects as "generalized files" i.e., connections.

To summarize the motivation for the proposal, again:

- There are two modes of connections: text and binary.  The operations
  supported on text and binary connections are mostly disjoint.  Most
  connection classes (socket, file, etc) support both modes.

- textConnection() binds a character vector to a text connection.
  There is no equivalent for a binary connection.  there are
  workarounds (i.e. anonymous connections, equivalent to temporary
  files), but these have substantial performance penalties.

- Both connection modes have useful applications.  textConnection() is
  useful, or it would not exist.  Orthogonality is good, special cases
  are bad.

- Only about 50 lines of code are required to implement a binary form
  of textConnection() in the R core.  Implementing this functionality
  in a separate package requires substantially more code.

- I need it, and in at least one case, another R package developer has
  implemented it using temporary files (caTools).  I also just noticed
  that Duncon Murdoch recently proposed the EXACT SAME feature on
  r-help:

  https://stat.ethz.ch/pipermail/r-help/2005-April/067651.html

I think that just about sums it up.  I've attached a smaller patch
that makes fewer changes to R source, doesn't change any existing
function names, etc.  The feature adds 400 bytes to the size of R.dll.

-- Dave



--- src/main/connections.c.orig	2005-06-17 19:05:02.000000000 -0700
+++ src/main/connections.c	2005-08-31 15:26:19.947195100 -0700
@@ -1644,7 +1644,7 @@
     return ans;
 }
 
-/* ------------------- text connections --------------------- */
+/* ------------------- text and raw connections --------------------- */
 
 /* read a R character vector into a buffer */
 static void text_init(Rconnection con, SEXP text)
@@ -1668,6 +1668,22 @@
     this->cur = this->save = 0;
 }
 
+/* read a R raw vector into a buffer */
+static void raw_init(Rconnection con, SEXP raw)
+{
+    int nbytes = length(raw);
+    Rtextconn this = (Rtextconn)con->private;
+
+    this->data = (char *) malloc(nbytes);
+    if(!this->data) {
+	free(this); free(con->description); free(con->class); free(con);
+	error(_("cannot allocate memory for raw connection"));
+    }
+    memcpy(this->data, RAW(raw), nbytes);
+    this->nchars = nbytes;
+    this->cur = this->save = 0;
+}
+
 static Rboolean text_open(Rconnection con)
 {
     con->save = -1000;
@@ -1702,41 +1718,60 @@
 
 static double text_seek(Rconnection con, double where, int origin, int rw)
 {
-    if(where >= 0) error(_("seek is not relevant for text connection"));
+    if(where >= 0) error(_("seek is not relevant for this connection"));
     return 0; /* if just asking, always at the beginning */
 }
 
-static Rconnection newtext(char *description, SEXP text)
+static size_t raw_read(void *ptr, size_t size, size_t nitems,
+		       Rconnection con)
+{
+    Rtextconn this = (Rtextconn)con->private;
+    if (this->cur + size*nitems > this->nchars) {
+	nitems = (this->nchars - this->cur)/size;
+	memcpy(ptr, this->data+this->cur, size*nitems);
+	this->cur = this->nchars;
+    } else {
+	memcpy(ptr, this->data+this->cur, size*nitems);
+	this->cur += size*nitems;
+    }
+    return nitems;
+}
+
+static Rconnection newtext(char *description, SEXP data)
 {
     Rconnection new;
+    int isText = isString(data);
     new = (Rconnection) malloc(sizeof(struct Rconn));
-    if(!new) error(_("allocation of text connection failed"));
-    new->class = (char *) malloc(strlen("textConnection") + 1);
-    if(!new->class) {
-	free(new);
-	error(_("allocation of text connection failed"));
-    }
-    strcpy(new->class, "textConnection");
+    if(!new) goto f1;
+    new->class = (char *) malloc(strlen("xxxxConnection") + 1);
+    if(!new->class) goto f2;
+    sprintf(new->class, "%sConnection", isText ? "text" : "raw");
     new->description = (char *) malloc(strlen(description) + 1);
-    if(!new->description) {
-	free(new->class); free(new);
-	error(_("allocation of text connection failed"));
-    }
+    if(!new->description) goto f3;
     init_con(new, description, "r");
     new->isopen = TRUE;
     new->canwrite = FALSE;
     new->open = &text_open;
     new->close = &text_close;
     new->destroy = &text_destroy;
-    new->fgetc = &text_fgetc;
     new->seek = &text_seek;
     new->private = (void*) malloc(sizeof(struct textconn));
-    if(!new->private) {
-	free(new->description); free(new->class); free(new);
-	error(_("allocation of text connection failed"));
+    if(!new->private) goto f4;
+    new->text = isText;
+    if (new->text) {
+	new->fgetc = &text_fgetc;
+	text_init(new, data);
+    } else {
+	new->read = &raw_read;
+	raw_init(new, data);
     }
-    text_init(new, text);
     return new;
+
+f4: free(new->description);
+f3: free(new->class);
+f2: free(new);
+f1: error(_("allocation of %s connection failed"),
+	  isText ? "text" : "raw");
 }
 
 static void outtext_close(Rconnection con)
@@ -1830,24 +1865,42 @@
     return res;
 }
 
+static size_t raw_write(const void *ptr, size_t size, size_t nitems,
+			Rconnection con)
+{
+    Routtextconn this = (Routtextconn)con->private;
+    SEXP tmp;
+    int idx = ConnIndex(con);
+
+    PROTECT(tmp = lengthgets(this->data, this->len + size*nitems));
+    memcpy(RAW(tmp)+this->len, ptr, size*nitems);
+    this->len += size*nitems;
+    defineVar(this->namesymbol, tmp, VECTOR_ELT(OutTextData, idx));
+    this->data = tmp;
+    UNPROTECT(1);
+    return nitems;
+}
+
 static void outtext_init(Rconnection con, char *mode, int idx)
 {
     Routtextconn this = (Routtextconn)con->private;
+    int st = (con->text ? STRSXP : RAWSXP);
     SEXP val;
 
     this->namesymbol = install(con->description);
-    if(strcmp(mode, "w") == 0) {
+    if(strncmp(mode, "w", 1) == 0) {
 	/* create variable pointed to by con->description */
-	PROTECT(val = allocVector(STRSXP, 0));
+	PROTECT(val = allocVector(st, 0));
 	defineVar(this->namesymbol, val, VECTOR_ELT(OutTextData, idx));
 	UNPROTECT(1);
     } else {
 	/* take over existing variable */
 	val = findVar1(this->namesymbol, VECTOR_ELT(OutTextData, idx),
-		       STRSXP, FALSE);
+		       st, FALSE);
 	if(val == R_UnboundValue) {
-	    warning(_("text connection: appending to a non-existent char vector"));
-	    PROTECT(val = allocVector(STRSXP, 0));
+	    warning(_("%s connection: appending to a non-existent vector"),
+		    con->text ? "text" : "raw");
+	    PROTECT(val = allocVector(st, 0));
 	    defineVar(this->namesymbol, val, VECTOR_ELT(OutTextData, idx));
 	    UNPROTECT(1);
 	}
@@ -1862,43 +1915,43 @@
 static Rconnection newouttext(char *description, SEXP sfile, char *mode,
 			      int idx)
 {
+    int isText = (mode[1] != 'b');
     Rconnection new;
     void *tmp;
 
     new = (Rconnection) malloc(sizeof(struct Rconn));
-    if(!new) error(_("allocation of text connection failed"));
-    new->class = (char *) malloc(strlen("textConnection") + 1);
-    if(!new->class) {
-	free(new);
-	error(_("allocation of text connection failed"));
-    }
-    strcpy(new->class, "textConnection");
+    if(!new) goto f1;
+    new->class = (char *) malloc(strlen("xxxxConnection") + 1);
+    if(!new->class) goto f2;
+    sprintf(new->class, "%sConnection", isText ? "text" : "raw");
     new->description = (char *) malloc(strlen(description) + 1);
-    if(!new->description) {
-	free(new->class); free(new);
-	error(_("allocation of text connection failed"));
-    }
+    if(!new->description) goto f3;
     init_con(new, description, mode);
+    new->text = isText;
     new->isopen = TRUE;
     new->canread = FALSE;
     new->open = &text_open;
     new->close = &outtext_close;
     new->destroy = &outtext_destroy;
-    new->vfprintf = &text_vfprintf;
     new->seek = &text_seek;
     new->private = (void*) malloc(sizeof(struct outtextconn));
-    if(!new->private) {
-	free(new->description); free(new->class); free(new);
-	error(_("allocation of text connection failed"));
-    }
+    if(!new->private) goto f4;
     ((Routtextconn)new->private)->lastline = tmp = malloc(LAST_LINE_LEN);
-    if(!tmp) {
-	free(new->private);
-	free(new->description); free(new->class); free(new);
-	error(_("allocation of text connection failed"));
+    if(!tmp) goto f5;
+    if (isText) {
+	new->vfprintf = &text_vfprintf;
+    } else {
+	new->write = &raw_write;
     }
     outtext_init(new, mode, idx);
     return new;
+
+f5: free(new->private);
+f4: free(new->description);
+f3: free(new->class);
+f2: free(new);
+f1: error(_("allocation of %s connection failed"),
+	  isText ? "text" : "raw");
 }
 
 SEXP do_textconnection(SEXP call, SEXP op, SEXP args, SEXP env)
@@ -1914,8 +1967,6 @@
 	error(_("invalid 'description' argument"));
     desc = CHAR(STRING_ELT(sfile, 0));
     stext = CADR(args);
-    if(!isString(stext))
-	error(_("invalid 'text' argument"));
     sopen = CADDR(args);
     if(!isString(sopen) || length(sopen) != 1)
     error(_("invalid 'open' argument"));
@@ -1924,9 +1975,13 @@
     if (!isEnvironment(venv) && venv != R_NilValue)
 	error(_("invalid 'environment' argument"));
     ncon = NextConnection();
-    if(!strlen(open) || strncmp(open, "r", 1) == 0)
+    if(!strlen(open) || (open[0] == 'r')) {
+	if(!isString(stext) && (TYPEOF(stext) != RAWSXP))
+	    error(_("invalid 'object' argument"));
 	con = Connections[ncon] = newtext(desc, stext);
-    else if (strncmp(open, "w", 1) == 0 || strncmp(open, "a", 1) == 0) {
+    } else if ((open[0] == 'w') || (open[0] == 'a')) {
+	if(!isString(stext))
+	    error(_("invalid 'object' argument"));
 	if (OutTextData == NULL) {
 	    OutTextData = allocVector(VECSXP, NCONNECTIONS);
 	    R_PreserveObject(OutTextData);
@@ -1942,7 +1997,7 @@
     PROTECT(ans = allocVector(INTSXP, 1));
     INTEGER(ans)[0] = ncon;
     PROTECT(class = allocVector(STRSXP, 2));
-    SET_STRING_ELT(class, 0, mkChar("textConnection"));
+    SET_STRING_ELT(class, 0, mkChar(con->class));
     SET_STRING_ELT(class, 1, mkChar("connection"));
     classgets(ans, class);
     UNPROTECT(2);



More information about the R-devel mailing list