[Rd] [External] On PRINTNAME() encoding, EncodeChar(), and being painted into a corner

Ivan Krylov kry|ov@r00t @end|ng |rom gm@||@com
Tue Oct 3 15:20:41 CEST 2023


Dear Luke Tierney,

Thank you for the reply and apologies for not getting back to you
earlier.

On Fri, 22 Sep 2023 16:14:58 -0500 (CDT)
luke-tierney using uiowa.edu wrote:

> I think it would be best to modify errorcall so errorcall_cpy is not
> necessary. As things are now it is just too easy to forget that
> sometimes errorcall_cpy should be used (and this has lead to some bugs
> recently).

At the end of this e-mail is a large patch that makes errorcall() and
warningcall() safer by processing the format arguments before calling
any R APIs. It's fairly invasive because it rewires all error() /
warning() / errorcall() / warningcall() processing into two common
subroutines that call R_vsnprintf() as soon as possible and keep a
single message buffer afterwards. It passes make check-devel. I am open
to other, less invasive ways to implement this change.

Additionally, I fixed a problem detected by a static analyser: given
unsigned msg_len, max(msg_len - strlen(head), 0) can only be 0 if
msg_len == strlen(head). That's because both msg_len and size_t end up
being promoted to unsigned in order to compute the subtraction. I
couldn't come up with a sufficiently laconic alternative, so I left the
ternary operator in for now.

>> The only solution to the latter problem is an EncodeChar() variant
>> that allocates its memory dynamically. Would R_alloc() be
>> acceptable in this context? With errors, the allocation stack would
>> be quickly reset (except when withCallingHandlers() is in effect?),
>> but with warnings, the code would have to restore it manually every
>> time.  
> 
> Or allow/require a buffer to be provided. So replacing the calls like
> 
>     CHAR(PRINTNAME(sym))
> 
> with
> 
>     EncodeSymbol(sym, buf, buf_size)

I can also implement this. Does this mean replacing every occasion of
EncodeChar(PRINTNAME(sym)) with EncodeSymbol(sym, <temporary>, sizeof
<temporary>)?

Index: src/main/envir.c
===================================================================
--- src/main/envir.c	(revision 85251)
+++ src/main/envir.c	(working copy)
@@ -1582,7 +1582,7 @@
 	}
 	rho = ENCLOS(rho);
     }
-    errorcall_cpy(call,
+    errorcall(call,
                   _("could not find function \"%s\""),
                   EncodeChar(PRINTNAME(symbol)));
     /* NOT REACHED */
@@ -3924,7 +3924,7 @@
     SEXP nsname = PROTECT(callR1(R_getNamespaceNameSymbol, ns));
     if (TYPEOF(nsname) != STRSXP || LENGTH(nsname) != 1)
 	errorcall(call, "bad value returned by `getNamespaceName'");
-    errorcall_cpy(call,
+    errorcall(call,
 		  _("'%s' is not an exported object from 'namespace:%s'"),
 		  EncodeChar(PRINTNAME(name)),
 		  CHAR(STRING_ELT(nsname, 0)));
Index: src/main/errors.c
===================================================================
--- src/main/errors.c	(revision 85251)
+++ src/main/errors.c	(working copy)
@@ -58,7 +58,7 @@
 /*
 Different values of inError are used to indicate different places
 in the error handling:
-inError = 1: In internal error handling, e.g. `verrorcall_dflt`, others.
+inError = 1: In internal error handling, e.g. `errorcall_dflt`, others.
 inError = 2: Writing traceback
 inError = 3: In user error handler (i.e. options(error=handler))
 */
@@ -387,32 +387,45 @@
 	return c ? c->call : R_NilValue;
 }
 
-void warning(const char *format, ...)
+/* declarations for internal condition handling */
+
+static void signalError(SEXP call, const char *msg);
+static void signalWarning(SEXP call, const char *msg);
+NORET static void invokeRestart(SEXP, SEXP);
+
+static void impl_vwarning(SEXP call, Rboolean immediate, const char *format, va_list ap)
 {
     char buf[BUFSIZE], *p;
 
-    va_list(ap);
-    va_start(ap, format);
     size_t psize;
     int pval;
 
     psize = min(BUFSIZE, R_WarnLength+1);
     pval = Rvsnprintf_mbcs(buf, psize, format, ap);
-    va_end(ap);
     p = buf + strlen(buf) - 1;
     if(strlen(buf) > 0 && *p == '\n') *p = '\0';
     RprintTrunc(buf, pval >= psize);
-    SEXP call = PROTECT(getCurrentCall());
-    warningcall(call, "%s", buf);
-    UNPROTECT(1);
+
+    // must not call into R before stringifying the format arguments
+    int nprotect = 0;
+    if (!call) {
+	call = PROTECT(getCurrentCall());
+	++nprotect;
+    }
+    if (immediate) immediateWarning = 1;
+    signalWarning(call, buf);
+    if (immediate) immediateWarning = 0;
+    UNPROTECT(nprotect);
 }
 
-/* declarations for internal condition handling */
+void warning(const char *format, ...)
+{
+    va_list(ap);
+    va_start(ap, format);
+    impl_vwarning(NULL, FALSE, format, ap);
+    va_end(ap);
+}
 
-static void vsignalError(SEXP call, const char *format, va_list ap);
-static void vsignalWarning(SEXP call, const char *format, va_list ap);
-NORET static void invokeRestart(SEXP, SEXP);
-
 static void reset_inWarning(void *data)
 {
     inWarning = 0;
@@ -437,12 +450,11 @@
     return nc;
 }
 
-static void vwarningcall_dflt(SEXP call, const char *format, va_list ap)
+static void warningcall_dflt(SEXP call, const char *msg)
 {
     int w;
     SEXP names, s;
     const char *dcall;
-    char buf[BUFSIZE];
     RCNTXT *cptr;
     RCNTXT cntxt;
     size_t psize;
@@ -480,11 +492,8 @@
     inWarning = 1;
 
     if(w >= 2) { /* make it an error */
-	psize = min(BUFSIZE, R_WarnLength+1);
-	pval = Rvsnprintf_mbcs(buf, psize, format, ap);
-	RprintTrunc(buf, pval >= psize);
 	inWarning = 0; /* PR#1570 */
-	errorcall(call, _("(converted from warning) %s"), buf);
+	errorcall(call, _("(converted from warning) %s"), msg);
     }
     else if(w == 1) {	/* print as they happen */
 	char *tr;
@@ -492,18 +501,16 @@
 	    dcall = CHAR(STRING_ELT(deparse1s(call), 0));
 	} else dcall = "";
 	psize = min(BUFSIZE, R_WarnLength+1);
-	pval = Rvsnprintf_mbcs(buf, psize, format, ap);
-	RprintTrunc(buf, pval >= psize);
 
 	if(dcall[0] == '\0') REprintf(_("Warning:"));
 	else {
 	    REprintf(_("Warning in %s :"), dcall);
 	    if(!(noBreakWarning ||
-		 ( mbcslocale && 18 + wd(dcall) + wd(buf) <= LONGWARN) ||
-		 (!mbcslocale && 18 + strlen(dcall) + strlen(buf) <= LONGWARN)))
+		 ( mbcslocale && 18 + wd(dcall) + wd(msg) <= LONGWARN) ||
+		 (!mbcslocale && 18 + strlen(dcall) + strlen(msg) <= LONGWARN)))
 		REprintf("\n ");
 	}
-	REprintf(" %s\n", buf);
+	REprintf(" %s\n", msg);
 	if(R_ShowWarnCalls && call != R_NilValue) {
 	    tr = R_ConciseTraceback(call, 0);
 	    if (strlen(tr)) {REprintf(_("Calls:")); REprintf(" %s\n", tr);}
@@ -512,10 +519,11 @@
     else if(w == 0) {	/* collect them */
 	if(!R_CollectWarnings) setupwarnings();
 	if(R_CollectWarnings < R_nwarnings) {
+	    char buf[BUFSIZE];
 	    SET_VECTOR_ELT(R_Warnings, R_CollectWarnings, call);
 	    psize = min(BUFSIZE, R_WarnLength+1);
-	    pval = Rvsnprintf_mbcs(buf, psize, format, ap);
-	    RprintTrunc(buf, pval >= psize);
+	    Rstrncpy(buf, msg, psize);
+	    RprintTrunc(buf, strlen(msg) >= psize);
 	    if(R_ShowWarnCalls && call != R_NilValue) {
 		char *tr =  R_ConciseTraceback(call, 0);
 		size_t nc = strlen(tr);
@@ -535,20 +543,12 @@
     inWarning = 0;
 }
 
-static void warningcall_dflt(SEXP call, const char *format,...)
-{
-    va_list(ap);
 
-    va_start(ap, format);
-    vwarningcall_dflt(call, format, ap);
-    va_end(ap);
-}
-
 void warningcall(SEXP call, const char *format, ...)
 {
     va_list(ap);
     va_start(ap, format);
-    vsignalWarning(call, format, ap);
+    impl_vwarning(call, FALSE, format, ap);
     va_end(ap);
 }
 
@@ -555,12 +555,9 @@
 void warningcall_immediate(SEXP call, const char *format, ...)
 {
     va_list(ap);
-
-    immediateWarning = 1;
     va_start(ap, format);
-    vsignalWarning(call, format, ap);
+    impl_vwarning(call, TRUE, format, ap);
     va_end(ap);
-    immediateWarning = 0;
 }
 
 static void cleanup_PrintWarnings(void *data)
@@ -741,7 +738,7 @@
 /* Construct newline terminated error message, write it to global errbuf, and
    possibly display with REprintf. */
 NORET static void
-verrorcall_dflt(SEXP call, const char *format, va_list ap)
+errorcall_dflt(SEXP call, const char *msg)
 {
     if (allowedConstsChecks > 0) {
 	allowedConstsChecks--;
@@ -758,7 +755,7 @@
 	    REprintf(_("Error during wrapup: "));
 	    /* this does NOT try to print the call since that could
 	       cause a cascade of error calls */
-	    Rvsnprintf_mbcs(errbuf, sizeof(errbuf), format, ap);
+	    Rstrncpy(errbuf, msg, sizeof(errbuf));
 	    REprintf("%s\n", errbuf);
 	}
 	if (R_Warnings != R_NilValue) {
@@ -801,7 +798,7 @@
 	}
 
 	const char *dcall = CHAR(STRING_ELT(deparse1s(call), 0));
-	Rsnprintf_mbcs(tmp2, BUFSIZE,  "%s", head);
+	Rstrncpy(tmp2, head, BUFSIZE);
 	if (skip != NA_INTEGER) {
 	    PROTECT(srcloc = GetSrcLoc(R_GetCurrentSrcref(skip)));
 	    protected++;
@@ -811,7 +808,7 @@
 			       dcall, CHAR(STRING_ELT(srcloc, 0)));
 	}
 
-	Rvsnprintf_mbcs(tmp, max(msg_len - strlen(head), 0), format, ap);
+	Rstrncpy(tmp, msg, msg_len > strlen(head) ? msg_len - strlen(head) : 0);
 	if (strlen(tmp2) + strlen(tail) + strlen(tmp) < BUFSIZE) {
 	    if(len) Rsnprintf_mbcs(errbuf, BUFSIZE,
 				   _("Error in %s (from %s) : "),
@@ -839,15 +836,15 @@
 	    }
 	    ERRBUFCAT(tmp);
 	} else {
-	    Rsnprintf_mbcs(errbuf, BUFSIZE, _("Error: "));
+	    Rstrncpy(errbuf, _("Error: "), BUFSIZE);
 	    ERRBUFCAT(tmp);
 	}
 	UNPROTECT(protected);
     }
     else {
-	Rsnprintf_mbcs(errbuf, BUFSIZE, _("Error: "));
+	Rstrncpy(errbuf, _("Error: "), BUFSIZE);
 	p = errbuf + strlen(errbuf);
-	Rvsnprintf_mbcs(p, max(msg_len - strlen(errbuf), 0), format, ap);
+	Rstrncpy(p, msg, msg_len > strlen(errbuf) ? msg_len - strlen(errbuf) : 0);
     }
     /* Approximate truncation detection, may produce false positives.  Assumes
        R_MB_CUR_MAX > 0. Note: approximation is fine, as the string may include
@@ -892,55 +889,44 @@
     inError = oldInError;
 }
 
-NORET static void errorcall_dflt(SEXP call, const char *format,...)
-{
-    va_list(ap);
+NORET static void do_verrorcall(SEXP call, const char *format, va_list ap) {
+    // must be careful to process the format arguments before calling into R
+    char buf[BUFSIZE];
+    Rvsnprintf_mbcs(buf, min(BUFSIZE, R_WarnLength), format, ap);
 
-    va_start(ap, format);
-    verrorcall_dflt(call, format, ap);
-    va_end(ap);
-}
-
-NORET void errorcall(SEXP call, const char *format,...)
-{
-    va_list(ap);
-
     if (call == R_CurrentExpression)
 	/* behave like error( */
 	call = getCurrentCall();
 
-    va_start(ap, format);
-    vsignalError(call, format, ap);
-    va_end(ap);
+    signalError(call, buf);
 
     if (R_ErrorHook != NULL) {
-	char buf[BUFSIZE];
 	void (*hook)(SEXP, char *) = R_ErrorHook;
 	R_ErrorHook = NULL; /* to avoid recursion */
-	va_start(ap, format);
-	Rvsnprintf_mbcs(buf, min(BUFSIZE, R_WarnLength), format, ap);
-	va_end(ap);
 	hook(call, buf);
     }
 
+    errorcall_dflt(call, buf);
+}
+
+NORET void errorcall(SEXP call, const char *format,...)
+{
+    va_list(ap);
     va_start(ap, format);
-    verrorcall_dflt(call, format, ap);
+    do_verrorcall(call, format, ap);
     va_end(ap);
 }
 
-/* Like errorcall, but copies all data for the error message into a buffer
-   before doing anything else. */
-attribute_hidden
-NORET void errorcall_cpy(SEXP call, const char *format, ...)
+void error(const char *format, ...)
 {
-    char buf[BUFSIZE];
-
     va_list(ap);
     va_start(ap, format);
-    Rvsnprintf_mbcs(buf, BUFSIZE, format, ap);
+    /* R_CurrentExpression will be processed as if it was getCurrentCall(), but
+       not before stringifying the format arguments. It's important to process
+       the format arguments first because calling into R may invalidate some of
+       the pointers (e.g. those returned by EncodeChar()). */
+    do_verrorcall(R_CurrentExpression, format, ap);
     va_end(ap);
-
-    errorcall(call, "%s", buf);
 }
 
 // geterrmessage(): Return (the global) 'errbuf' as R string
@@ -953,17 +939,6 @@
     return res;
 }
 
-void error(const char *format, ...)
-{
-    char buf[BUFSIZE];
-
-    va_list(ap);
-    va_start(ap, format);
-    Rvsnprintf_mbcs(buf, min(BUFSIZE, R_WarnLength), format, ap);
-    va_end(ap);
-    errorcall(getCurrentCall(), "%s", buf);
-}
-
 static void try_jump_to_restart(void)
 {
     SEXP list;
@@ -1834,9 +1809,8 @@
     return R_NilValue;
 }
 
-static void vsignalWarning(SEXP call, const char *format, va_list ap)
+static void signalWarning(SEXP call, const char *msg)
 {
-    char buf[BUFSIZE];
     SEXP hooksym, hcall, qcall, qfun;
 
     hooksym = install(".signalSimpleWarning");
@@ -1846,13 +1820,12 @@
 	PROTECT(qfun);
 	PROTECT(qcall = LCONS(qfun, LCONS(call, R_NilValue)));
 	PROTECT(hcall = LCONS(qcall, R_NilValue));
-	Rvsnprintf_mbcs(buf, BUFSIZE - 1, format, ap);
-	hcall = LCONS(mkString(buf), hcall);
+	hcall = LCONS(mkString(msg), hcall);
 	PROTECT(hcall = LCONS(hooksym, hcall));
 	evalKeepVis(hcall, R_GlobalEnv);
 	UNPROTECT(4);
     }
-    else vwarningcall_dflt(call, format, ap);
+    else warningcall_dflt(call, msg);
 }
 
 NORET static void gotoExitingHandler(SEXP cond, SEXP call, SEXP entry)
@@ -1865,20 +1838,18 @@
     findcontext(CTXT_FUNCTION, rho, result);
 }
 
-static void vsignalError(SEXP call, const char *format, va_list ap)
+static void signalError(SEXP call, const char *msg)
 {
     /* This function does not protect or restore the old handler
        stack. On return R_HandlerStack will be R_NilValue (unless
        R_RestartToken is encountered). */
-    char localbuf[BUFSIZE];
     SEXP list;
 
-    Rvsnprintf_mbcs(localbuf, BUFSIZE - 1, format, ap);
     while ((list = findSimpleErrorHandler()) != R_NilValue) {
 	char *buf = errbuf;
 	SEXP entry = CAR(list);
 	R_HandlerStack = CDR(list);
-	Rstrncpy(buf, localbuf, BUFSIZE);
+	Rstrncpy(buf, msg, BUFSIZE);
 	/*	Rvsnprintf(buf, BUFSIZE - 1, format, ap);*/
 	if (IS_CALLING_ENTRY(entry)) {
 	    if (ENTRY_HANDLER(entry) == R_RestartToken) {
@@ -1950,7 +1921,7 @@
 		if (TYPEOF(msg) == STRSXP && LENGTH(msg) > 0)
 		    msgstr = translateChar(STRING_ELT(msg, 0));
 		else error(_("error message not a string"));
-		errorcall_dflt(ecall, "%s", msgstr);
+		errorcall_dflt(ecall, msgstr);
 	    }
 	    else {
 		SEXP hcall = LCONS(h, LCONS(cond, R_NilValue));
@@ -2075,7 +2046,7 @@
     const char *msg = translateChar(STRING_ELT(CAR(args), 0));
     SEXP ecall = CADR(args);
 
-    warningcall_dflt(ecall, "%s", msg);
+    warningcall_dflt(ecall, msg);
     return R_NilValue;
 }
 
@@ -2088,7 +2059,7 @@
     const char *msg = translateChar(STRING_ELT(CAR(args), 0));
     SEXP ecall = CADR(args);
 
-    errorcall_dflt(ecall, "%s", msg);
+    errorcall_dflt(ecall, msg);
 }
 
 
Index: src/main/eval.c
===================================================================
--- src/main/eval.c	(revision 85251)
+++ src/main/eval.c	(working copy)
@@ -1153,7 +1153,7 @@
 	else
 	    tmp = findVar(e, rho);
 	if (tmp == R_UnboundValue)
-	    errorcall_cpy(getLexicalCall(rho),
+	    errorcall(getLexicalCall(rho),
 			  _("object '%s' not found"),
 			  EncodeChar(PRINTNAME(e)));
 	/* if ..d is missing then ddfindVar will signal */
@@ -3473,7 +3473,7 @@
 	       code more consistent. */
 	} else if (isSymbol(CAR(el)) && R_isMissing(CAR(el), rho)) {
 	    /* It was missing */
-	    errorcall_cpy(call,
+	    errorcall(call,
 	                  _("'%s' is missing"),
 	                  EncodeChar(PRINTNAME(CAR(el))));
 #endif
@@ -5574,7 +5574,7 @@
 
 NORET static void UNBOUND_VARIABLE_ERROR(SEXP symbol, SEXP rho)
 {
-    errorcall_cpy(getLexicalCall(rho),
+    errorcall(getLexicalCall(rho),
 		  _("object '%s' not found"),
 		  EncodeChar(PRINTNAME(symbol)));
 }
Index: src/main/printutils.c
===================================================================
--- src/main/printutils.c	(revision 85251)
+++ src/main/printutils.c	(working copy)
@@ -879,9 +879,7 @@
    The pointer returned by EncodeChar points into an internal buffer
    which is overwritten by subsequent calls to EncodeChar/EncodeString.
    It is the responsibility of the caller to copy the result before
-   any subsequent call to EncodeChar/EncodeString may happen. Note that
-   particularly it is NOT safe to pass the result of EncodeChar as 3rd
-   argument to errorcall (errorcall_cpy can be used instead). */
+   any subsequent call to EncodeChar/EncodeString may happen. */
 //attribute_hidden
 const char *EncodeChar(SEXP x)
 {


-- 
Best regards,
Ivan



More information about the R-devel mailing list