[Rd] [External] On PRINTNAME() encoding, EncodeChar(), and being painted into a corner
Ivan Krylov
kry|ov@r00t @end|ng |rom gm@||@com
Tue Oct 3 15:20:41 CEST 2023
Dear Luke Tierney,
Thank you for the reply and apologies for not getting back to you
earlier.
On Fri, 22 Sep 2023 16:14:58 -0500 (CDT)
luke-tierney using uiowa.edu wrote:
> I think it would be best to modify errorcall so errorcall_cpy is not
> necessary. As things are now it is just too easy to forget that
> sometimes errorcall_cpy should be used (and this has lead to some bugs
> recently).
At the end of this e-mail is a large patch that makes errorcall() and
warningcall() safer by processing the format arguments before calling
any R APIs. It's fairly invasive because it rewires all error() /
warning() / errorcall() / warningcall() processing into two common
subroutines that call R_vsnprintf() as soon as possible and keep a
single message buffer afterwards. It passes make check-devel. I am open
to other, less invasive ways to implement this change.
Additionally, I fixed a problem detected by a static analyser: given
unsigned msg_len, max(msg_len - strlen(head), 0) can only be 0 if
msg_len == strlen(head). That's because both msg_len and size_t end up
being promoted to unsigned in order to compute the subtraction. I
couldn't come up with a sufficiently laconic alternative, so I left the
ternary operator in for now.
>> The only solution to the latter problem is an EncodeChar() variant
>> that allocates its memory dynamically. Would R_alloc() be
>> acceptable in this context? With errors, the allocation stack would
>> be quickly reset (except when withCallingHandlers() is in effect?),
>> but with warnings, the code would have to restore it manually every
>> time.
>
> Or allow/require a buffer to be provided. So replacing the calls like
>
> CHAR(PRINTNAME(sym))
>
> with
>
> EncodeSymbol(sym, buf, buf_size)
I can also implement this. Does this mean replacing every occasion of
EncodeChar(PRINTNAME(sym)) with EncodeSymbol(sym, <temporary>, sizeof
<temporary>)?
Index: src/main/envir.c
===================================================================
--- src/main/envir.c (revision 85251)
+++ src/main/envir.c (working copy)
@@ -1582,7 +1582,7 @@
}
rho = ENCLOS(rho);
}
- errorcall_cpy(call,
+ errorcall(call,
_("could not find function \"%s\""),
EncodeChar(PRINTNAME(symbol)));
/* NOT REACHED */
@@ -3924,7 +3924,7 @@
SEXP nsname = PROTECT(callR1(R_getNamespaceNameSymbol, ns));
if (TYPEOF(nsname) != STRSXP || LENGTH(nsname) != 1)
errorcall(call, "bad value returned by `getNamespaceName'");
- errorcall_cpy(call,
+ errorcall(call,
_("'%s' is not an exported object from 'namespace:%s'"),
EncodeChar(PRINTNAME(name)),
CHAR(STRING_ELT(nsname, 0)));
Index: src/main/errors.c
===================================================================
--- src/main/errors.c (revision 85251)
+++ src/main/errors.c (working copy)
@@ -58,7 +58,7 @@
/*
Different values of inError are used to indicate different places
in the error handling:
-inError = 1: In internal error handling, e.g. `verrorcall_dflt`, others.
+inError = 1: In internal error handling, e.g. `errorcall_dflt`, others.
inError = 2: Writing traceback
inError = 3: In user error handler (i.e. options(error=handler))
*/
@@ -387,32 +387,45 @@
return c ? c->call : R_NilValue;
}
-void warning(const char *format, ...)
+/* declarations for internal condition handling */
+
+static void signalError(SEXP call, const char *msg);
+static void signalWarning(SEXP call, const char *msg);
+NORET static void invokeRestart(SEXP, SEXP);
+
+static void impl_vwarning(SEXP call, Rboolean immediate, const char *format, va_list ap)
{
char buf[BUFSIZE], *p;
- va_list(ap);
- va_start(ap, format);
size_t psize;
int pval;
psize = min(BUFSIZE, R_WarnLength+1);
pval = Rvsnprintf_mbcs(buf, psize, format, ap);
- va_end(ap);
p = buf + strlen(buf) - 1;
if(strlen(buf) > 0 && *p == '\n') *p = '\0';
RprintTrunc(buf, pval >= psize);
- SEXP call = PROTECT(getCurrentCall());
- warningcall(call, "%s", buf);
- UNPROTECT(1);
+
+ // must not call into R before stringifying the format arguments
+ int nprotect = 0;
+ if (!call) {
+ call = PROTECT(getCurrentCall());
+ ++nprotect;
+ }
+ if (immediate) immediateWarning = 1;
+ signalWarning(call, buf);
+ if (immediate) immediateWarning = 0;
+ UNPROTECT(nprotect);
}
-/* declarations for internal condition handling */
+void warning(const char *format, ...)
+{
+ va_list(ap);
+ va_start(ap, format);
+ impl_vwarning(NULL, FALSE, format, ap);
+ va_end(ap);
+}
-static void vsignalError(SEXP call, const char *format, va_list ap);
-static void vsignalWarning(SEXP call, const char *format, va_list ap);
-NORET static void invokeRestart(SEXP, SEXP);
-
static void reset_inWarning(void *data)
{
inWarning = 0;
@@ -437,12 +450,11 @@
return nc;
}
-static void vwarningcall_dflt(SEXP call, const char *format, va_list ap)
+static void warningcall_dflt(SEXP call, const char *msg)
{
int w;
SEXP names, s;
const char *dcall;
- char buf[BUFSIZE];
RCNTXT *cptr;
RCNTXT cntxt;
size_t psize;
@@ -480,11 +492,8 @@
inWarning = 1;
if(w >= 2) { /* make it an error */
- psize = min(BUFSIZE, R_WarnLength+1);
- pval = Rvsnprintf_mbcs(buf, psize, format, ap);
- RprintTrunc(buf, pval >= psize);
inWarning = 0; /* PR#1570 */
- errorcall(call, _("(converted from warning) %s"), buf);
+ errorcall(call, _("(converted from warning) %s"), msg);
}
else if(w == 1) { /* print as they happen */
char *tr;
@@ -492,18 +501,16 @@
dcall = CHAR(STRING_ELT(deparse1s(call), 0));
} else dcall = "";
psize = min(BUFSIZE, R_WarnLength+1);
- pval = Rvsnprintf_mbcs(buf, psize, format, ap);
- RprintTrunc(buf, pval >= psize);
if(dcall[0] == '\0') REprintf(_("Warning:"));
else {
REprintf(_("Warning in %s :"), dcall);
if(!(noBreakWarning ||
- ( mbcslocale && 18 + wd(dcall) + wd(buf) <= LONGWARN) ||
- (!mbcslocale && 18 + strlen(dcall) + strlen(buf) <= LONGWARN)))
+ ( mbcslocale && 18 + wd(dcall) + wd(msg) <= LONGWARN) ||
+ (!mbcslocale && 18 + strlen(dcall) + strlen(msg) <= LONGWARN)))
REprintf("\n ");
}
- REprintf(" %s\n", buf);
+ REprintf(" %s\n", msg);
if(R_ShowWarnCalls && call != R_NilValue) {
tr = R_ConciseTraceback(call, 0);
if (strlen(tr)) {REprintf(_("Calls:")); REprintf(" %s\n", tr);}
@@ -512,10 +519,11 @@
else if(w == 0) { /* collect them */
if(!R_CollectWarnings) setupwarnings();
if(R_CollectWarnings < R_nwarnings) {
+ char buf[BUFSIZE];
SET_VECTOR_ELT(R_Warnings, R_CollectWarnings, call);
psize = min(BUFSIZE, R_WarnLength+1);
- pval = Rvsnprintf_mbcs(buf, psize, format, ap);
- RprintTrunc(buf, pval >= psize);
+ Rstrncpy(buf, msg, psize);
+ RprintTrunc(buf, strlen(msg) >= psize);
if(R_ShowWarnCalls && call != R_NilValue) {
char *tr = R_ConciseTraceback(call, 0);
size_t nc = strlen(tr);
@@ -535,20 +543,12 @@
inWarning = 0;
}
-static void warningcall_dflt(SEXP call, const char *format,...)
-{
- va_list(ap);
- va_start(ap, format);
- vwarningcall_dflt(call, format, ap);
- va_end(ap);
-}
-
void warningcall(SEXP call, const char *format, ...)
{
va_list(ap);
va_start(ap, format);
- vsignalWarning(call, format, ap);
+ impl_vwarning(call, FALSE, format, ap);
va_end(ap);
}
@@ -555,12 +555,9 @@
void warningcall_immediate(SEXP call, const char *format, ...)
{
va_list(ap);
-
- immediateWarning = 1;
va_start(ap, format);
- vsignalWarning(call, format, ap);
+ impl_vwarning(call, TRUE, format, ap);
va_end(ap);
- immediateWarning = 0;
}
static void cleanup_PrintWarnings(void *data)
@@ -741,7 +738,7 @@
/* Construct newline terminated error message, write it to global errbuf, and
possibly display with REprintf. */
NORET static void
-verrorcall_dflt(SEXP call, const char *format, va_list ap)
+errorcall_dflt(SEXP call, const char *msg)
{
if (allowedConstsChecks > 0) {
allowedConstsChecks--;
@@ -758,7 +755,7 @@
REprintf(_("Error during wrapup: "));
/* this does NOT try to print the call since that could
cause a cascade of error calls */
- Rvsnprintf_mbcs(errbuf, sizeof(errbuf), format, ap);
+ Rstrncpy(errbuf, msg, sizeof(errbuf));
REprintf("%s\n", errbuf);
}
if (R_Warnings != R_NilValue) {
@@ -801,7 +798,7 @@
}
const char *dcall = CHAR(STRING_ELT(deparse1s(call), 0));
- Rsnprintf_mbcs(tmp2, BUFSIZE, "%s", head);
+ Rstrncpy(tmp2, head, BUFSIZE);
if (skip != NA_INTEGER) {
PROTECT(srcloc = GetSrcLoc(R_GetCurrentSrcref(skip)));
protected++;
@@ -811,7 +808,7 @@
dcall, CHAR(STRING_ELT(srcloc, 0)));
}
- Rvsnprintf_mbcs(tmp, max(msg_len - strlen(head), 0), format, ap);
+ Rstrncpy(tmp, msg, msg_len > strlen(head) ? msg_len - strlen(head) : 0);
if (strlen(tmp2) + strlen(tail) + strlen(tmp) < BUFSIZE) {
if(len) Rsnprintf_mbcs(errbuf, BUFSIZE,
_("Error in %s (from %s) : "),
@@ -839,15 +836,15 @@
}
ERRBUFCAT(tmp);
} else {
- Rsnprintf_mbcs(errbuf, BUFSIZE, _("Error: "));
+ Rstrncpy(errbuf, _("Error: "), BUFSIZE);
ERRBUFCAT(tmp);
}
UNPROTECT(protected);
}
else {
- Rsnprintf_mbcs(errbuf, BUFSIZE, _("Error: "));
+ Rstrncpy(errbuf, _("Error: "), BUFSIZE);
p = errbuf + strlen(errbuf);
- Rvsnprintf_mbcs(p, max(msg_len - strlen(errbuf), 0), format, ap);
+ Rstrncpy(p, msg, msg_len > strlen(errbuf) ? msg_len - strlen(errbuf) : 0);
}
/* Approximate truncation detection, may produce false positives. Assumes
R_MB_CUR_MAX > 0. Note: approximation is fine, as the string may include
@@ -892,55 +889,44 @@
inError = oldInError;
}
-NORET static void errorcall_dflt(SEXP call, const char *format,...)
-{
- va_list(ap);
+NORET static void do_verrorcall(SEXP call, const char *format, va_list ap) {
+ // must be careful to process the format arguments before calling into R
+ char buf[BUFSIZE];
+ Rvsnprintf_mbcs(buf, min(BUFSIZE, R_WarnLength), format, ap);
- va_start(ap, format);
- verrorcall_dflt(call, format, ap);
- va_end(ap);
-}
-
-NORET void errorcall(SEXP call, const char *format,...)
-{
- va_list(ap);
-
if (call == R_CurrentExpression)
/* behave like error( */
call = getCurrentCall();
- va_start(ap, format);
- vsignalError(call, format, ap);
- va_end(ap);
+ signalError(call, buf);
if (R_ErrorHook != NULL) {
- char buf[BUFSIZE];
void (*hook)(SEXP, char *) = R_ErrorHook;
R_ErrorHook = NULL; /* to avoid recursion */
- va_start(ap, format);
- Rvsnprintf_mbcs(buf, min(BUFSIZE, R_WarnLength), format, ap);
- va_end(ap);
hook(call, buf);
}
+ errorcall_dflt(call, buf);
+}
+
+NORET void errorcall(SEXP call, const char *format,...)
+{
+ va_list(ap);
va_start(ap, format);
- verrorcall_dflt(call, format, ap);
+ do_verrorcall(call, format, ap);
va_end(ap);
}
-/* Like errorcall, but copies all data for the error message into a buffer
- before doing anything else. */
-attribute_hidden
-NORET void errorcall_cpy(SEXP call, const char *format, ...)
+void error(const char *format, ...)
{
- char buf[BUFSIZE];
-
va_list(ap);
va_start(ap, format);
- Rvsnprintf_mbcs(buf, BUFSIZE, format, ap);
+ /* R_CurrentExpression will be processed as if it was getCurrentCall(), but
+ not before stringifying the format arguments. It's important to process
+ the format arguments first because calling into R may invalidate some of
+ the pointers (e.g. those returned by EncodeChar()). */
+ do_verrorcall(R_CurrentExpression, format, ap);
va_end(ap);
-
- errorcall(call, "%s", buf);
}
// geterrmessage(): Return (the global) 'errbuf' as R string
@@ -953,17 +939,6 @@
return res;
}
-void error(const char *format, ...)
-{
- char buf[BUFSIZE];
-
- va_list(ap);
- va_start(ap, format);
- Rvsnprintf_mbcs(buf, min(BUFSIZE, R_WarnLength), format, ap);
- va_end(ap);
- errorcall(getCurrentCall(), "%s", buf);
-}
-
static void try_jump_to_restart(void)
{
SEXP list;
@@ -1834,9 +1809,8 @@
return R_NilValue;
}
-static void vsignalWarning(SEXP call, const char *format, va_list ap)
+static void signalWarning(SEXP call, const char *msg)
{
- char buf[BUFSIZE];
SEXP hooksym, hcall, qcall, qfun;
hooksym = install(".signalSimpleWarning");
@@ -1846,13 +1820,12 @@
PROTECT(qfun);
PROTECT(qcall = LCONS(qfun, LCONS(call, R_NilValue)));
PROTECT(hcall = LCONS(qcall, R_NilValue));
- Rvsnprintf_mbcs(buf, BUFSIZE - 1, format, ap);
- hcall = LCONS(mkString(buf), hcall);
+ hcall = LCONS(mkString(msg), hcall);
PROTECT(hcall = LCONS(hooksym, hcall));
evalKeepVis(hcall, R_GlobalEnv);
UNPROTECT(4);
}
- else vwarningcall_dflt(call, format, ap);
+ else warningcall_dflt(call, msg);
}
NORET static void gotoExitingHandler(SEXP cond, SEXP call, SEXP entry)
@@ -1865,20 +1838,18 @@
findcontext(CTXT_FUNCTION, rho, result);
}
-static void vsignalError(SEXP call, const char *format, va_list ap)
+static void signalError(SEXP call, const char *msg)
{
/* This function does not protect or restore the old handler
stack. On return R_HandlerStack will be R_NilValue (unless
R_RestartToken is encountered). */
- char localbuf[BUFSIZE];
SEXP list;
- Rvsnprintf_mbcs(localbuf, BUFSIZE - 1, format, ap);
while ((list = findSimpleErrorHandler()) != R_NilValue) {
char *buf = errbuf;
SEXP entry = CAR(list);
R_HandlerStack = CDR(list);
- Rstrncpy(buf, localbuf, BUFSIZE);
+ Rstrncpy(buf, msg, BUFSIZE);
/* Rvsnprintf(buf, BUFSIZE - 1, format, ap);*/
if (IS_CALLING_ENTRY(entry)) {
if (ENTRY_HANDLER(entry) == R_RestartToken) {
@@ -1950,7 +1921,7 @@
if (TYPEOF(msg) == STRSXP && LENGTH(msg) > 0)
msgstr = translateChar(STRING_ELT(msg, 0));
else error(_("error message not a string"));
- errorcall_dflt(ecall, "%s", msgstr);
+ errorcall_dflt(ecall, msgstr);
}
else {
SEXP hcall = LCONS(h, LCONS(cond, R_NilValue));
@@ -2075,7 +2046,7 @@
const char *msg = translateChar(STRING_ELT(CAR(args), 0));
SEXP ecall = CADR(args);
- warningcall_dflt(ecall, "%s", msg);
+ warningcall_dflt(ecall, msg);
return R_NilValue;
}
@@ -2088,7 +2059,7 @@
const char *msg = translateChar(STRING_ELT(CAR(args), 0));
SEXP ecall = CADR(args);
- errorcall_dflt(ecall, "%s", msg);
+ errorcall_dflt(ecall, msg);
}
Index: src/main/eval.c
===================================================================
--- src/main/eval.c (revision 85251)
+++ src/main/eval.c (working copy)
@@ -1153,7 +1153,7 @@
else
tmp = findVar(e, rho);
if (tmp == R_UnboundValue)
- errorcall_cpy(getLexicalCall(rho),
+ errorcall(getLexicalCall(rho),
_("object '%s' not found"),
EncodeChar(PRINTNAME(e)));
/* if ..d is missing then ddfindVar will signal */
@@ -3473,7 +3473,7 @@
code more consistent. */
} else if (isSymbol(CAR(el)) && R_isMissing(CAR(el), rho)) {
/* It was missing */
- errorcall_cpy(call,
+ errorcall(call,
_("'%s' is missing"),
EncodeChar(PRINTNAME(CAR(el))));
#endif
@@ -5574,7 +5574,7 @@
NORET static void UNBOUND_VARIABLE_ERROR(SEXP symbol, SEXP rho)
{
- errorcall_cpy(getLexicalCall(rho),
+ errorcall(getLexicalCall(rho),
_("object '%s' not found"),
EncodeChar(PRINTNAME(symbol)));
}
Index: src/main/printutils.c
===================================================================
--- src/main/printutils.c (revision 85251)
+++ src/main/printutils.c (working copy)
@@ -879,9 +879,7 @@
The pointer returned by EncodeChar points into an internal buffer
which is overwritten by subsequent calls to EncodeChar/EncodeString.
It is the responsibility of the caller to copy the result before
- any subsequent call to EncodeChar/EncodeString may happen. Note that
- particularly it is NOT safe to pass the result of EncodeChar as 3rd
- argument to errorcall (errorcall_cpy can be used instead). */
+ any subsequent call to EncodeChar/EncodeString may happen. */
//attribute_hidden
const char *EncodeChar(SEXP x)
{
--
Best regards,
Ivan
More information about the R-devel
mailing list