From 8d1fb21fe111eba2b54579d5115017f40d0d949c Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Fri, 28 Jul 2023 00:00:12 -0400 Subject: [PATCH] libcdecl: Work around GNU libc snprintf bug. Apparently, with GNU libc (even contemporary versions), you cannot simply pass an arbitrarily-large length. It appears that glibc internally adds the provided length to the provided destination pointer, and if that calculation overflows, then the actual output is silently truncated. I believe this to be an error in glibc; the only thing the specification for snprintf says about the length n is that output characters beyond the "n-1st" are discarded, which doesn't imply that it is invalid to for a program to pass large length values. But it doesn't really matter, we just have to deal with this behaviour since it exists. This only affects the parser syntax error path, since 41ff7ec97691 ("libcdecl: Simplify Bison error message reporting.") changed that to use cdecl__strlcpy with a large size (INT_MAX). The normal output path always passes an actual buffer length. The bison-generated syntax error code does not track buffer lengths, but it does ensure the buffer is large enough so we can use strlen to infer a suitable length. A new test case is added to sortof directly test the issue, without the fix it reliably fails for me on 32-bit x86, as the stack above 2G. The invalid character test also fails because of this issue, but for a subtly different reason. --- src/parse.y | 8 +++++++- tests/general.at | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/parse.y b/src/parse.y index bf22718..14d593e 100644 --- a/src/parse.y +++ b/src/parse.y @@ -109,7 +109,13 @@ * name strings can be used directly in error messages and there is no * need for any string processing. */ -#define yytnamerr(a, b) cdecl__strlcpy(a, b, (a) ? INT_MAX : 0) +#define yytnamerr(a, b) ( (a) ? yytnamerr_copy(a, b) \ + : strlen(b) ) + +static size_t yytnamerr_copy(char *dst, const char *src) +{ + return cdecl__strlcpy(dst, src, strlen(src)+1); +} %} %code requires { diff --git a/tests/general.at b/tests/general.at index bc22e44..21261ed 100644 --- a/tests/general.at +++ b/tests/general.at @@ -150,6 +150,41 @@ AT_CHECK([$AWK '{ print $NF; }' stderr], [0], AT_CLEANUP +dnl Ensure that parse error messages for misplaced keywords correctly +dnl include the keyword itself. +AT_SETUP([cdecl99 unexpected keyword error messages]) + +# We use the English syntax to reliably force a syntax error where we want +# it, as the "declare" form takes an identifier and not any other token, +AT_DATA([test.dat], +[[declare signed as int +declare typedef as int +declare volatile as int +declare inline as int +]]) + +AT_DATA([test.awk], +[[{ + for (i = 1; i <= $NF; i++) { + if ($i == "unexpected") { + sub(/,$/, "", $(i+1)); + print $(i+1); + break; + } + } +} +]]) + +AT_CHECK([LC_ALL=C cdecl99 -f test.dat || exit 42], [42], [], [stderr]) +AT_CHECK([$AWK -f test.awk stderr], [0], +[[signed +typedef +volatile +inline +]]) + +AT_CLEANUP + AT_SETUP([cdecl99 interactive mode]) AT_DATA([test.dat], -- 2.43.2