From a2c3dad92b2bf55488174c203563224880380c9b Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Wed, 10 Jan 2024 22:28:01 -0500 Subject: [PATCH] libcdecl: Fix parsing of very long declarator lists. The way the toplevel declaration parser is currently arranged requires shifting every declarator symbol before any part of the full declarator list is reduced. Thus, very long declarations (with about 5000 or more full declarators) can lead to a parse error since the symbol stack is exhausted (even if there would otherwise be enough memory to allocate this many declarator items). Technically this kind of failure is permitted by the C language, as implementations are not required to support declarations of this size. However, it is better to avoid arbitrary limits like this and it is not a big problem to do so. Simply collecting the declarators in reverse order allows for the list elements to be reduced as they are encountered, which avoids excessive use of the symbol stack. However in this case the order matters, so the final list must be reversed before it is returned from the parser. Add a new test case to cover this behaviour. --- src/parse.y | 28 +++++++++++++++++++++++----- tests/stress.at | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/src/parse.y b/src/parse.y index 3a17b99..0a12f3f 100644 --- a/src/parse.y +++ b/src/parse.y @@ -226,11 +226,29 @@ static struct cdecl_declspec *join_specs3(struct cdecl_declspec *a, return b; } +/* + * Reverse the order of a "struct cdecl" list, and return the new first + * element of the list (i.e., the last element of the original list). + */ +static struct cdecl *reverse_decls(struct cdecl *decl) +{ + struct cdecl *prev, *next; + + for (prev = NULL; decl; decl = next) { + next = decl->next; + decl->next = prev; + prev = decl; + } + + return prev; +} + /* * Alter an abstract declarator (type name) to declare an identifier instead, * used by the English parser rules to reduce "identifier as type" sequences. */ -static struct cdecl *insert_identifier(struct cdecl *decl, struct parse_item *ident) +static struct cdecl * +insert_identifier(struct cdecl *decl, struct parse_item *ident) { struct cdecl_declarator *d, **p = &decl->declarators; @@ -343,7 +361,7 @@ cdecl: english | declaration semi: | T_SEMICOLON declaration: declspecs declarators semi { - $$ = $2; + $$ = reverse_decls($2); $$->specifiers = $1; }; @@ -382,9 +400,9 @@ qualifiers: { $$ = NULL; } | qualifiers qualifier { $$->next = $1; } -declarators: declarator_wrap | declarator_wrap T_COMMA declarators { - $$ = $1; - $$->next = $3; +declarators: declarator_wrap | declarators T_COMMA declarator_wrap { + $$ = $3; + $$->next = $1; } declarator_wrap: declarator { diff --git a/tests/stress.at b/tests/stress.at index 40e8812..605a69a 100644 --- a/tests/stress.at +++ b/tests/stress.at @@ -125,3 +125,44 @@ inline int f() ]]) AT_CLEANUP + +# Check that we can parse declarations with more than 10000 declarators. +AT_SETUP([Excessive declarators]) + +AT_DATA([check.awk], +[[# We don't need any field splitting, so choose a character that does not +# appear in C code to avoid tripping over 199-field limit in HP-UX 11 awk. +BEGIN { FS = "@"; runstart = 0; } +END { finish_run(NR); } + +$0 != lastline { + finish_run(NR-1); + lastline = $0; + runstart = NR; + print; +} + +function finish_run(nr) { + count = nr - runstart; + if (count > 0) + print "[repeated " count " more times]"; +} +]]) + +a="a" +for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do + AS_VAR_APPEND([a], [",$a"]) +done + +cat >test.dat <test.out; status=$?; +$AWK -f check.awk test.out +exit $status], [0], +[[declare a as int +[repeated 16383 more times] +]]) + +AT_CLEANUP -- 2.43.2