From a118e977106144aa753045c59e9f5278facc48d7 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Wed, 10 Jan 2024 20:09:46 -0500 Subject: [PATCH] libcdecl: Fix parsing of very long specifier lists. Most of the current parser rules that deal with specifiers are arranged such that all the relevant specifier symbols are shifted before any part of the specifier list is reduced. Thus, very long specifier lists (with about 10000 or more specifiers) can lead to a parse error as the symbol is exhausted (even if there would otherwise be enough memory to allocate this many specifier items). Since the C language allows certain specifiers to be repeated any number of times, there are actually valid declarations with so many specifiers. Simply collecting the specifiers in reverse order allows for the list elements to be reduced as they are encountered, which avoids excessive use of the symbol stack. The order in which specifiers are parsed does not matter and we can return these lists in any order. Add a new test case to cover this behaviour. --- src/parse.y | 52 +++++++++++++++++++++++++++++++------------------ tests/stress.at | 37 +++++++++++++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 21 deletions(-) diff --git a/src/parse.y b/src/parse.y index 6d349f7..3a17b99 100644 --- a/src/parse.y +++ b/src/parse.y @@ -211,6 +211,21 @@ static void join_specs(struct cdecl_declspec *a, struct cdecl_declspec *b) a->next = b; } +/* + * Join three specifier lists into a single list, and returns the head of + * the new list. + * + * The list "b" is assumed to be a singleton list. + */ +static struct cdecl_declspec *join_specs3(struct cdecl_declspec *a, + struct cdecl_declspec *b, + struct cdecl_declspec *c) +{ + b->next = c; + join_specs(b, a); + return b; +} + /* * Alter an abstract declarator (type name) to declare an identifier instead, * used by the English parser rules to reduce "identifier as type" sequences. @@ -305,7 +320,7 @@ static struct cdecl_declarator *nulldecl(void) %type typespec_simple typespec_tagged %type declspec_notype declspec_noid typespec_noid typespec %type qualifier qualifiers -%type declspecs declspecs_noid +%type declspecs declspecs_notype declspecs_noid %type direct_declarator declarator pointer array parens postfix %type direct_declarator_ish declarator_ish parameter_type_list %type cdecl declaration declarators declarator_wrap parameter @@ -348,17 +363,18 @@ declaration: declspecs declarators semi { * unexpected parses; libcdecl applies a simplification step to the resulting * parse tree afterwards. */ -declspecs: declspec_notype declspecs { - $$ = $1; - $$->next = $2; -} | typespec declspecs_noid { - $$ = $1; - $$->next = $2; +declspecs: declspecs_notype typespec declspecs_noid { + $$ = join_specs3($1, $2, $3); } -declspecs_noid: { $$ = NULL; } | declspec_noid declspecs_noid { - $$ = $1; - $$->next = $2; +declspecs_notype: { $$ = NULL; } | declspecs_notype declspec_notype { + $$ = $2; + $$->next = $1; +} + +declspecs_noid: { $$ = NULL; } | declspecs_noid declspec_noid { + $$ = $2; + $$->next = $1; } qualifiers: { $$ = NULL; } | qualifiers qualifier { @@ -490,16 +506,16 @@ english: T_DECLARE T_IDENT T_AS english_declaration { * over reducing this empty rule; see below. */ storage_func_specs: %prec T_TYPE { $$ = NULL; } -storage_func_specs: declspec_simple storage_func_specs { - ALLOC_DECLSPEC($$, $1); - $$->next = $2; +storage_func_specs: storage_func_specs declspec_simple { + ALLOC_DECLSPEC($$, $2); + $$->next = $1; } type_qual_spec: typespec_noid | qualifier -type_qual_specs: { $$ = NULL; } | type_qual_spec type_qual_specs { - $$ = $1; - $$->next = $2; +type_qual_specs: { $$ = NULL; } | type_qual_specs type_qual_spec { + $$ = $2; + $$->next = $1; } /* @@ -508,9 +524,7 @@ type_qual_specs: { $$ = NULL; } | type_qual_spec type_qual_specs { * together three different specifiers lists. */ post_specs: qualifiers typespec type_qual_specs { - $2->next = $3; - join_specs($2, $1); - $$ = $2; + $$ = join_specs3($1, $2, $3); } english_declaration: storage_func_specs english_declarator post_specs { diff --git a/tests/stress.at b/tests/stress.at index fbdb368..40e8812 100644 --- a/tests/stress.at +++ b/tests/stress.at @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -AT_BANNER([Randomized tests]) +AT_BANNER([Stress tests]) dnl Verify the RNG implementation TEST_TAP_SIMPLE([xoshiro256p sanity], [rng-test], @@ -83,7 +83,7 @@ AT_CHECK([$AWK -f sanity.awk decls | LC_ALL=C sort], [0], [expout]) AT_CLEANUP -AT_SETUP([random cross-parse]) +AT_SETUP([Random crossparse]) TEST_NEED_PROGRAM([randomdecl]) TEST_NEED_PROGRAM([crossparse]) @@ -92,3 +92,36 @@ AS_ECHO(["Using seed $random_seed"]) >&AS_MESSAGE_LOG_FD AT_CHECK([randomdecl -n "$random_iter" -s "$random_seed"],, [stdout-nolog]) AT_CHECK([crossparse -f stdout]) AT_CLEANUP + +# Check that we can parse declarations with more than 10000 specifiers. +AT_SETUP([Excessive specifiers]) + +s="const" +for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do + AS_VAR_APPEND([s], [" $s"]) +done + +cat >test.dat <>test.dat <