From: Nick Bowler Date: Thu, 11 Jan 2024 04:08:14 +0000 (-0500) Subject: libcdecl: Fix parsing of very long parameter lists. X-Git-Tag: v1.3~37 X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/commitdiff_plain/a41aa156f28753613f38924fc856d3dc992cfc56 libcdecl: Fix parsing of very long parameter lists. The way the parameter parser rules are currently arranged requires shifting every parameter symbol before any part of the parameter list is reduced. Thus, functions with a lot of parameters (about 5000 or more) can lead to a parse error since the symbol stack is exhausted (even if there would otherwise be enough memory to allocate this many parameter items). Technically this kind of failure is permitted by the C language, as implementations do not need to support functions with more than 127 parameters. However, it is better to avoid arbitrary limits like this and it is not a big problem to do so. Collecting the parameters in reverse order allows the list elements to be reduced as they are encountered, which avoids excessive use of the symbol stack. The final list has to then be reversed again prior to returning from the parser. This is actually how the prior release 1.2 worked so this is technically a regression fix. Expand some test cases to cover this behaviour. We actually don't have any tests targeting the parameter ordering (although randomdecl and crossparse quickly catches it) so add some of those too. --- diff --git a/src/parse.y b/src/parse.y index 0a12f3f..8abc21a 100644 --- a/src/parse.y +++ b/src/parse.y @@ -66,10 +66,14 @@ #define ALLOC_ITEM_DECLSPEC(ptr) ALLOC_ITEM(ptr, declspec, next) #define ALLOC_ITEM_DECL(ptr) ALLOC_ITEM(ptr, decl, next) -#define ALLOC_FUNCTION(ptr, parameters_, variadic_) do { \ +#define ALLOC_FUNCTION_(ptr, parameters_) do { \ ALLOC_ITEM_DECLARATOR(ptr); \ (ptr)->type = CDECL_DECL_FUNCTION; \ (ptr)->u.function.parameters = parameters_; \ +} while (0) + +#define ALLOC_FUNCTION(ptr, parameters_, variadic_) do { \ + ALLOC_FUNCTION_(ptr, parameters_); \ (ptr)->u.function.variadic = variadic_; \ } while (0) @@ -470,21 +474,23 @@ parameter: declspecs declarator { varargs: { $$ = false; } | T_COMMA T_ELLIPSIS { $$ = true; } -parameter_type_list: parameter varargs { - ALLOC_FUNCTION($$, $1, $2); -} | parameter T_COMMA parameter_type_list { - $$ = $3; - $1->next = $$->u.function.parameters; - $$->u.function.parameters = $1; +parameter_type_list: parameter { + ALLOC_FUNCTION_($$, $1); +} | parameter_type_list T_COMMA parameter { + $$ = $1; + $3->next = $$->u.function.parameters; + $$->u.function.parameters = $3; } -parens: T_LPAREN parameter_type_list T_RPAREN { +parens: T_LPAREN parameter_type_list varargs T_RPAREN { + $2->u.function.parameters = reverse_decls($2->u.function.parameters); + $2->u.function.variadic = $3; $$ = $2; } | T_LPAREN declarator_ish T_RPAREN { struct cdecl *fake_params; ALLOC_DECL(fake_params, NULL, $2); - ALLOC_FUNCTION($$, fake_params, false); + ALLOC_FUNCTION_($$, fake_params); } pointer: T_ASTERISK qualifiers direct_declarator { @@ -564,16 +570,18 @@ english_declarator: { english_function: T_FUNCTION T_RETURNING { ALLOC_FUNCTION($$, NULL, false); -} | T_FUNCTION T_LPAREN english_parameter_list T_RPAREN T_RETURNING { +} | T_FUNCTION T_LPAREN english_parameter_list varargs T_RPAREN T_RETURNING { + $3->u.function.parameters = reverse_decls($3->u.function.parameters); + $3->u.function.variadic = $4; $$ = $3; } -english_parameter_list: english_parameter varargs { - ALLOC_FUNCTION($$, $1, $2); -} | english_parameter T_COMMA english_parameter_list { - $$ = $3; - $1->next = $$->u.function.parameters; - $$->u.function.parameters = $1; +english_parameter_list: english_parameter { + ALLOC_FUNCTION_($$, $1); +} | english_parameter_list T_COMMA english_parameter { + $$ = $1; + $3->next = $$->u.function.parameters; + $$->u.function.parameters = $3; } typedef_name_qual: T_IDENT qualifiers { diff --git a/tests/decl-good.at b/tests/decl-good.at index df570c8..bb3c168 100644 --- a/tests/decl-good.at +++ b/tests/decl-good.at @@ -1,4 +1,4 @@ -# Copyright © 2020-2021, 2023 Nick Bowler +# Copyright © 2020-2021, 2023-2024 Nick Bowler # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -120,6 +120,22 @@ SIMPLE_DECLS( [[int @<:@n@:>@], [type variable-length array n of int]], [[int @<:@*@:>@], [type variable-length array of int]]) +SIMPLE_DECLS( + [[int f(a, b)], [declare f as function (a, b) returning int]], + [[int f(int, b)], [declare f as function (int, b) returning int]], + [[int f(a, int)], [declare f as function (a, int) returning int]], + [[int (a, b)], [type function (a, b) returning int]], + [[int (int, b)], [type function (int, b) returning int]], + [[int (a, int)], [type function (a, int) returning int]]) + +SIMPLE_DECLS( + [[int f(a, b, ...)], [declare f as function (a, b, ...) returning int]], + [[int f(int, b, ...)], [declare f as function (int, b, ...) returning int]], + [[int f(a, int, ...)], [declare f as function (a, int, ...) returning int]], + [[int (a, b, ...)], [type function (a, b, ...) returning int]], + [[int (int, b, ...)], [type function (int, b, ...) returning int]], + [[int (a, int, ...)], [type function (a, int, ...) returning int]]) + SIMPLE_DECLS_EXPLAIN( [[int ((int))], [type function (int) returning int], [int (int)]], [[int (x(int))], [declare x as function (int) returning int], [int x(int)]], diff --git a/tests/stress.at b/tests/stress.at index 605a69a..3ee72d9 100644 --- a/tests/stress.at +++ b/tests/stress.at @@ -135,6 +135,14 @@ AT_DATA([check.awk], BEGIN { FS = "@"; runstart = 0; } END { finish_run(NR); } +gsub(/,[^,)]*/, "~") { + while (match($0, /~+/) > 0) { + l = substr($0, 1, RSTART-1); + r = substr($0, RSTART+RLENGTH); + $0 = l ", [plus " RLENGTH " more parameters]" r; + } +} + $0 != lastline { finish_run(NR-1); lastline = $0; @@ -156,6 +164,8 @@ done cat >test.dat <test.out; status=$?; @@ -163,6 +173,8 @@ $AWK -f check.awk test.out exit $status], [0], [[declare a as int [repeated 16383 more times] +type function (a, [plus 16383 more parameters]) returning int +int (a, [plus 16383 more parameters]) ]]) AT_CLEANUP