3 * Parser for C declarations.
4 * Copyright © 2011-2012, 2021, 2023 Nick Bowler
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 %name-prefix "cdecl__yy"
22 %parse-param {void *scanner}
23 %parse-param {struct cdecl **out}
24 %lex-param {yyscan_t scanner}
36 #include "cdecl-internal.h"
39 #define FAIL(msg) do { \
40 yyerror(&yylloc, NULL, NULL, msg); \
44 #define ALLOC(ptr, size) do { \
45 (ptr) = malloc(size); \
47 cdecl__errmsg(CDECL__ENOMEM); \
52 #define ALLOC_FUNCTION(ptr, parameters_, variadic_) do { \
53 ALLOC(ptr, sizeof (struct cdecl_declarator)); \
54 (ptr)->type = CDECL_DECL_FUNCTION; \
55 (ptr)->u.function.parameters = parameters_; \
56 (ptr)->u.function.variadic = variadic_; \
59 #define ALLOC_ARRAY(ptr, length_) do { \
60 ALLOC(ptr, sizeof (struct cdecl_declarator)); \
61 (ptr)->type = CDECL_DECL_ARRAY; \
62 (ptr)->u.array.vla = NULL; \
63 (ptr)->u.array.length = length_; \
66 #define ALLOC_POINTER(ptr, qualifiers_, child_) do { \
67 ALLOC(ptr, sizeof (struct cdecl_declarator)); \
68 (ptr)->child = child_; \
69 (ptr)->type = CDECL_DECL_POINTER; \
70 (ptr)->u.pointer.qualifiers = qualifiers_; \
73 #define ALLOC_DECLSPEC(ptr, type_) do { \
74 ALLOC(ptr, sizeof (struct cdecl_declspec)); \
75 (ptr)->type = type_; \
76 (ptr)->ident = NULL; \
79 #define ALLOC_DECL(ptr, specifiers_, declarators_) do { \
80 ALLOC(ptr, sizeof (struct cdecl)); \
81 (ptr)->specifiers = specifiers_; \
82 (ptr)->declarators = declarators_; \
87 * With the postprocessing performed by fix-yytname.awk, all the symbol
88 * name strings can be used directly in error messages and there is no
89 * need for any string processing.
91 #define yytnamerr(a, b) cdecl__strlcpy(a, b, (a) ? INT_MAX : 0)
99 void cdecl__free(struct cdecl *);
100 int cdecl__yyparse(void *scanner, struct cdecl **out);
101 const char *cdecl__token_name(unsigned token);
108 struct cdecl_declspec *declspec;
109 struct cdecl_declarator *declarator;
111 struct parse_item *item;
115 static void yyerror(YYLTYPE *, yyscan_t, struct cdecl **, const char *);
116 static void free_decl(struct cdecl *);
118 static void free_declspec(struct cdecl_declspec *x)
120 struct cdecl_declspec *p;
128 static void free_declarator(struct cdecl_declarator *x)
130 struct cdecl_declarator *p;
136 case CDECL_DECL_NULL:
138 case CDECL_DECL_IDENT:
139 case CDECL_DECL_ARRAY:
141 case CDECL_DECL_POINTER:
142 free_declspec(x->u.pointer.qualifiers);
144 case CDECL_DECL_FUNCTION:
145 free_decl(x->u.function.parameters);
156 static void free_decl(struct cdecl *x)
163 /* The specifiers may be shared by an entire chain. */
164 if (!p || p->specifiers != x->specifiers)
165 free_declspec(x->specifiers);
167 free_declarator(x->declarators);
173 void cdecl__free(struct cdecl *decl)
179 * Join two declaration specifier lists into a single list, with "a" being the
180 * head of the new list.
182 * The list "a" is assumed to be nonempty.
184 static void join_specs(struct cdecl_declspec *a, struct cdecl_declspec *b)
192 * Alter an abstract declarator (type name) to declare an identifier instead,
193 * used by the English parser rules to reduce "identifier as type" sequences.
195 static struct cdecl *insert_identifier(struct cdecl *decl, struct parse_item *ident)
197 struct cdecl_declarator *d, **p = &decl->declarators;
199 while ((d = *p)->child)
202 *p = &ident->u.declarator;
206 static struct cdecl_declarator *nulldecl(void)
208 static const struct cdecl_declarator nulldecl = {0};
209 return (void *)&nulldecl;
211 #define NULLDECL (nulldecl())
215 %destructor { free($$); } <item>
216 %destructor { free_declspec($$); } <declspec>
217 %destructor { free_declarator($$); } <declarator>
218 %destructor { free_decl($$); } <decl>
223 %token <item> T_IDENT "identifier"
224 %token <uintval> T_UINT "integer constant"
226 %token T_SEMICOLON ";"
227 %token T_ASTERISK "*"
230 %token T_LBRACKET "["
231 %token T_RBRACKET "]"
233 %token T_ELLIPSIS "..."
235 %token <spectype> T_TYPEDEF "typedef"
236 %token <spectype> T_EXTERN "extern"
237 %token <spectype> T_STATIC "static"
238 %token <spectype> T_AUTO "auto"
239 %token <spectype> T_REGISTER "register"
241 %token <spectype> T_INLINE "inline"
243 %token <spectype> T_RESTRICT "restrict"
244 %token <spectype> T_VOLATILE "volatile"
245 %token <spectype> T_CONST "const"
247 %token <spectype> T_VOID "void"
248 %token <spectype> T_CHAR "char"
249 %token <spectype> T_SHORT "short"
250 %token <spectype> T_INT "int"
251 %token <spectype> T_LONG "long"
252 %token <spectype> T_FLOAT "float"
253 %token <spectype> T_DOUBLE "double"
254 %token <spectype> T_SIGNED "signed"
255 %token <spectype> T_UNSIGNED "unsigned"
256 %token <spectype> T_BOOL "_Bool"
257 %token <spectype> T_COMPLEX "_Complex"
258 %token <spectype> T_IMAGINARY "_Imaginary"
260 %token <spectype> T_STRUCT "struct"
261 %token <spectype> T_UNION "union"
262 %token <spectype> T_ENUM "enum"
268 %token T_DECLARE "declare"
269 %token T_POINTER "pointer"
270 %token T_FUNCTION "function"
271 %token T_RETURNING "returning"
272 %token T_ARRAY "array"
276 %token T_VLA "variable-length"
278 %type <item> vla_ident
279 %type <uintval> array_length
280 %type <boolval> varargs
281 %type <spectype> declspec_simple qualifier_simple
282 %type <spectype> typespec_simple typespec_tagged
283 %type <declspec> declspec_notype declspec_noid typespec_noid typespec
284 %type <declspec> qualifier qualifiers
285 %type <declspec> declspecs declspecs_noid
286 %type <declarator> direct_declarator declarator pointer array parens postfix
287 %type <declarator> direct_declarator_ish declarator_ish parameter_type_list
288 %type <decl> cdecl declaration declarators declarator_wrap parameter
290 %type <item> english_vla
291 %type <declspec> storage_func_specs post_specs
292 %type <declspec> type_qual_spec type_qual_specs typedef_name_qual
293 %type <declarator> english_declarator english_array english_function
294 %type <declarator> english_parameter_list null_decl
295 %type <decl> english english_declaration english_parameter
297 /* Precedence declaration to avoid conflict in english_parameter; see below. */
303 input: cdecl { *out = $1; }
304 cdecl: english | declaration
307 declaration: declspecs declarators semi {
313 * We support parsing declarations using arbitrary identifiers as type
314 * specifiers (a la C typedef). To avoid confusion with identifiers that
315 * may also be used as declarators, note the following:
317 * (a) Every valid C declaration must have at least one type specifier, and
318 * (b) Valid declarations with typedef names have exactly one type specifier.
320 * So the rule applied when parsing specifiers is: an identifier is a type
321 * specifier only if we have not yet seen any type specifiers whatsoever
322 * (within one declaration specifier list).
324 * Treating identifiers as type specifiers by default can lead to strange and
325 * unexpected parses; libcdecl applies a simplification step to the resulting
326 * parse tree afterwards.
328 declspecs: declspec_notype declspecs {
331 } | typespec declspecs_noid {
336 declspecs_noid: { $$ = NULL; } | declspec_noid declspecs_noid {
341 qualifiers: { $$ = NULL; } | qualifiers qualifier {
346 declarators: declarator_wrap | declarator_wrap T_COMMA declarators {
351 declarator_wrap: declarator {
352 ALLOC_DECL($$, NULL, $1);
355 declspec_simple: T_AUTO
362 typespec_simple: T_VOID
375 typespec_tagged: T_STRUCT | T_UNION | T_ENUM | { $$ = CDECL_TYPE_IDENT; }
377 qualifier_simple: T_CONST
381 declspec_notype: qualifier | declspec_simple { ALLOC_DECLSPEC($$, $1); }
382 typespec_noid: typespec_simple { ALLOC_DECLSPEC($$, $1); }
383 qualifier: qualifier_simple { ALLOC_DECLSPEC($$, $1); }
385 typespec: typespec_noid | typespec_tagged T_IDENT {
386 /* Compiler should be able to elide this assignment. */
387 $2->u.declspec.ident = $2->u.declarator.u.ident;
389 $$ = &$2->u.declspec;
393 declspec_noid: declspec_notype | typespec_noid
395 vla_ident: T_IDENT | T_ASTERISK {
396 if (!($$ = cdecl__alloc_item(1)))
401 array: T_LBRACKET array_length T_RBRACKET {
403 } | T_LBRACKET vla_ident T_RBRACKET {
404 $$ = &$2->u.declarator;
405 $$->type = CDECL_DECL_ARRAY;
406 $$->u.array.vla = $$->u.ident;
407 $$->u.array.length = 0;
410 parameter: declspecs declarator {
411 ALLOC_DECL($$, $1, $2);
414 varargs: { $$ = false; } | T_COMMA T_ELLIPSIS { $$ = true; }
416 parameter_type_list: parameter varargs {
417 ALLOC_FUNCTION($$, $1, $2);
418 } | parameter T_COMMA parameter_type_list {
420 $1->next = $$->u.function.parameters;
421 $$->u.function.parameters = $1;
424 parens: T_LPAREN parameter_type_list T_RPAREN {
426 } | T_LPAREN declarator_ish T_RPAREN {
427 struct cdecl *fake_params;
429 ALLOC_DECL(fake_params, NULL, $2);
430 ALLOC_FUNCTION($$, fake_params, false);
433 pointer: T_ASTERISK qualifiers direct_declarator {
434 ALLOC_POINTER($$, $2, $3);
435 } | T_ASTERISK qualifiers pointer {
436 ALLOC_POINTER($$, $2, $3);
439 declarator: direct_declarator | pointer
440 declarator_ish: direct_declarator_ish | pointer
441 postfix: array | parens
443 direct_declarator_ish: {
445 } | direct_declarator_ish postfix {
453 $$ = &$1->u.declarator;
454 } | direct_declarator postfix {
459 english: T_DECLARE T_IDENT T_AS english_declaration {
460 $$ = insert_identifier($4, $2);
461 } | T_TYPE english_declaration {
466 * We use a precedence declaration to prefer shifting an identifier
467 * over reducing this empty rule; see below.
469 storage_func_specs: %prec T_TYPE { $$ = NULL; }
470 storage_func_specs: declspec_simple storage_func_specs {
471 ALLOC_DECLSPEC($$, $1);
475 type_qual_spec: typespec_noid | qualifier
477 type_qual_specs: { $$ = NULL; } | type_qual_spec type_qual_specs {
483 * The "qualifiers" nonterminal needs to be used here to avoid shift/reduce
484 * conflicts with pointer declarators. So we end up needing to stitch
485 * together three different specifiers lists.
487 post_specs: qualifiers typespec type_qual_specs {
493 english_declaration: storage_func_specs english_declarator post_specs {
495 ALLOC_DECL($$, $3, $2);
498 english_declarator: {
500 } | english_declarator qualifiers T_POINTER T_TO {
501 ALLOC_POINTER($$, $2, $1);
502 } | english_declarator english_array {
505 } | english_declarator english_function {
510 english_function: T_FUNCTION T_RETURNING {
511 ALLOC_FUNCTION($$, NULL, false);
512 } | T_FUNCTION T_LPAREN english_parameter_list T_RPAREN T_RETURNING {
516 english_parameter_list: english_parameter varargs {
517 ALLOC_FUNCTION($$, $1, $2);
518 } | english_parameter T_COMMA english_parameter_list {
520 $1->next = $$->u.function.parameters;
521 $$->u.function.parameters = $1;
524 typedef_name_qual: T_IDENT qualifiers {
525 /* Compiler should be able to elide this assignment. */
526 $1->u.declspec.ident = $1->u.declarator.u.ident;
528 $$ = &$1->u.declspec;
529 $$->type = CDECL_TYPE_IDENT;
538 * There is a shift/reduce conflict here when an identifier appears as the
539 * first token. The conflict is between shifting T_IDENT, or reducing the
540 * empty production for storage_func_specs (cf. english_declaration).
542 * - In either case, if we reduce, we won't match T_IDENT T_AS since the
543 * stack now has the extra storage_func_specs nonterminal symbol.
544 * - And if we shift, we won't match english_declaration since it is
545 * too late to add storage_func_specs to the stack.
547 * The only valid input affected by the conflict is a simple type names,
548 * possibly followed by qualifiers. So the conflict is adequately resolved
549 * by shifting, so long as we have a special-case reduction to handle this.
551 english_parameter: english_declaration | typedef_name_qual null_decl {
552 ALLOC_DECL($$, $1, $2);
553 } | T_IDENT T_AS english_declaration {
554 $$ = insert_identifier($3, $1);
557 english_array: T_VLA T_ARRAY english_vla T_OF {
558 $$ = &$3->u.declarator;
559 $$->type = CDECL_DECL_ARRAY;
560 $$->u.array.vla = $$->u.ident;
561 $$->u.array.length = 0;
562 } | T_ARRAY array_length T_OF {
566 array_length: { $$ = 0; }
567 array_length: T_UINT {
569 FAIL(_("array length must be positive"));
572 english_vla: T_IDENT | {
573 if (!($$ = cdecl__alloc_item(1)))
581 * Expose the token string table to the rest of the library, in order to
582 * produce strings that match parser keywords.
584 * In order for this to work properly, the Bison output must be postprocessed
585 * by fix-yytname.awk to remove pointless quotation marks from the keyword
588 const char *cdecl__token_name(unsigned token)
590 return yytname[YYTRANSLATE(token)];
594 yyerror(YYLTYPE *loc, yyscan_t scanner, struct cdecl **out, const char *err)
596 if (strstr(err, yytname[YYTRANSLATE(T_LEX_ERROR)]))
599 cdecl__err(CDECL_ENOPARSE, "%s", err);