2 * Parse and validate C declarations.
3 * Copyright © 2011-2012, 2020-2021 Nick Bowler
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <https://www.gnu.org/licenses/>.
25 #include "cdecl-internal.h"
30 * We can represent type specifiers as a bitmap, which gives us a finite
31 * list of acceptable bitmap values according to the C standard. However,
32 * the "long" specifier is allowed to occur more than once, but only at most
33 * 2 times. Treat it as a special case, assigning an unused bit to represent
36 #define MAP_LLONG_BIT 31
37 #define MAP_LONG_BIT (CDECL_TYPE_LONG-CDECL_SPEC_TYPE)
38 #define CDECL_TYPE_LLONG (CDECL_SPEC_TYPE+MAP_LLONG_BIT)
43 * Convert the declaration specifiers to a bitmap with each bit
44 * corresponding to one specific type specifier.
46 static int valid_typespec(struct cdecl_declspec *s)
48 unsigned long map = 0;
50 for (struct cdecl_declspec *c = s; c; c = c->next) {
53 if (cdecl_spec_kind(c) != CDECL_SPEC_TYPE)
56 bit = c->type - CDECL_SPEC_TYPE;
57 assert(bit < MAP_LLONG_BIT);
60 /* "long" special case */
61 if ((map & bit) == 1ul << MAP_LONG_BIT)
62 bit = 1ul << MAP_LLONG_BIT;
65 if (bit == 1ul << MAP_LLONG_BIT)
66 cdecl__err(CDECL_EBADTYPE, _("too many long specifiers"));
68 cdecl__err(CDECL_EBADTYPE, _("duplicate type specifier"));
74 if (typemap_is_valid(map))
78 cdecl__err(CDECL_EBADTYPE, _("no type specified"));
80 cdecl__err(CDECL_EBADTYPE, _("invalid type specified"));
86 * Verify the declaration specifiers of a declaration. If top is true, treat
87 * this as a top-level declaration. Otherwise, treat this as a function
88 * parameter (which carries additional constraints).
90 static bool valid_declspecs(struct cdecl *decl, bool top)
92 struct cdecl_declspec *specs = decl->specifiers;
93 struct cdecl_declarator *d = decl->declarators;
94 bool abstract = cdecl_is_abstract(d);
95 unsigned num_storage = 0;
97 if (!valid_typespec(specs))
100 for (struct cdecl_declspec *c = specs; c; c = c->next) {
101 switch (cdecl_spec_kind(c)) {
102 case CDECL_SPEC_TYPE:
103 if (c->type == CDECL_TYPE_VOID &&
104 (d->type == CDECL_DECL_IDENT
105 || d->type == CDECL_DECL_ARRAY)) {
106 cdecl__err(CDECL_EBADTYPE, _("invalid declaration of type void"));
110 case CDECL_SPEC_STOR:
111 if (top && abstract) {
112 cdecl__err(CDECL_EBADSTOR, _("type names cannot have storage-class specifiers"));
116 if (!top && c->type != CDECL_STOR_REGISTER) {
117 cdecl__err(CDECL_EBADSTOR, _("function parameters may only have register storage"));
121 if (++num_storage > 1) {
122 cdecl__err(CDECL_EBADSTOR, _("too many storage-class specifiers"));
126 case CDECL_SPEC_QUAL:
128 * Restrict qualifiers are only valid in the
129 * pointer qualifier list, which isn't checked here.
131 if (c->type == CDECL_QUAL_RESTRICT) {
132 cdecl__err(CDECL_EBADQUAL, _("only pointer types can be restrict-qualified"));
136 case CDECL_SPEC_FUNC:
138 cdecl__err(CDECL_ENOTFUNC, _("type names cannot have function specifiers"));
142 if (!top || d->type != CDECL_DECL_FUNCTION) {
143 cdecl__err(CDECL_ENOTFUNC, _("only function declarations can have function specifiers"));
156 * The C grammar leaves ambiguous some cases where parentheses represent a
157 * function declarator or just parentheses. The language uses additional
158 * context (whether or not a typedef is in scope, etc.) to resolve these
159 * ambiguities, but we don't have access to that kind of information.
161 * The cdecl99 parser uses an unambiguous grammar which treats almost
162 * everything as a function, and thus considers things like 'int (x)' to
163 * be a function type with a single parameter of type 'x' (a typedef name),
164 * returning int. This can result in very complicated types for simple
165 * declarations. Ideally, cdecl99 should try and find the "simplest"
166 * explanation for a given declaration.
168 * Whether or not it achieves the simplest explanation, we apply a simple rule:
169 * if a declarator could be interpreted as something other than a function,
172 * Since cdecl99 supports things like [*] in any context (in C, such constructs
173 * are only valid in function parameter lists), we don't treat them specially
177 static struct cdecl_declarator *reduce_function(struct cdecl *param)
179 struct cdecl_declspec *spec = param->specifiers;
180 struct cdecl_declarator *decl = param->declarators;
181 struct cdecl_declarator *last;
183 for (last = decl; last && last->type != CDECL_DECL_NULL;)
189 last->type = CDECL_DECL_IDENT;
190 last->u.ident = spec->ident;
197 static bool function_is_reducible(struct cdecl_declarator *d)
199 if (d->type != CDECL_DECL_FUNCTION)
201 if (d->child->type != CDECL_DECL_NULL)
202 return false; /* e.g., int (*)(x) */
204 if (!d->u.function.parameters)
205 return false; /* e.g., int f() */
206 if (d->u.function.parameters->next)
207 return false; /* e.g., int (x, y) */
208 if (d->u.function.variadic)
209 return false; /* e.g., int (x, ...) */
211 if (d->u.function.parameters->specifiers->type != CDECL_TYPE_IDENT)
212 return false; /* e.g. int (int) */
213 if (d->u.function.parameters->specifiers->next)
214 return false; /* e.g. int (size_t const) */
215 if (d->u.function.parameters->declarators->type == CDECL_DECL_POINTER)
216 return false; /* e.g. int (x *) */
222 simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d)
224 struct cdecl_declarator *new;
226 if (!function_is_reducible(d))
229 new = reduce_function(d->u.function.parameters);
231 return 0; /* e.g. int (foo bar) */
240 * The parser's bias towards considering things as functions whenever possible
241 * makes nested parentheses tricky. (x) is considered to be part of a function
242 * declarator until simplify_functions converts it. The problem is that
243 * (((x))) is not valid as part of a function declarator, but it *is* valid
244 * as an identifier enclosed 3 times in parentheses. This is complicated by
245 * the fact that things like (((int))) are not valid anywhere.
247 * To avoid ambiguities, the parser actually emits a "function" declarator for
248 * every pair of parentheses. The ones that can't reasonably be functions
249 * consist of a single "parameter" with no declaration specifiers (note that
250 * every valid function parameter will have at least one type specifier).
252 * This pass is to remove these fake functions from the parse tree. We take
253 * care to avoid turning invalid things like ((int)) into valid things like
254 * (int) by observing that the only valid function declarators that appear
255 * in these "fake" parentheses are those that have a non-null child declarator
256 * (for instance, int ((*)(int)) *or* those that will be eliminated by the
257 * simplify_functions pass.
261 reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d)
265 if (d->type != CDECL_DECL_FUNCTION)
268 param = d->u.function.parameters;
269 if (param && param->specifiers == NULL) {
270 struct cdecl_declarator *decl;
272 assert(!param->next);
274 decl = param->declarators;
275 if (decl->type == CDECL_DECL_NULL) {
278 d->u.function.parameters = NULL;
282 if (d->child->type != CDECL_DECL_NULL) {
283 cdecl__err(CDECL_EBADPARAMS, _("invalid function parameter"));
293 * We may have replaced d with another fake function which
294 * also needs to be eliminated.
296 if (reduce_parentheses(p, decl) < 0)
300 * If the remaining declarator is a function, make sure it's
301 * valid by checking its reducibility.
304 if (decl->type == CDECL_DECL_FUNCTION
305 && decl->child->type == CDECL_DECL_NULL
306 && !function_is_reducible(decl)) {
307 cdecl__err(CDECL_EBADPARAMS, _("too many parentheses in function"));
318 * Function parameters and return types have a few restrictions that are
319 * really easy to check in comparison to the above absurdity.
322 check_parameters(struct cdecl_declarator **p, struct cdecl_declarator *d)
324 struct cdecl_declspec *spec;
327 if (d->type != CDECL_DECL_FUNCTION)
330 for (param = d->u.function.parameters; param; param = param->next) {
331 if (!valid_declspecs(param, false))
334 /* Check for "void" function parameters as a special case. */
335 for (spec = param->specifiers; spec; spec = spec->next) {
336 if (param->declarators->type != CDECL_DECL_NULL)
338 if (spec->type != CDECL_TYPE_VOID)
341 if (spec != param->specifiers || spec->next != NULL) {
342 cdecl__err(CDECL_EVOIDPARAM, _("void parameter cannot have extra specifiers"));
344 } else if (d->u.function.parameters->next) {
345 cdecl__err(CDECL_EVOIDPARAM, _("void parameter must stand alone"));
347 } else if (d->u.function.variadic) {
348 cdecl__err(CDECL_EVOIDPARAM, _("variadic function cannot have void parameter"));
358 * Functions cannot return arrays or functions. Since the parse tree is
359 * "inside-out", we need to look for functions as the child declarator.
362 check_rettypes(struct cdecl_declarator **p, struct cdecl_declarator *d)
364 if (!d->child || d->child->type != CDECL_DECL_FUNCTION)
368 case CDECL_DECL_FUNCTION:
369 cdecl__err(CDECL_EBADRETURN, _("functions cannot return functions"));
371 case CDECL_DECL_ARRAY:
372 cdecl__err(CDECL_EBADRETURN, _("functions cannot return arrays"));
380 check_arrays(struct cdecl_declarator **p, struct cdecl_declarator *d)
382 if (!d->child || d->child->type != CDECL_DECL_ARRAY)
386 case CDECL_DECL_FUNCTION:
387 cdecl__err(CDECL_EBADARRAY, _("array members cannot be functions"));
395 normalize_specs(struct cdecl_declarator **p, struct cdecl_declarator *d)
397 struct cdecl_function *func;
398 struct cdecl_pointer *ptr;
401 case CDECL_DECL_POINTER:
403 ptr->qualifiers = cdecl__normalize_specs(ptr->qualifiers);
405 case CDECL_DECL_FUNCTION:
406 func = &d->u.function;
407 for (struct cdecl *i = func->parameters; i; i = i->next)
408 i->specifiers = cdecl__normalize_specs(i->specifiers);
416 check_qualifiers(struct cdecl_declarator **p, struct cdecl_declarator *d)
418 struct cdecl_declspec *spec;
419 struct cdecl_pointer *ptr;
421 if (!d->child || d->child->type != CDECL_DECL_POINTER)
424 ptr = &d->child->u.pointer;
425 for (spec = ptr->qualifiers; spec; spec = spec->next) {
426 if (spec->type == CDECL_QUAL_RESTRICT
427 && d->type == CDECL_DECL_FUNCTION) {
428 cdecl__err(CDECL_EBADPOINTER, _("function pointers cannot be restrict-qualified"));
437 * Traverse the parse tree, calling a function on every declarator in a
438 * depth-first preorder traversal. The function is given a pointer to the
439 * declarator as well as to the pointer which was used to reach that
440 * declarator: this can be used to rewrite entire subtrees.
442 static bool forall_declarators(struct cdecl *decl,
443 int f(struct cdecl_declarator **, struct cdecl_declarator *))
445 struct cdecl_declarator *d, **p;
447 for (p = &decl->declarators, d = *p; d; p = &d->child, d = *p) {
460 if (d->type == CDECL_DECL_FUNCTION) {
463 for (i = d->u.function.parameters; i; i = i->next) {
464 if (!forall_declarators(i, f))
473 struct cdecl *cdecl_parse_decl(const char *declstr)
475 struct cdecl_declspec *norm_specs;
476 YY_BUFFER_STATE state;
483 rc = cdecl__yylex_init(&scanner);
487 state = cdecl__yy_scan_string(declstr, scanner);
488 rc = cdecl__yyparse(scanner, &decl);
489 cdecl__yy_delete_buffer(state, scanner);
490 cdecl__yylex_destroy(scanner);
496 * Since the top-level specifiers are shared between each top-level
497 * declarator, we need to normalize them once and then propagate the
498 * new specifier list.
500 norm_specs = cdecl__normalize_specs(decl->specifiers);
501 for (struct cdecl *i = decl; i; i = i->next) {
502 i->specifiers = norm_specs;
505 /* Now perform checks and simplifications on each declarator. */
506 for (struct cdecl *i = decl; i; i = i->next) {
507 if (!forall_declarators(i, reduce_parentheses))
509 if (!forall_declarators(i, simplify_functions))
511 if (!forall_declarators(i, check_parameters))
513 if (!forall_declarators(i, check_rettypes))
515 if (!forall_declarators(i, check_arrays))
517 if (!forall_declarators(i, normalize_specs))
519 if (!forall_declarators(i, check_qualifiers))
522 if (!valid_declspecs(i, true))
525 if (cdecl_is_abstract(i->declarators)
526 && (i != decl || i->next)) {
527 cdecl__err(CDECL_EBADDECL, _("mixing type names and declarations is not allowed"));
538 struct cdecl *cdecl_parse_english(const char *english)
540 YY_BUFFER_STATE state;
547 rc = cdecl__yylex_init_extra(true, &scanner);
551 state = cdecl__yy_scan_string(english, scanner);
552 rc = cdecl__yyparse(scanner, &decl);
553 cdecl__yy_delete_buffer(state, scanner);
554 cdecl__yylex_destroy(scanner);
559 for (struct cdecl *i = decl; i; i = i->next) {
560 i->specifiers = cdecl__normalize_specs(i->specifiers);
562 if (!forall_declarators(i, check_parameters))
564 if (!forall_declarators(i, check_rettypes))
566 if (!forall_declarators(i, check_arrays))
568 if (!forall_declarators(i, normalize_specs))
570 if (!forall_declarators(i, check_qualifiers))
573 if (!valid_declspecs(i, true))
583 void cdecl_free(struct cdecl *decl)