/* * Parse and validate C declarations. * Copyright © 2011-2012, 2020-2021, 2023-2024 Nick Bowler * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include "cdecl.h" #include "cdecl-internal.h" #include "parse.h" #include "scan.h" #include "errmsg.h" static struct cdecl *fake_function_param(struct cdecl_declarator *); /* * Allocate a "parse item", which is a union of several parse tree * structure types, together with a string buffer. The s_sz argument * specifies the size of the string (including its terminator), which * may be zero. * * The union's declarator member is pre-initialized to a valid "identifier" * declarator, which shares several interesting offsets with the "declspec" * structure for an "identifier" type specifier. */ struct parse_item *cdecl__alloc_item(size_t s_sz) { struct parse_item *ret; ret = malloc(offsetof(struct parse_item, s) + s_sz); if (!ret) { cdecl__errmsg(CDECL__ENOMEM); return NULL; } ret->u.declarator.child = NULL; ret->u.declarator.type = CDECL_DECL_IDENT; ret->u.declarator.u.ident = ret->s; return ret; } /* * We can represent type specifiers as a bitmap, which gives us a finite * list of acceptable bitmap values according to the C standard. However, * the "long" specifier is allowed to occur more than once, but only at most * 2 times. Treat it as a special case, assigning an unused bit to represent * the second long. */ #define MAP_LLONG_BIT 31 #define MAP_LONG_BIT (CDECL_TYPE_LONG-CDECL_SPEC_TYPE) #define CDECL_TYPE_LLONG (CDECL_SPEC_TYPE+MAP_LLONG_BIT) #include "typemap.h" /* * Convert the declaration specifiers to a bitmap with each bit * corresponding to one specific type specifier. */ static int valid_typespec(struct cdecl_declspec *s) { struct cdecl_declspec *c; unsigned long map = 0; for (c = s; c; c = c->next) { unsigned long bit; if (cdecl_spec_kind(c) != CDECL_SPEC_TYPE) continue; bit = c->type - CDECL_SPEC_TYPE; assert(bit < MAP_LLONG_BIT); bit = 1ul << bit; /* "long" special case */ if ((map & bit) == 1ul << MAP_LONG_BIT) bit = 1ul << MAP_LLONG_BIT; if (map & bit) { if (bit == 1ul << MAP_LLONG_BIT) cdecl__errmsg(CDECL__ETOOLONG); else cdecl__errmsg(CDECL__EDUPTYPE); return false; } map |= bit; } if (typemap_is_valid(map)) return true; if (map == 0) cdecl__errmsg(CDECL__ENOTYPE); else cdecl__errmsg(CDECL__EBADTYPE); return false; } /* * Verify the declaration specifiers of a declaration. If top is true, treat * this as a top-level declaration. Otherwise, treat this as a function * parameter (which carries additional constraints). */ static bool valid_declspecs(struct cdecl *decl, bool top) { struct cdecl_declspec *c, *specs = decl->specifiers; struct cdecl_declarator *d = decl->declarators; bool abstract = cdecl_is_abstract(d); unsigned num_storage = 0; if (!valid_typespec(specs)) return false; for (c = specs; c; c = c->next) { switch (cdecl_spec_kind(c)) { case CDECL_SPEC_TYPE: if (c->type == CDECL_TYPE_VOID && (d->type == CDECL_DECL_IDENT || d->type == CDECL_DECL_ARRAY)) { cdecl__errmsg(CDECL__EBADVOID); return false; } continue; case CDECL_SPEC_STOR: if (top && abstract) { cdecl__errmsg(CDECL__ETYPESTOR); return false; } if (!top && c->type != CDECL_STOR_REGISTER) { cdecl__errmsg(CDECL__EFUNCSTOR); return false; } if (++num_storage > 1) { cdecl__errmsg(CDECL__EMANYSTOR); return false; } break; case CDECL_SPEC_QUAL: /* * Restrict qualifiers are only valid in the * pointer qualifier list, which isn't checked here. */ if (c->type == CDECL_QUAL_RESTRICT) { cdecl__errmsg(CDECL__EBADQUAL); return false; } break; case CDECL_SPEC_FUNC: if (abstract || !top || d->type != CDECL_DECL_FUNCTION) { cdecl__errmsg(CDECL__ENOTFUNC); return false; } break; default: assert(0); } } return true; } /* * Find the tree pointer which leads to the parameter's leaf node. * * Return a null pointer if the traversal locates a syntactic element which * prevents function reduction. This occurs if the leaf node declares an * identifier, or for nontrivial fake function parameters (see below). */ static struct cdecl_declarator **leaf_pointer(struct cdecl *param) { struct cdecl_declarator *d, **p = ¶m->declarators; if ((param = fake_function_param(param->declarators))) { if (param->declarators->type != CDECL_DECL_NULL) return NULL; /* e.g. int (x (*)) */ } while ((d = *p)->child) { p = &d->child; if (fake_function_param(d->child)) return NULL; /* e.g. int (x (*)[][1]) */ } if (d->type != CDECL_DECL_NULL) return NULL; /* e.g. int (x y) */ return p; } /* * The C grammar leaves ambiguous some cases where parentheses represent a * function declarator or just parentheses. The language uses additional * context (whether or not a typedef is in scope, etc.) to resolve these * ambiguities, but we don't have access to that kind of information. * * The cdecl99 parser uses an unambiguous grammar which treats almost * everything as a function, and thus considers things like 'int (x)' to * be a function type with a single parameter of type 'x' (a typedef name), * returning int. This can result in very complicated types for simple * declarations. Ideally, cdecl99 should try and find the "simplest" * explanation for a given declaration. * * Whether or not it achieves the simplest explanation, we apply a simple rule: * if a declarator could be interpreted as something other than a function, * do that. * * Since cdecl99 supports things like [*] in any context (in C, such constructs * are only valid in function parameter lists), we don't treat them specially * here. */ static struct cdecl_declarator *reduce_function(struct cdecl *param) { struct parse_item *spec = (void *)param->specifiers; struct cdecl_declarator *d, **p; if (!(p = leaf_pointer(param))) return NULL; /* * The child and u.ident members of cdecl_declarator are expected * to be located at identical offsets as, respectively, the next * and ident members within cdecl_declspec, so the expectation is * that the compiler can elide both assignments. */ spec->u.declarator.child = (void *)spec->u.declspec.next; spec->u.declarator.u.ident = spec->u.declspec.ident; spec->u.declarator.type = CDECL_DECL_IDENT; *p = &spec->u.declarator; d = param->declarators; free(param); return d; } static bool function_is_reducible(struct cdecl_declarator *d) { if (d->type != CDECL_DECL_FUNCTION) return false; if (d->child->type != CDECL_DECL_NULL) return false; /* e.g., int (*)(x) */ if (!d->u.function.parameters) return false; /* e.g., int f() */ if (d->u.function.parameters->next) return false; /* e.g., int (x, y) */ if (d->u.function.variadic) return false; /* e.g., int (x, ...) */ if (d->u.function.parameters->specifiers->type != CDECL_TYPE_IDENT) return false; /* e.g. int (int) */ if (d->u.function.parameters->specifiers->next) return false; /* e.g. int (size_t const) */ if (d->u.function.parameters->declarators->type == CDECL_DECL_POINTER) return false; /* e.g. int (x *) */ return true; } static int simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d) { struct cdecl_declarator *new; if (!function_is_reducible(d)) return 0; new = reduce_function(d->u.function.parameters); if (!new) return 0; *p = new; free(d); return 1; } /* * The main parser's bias towards considering things as functions whenever * possible makes nested parentheses tricky. "(x)" is considered to be part * of a function declarator until simplify_functions converts it. The problem * is that "(((x)))" is not valid as part of a function declarator, but it _is_ * valid as either an identifier enclosed thrice in parentheses, or an abstract * function declarator enclosed twice in parentheses. * * To avoid ambiguities, the main parser actually returns a function declarator * for every pair of parentheses. The ones we need to look at consist of a * single parameter with an empty specifier list (noting that every real * function parameter will have at least one type specifier). * * There are two cases: * * - For (), the parser emits a parameter with a lone null declarator. * This fake parameter simply gets deleted, leaving us with a normal * function declarator with an empty identifier list. * * - Otherwise, the parameter's outermost declarator is not null. The * function itself is deleted, replaced in the parse tree with the * fake parameter's declarator. * * Repeating until there no fake parameters, this reduction transforms, for * example, "(((x)))" into "(x)", an abstract function declarator. The result * is then subject to the function simplification step, which will turn "(x)" * into x (declaring an identifier). * * The whole process is repeated until no more changes are made to the parse * tree, or a syntax error is detected. */ static struct cdecl *fake_function_param(struct cdecl_declarator *d) { struct cdecl *param; if (d->type != CDECL_DECL_FUNCTION) return NULL; param = d->u.function.parameters; if (!param || param->specifiers) return NULL; assert(!param->next); return param; } static int reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d) { struct cdecl *param; do { d = *p; while ((param = fake_function_param(d))) { struct cdecl_declarator *decl = param->declarators; d->u.function.parameters = NULL; if (decl->type != CDECL_DECL_NULL) { if (d->child->type != CDECL_DECL_NULL) { /* Fake parameter on real function. */ d->u.function.parameters = param; cdecl__errmsg(CDECL__EBADPARAM); return -1; } param->declarators = d; *p = d = decl; } cdecl__free(param); } } while (simplify_functions(p, d)); return 0; } /* * Returns nonzero iff the given specifier list contains a specifier * of the indicated type. */ static int have_specifier(struct cdecl_declspec *s, unsigned type) { for (; s; s = s->next) if (s->type == type) return 1; return 0; } /* * Check syntax restrictions on a function declarator's child declarator. * That is, "pointer to function", "array of function" and "function * returning function". * * Returns -1 if the declaration is invalid, or 0 otherwise. */ static int check_function_child(struct cdecl_declarator *d) { struct cdecl_pointer *ptr; switch (d->type) { case CDECL_DECL_POINTER: ptr = &d->u.pointer; if (have_specifier(ptr->qualifiers, CDECL_QUAL_RESTRICT)) { /* pointer to function cannot be restrict qualified. */ cdecl__errmsg(CDECL__ERESTRICTFUNC); return -1; } return 0; case CDECL_DECL_FUNCTION: /* function returning function is never allowed. */ cdecl__errmsg(CDECL__ERETFUNC); return -1; case CDECL_DECL_ARRAY: /* array of function is never allowed. */ cdecl__errmsg(CDECL__EFUNCARRAY); return -1; } return 0; } /* * Check a function parameter declaration for validity, which means it has a * valid combination of declaration specifiers and, if it is a void parameter, * that it is the one special case where this is allowed. * * Returns -1 if the declaration is invalid, or 0 otherwise. */ static int check_function_param(struct cdecl_function *f, struct cdecl *param) { if (!valid_declspecs(param, false)) return -1; /* Check for "void" function parameters as a special case. */ if (param->declarators->type == CDECL_DECL_NULL && have_specifier(param->specifiers, CDECL_TYPE_VOID)) { struct cdecl *fp = f->parameters; if (f->variadic || fp->next || fp->specifiers->next) { cdecl__errmsg(CDECL__EVOIDPARAM); return -1; } } return 0; } /* * Normalize the specifier lists for function parameters, and then check the * function declarator for validity. * * Returns -1 if the declaration is invalid, or 0 otherwise. */ static int postproc_function(struct cdecl_declarator *d) { struct cdecl_function *func = &d->u.function; struct cdecl *param; int rc; for (param = func->parameters; param; param = param->next) { param->specifiers = cdecl__normalize_specs(param->specifiers); if ((rc = check_function_param(func, param)) < 0) return rc; } return check_function_child(d->child); } static int postproc_common(struct cdecl_declarator **p, struct cdecl_declarator *d) { struct cdecl_pointer *ptr; switch (d->type) { case CDECL_DECL_POINTER: ptr = &d->u.pointer; ptr->qualifiers = cdecl__normalize_specs(ptr->qualifiers); return 0; case CDECL_DECL_FUNCTION: return postproc_function(d); case CDECL_DECL_ARRAY: if (d->child && d->child->type == CDECL_DECL_FUNCTION) { /* function returning array is never allowed. */ cdecl__errmsg(CDECL__ERETARRAY); return -1; } return 0; } return 0; } /* * Traverse the parse tree, calling a function on every declarator in a * depth-first preorder traversal. The function is given a pointer to the * declarator as well as to the pointer which was used to reach that * declarator: this can be used to rewrite entire subtrees. * * The called function may return a negative value to indicate an error * which terminates traversal. * * Returns 0 on success, or a negative value on failure. */ static int forall_declarators(struct cdecl *decl, int f(struct cdecl_declarator **, struct cdecl_declarator *)) { struct cdecl_declarator *d, **p; for (p = &decl->declarators; *p; p = &d->child) { int rc; rc = f(p, *p); if (rc < 0) return rc; d = *p; if (d->type == CDECL_DECL_FUNCTION) { struct cdecl *i; for (i = d->u.function.parameters; i; i = i->next) { rc = forall_declarators(i, f); if (rc < 0) return rc; } } } return 0; } static struct cdecl *do_parse(const char *str, int english_mode) { struct cdecl *decl = NULL; YY_BUFFER_STATE state; yyscan_t scanner; #if YYDEBUG extern int cdecl__yydebug; cdecl__yydebug = 1; #endif cdecl__init_i18n(); if (cdecl__yylex_init_extra(english_mode, &scanner) != 0) return NULL; state = cdecl__yy_scan_string(str, scanner); if (cdecl__yyparse(scanner, &decl) != 0) { /* * If the input consists of a complete, valid declaration * followed by some garbage, that parsed declaration will * be output by the parser and we need to free it here. */ cdecl__free(decl); decl = NULL; } cdecl__yy_delete_buffer(state, scanner); cdecl__yylex_destroy(scanner); return decl; } static int do_postprocess(struct cdecl *decl, int english_mode) { struct cdecl_declspec *norm_specs; struct cdecl *i; /* * For a C declaration with more than one full declarator, the * specifier list is common to all of them. Normalize it once, * then propagate that to all the linked cdecl structures. * * In english mode, the cdecl structure list always has exactly * one entry so we don't need to do anything differently. */ norm_specs = cdecl__normalize_specs(decl->specifiers); for (i = decl; i; i = i->next) i->specifiers = norm_specs; for (i = decl; i; i = i->next) { if (!english_mode) { if (forall_declarators(i, reduce_parentheses) < 0) return 0; } if (forall_declarators(i, postproc_common) < 0) return 0; if (!valid_declspecs(i, true)) return 0; if (decl->next && cdecl_is_abstract(i->declarators)) { /* Abstract full declarators: there can only be one. */ cdecl__errmsg(CDECL__EDECLTYPE); return 0; } } return 1; } static struct cdecl *parse_common(const char *str, int english_mode) { struct cdecl *decl; if (!(decl = do_parse(str, english_mode))) return NULL; if (!do_postprocess(decl, english_mode)) { cdecl__free(decl); return NULL; } return decl; } struct cdecl *cdecl_parse_decl(const char *declstr) { return parse_common(declstr, false); } struct cdecl *cdecl_parse_english(const char *english) { return parse_common(english, true); } void cdecl_free(struct cdecl *decl) { cdecl__free(decl); }