/* * Parse and validate C declarations. * Copyright © 2011-2012 Nick Bowler * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include "cdecl.h" #include "typemap.h" #include "parse.h" #include "scan.h" #include "i18n.h" #include "normalize.h" /* * Verify the declaration specifiers of a declaration. If top is true, treat * this as a top-level declaration. Otherwise, treat this as a function * parameter (which carries additional constraints). */ static bool valid_declspecs(struct cdecl *decl, bool top) { struct cdecl_declspec *specs = decl->specifiers; struct cdecl_declarator *d = decl->declarators; bool abstract = cdecl_is_abstract(d); unsigned num_storage = 0; unsigned long typemap; typemap = cdecl__build_typemap(specs); if (typemap == -1) return false; for (struct cdecl_declspec *c = specs; c; c = c->next) { switch (cdecl_spec_kind(c)) { case CDECL_SPEC_TYPE: if (c->type == CDECL_TYPE_VOID && (d->type == CDECL_DECL_IDENT || d->type == CDECL_DECL_ARRAY)) { fprintf(stderr, "invalid declaration of type void\n"); return false; } continue; case CDECL_SPEC_STOR: if (top && abstract) { fprintf(stderr, "type names cannot have storage-class specifiers\n"); return false; } if (!top && c->type != CDECL_STOR_REGISTER) { fprintf(stderr, "function parameters may only have register storage\n"); return false; } if (++num_storage > 1) { fprintf(stderr, "too many storage-class specifiers\n"); return false; } break; case CDECL_SPEC_QUAL: /* * Restrict qualifiers are only valid in the * pointer qualifier list, which isn't checked here. */ if (c->type == CDECL_QUAL_RESTRICT) { fprintf(stderr, "only pointer types can be restrict-qualified.\n"); return false; } break; case CDECL_SPEC_FUNC: if (abstract) { fprintf(stderr, "type names cannot have function specifiers\n"); return false; } if (!top || d->type != CDECL_DECL_FUNCTION) { fprintf(stderr, "only function declarations may have function specifiers.\n"); return false; } break; default: assert(0); } } return true; } /* * The C grammar leaves ambiguous some cases where parentheses represent a * function declarator or just parentheses. The language uses additional * context (whether or not a typedef is in scope, etc.) to resolve these * ambiguities, but we don't have access to that kind of information. * * The cdecl99 parser uses an unambiguous grammar which treats almost * everything as a function, and thus considers things like 'int (x)' to * be a function type with a single parameter of type 'x' (a typedef name), * returning int. This can result in very complicated types for simple * declarations. Ideally, cdecl99 should try and find the "simplest" * explanation for a given declaration. * * Whether or not it achieves the simplest explanation, we apply a simple rule: * if a declarator could be interpreted as something other than a function, * do that. * * - The function declarator has a null child declarator. * - The function declarator has exactly one parameter, and is not variadic. * - The function parameter has a type specifier, and it is a typedef name. * - The function parameter has no other declaration specifiers. * - The function parameter does not declare an identifier. * * Since cdecl99 supports things like [*] in any context (in C, such constructs * are only valid in function parameter lists), we don't treat them specially * here. */ static struct cdecl_declarator *reduce_function(struct cdecl *param) { struct cdecl_declspec *spec = param->specifiers; struct cdecl_declarator *decl = param->declarators; struct cdecl_declarator *last; for (last = decl; last && last->type != CDECL_DECL_NULL;) last = last->child; if (!last) return NULL; last->type = CDECL_DECL_IDENT; last->u.ident = spec->ident; free(param); free(spec); return decl; } static bool function_is_reducible(struct cdecl_declarator *d) { if (d->type != CDECL_DECL_FUNCTION) return false; if (d->child->type != CDECL_DECL_NULL) return false; /* e.g., int (*)(x) */ if (!d->u.function.parameters) return false; /* e.g., int f() */ if (d->u.function.parameters->next) return false; /* e.g., int (x, y) */ if (d->u.function.variadic) return false; /* e.g., int (x, ...) */ if (d->u.function.parameters->specifiers->type != CDECL_TYPE_IDENT) return false; /* e.g. int (int) */ if (d->u.function.parameters->specifiers->next) return false; /* e.g. int (size_t const) */ return true; } static int simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d) { struct cdecl_declarator *new; if (!function_is_reducible(d)) return 0; new = reduce_function(d->u.function.parameters); if (!new) return 0; /* e.g. int (foo bar) */ *p = new; free(d->child); free(d); return 1; } /* * The parser's bias towards considering things as functions whenever possible * makes nested parentheses tricky. (x) is considered to be part of a function * declarator until simplify_functions converts it. The problem is that * (((x))) is not valid as part of a function declarator, but it *is* valid * as an identifier enclosed 3 times in parentheses. This is complicated by * the fact that things like (((int))) are not valid anywhere. * * To avoid ambiguities, the parser actually emits a "function" declarator for * every pair of parentheses. The ones that can't reasonably be functions * consist of a single "parameter" with no declaration specifiers (note that * every valid function parameter will have at least one type specifier). * * This pass is to remove these fake functions from the parse tree. We take * care to avoid turning invalid things like ((int)) into valid things like * (int) by observing that the only valid function declarators that appear * in these "fake" parentheses are those that have a non-null child declarator * (for instance, int ((*)(int)) *or* those that will be eliminated by the * simplify_functions pass. */ static int reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d) { struct cdecl *param; if (d->type != CDECL_DECL_FUNCTION) return 0; param = d->u.function.parameters; if (param && param->specifiers == NULL) { struct cdecl_declarator *decl; assert(!param->next); decl = param->declarators; if (decl->type == CDECL_DECL_NULL) { free(decl); free(param); d->u.function.parameters = NULL; return 0; } if (d->child->type != CDECL_DECL_NULL) { fprintf(stderr, "invalid function parameter\n"); return -1; } free(d->child); free(param); free(d); *p = decl; /* * We may have replaced d with another fake function which * also needs to be eliminated. */ if (reduce_parentheses(p, decl) < 0) return -1; /* * If the remaining declarator is a function, make sure it's * valid by checking its reducibility. */ decl = *p; if (decl->type == CDECL_DECL_FUNCTION && decl->child->type == CDECL_DECL_NULL && !function_is_reducible(decl)) { fprintf(stderr, "too many parentheses in function\n"); return -1; } return 1; } return 0; } /* * Function parameters and return types have a few restrictions that are * really easy to check in comparison to the above absurdity. */ static int check_parameters(struct cdecl_declarator **p, struct cdecl_declarator *d) { struct cdecl_declspec *spec; struct cdecl *param; bool has_void = false; if (d->type != CDECL_DECL_FUNCTION) return 0; for (param = d->u.function.parameters; param; param = param->next) { if (!valid_declspecs(param, false)) return -1; /* Check for "void" function parameters as a special case. */ for (spec = param->specifiers; spec; spec = spec->next) { if (param->declarators->type != CDECL_DECL_NULL) continue; if (spec->type != CDECL_TYPE_VOID) continue; if (spec != param->specifiers || spec->next != NULL) { fprintf(stderr, "void parameter must not have extra specifiers\n"); return -1; } else if (d->u.function.parameters->next) { fprintf(stderr, "a void parameter must stand alone\n"); return -1; } else if (d->u.function.variadic) { fprintf(stderr, "variadic functions cannot have a void parameter\n"); return -1; } } } return 0; } /* * Functions cannot return arrays or functions. Since the parse tree is * "inside-out", we need to look for functions as the child declarator. */ static int check_rettypes(struct cdecl_declarator **p, struct cdecl_declarator *d) { if (!d->child || d->child->type != CDECL_DECL_FUNCTION) return 0; switch (d->type) { case CDECL_DECL_FUNCTION: fprintf(stderr, "functions cannot return functions\n"); return -1; case CDECL_DECL_ARRAY: fprintf(stderr, "functions cannot return arrays\n"); return -1; } return 0; } static int check_arrays(struct cdecl_declarator **p, struct cdecl_declarator *d) { if (!d->child || d->child->type != CDECL_DECL_ARRAY) return 0; switch (d->type) { case CDECL_DECL_FUNCTION: fprintf(stderr, "array members cannot be functions\n"); return -1; } return 0; } static int normalize_specs(struct cdecl_declarator **p, struct cdecl_declarator *d) { struct cdecl_function *func; struct cdecl_pointer *ptr; switch (d->type) { case CDECL_DECL_POINTER: ptr = &d->u.pointer; ptr->qualifiers = cdecl__normalize_specs(ptr->qualifiers); break; case CDECL_DECL_FUNCTION: func = &d->u.function; for (struct cdecl *i = func->parameters; i; i = i->next) i->specifiers = cdecl__normalize_specs(i->specifiers); break; } return 0; } static int check_qualifiers(struct cdecl_declarator **p, struct cdecl_declarator *d) { struct cdecl_declspec *spec; struct cdecl_pointer *ptr; if (!d->child || d->child->type != CDECL_DECL_POINTER) return 0; ptr = &d->child->u.pointer; for (spec = ptr->qualifiers; spec; spec = spec->next) { if (spec->type == CDECL_QUAL_RESTRICT && d->type == CDECL_DECL_FUNCTION) { fprintf(stderr, "function pointers cannot be restrict-qualified\n"); return -1; } } return 0; } /* * Traverse the parse tree, calling a function on every declarator in a * depth-first preorder traversal. The function is given a pointer to the * declarator as well as to the pointer which was used to reach that * declarator: this can be used to rewrite entire subtrees. */ static bool forall_declarators(struct cdecl *decl, int f(struct cdecl_declarator **, struct cdecl_declarator *)) { struct cdecl_declarator *d, **p; for (p = &decl->declarators, d = *p; d; p = &d->child, d = *p) { switch (f(p, d)) { case 0: break; case 1: d = *p; break; case -1: return false; default: assert(0); } if (d->type == CDECL_DECL_FUNCTION) { struct cdecl *i; for (i = d->u.function.parameters; i; i = i->next) { if (!forall_declarators(i, f)) return false; } } } return true; } struct cdecl *cdecl_parse_decl(const char *declstr) { struct cdecl_declspec *norm_specs; YY_BUFFER_STATE state; yyscan_t scanner; struct cdecl *decl; int rc; cdecl__init_i18n(); rc = cdecl__yylex_init(&scanner); if (rc != 0) return NULL; state = cdecl__yy_scan_string(declstr, scanner); rc = cdecl__yyparse(scanner, &decl); cdecl__yy_delete_buffer(state, scanner); cdecl__yylex_destroy(scanner); if (rc != 0) return NULL; /* * Since the top-level specifiers are shared between each top-level * declarator, we need to normalize them once and then propagate the * new specifier list. */ norm_specs = cdecl__normalize_specs(decl->specifiers); for (struct cdecl *i = decl; i; i = i->next) { i->specifiers = norm_specs; if (!forall_declarators(i, reduce_parentheses)) goto err; if (!forall_declarators(i, simplify_functions)) goto err; if (!forall_declarators(i, check_parameters)) goto err; if (!forall_declarators(i, check_rettypes)) goto err; if (!forall_declarators(i, check_arrays)) goto err; if (!forall_declarators(i, normalize_specs)) goto err; if (!forall_declarators(i, check_qualifiers)) goto err; if (!valid_declspecs(i, true)) goto err; if (cdecl_is_abstract(i->declarators) && (i != decl || i->next)) { fprintf(stderr, "mixing type names and declarations is not allowed\n"); goto err; } } return decl; err: cdecl__free(decl); return NULL; } struct cdecl *cdecl_parse_english(const char *english) { YY_BUFFER_STATE state; yyscan_t scanner; struct cdecl *decl; int rc; cdecl__init_i18n(); rc = cdecl__yylex_init_extra(true, &scanner); if (rc != 0) return NULL; state = cdecl__yy_scan_string(english, scanner); rc = cdecl__yyparse(scanner, &decl); cdecl__yy_delete_buffer(state, scanner); cdecl__yylex_destroy(scanner); if (rc != 0) return NULL; for (struct cdecl *i = decl; i; i = i->next) { i->specifiers = cdecl__normalize_specs(i->specifiers); if (!forall_declarators(i, check_parameters)) goto err; if (!forall_declarators(i, check_rettypes)) goto err; if (!forall_declarators(i, check_arrays)) goto err; if (!forall_declarators(i, normalize_specs)) goto err; if (!forall_declarators(i, check_qualifiers)) goto err; if (!valid_declspecs(i, true)) goto err; } return decl; err: cdecl__free(decl); return NULL; } void cdecl_free(struct cdecl *decl) { cdecl__free(decl); }