X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/blobdiff_plain/2ca0617e4dd5192c029fa800e00250aaf6248c79..0c61f9637a469ac7a28b5a329551b03e6ad14d62:/src/parse-decl.c diff --git a/src/parse-decl.c b/src/parse-decl.c index c06080b..2757703 100644 --- a/src/parse-decl.c +++ b/src/parse-decl.c @@ -1,175 +1,533 @@ +/* + * Parse and validate C declarations. + * Copyright © 2011-2012, 2020-2021 Nick Bowler + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include #include #include +#include #include "cdecl.h" +#include "cdecl-internal.h" #include "parse.h" #include "scan.h" -#define PASTE(a, b) a ## b -#define PASTE2(a, b) PASTE(a, b) +/* + * Verify the declaration specifiers of a declaration. If top is true, treat + * this as a top-level declaration. Otherwise, treat this as a function + * parameter (which carries additional constraints). + */ +static bool valid_declspecs(struct cdecl *decl, bool top) +{ + struct cdecl_declspec *specs = decl->specifiers; + struct cdecl_declarator *d = decl->declarators; + bool abstract = cdecl_is_abstract(d); + unsigned num_storage = 0; + unsigned long typemap; + + typemap = cdecl__build_typemap(specs); + if (typemap == -1) + return false; -#define BIT1(a) ((1ul<<(CDECL_TYPE_ ## a))) -#define BIT2(a, b) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))) -#define BIT3(a, b, c) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))|(1ul<<(CDECL_TYPE_ ## c))) -#define BIT4(a, b, c, d) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))|(1ul<<(CDECL_TYPE_ ## c))|(1ul<<(CDECL_TYPE_ ## d))) + for (struct cdecl_declspec *c = specs; c; c = c->next) { + switch (cdecl_spec_kind(c)) { + case CDECL_SPEC_TYPE: + if (c->type == CDECL_TYPE_VOID && + (d->type == CDECL_DECL_IDENT + || d->type == CDECL_DECL_ARRAY)) { + fprintf(stderr, "invalid declaration of type void\n"); + return false; + } + continue; + case CDECL_SPEC_STOR: + if (top && abstract) { + fprintf(stderr, "type names cannot have storage-class specifiers\n"); + return false; + } + + if (!top && c->type != CDECL_STOR_REGISTER) { + fprintf(stderr, "function parameters may only have register storage\n"); + return false; + } + + if (++num_storage > 1) { + fprintf(stderr, "too many storage-class specifiers\n"); + return false; + } + break; + case CDECL_SPEC_QUAL: + /* + * Restrict qualifiers are only valid in the + * pointer qualifier list, which isn't checked here. + */ + if (c->type == CDECL_QUAL_RESTRICT) { + fprintf(stderr, "only pointer types can be restrict-qualified.\n"); + return false; + } + break; + case CDECL_SPEC_FUNC: + if (abstract) { + fprintf(stderr, "type names cannot have function specifiers\n"); + return false; + } -#define NARG_(_4, _3, _2, _1, n, ...) n -#define NARG(...) NARG_(__VA_ARGS__, 4, 3, 2, 1) + if (!top || d->type != CDECL_DECL_FUNCTION) { + fprintf(stderr, "only function declarations may have function specifiers.\n"); + return false; + } + break; + default: + assert(0); + } + } -#define BITS(...) PASTE2(BIT, NARG(__VA_ARGS__))(__VA_ARGS__) + return true; +} /* - * We can represent type specifiers as a bitmap, which gives us a finite - * list of acceptable bitmap values according to the C standard. However, - * the "long" specifier is allowed to occur more than once, but only at most - * 2 times. Treat it as a special case, assigning an unused bit to represent - * the second long. + * The C grammar leaves ambiguous some cases where parentheses represent a + * function declarator or just parentheses. The language uses additional + * context (whether or not a typedef is in scope, etc.) to resolve these + * ambiguities, but we don't have access to that kind of information. + * + * The cdecl99 parser uses an unambiguous grammar which treats almost + * everything as a function, and thus considers things like 'int (x)' to + * be a function type with a single parameter of type 'x' (a typedef name), + * returning int. This can result in very complicated types for simple + * declarations. Ideally, cdecl99 should try and find the "simplest" + * explanation for a given declaration. + * + * Whether or not it achieves the simplest explanation, we apply a simple rule: + * if a declarator could be interpreted as something other than a function, + * do that. + * + * Since cdecl99 supports things like [*] in any context (in C, such constructs + * are only valid in function parameter lists), we don't treat them specially + * here. */ -#define CDECL_TYPE_LLONG 32 -static int typemap_verify(unsigned long map) +static struct cdecl_declarator *reduce_function(struct cdecl *param) { - /* - * This is the complete list of valid type specifiers from C99§6.7.2#2 - */ + struct cdecl_declspec *spec = param->specifiers; + struct cdecl_declarator *decl = param->declarators; + struct cdecl_declarator *last; + + for (last = decl; last && last->type != CDECL_DECL_NULL;) + last = last->child; + + if (!last) + return NULL; + + last->type = CDECL_DECL_IDENT; + last->u.ident = spec->ident; + free(param); + free(spec); + + return decl; +} + +static bool function_is_reducible(struct cdecl_declarator *d) +{ + if (d->type != CDECL_DECL_FUNCTION) + return false; + if (d->child->type != CDECL_DECL_NULL) + return false; /* e.g., int (*)(x) */ - switch (map) { - case BITS(VOID): - case BITS(CHAR): - case BITS(SIGNED, CHAR): - case BITS(UNSIGNED, CHAR): - case BITS(SHORT): - case BITS(SIGNED, SHORT): - case BITS(SHORT, INT): - case BITS(SIGNED, SHORT, INT): - case BITS(UNSIGNED, SHORT): - case BITS(UNSIGNED, SHORT, INT): - case BITS(INT): - case BITS(SIGNED): - case BITS(SIGNED, INT): - case BITS(UNSIGNED): - case BITS(UNSIGNED, INT): - case BITS(LONG): - case BITS(SIGNED, LONG): - case BITS(SIGNED, LONG, INT): - case BITS(UNSIGNED, LONG): - case BITS(UNSIGNED, LONG, INT): - case BITS(LLONG, LONG): - case BITS(SIGNED, LLONG, LONG): - case BITS(SIGNED, LLONG, LONG, INT): - case BITS(UNSIGNED, LLONG, LONG): - case BITS(UNSIGNED, LLONG, LONG, INT): - case BITS(BOOL): - case BITS(FLOAT): - case BITS(DOUBLE): - case BITS(LONG, DOUBLE): - case BITS(FLOAT, COMPLEX): - case BITS(DOUBLE, COMPLEX): - case BITS(LONG, DOUBLE, COMPLEX): - case BITS(STRUCT): - case BITS(UNION): - case BITS(ENUM): - case BITS(IDENT): + if (!d->u.function.parameters) + return false; /* e.g., int f() */ + if (d->u.function.parameters->next) + return false; /* e.g., int (x, y) */ + if (d->u.function.variadic) + return false; /* e.g., int (x, ...) */ + + if (d->u.function.parameters->specifiers->type != CDECL_TYPE_IDENT) + return false; /* e.g. int (int) */ + if (d->u.function.parameters->specifiers->next) + return false; /* e.g. int (size_t const) */ + if (d->u.function.parameters->declarators->type == CDECL_DECL_POINTER) + return false; /* e.g. int (x *) */ + + return true; +} + +static int +simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d) +{ + struct cdecl_declarator *new; + + if (!function_is_reducible(d)) return 0; - } - return -1; + new = reduce_function(d->u.function.parameters); + if (!new) + return 0; /* e.g. int (foo bar) */ + *p = new; + free(d->child); + free(d); + + return 1; } -static unsigned long -typemap_add_typespec(unsigned long map, struct cdecl_declspec *s) +/* + * The parser's bias towards considering things as functions whenever possible + * makes nested parentheses tricky. (x) is considered to be part of a function + * declarator until simplify_functions converts it. The problem is that + * (((x))) is not valid as part of a function declarator, but it *is* valid + * as an identifier enclosed 3 times in parentheses. This is complicated by + * the fact that things like (((int))) are not valid anywhere. + * + * To avoid ambiguities, the parser actually emits a "function" declarator for + * every pair of parentheses. The ones that can't reasonably be functions + * consist of a single "parameter" with no declaration specifiers (note that + * every valid function parameter will have at least one type specifier). + * + * This pass is to remove these fake functions from the parse tree. We take + * care to avoid turning invalid things like ((int)) into valid things like + * (int) by observing that the only valid function declarators that appear + * in these "fake" parentheses are those that have a non-null child declarator + * (for instance, int ((*)(int)) *or* those that will be eliminated by the + * simplify_functions pass. + */ + +static int +reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d) { - assert(s->type < CDECL_TYPE_LLONG); + struct cdecl *param; - if (s->type == CDECL_TYPE_LONG) { - if (map & BITS(LLONG)) { - fprintf(stderr, "too many long specifiers\n"); + if (d->type != CDECL_DECL_FUNCTION) + return 0; + + param = d->u.function.parameters; + if (param && param->specifiers == NULL) { + struct cdecl_declarator *decl; + + assert(!param->next); + + decl = param->declarators; + if (decl->type == CDECL_DECL_NULL) { + free(decl); + free(param); + d->u.function.parameters = NULL; + return 0; + } + + if (d->child->type != CDECL_DECL_NULL) { + fprintf(stderr, "invalid function parameter\n"); return -1; - } else if (map & BITS(LONG)) { - return map | BITS(LLONG); } - } - if (map & (1ul<type)) { - fprintf(stderr, "duplicate type specifier\n"); - return -1; + free(d->child); + free(param); + free(d); + *p = decl; + + /* + * We may have replaced d with another fake function which + * also needs to be eliminated. + */ + if (reduce_parentheses(p, decl) < 0) + return -1; + + /* + * If the remaining declarator is a function, make sure it's + * valid by checking its reducibility. + */ + decl = *p; + if (decl->type == CDECL_DECL_FUNCTION + && decl->child->type == CDECL_DECL_NULL + && !function_is_reducible(decl)) { + fprintf(stderr, "too many parentheses in function\n"); + return -1; + } + + return 1; } - return map | (1<type); + return 0; } -static int verify_specs(struct cdecl_declspec *s) +/* + * Function parameters and return types have a few restrictions that are + * really easy to check in comparison to the above absurdity. + */ +static int +check_parameters(struct cdecl_declarator **p, struct cdecl_declarator *d) { - unsigned long typemap = 0; - unsigned num_storage = 0; + struct cdecl_declspec *spec; + struct cdecl *param; + bool has_void = false; - for (struct cdecl_declspec *c = s; c; c = c->next) { - switch (cdecl_spec_kind(c)) { - case CDECL_SPEC_TYPE: - typemap = typemap_add_typespec(typemap, c); - if (typemap == -1) { + if (d->type != CDECL_DECL_FUNCTION) + return 0; + + for (param = d->u.function.parameters; param; param = param->next) { + if (!valid_declspecs(param, false)) + return -1; + + /* Check for "void" function parameters as a special case. */ + for (spec = param->specifiers; spec; spec = spec->next) { + if (param->declarators->type != CDECL_DECL_NULL) + continue; + if (spec->type != CDECL_TYPE_VOID) + continue; + + if (spec != param->specifiers || spec->next != NULL) { + fprintf(stderr, "void parameter must not have extra specifiers\n"); return -1; - } - break; - case CDECL_SPEC_STOR: - if (++num_storage > 1) { - fprintf(stderr, "too many storage-class specifiers\n"); + } else if (d->u.function.parameters->next) { + fprintf(stderr, "a void parameter must stand alone\n"); return -1; - } - break; - case CDECL_SPEC_QUAL: - /* - * Since we don't support pointer types yet, all - * restrict qualifiers are invalid. Other qualifiers - * are always valid. - */ - if (c->type == CDECL_QUAL_RESTRICT) { - fprintf(stderr, "only pointer types can be restrict-qualified.\n"); + } else if (d->u.function.variadic) { + fprintf(stderr, "variadic functions cannot have a void parameter\n"); return -1; } - break; - case CDECL_SPEC_FUNC: - /* - * Likewise for function specifiers. - */ - fprintf(stderr, "only function declarations may have function specifiers.\n"); - return -1; - default: - abort(); } } - if (typemap_verify(typemap) == -1) { - fprintf(stderr, "conflicting type specifiers\n"); + return 0; +} + +/* + * Functions cannot return arrays or functions. Since the parse tree is + * "inside-out", we need to look for functions as the child declarator. + */ +static int +check_rettypes(struct cdecl_declarator **p, struct cdecl_declarator *d) +{ + if (!d->child || d->child->type != CDECL_DECL_FUNCTION) + return 0; + + switch (d->type) { + case CDECL_DECL_FUNCTION: + fprintf(stderr, "functions cannot return functions\n"); + return -1; + case CDECL_DECL_ARRAY: + fprintf(stderr, "functions cannot return arrays\n"); + return -1; + } + + return 0; +} + +static int +check_arrays(struct cdecl_declarator **p, struct cdecl_declarator *d) +{ + if (!d->child || d->child->type != CDECL_DECL_ARRAY) + return 0; + + switch (d->type) { + case CDECL_DECL_FUNCTION: + fprintf(stderr, "array members cannot be functions\n"); return -1; } return 0; } -static int verify_decl(struct cdecl *decl) +static int +normalize_specs(struct cdecl_declarator **p, struct cdecl_declarator *d) { - return verify_specs(decl->specifiers); + struct cdecl_function *func; + struct cdecl_pointer *ptr; + + switch (d->type) { + case CDECL_DECL_POINTER: + ptr = &d->u.pointer; + ptr->qualifiers = cdecl__normalize_specs(ptr->qualifiers); + break; + case CDECL_DECL_FUNCTION: + func = &d->u.function; + for (struct cdecl *i = func->parameters; i; i = i->next) + i->specifiers = cdecl__normalize_specs(i->specifiers); + break; + } + + return 0; +} + +static int +check_qualifiers(struct cdecl_declarator **p, struct cdecl_declarator *d) +{ + struct cdecl_declspec *spec; + struct cdecl_pointer *ptr; + + if (!d->child || d->child->type != CDECL_DECL_POINTER) + return 0; + + ptr = &d->child->u.pointer; + for (spec = ptr->qualifiers; spec; spec = spec->next) { + if (spec->type == CDECL_QUAL_RESTRICT + && d->type == CDECL_DECL_FUNCTION) { + fprintf(stderr, "function pointers cannot be restrict-qualified\n"); + return -1; + } + } + + return 0; +} + +/* + * Traverse the parse tree, calling a function on every declarator in a + * depth-first preorder traversal. The function is given a pointer to the + * declarator as well as to the pointer which was used to reach that + * declarator: this can be used to rewrite entire subtrees. + */ +static bool forall_declarators(struct cdecl *decl, + int f(struct cdecl_declarator **, struct cdecl_declarator *)) +{ + struct cdecl_declarator *d, **p; + + for (p = &decl->declarators, d = *p; d; p = &d->child, d = *p) { + switch (f(p, d)) { + case 0: + break; + case 1: + d = *p; + break; + case -1: + return false; + default: + assert(0); + } + + if (d->type == CDECL_DECL_FUNCTION) { + struct cdecl *i; + + for (i = d->u.function.parameters; i; i = i->next) { + if (!forall_declarators(i, f)) + return false; + } + } + } + + return true; } struct cdecl *cdecl_parse_decl(const char *declstr) { + struct cdecl_declspec *norm_specs; YY_BUFFER_STATE state; + yyscan_t scanner; struct cdecl *decl; int rc; - state = yy_scan_string(declstr); - rc = yyparse(&decl); - yy_delete_buffer(state); + cdecl__init_i18n(); + rc = cdecl__yylex_init(&scanner); if (rc != 0) return NULL; - rc = verify_decl(decl); - if (rc != 0) { - cdecl_free(decl); + state = cdecl__yy_scan_string(declstr, scanner); + rc = cdecl__yyparse(scanner, &decl); + cdecl__yy_delete_buffer(state, scanner); + cdecl__yylex_destroy(scanner); + + if (rc != 0) return NULL; + + /* + * Since the top-level specifiers are shared between each top-level + * declarator, we need to normalize them once and then propagate the + * new specifier list. + */ + norm_specs = cdecl__normalize_specs(decl->specifiers); + for (struct cdecl *i = decl; i; i = i->next) { + i->specifiers = norm_specs; + } + + /* Now perform checks and simplifications on each declarator. */ + for (struct cdecl *i = decl; i; i = i->next) { + if (!forall_declarators(i, reduce_parentheses)) + goto err; + if (!forall_declarators(i, simplify_functions)) + goto err; + if (!forall_declarators(i, check_parameters)) + goto err; + if (!forall_declarators(i, check_rettypes)) + goto err; + if (!forall_declarators(i, check_arrays)) + goto err; + if (!forall_declarators(i, normalize_specs)) + goto err; + if (!forall_declarators(i, check_qualifiers)) + goto err; + + if (!valid_declspecs(i, true)) + goto err; + + if (cdecl_is_abstract(i->declarators) + && (i != decl || i->next)) { + fprintf(stderr, "mixing type names and declarations is not allowed\n"); + goto err; + } } return decl; +err: + cdecl__free(decl); + return NULL; +} + +struct cdecl *cdecl_parse_english(const char *english) +{ + YY_BUFFER_STATE state; + yyscan_t scanner; + struct cdecl *decl; + int rc; + + cdecl__init_i18n(); + + rc = cdecl__yylex_init_extra(true, &scanner); + if (rc != 0) + return NULL; + + state = cdecl__yy_scan_string(english, scanner); + rc = cdecl__yyparse(scanner, &decl); + cdecl__yy_delete_buffer(state, scanner); + cdecl__yylex_destroy(scanner); + + if (rc != 0) + return NULL; + + for (struct cdecl *i = decl; i; i = i->next) { + i->specifiers = cdecl__normalize_specs(i->specifiers); + + if (!forall_declarators(i, check_parameters)) + goto err; + if (!forall_declarators(i, check_rettypes)) + goto err; + if (!forall_declarators(i, check_arrays)) + goto err; + if (!forall_declarators(i, normalize_specs)) + goto err; + if (!forall_declarators(i, check_qualifiers)) + goto err; + + if (!valid_declspecs(i, true)) + goto err; + } + + return decl; +err: + cdecl__free(decl); + return NULL; +} + +void cdecl_free(struct cdecl *decl) +{ + cdecl__free(decl); }