X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/blobdiff_plain/2ca0617e4dd5192c029fa800e00250aaf6248c79..4423b55b49e402ca3ac13a6430c23a6629da9bf4:/src/parse-decl.c diff --git a/src/parse-decl.c b/src/parse-decl.c index c06080b..8f0bd38 100644 --- a/src/parse-decl.c +++ b/src/parse-decl.c @@ -1,22 +1,58 @@ +/* + * Parse and validate C declarations. + * Copyright © 2011-2012, 2020-2021, 2023 Nick Bowler + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include #include #include +#include #include "cdecl.h" +#include "cdecl-internal.h" #include "parse.h" #include "scan.h" +#include "errmsg.h" -#define PASTE(a, b) a ## b -#define PASTE2(a, b) PASTE(a, b) +/* + * Allocate a "parse item", which is a union of several parse tree + * structure types, together with a string buffer. The s_sz argument + * specifies the size of the string (including its terminator), which + * may be zero. + * + * The union's declarator member is pre-initialized to a valid "identifier" + * declarator, which shares several interesting offsets with the "declspec" + * structure for an "identifier" type specifier. + */ +struct parse_item *cdecl__alloc_item(size_t s_sz) +{ + struct parse_item *ret; -#define BIT1(a) ((1ul<<(CDECL_TYPE_ ## a))) -#define BIT2(a, b) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))) -#define BIT3(a, b, c) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))|(1ul<<(CDECL_TYPE_ ## c))) -#define BIT4(a, b, c, d) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))|(1ul<<(CDECL_TYPE_ ## c))|(1ul<<(CDECL_TYPE_ ## d))) + ret = malloc(offsetof(struct parse_item, s) + s_sz); + if (!ret) { + cdecl__errmsg(CDECL__ENOMEM); + return NULL; + } -#define NARG_(_4, _3, _2, _1, n, ...) n -#define NARG(...) NARG_(__VA_ARGS__, 4, 3, 2, 1) + ret->u.declarator.child = NULL; + ret->u.declarator.type = CDECL_DECL_IDENT; + ret->u.declarator.u.ident = ret->s; -#define BITS(...) PASTE2(BIT, NARG(__VA_ARGS__))(__VA_ARGS__) + return ret; +} /* * We can represent type specifiers as a bitmap, which gives us a finite @@ -25,151 +61,540 @@ * 2 times. Treat it as a special case, assigning an unused bit to represent * the second long. */ -#define CDECL_TYPE_LLONG 32 +#define MAP_LLONG_BIT 31 +#define MAP_LONG_BIT (CDECL_TYPE_LONG-CDECL_SPEC_TYPE) +#define CDECL_TYPE_LLONG (CDECL_SPEC_TYPE+MAP_LLONG_BIT) -static int typemap_verify(unsigned long map) +#include "typemap.h" + +/* + * Convert the declaration specifiers to a bitmap with each bit + * corresponding to one specific type specifier. + */ +static int valid_typespec(struct cdecl_declspec *s) { - /* - * This is the complete list of valid type specifiers from C99§6.7.2#2 - */ + struct cdecl_declspec *c; + unsigned long map = 0; - switch (map) { - case BITS(VOID): - case BITS(CHAR): - case BITS(SIGNED, CHAR): - case BITS(UNSIGNED, CHAR): - case BITS(SHORT): - case BITS(SIGNED, SHORT): - case BITS(SHORT, INT): - case BITS(SIGNED, SHORT, INT): - case BITS(UNSIGNED, SHORT): - case BITS(UNSIGNED, SHORT, INT): - case BITS(INT): - case BITS(SIGNED): - case BITS(SIGNED, INT): - case BITS(UNSIGNED): - case BITS(UNSIGNED, INT): - case BITS(LONG): - case BITS(SIGNED, LONG): - case BITS(SIGNED, LONG, INT): - case BITS(UNSIGNED, LONG): - case BITS(UNSIGNED, LONG, INT): - case BITS(LLONG, LONG): - case BITS(SIGNED, LLONG, LONG): - case BITS(SIGNED, LLONG, LONG, INT): - case BITS(UNSIGNED, LLONG, LONG): - case BITS(UNSIGNED, LLONG, LONG, INT): - case BITS(BOOL): - case BITS(FLOAT): - case BITS(DOUBLE): - case BITS(LONG, DOUBLE): - case BITS(FLOAT, COMPLEX): - case BITS(DOUBLE, COMPLEX): - case BITS(LONG, DOUBLE, COMPLEX): - case BITS(STRUCT): - case BITS(UNION): - case BITS(ENUM): - case BITS(IDENT): - return 0; - } + for (c = s; c; c = c->next) { + unsigned long bit; - return -1; -} + if (cdecl_spec_kind(c) != CDECL_SPEC_TYPE) + continue; -static unsigned long -typemap_add_typespec(unsigned long map, struct cdecl_declspec *s) -{ - assert(s->type < CDECL_TYPE_LLONG); + bit = c->type - CDECL_SPEC_TYPE; + assert(bit < MAP_LLONG_BIT); + bit = 1ul << bit; - if (s->type == CDECL_TYPE_LONG) { - if (map & BITS(LLONG)) { - fprintf(stderr, "too many long specifiers\n"); - return -1; - } else if (map & BITS(LONG)) { - return map | BITS(LLONG); + /* "long" special case */ + if ((map & bit) == 1ul << MAP_LONG_BIT) + bit = 1ul << MAP_LLONG_BIT; + + if (map & bit) { + if (bit == 1ul << MAP_LLONG_BIT) + cdecl__errmsg(CDECL__ETOOLONG); + else + cdecl__errmsg(CDECL__EDUPTYPE); + return false; } + map |= bit; } - if (map & (1ul<type)) { - fprintf(stderr, "duplicate type specifier\n"); - return -1; - } + if (typemap_is_valid(map)) + return true; + + if (map == 0) + cdecl__errmsg(CDECL__ENOTYPE); + else + cdecl__errmsg(CDECL__EBADTYPE); - return map | (1<type); + return false; } -static int verify_specs(struct cdecl_declspec *s) +/* + * Verify the declaration specifiers of a declaration. If top is true, treat + * this as a top-level declaration. Otherwise, treat this as a function + * parameter (which carries additional constraints). + */ +static bool valid_declspecs(struct cdecl *decl, bool top) { - unsigned long typemap = 0; + struct cdecl_declspec *c, *specs = decl->specifiers; + struct cdecl_declarator *d = decl->declarators; + bool abstract = cdecl_is_abstract(d); unsigned num_storage = 0; - for (struct cdecl_declspec *c = s; c; c = c->next) { + if (!valid_typespec(specs)) + return false; + + for (c = specs; c; c = c->next) { switch (cdecl_spec_kind(c)) { case CDECL_SPEC_TYPE: - typemap = typemap_add_typespec(typemap, c); - if (typemap == -1) { - return -1; + if (c->type == CDECL_TYPE_VOID && + (d->type == CDECL_DECL_IDENT + || d->type == CDECL_DECL_ARRAY)) { + cdecl__errmsg(CDECL__EBADVOID); + return false; } - break; + continue; case CDECL_SPEC_STOR: + if (top && abstract) { + cdecl__errmsg(CDECL__ETYPESTOR); + return false; + } + + if (!top && c->type != CDECL_STOR_REGISTER) { + cdecl__errmsg(CDECL__EFUNCSTOR); + return false; + } + if (++num_storage > 1) { - fprintf(stderr, "too many storage-class specifiers\n"); - return -1; + cdecl__errmsg(CDECL__EMANYSTOR); + return false; } break; case CDECL_SPEC_QUAL: /* - * Since we don't support pointer types yet, all - * restrict qualifiers are invalid. Other qualifiers - * are always valid. + * Restrict qualifiers are only valid in the + * pointer qualifier list, which isn't checked here. */ if (c->type == CDECL_QUAL_RESTRICT) { - fprintf(stderr, "only pointer types can be restrict-qualified.\n"); - return -1; + cdecl__errmsg(CDECL__EBADQUAL); + return false; } break; case CDECL_SPEC_FUNC: - /* - * Likewise for function specifiers. - */ - fprintf(stderr, "only function declarations may have function specifiers.\n"); - return -1; + if (abstract || !top || d->type != CDECL_DECL_FUNCTION) { + cdecl__errmsg(CDECL__ENOTFUNC); + return false; + } + + break; default: - abort(); + assert(0); + } + } + + return true; +} + +/* + * The C grammar leaves ambiguous some cases where parentheses represent a + * function declarator or just parentheses. The language uses additional + * context (whether or not a typedef is in scope, etc.) to resolve these + * ambiguities, but we don't have access to that kind of information. + * + * The cdecl99 parser uses an unambiguous grammar which treats almost + * everything as a function, and thus considers things like 'int (x)' to + * be a function type with a single parameter of type 'x' (a typedef name), + * returning int. This can result in very complicated types for simple + * declarations. Ideally, cdecl99 should try and find the "simplest" + * explanation for a given declaration. + * + * Whether or not it achieves the simplest explanation, we apply a simple rule: + * if a declarator could be interpreted as something other than a function, + * do that. + * + * Since cdecl99 supports things like [*] in any context (in C, such constructs + * are only valid in function parameter lists), we don't treat them specially + * here. + */ + +static struct cdecl_declarator *reduce_function(struct cdecl *param) +{ + struct cdecl_declarator *d, **p = ¶m->declarators; + struct parse_item *spec = (void *)param->specifiers; + + while ((d = *p)->child) + p = &d->child; + + if (d->type != CDECL_DECL_NULL) + return NULL; + + /* + * The child and u.ident members of cdecl_declarator are expected + * to be located at identical offsets as, respectively, the next + * and ident members within cdecl_declspec, so the expectation is + * that the compiler can elide both assignments. + */ + spec->u.declarator.child = (void *)spec->u.declspec.next; + spec->u.declarator.u.ident = spec->u.declspec.ident; + spec->u.declarator.type = CDECL_DECL_IDENT; + *p = &spec->u.declarator; + + d = param->declarators; + free(param); + return d; +} + +static bool function_is_reducible(struct cdecl_declarator *d) +{ + if (d->type != CDECL_DECL_FUNCTION) + return false; + if (d->child->type != CDECL_DECL_NULL) + return false; /* e.g., int (*)(x) */ + + if (!d->u.function.parameters) + return false; /* e.g., int f() */ + if (d->u.function.parameters->next) + return false; /* e.g., int (x, y) */ + if (d->u.function.variadic) + return false; /* e.g., int (x, ...) */ + + if (d->u.function.parameters->specifiers->type != CDECL_TYPE_IDENT) + return false; /* e.g. int (int) */ + if (d->u.function.parameters->specifiers->next) + return false; /* e.g. int (size_t const) */ + if (d->u.function.parameters->declarators->type == CDECL_DECL_POINTER) + return false; /* e.g. int (x *) */ + + return true; +} + +static int +simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d) +{ + struct cdecl_declarator *new; + + if (!function_is_reducible(d)) + return 0; + + new = reduce_function(d->u.function.parameters); + if (!new) + return 0; /* e.g. int (foo bar) */ + *p = new; + free(d); + + return 1; +} + +/* + * The main parser's bias towards considering things as functions whenever + * possible makes nested parentheses tricky. "(x)" is considered to be part + * of a function declarator until simplify_functions converts it. The problem + * is that "(((x)))" is not valid as part of a function declarator, but it _is_ + * valid as either an identifier enclosed thrice in parentheses, or an abstract + * function declarator enclosed twice in parentheses. + * + * To avoid ambiguities, the main parser actually returns a function declarator + * for every pair of parentheses. The ones we need to look at consist of a + * single parameter with an empty specifier list (noting that every real + * function parameter will have at least one type specifier). + * + * There are two cases: + * + * - For (), the parser emits a parameter with a lone null declarator. + * This fake parameter simply gets deleted, leaving us with a normal + * function declarator with an empty identifier list. + * + * - Otherwise, the parameter's outermost declarator is not null. The + * function itself is deleted, replaced in the parse tree with the + * fake parameter's declarator. + * + * Repeating until there no fake parameters, this reduction transforms, for + * example, "(((x)))" into "(x)", an abstract function declarator. The result + * is then subject to the function simplification step, which will turn "(x)" + * into x (declaring an identifier). + * + * The whole process is repeated until no more changes are made to the parse + * tree, or a syntax error is detected. + */ +static struct cdecl *fake_function_param(struct cdecl_declarator *d) +{ + struct cdecl *param; + + if (d->type != CDECL_DECL_FUNCTION) + return NULL; + + param = d->u.function.parameters; + if (!param || param->specifiers) + return NULL; + + assert(!param->next); + return param; +} + +static int +reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d) +{ + struct cdecl *param; + + do { + d = *p; + while ((param = fake_function_param(d))) { + struct cdecl_declarator *decl = param->declarators; + d->u.function.parameters = NULL; + + if (decl->type != CDECL_DECL_NULL) { + if (d->child->type != CDECL_DECL_NULL) { + /* Fake parameter on real function. */ + d->u.function.parameters = param; + cdecl__errmsg(CDECL__EBADPARAM); + return -1; + } + + param->declarators = d; + *p = d = decl; + } + + cdecl__free(param); + } + } while (simplify_functions(p, d)); + + return 0; +} + +/* + * Returns nonzero iff the given specifier list contains a specifier + * of the indicated type. + */ +static int have_specifier(struct cdecl_declspec *s, unsigned type) +{ + for (; s; s = s->next) + if (s->type == type) + return 1; + return 0; +} + +/* + * Check syntax restrictions on a function declarator's child declarator. + * That is, "pointer to function", "array of function" and "function + * returning function". + * + * Returns -1 if the declaration is invalid, or 0 otherwise. + */ +static int check_function_child(struct cdecl_declarator *d) +{ + struct cdecl_pointer *ptr; + + switch (d->type) { + case CDECL_DECL_POINTER: + ptr = &d->u.pointer; + if (have_specifier(ptr->qualifiers, CDECL_QUAL_RESTRICT)) { + /* pointer to function cannot be restrict qualified. */ + cdecl__errmsg(CDECL__ERESTRICTFUNC); + return -1; } + return 0; + case CDECL_DECL_FUNCTION: + /* function returning function is never allowed. */ + cdecl__errmsg(CDECL__ERETFUNC); + return -1; + case CDECL_DECL_ARRAY: + /* array of function is never allowed. */ + cdecl__errmsg(CDECL__EFUNCARRAY); + return -1; } - if (typemap_verify(typemap) == -1) { - fprintf(stderr, "conflicting type specifiers\n"); + return 0; +} + +/* + * Check a function parameter declaration for validity, which means it has a + * valid combination of declaration specifiers and, if it is a void parameter, + * that it is the one special case where this is allowed. + * + * Returns -1 if the declaration is invalid, or 0 otherwise. + */ +static int check_function_param(struct cdecl_function *f, struct cdecl *param) +{ + if (!valid_declspecs(param, false)) return -1; + + /* Check for "void" function parameters as a special case. */ + if (param->declarators->type == CDECL_DECL_NULL + && have_specifier(param->specifiers, CDECL_TYPE_VOID)) + { + struct cdecl *fp = f->parameters; + + if (f->variadic || fp->next || fp->specifiers->next) { + cdecl__errmsg(CDECL__EVOIDPARAM); + return -1; + } } return 0; } -static int verify_decl(struct cdecl *decl) +/* + * Normalize the specifier lists for function parameters, and then check the + * function declarator for validity. + * + * Returns -1 if the declaration is invalid, or 0 otherwise. + */ +static int postproc_function(struct cdecl_declarator *d) +{ + struct cdecl_function *func = &d->u.function; + struct cdecl *param; + int rc; + + for (param = func->parameters; param; param = param->next) { + param->specifiers = cdecl__normalize_specs(param->specifiers); + + if ((rc = check_function_param(func, param)) < 0) + return rc; + } + + return check_function_child(d->child); +} + +static int +postproc_common(struct cdecl_declarator **p, struct cdecl_declarator *d) { - return verify_specs(decl->specifiers); + struct cdecl_pointer *ptr; + + switch (d->type) { + case CDECL_DECL_POINTER: + ptr = &d->u.pointer; + ptr->qualifiers = cdecl__normalize_specs(ptr->qualifiers); + return 0; + case CDECL_DECL_FUNCTION: + return postproc_function(d); + case CDECL_DECL_ARRAY: + if (d->child && d->child->type == CDECL_DECL_FUNCTION) { + /* function returning array is never allowed. */ + cdecl__errmsg(CDECL__ERETARRAY); + return -1; + } + return 0; + } + + return 0; } -struct cdecl *cdecl_parse_decl(const char *declstr) +/* + * Traverse the parse tree, calling a function on every declarator in a + * depth-first preorder traversal. The function is given a pointer to the + * declarator as well as to the pointer which was used to reach that + * declarator: this can be used to rewrite entire subtrees. + * + * The called function may return a negative value to indicate an error + * which terminates traversal. + * + * Returns 0 on success, or a negative value on failure. + */ +static int forall_declarators(struct cdecl *decl, + int f(struct cdecl_declarator **, struct cdecl_declarator *)) { + struct cdecl_declarator *d, **p; + + for (p = &decl->declarators; *p; p = &d->child) { + int rc; + + rc = f(p, *p); + if (rc < 0) + return rc; + d = *p; + + if (d->type == CDECL_DECL_FUNCTION) { + struct cdecl *i; + + for (i = d->u.function.parameters; i; i = i->next) { + rc = forall_declarators(i, f); + if (rc < 0) + return rc; + } + } + } + + return 0; +} + +static struct cdecl *do_parse(const char *str, int english_mode) +{ + struct cdecl *decl = NULL; YY_BUFFER_STATE state; - struct cdecl *decl; - int rc; + yyscan_t scanner; - state = yy_scan_string(declstr); - rc = yyparse(&decl); - yy_delete_buffer(state); +#if YYDEBUG + extern int cdecl__yydebug; + cdecl__yydebug = 1; +#endif - if (rc != 0) + cdecl__init_i18n(); + if (cdecl__yylex_init_extra(english_mode, &scanner) != 0) return NULL; - rc = verify_decl(decl); - if (rc != 0) { - cdecl_free(decl); + state = cdecl__yy_scan_string(str, scanner); + if (cdecl__yyparse(scanner, &decl) != 0) { + /* + * If the input consists of a complete, valid declaration + * followed by some garbage, that parsed declaration will + * be output by the parser and we need to free it here. + */ + cdecl__free(decl); + decl = NULL; + } + cdecl__yy_delete_buffer(state, scanner); + cdecl__yylex_destroy(scanner); + + return decl; +} + +static int do_postprocess(struct cdecl *decl, int english_mode) +{ + struct cdecl_declspec *norm_specs; + struct cdecl *i; + + /* + * For a C declaration with more than one full declarator, the + * specifier list is common to all of them. Normalize it once, + * then propagate that to all the linked cdecl structures. + * + * In english mode, the cdecl structure list always has exactly + * one entry so we don't need to do anything differently. + */ + norm_specs = cdecl__normalize_specs(decl->specifiers); + for (i = decl; i; i = i->next) + i->specifiers = norm_specs; + + for (i = decl; i; i = i->next) { + if (!english_mode) { + if (forall_declarators(i, reduce_parentheses) < 0) + return 0; + } + + if (forall_declarators(i, postproc_common) < 0) + return 0; + + if (!valid_declspecs(i, true)) + return 0; + + if (decl->next && cdecl_is_abstract(i->declarators)) { + /* Abstract full declarators: there can only be one. */ + cdecl__errmsg(CDECL__EDECLTYPE); + return 0; + } + } + + return 1; +} + +static struct cdecl *parse_common(const char *str, int english_mode) +{ + struct cdecl *decl; + + if (!(decl = do_parse(str, english_mode))) + return NULL; + + if (!do_postprocess(decl, english_mode)) { + cdecl__free(decl); return NULL; } return decl; } + +struct cdecl *cdecl_parse_decl(const char *declstr) +{ + return parse_common(declstr, false); +} + +struct cdecl *cdecl_parse_english(const char *english) +{ + return parse_common(english, true); +} + +void cdecl_free(struct cdecl *decl) +{ + cdecl__free(decl); +}