X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/blobdiff_plain/ad973d63e038f293f2b158f19b04c1582e616af0..HEAD:/src/parse-decl.c diff --git a/src/parse-decl.c b/src/parse-decl.c index f00442d..3c60dd2 100644 --- a/src/parse-decl.c +++ b/src/parse-decl.c @@ -1,6 +1,6 @@ /* * Parse and validate C declarations. - * Copyright © 2011-2012, 2020-2021 Nick Bowler + * Copyright © 2011-2012, 2020-2021, 2023-2024 Nick Bowler * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,6 +25,36 @@ #include "cdecl-internal.h" #include "parse.h" #include "scan.h" +#include "errmsg.h" + +static struct cdecl *fake_function_param(struct cdecl_declarator *); + +/* + * Allocate a "parse item", which is a union of several parse tree + * structure types, together with a string buffer. The s_sz argument + * specifies the size of the string (including its terminator), which + * may be zero. + * + * The union's declarator member is pre-initialized to a valid "identifier" + * declarator, which shares several interesting offsets with the "declspec" + * structure for an "identifier" type specifier. + */ +struct parse_item *cdecl__alloc_item(size_t s_sz) +{ + struct parse_item *ret; + + ret = malloc(offsetof(struct parse_item, s) + s_sz); + if (!ret) { + cdecl__errmsg(CDECL__ENOMEM); + return NULL; + } + + ret->u.declarator.child = NULL; + ret->u.declarator.type = CDECL_DECL_IDENT; + ret->u.declarator.u.ident = ret->s; + + return ret; +} /* * We can represent type specifiers as a bitmap, which gives us a finite @@ -45,9 +75,10 @@ */ static int valid_typespec(struct cdecl_declspec *s) { + struct cdecl_declspec *c; unsigned long map = 0; - for (struct cdecl_declspec *c = s; c; c = c->next) { + for (c = s; c; c = c->next) { unsigned long bit; if (cdecl_spec_kind(c) != CDECL_SPEC_TYPE) @@ -63,9 +94,9 @@ static int valid_typespec(struct cdecl_declspec *s) if (map & bit) { if (bit == 1ul << MAP_LLONG_BIT) - cdecl__err(CDECL_EBADTYPE, _("too many long specifiers")); + cdecl__errmsg(CDECL__ETOOLONG); else - cdecl__err(CDECL_EBADTYPE, _("duplicate type specifier")); + cdecl__errmsg(CDECL__EDUPTYPE); return false; } map |= bit; @@ -75,9 +106,9 @@ static int valid_typespec(struct cdecl_declspec *s) return true; if (map == 0) - cdecl__err(CDECL_EBADTYPE, _("no type specified")); + cdecl__errmsg(CDECL__ENOTYPE); else - cdecl__err(CDECL_EBADTYPE, _("invalid type specified")); + cdecl__errmsg(CDECL__EBADTYPE); return false; } @@ -89,7 +120,7 @@ static int valid_typespec(struct cdecl_declspec *s) */ static bool valid_declspecs(struct cdecl *decl, bool top) { - struct cdecl_declspec *specs = decl->specifiers; + struct cdecl_declspec *c, *specs = decl->specifiers; struct cdecl_declarator *d = decl->declarators; bool abstract = cdecl_is_abstract(d); unsigned num_storage = 0; @@ -97,29 +128,29 @@ static bool valid_declspecs(struct cdecl *decl, bool top) if (!valid_typespec(specs)) return false; - for (struct cdecl_declspec *c = specs; c; c = c->next) { + for (c = specs; c; c = c->next) { switch (cdecl_spec_kind(c)) { case CDECL_SPEC_TYPE: if (c->type == CDECL_TYPE_VOID && (d->type == CDECL_DECL_IDENT || d->type == CDECL_DECL_ARRAY)) { - cdecl__err(CDECL_EBADTYPE, _("invalid declaration of type void")); + cdecl__errmsg(CDECL__EBADVOID); return false; } continue; case CDECL_SPEC_STOR: if (top && abstract) { - cdecl__err(CDECL_EBADSTOR, _("type names cannot have storage-class specifiers")); + cdecl__errmsg(CDECL__ETYPESTOR); return false; } if (!top && c->type != CDECL_STOR_REGISTER) { - cdecl__err(CDECL_EBADSTOR, _("function parameters may only have register storage")); + cdecl__errmsg(CDECL__EFUNCSTOR); return false; } if (++num_storage > 1) { - cdecl__err(CDECL_EBADSTOR, _("too many storage-class specifiers")); + cdecl__errmsg(CDECL__EMANYSTOR); return false; } break; @@ -129,20 +160,16 @@ static bool valid_declspecs(struct cdecl *decl, bool top) * pointer qualifier list, which isn't checked here. */ if (c->type == CDECL_QUAL_RESTRICT) { - cdecl__err(CDECL_EBADQUAL, _("only pointer types can be restrict-qualified")); + cdecl__errmsg(CDECL__EBADQUAL); return false; } break; case CDECL_SPEC_FUNC: - if (abstract) { - cdecl__err(CDECL_ENOTFUNC, _("type names cannot have function specifiers")); + if (abstract || !top || d->type != CDECL_DECL_FUNCTION) { + cdecl__errmsg(CDECL__ENOTFUNC); return false; } - if (!top || d->type != CDECL_DECL_FUNCTION) { - cdecl__err(CDECL_ENOTFUNC, _("only function declarations can have function specifiers")); - return false; - } break; default: assert(0); @@ -152,6 +179,36 @@ static bool valid_declspecs(struct cdecl *decl, bool top) return true; } +/* + * Find the tree pointer which leads to the parameter's leaf node. + * + * Return a null pointer if the traversal locates a syntactic element which + * prevents function reduction. This occurs if the leaf node declares an + * identifier, or for nontrivial fake function parameters (see below). + */ +static struct cdecl_declarator **leaf_pointer(struct cdecl *param) +{ + struct cdecl_declarator *d, **p = ¶m->declarators; + + if ((param = fake_function_param(param->declarators))) { + if (param->declarators->type != CDECL_DECL_NULL) + return NULL; /* e.g. int (x (*)) */ + } + + while ((d = *p)->child) { + p = &d->child; + + if (fake_function_param(d->child)) + return NULL; /* e.g. int (x (*)[][1]) */ + } + + if (d->type != CDECL_DECL_NULL) + return NULL; /* e.g. int (x y) */ + + return p; +} + + /* * The C grammar leaves ambiguous some cases where parentheses represent a * function declarator or just parentheses. The language uses additional @@ -176,22 +233,26 @@ static bool valid_declspecs(struct cdecl *decl, bool top) static struct cdecl_declarator *reduce_function(struct cdecl *param) { - struct cdecl_declspec *spec = param->specifiers; - struct cdecl_declarator *decl = param->declarators; - struct cdecl_declarator *last; - - for (last = decl; last && last->type != CDECL_DECL_NULL;) - last = last->child; + struct parse_item *spec = (void *)param->specifiers; + struct cdecl_declarator *d, **p; - if (!last) + if (!(p = leaf_pointer(param))) return NULL; - last->type = CDECL_DECL_IDENT; - last->u.ident = spec->ident; - free(param); - free(spec); + /* + * The child and u.ident members of cdecl_declarator are expected + * to be located at identical offsets as, respectively, the next + * and ident members within cdecl_declspec, so the expectation is + * that the compiler can elide both assignments. + */ + spec->u.declarator.child = (void *)spec->u.declspec.next; + spec->u.declarator.u.ident = spec->u.declspec.ident; + spec->u.declarator.type = CDECL_DECL_IDENT; + *p = &spec->u.declarator; - return decl; + d = param->declarators; + free(param); + return d; } static bool function_is_reducible(struct cdecl_declarator *d) @@ -228,206 +289,202 @@ simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d) new = reduce_function(d->u.function.parameters); if (!new) - return 0; /* e.g. int (foo bar) */ + return 0; *p = new; - free(d->child); free(d); return 1; } /* - * The parser's bias towards considering things as functions whenever possible - * makes nested parentheses tricky. (x) is considered to be part of a function - * declarator until simplify_functions converts it. The problem is that - * (((x))) is not valid as part of a function declarator, but it *is* valid - * as an identifier enclosed 3 times in parentheses. This is complicated by - * the fact that things like (((int))) are not valid anywhere. + * The main parser's bias towards considering things as functions whenever + * possible makes nested parentheses tricky. "(x)" is considered to be part + * of a function declarator until simplify_functions converts it. The problem + * is that "(((x)))" is not valid as part of a function declarator, but it _is_ + * valid as either an identifier enclosed thrice in parentheses, or an abstract + * function declarator enclosed twice in parentheses. + * + * To avoid ambiguities, the main parser actually returns a function declarator + * for every pair of parentheses. The ones we need to look at consist of a + * single parameter with an empty specifier list (noting that every real + * function parameter will have at least one type specifier). + * + * There are two cases: + * + * - For (), the parser emits a parameter with a lone null declarator. + * This fake parameter simply gets deleted, leaving us with a normal + * function declarator with an empty identifier list. + * + * - Otherwise, the parameter's outermost declarator is not null. The + * function itself is deleted, replaced in the parse tree with the + * fake parameter's declarator. * - * To avoid ambiguities, the parser actually emits a "function" declarator for - * every pair of parentheses. The ones that can't reasonably be functions - * consist of a single "parameter" with no declaration specifiers (note that - * every valid function parameter will have at least one type specifier). + * Repeating until there no fake parameters, this reduction transforms, for + * example, "(((x)))" into "(x)", an abstract function declarator. The result + * is then subject to the function simplification step, which will turn "(x)" + * into x (declaring an identifier). * - * This pass is to remove these fake functions from the parse tree. We take - * care to avoid turning invalid things like ((int)) into valid things like - * (int) by observing that the only valid function declarators that appear - * in these "fake" parentheses are those that have a non-null child declarator - * (for instance, int ((*)(int)) *or* those that will be eliminated by the - * simplify_functions pass. + * The whole process is repeated until no more changes are made to the parse + * tree, or a syntax error is detected. */ - -static int -reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d) +static struct cdecl *fake_function_param(struct cdecl_declarator *d) { struct cdecl *param; if (d->type != CDECL_DECL_FUNCTION) - return 0; + return NULL; param = d->u.function.parameters; - if (param && param->specifiers == NULL) { - struct cdecl_declarator *decl; + if (!param || param->specifiers) + return NULL; - assert(!param->next); + assert(!param->next); + return param; +} - decl = param->declarators; - if (decl->type == CDECL_DECL_NULL) { - free(decl); - free(param); - d->u.function.parameters = NULL; - return 0; - } +static int +reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d) +{ + struct cdecl *param; - if (d->child->type != CDECL_DECL_NULL) { - cdecl__err(CDECL_EBADPARAMS, _("invalid function parameter")); - return -1; - } + do { + d = *p; + while ((param = fake_function_param(d))) { + struct cdecl_declarator *decl = param->declarators; + d->u.function.parameters = NULL; - free(d->child); - free(param); - free(d); - *p = decl; + if (decl->type != CDECL_DECL_NULL) { + if (d->child->type != CDECL_DECL_NULL) { + /* Fake parameter on real function. */ + d->u.function.parameters = param; + cdecl__errmsg(CDECL__EBADPARAM); + return -1; + } - /* - * We may have replaced d with another fake function which - * also needs to be eliminated. - */ - if (reduce_parentheses(p, decl) < 0) - return -1; + param->declarators = d; + *p = d = decl; + } - /* - * If the remaining declarator is a function, make sure it's - * valid by checking its reducibility. - */ - decl = *p; - if (decl->type == CDECL_DECL_FUNCTION - && decl->child->type == CDECL_DECL_NULL - && !function_is_reducible(decl)) { - cdecl__err(CDECL_EBADPARAMS, _("too many parentheses in function")); - return -1; + cdecl__free(param); } - - return 1; - } + } while (simplify_functions(p, d)); return 0; } /* - * Function parameters and return types have a few restrictions that are - * really easy to check in comparison to the above absurdity. + * Returns nonzero iff the given specifier list contains a specifier + * of the indicated type. */ -static int -check_parameters(struct cdecl_declarator **p, struct cdecl_declarator *d) +static int have_specifier(struct cdecl_declspec *s, unsigned type) { - struct cdecl_declspec *spec; - struct cdecl *param; - - if (d->type != CDECL_DECL_FUNCTION) - return 0; - - for (param = d->u.function.parameters; param; param = param->next) { - if (!valid_declspecs(param, false)) - return -1; - - /* Check for "void" function parameters as a special case. */ - for (spec = param->specifiers; spec; spec = spec->next) { - if (param->declarators->type != CDECL_DECL_NULL) - continue; - if (spec->type != CDECL_TYPE_VOID) - continue; - - if (spec != param->specifiers || spec->next != NULL) { - cdecl__err(CDECL_EVOIDPARAM, _("void parameter cannot have extra specifiers")); - return -1; - } else if (d->u.function.parameters->next) { - cdecl__err(CDECL_EVOIDPARAM, _("void parameter must stand alone")); - return -1; - } else if (d->u.function.variadic) { - cdecl__err(CDECL_EVOIDPARAM, _("variadic function cannot have void parameter")); - return -1; - } - } - } - + for (; s; s = s->next) + if (s->type == type) + return 1; return 0; } /* - * Functions cannot return arrays or functions. Since the parse tree is - * "inside-out", we need to look for functions as the child declarator. + * Check syntax restrictions on a function declarator's child declarator. + * That is, "pointer to function", "array of function" and "function + * returning function". + * + * Returns -1 if the declaration is invalid, or 0 otherwise. */ -static int -check_rettypes(struct cdecl_declarator **p, struct cdecl_declarator *d) +static int check_function_child(struct cdecl_declarator *d) { - if (!d->child || d->child->type != CDECL_DECL_FUNCTION) - return 0; + struct cdecl_pointer *ptr; switch (d->type) { + case CDECL_DECL_POINTER: + ptr = &d->u.pointer; + if (have_specifier(ptr->qualifiers, CDECL_QUAL_RESTRICT)) { + /* pointer to function cannot be restrict qualified. */ + cdecl__errmsg(CDECL__ERESTRICTFUNC); + return -1; + } + return 0; case CDECL_DECL_FUNCTION: - cdecl__err(CDECL_EBADRETURN, _("functions cannot return functions")); + /* function returning function is never allowed. */ + cdecl__errmsg(CDECL__ERETFUNC); return -1; case CDECL_DECL_ARRAY: - cdecl__err(CDECL_EBADRETURN, _("functions cannot return arrays")); + /* array of function is never allowed. */ + cdecl__errmsg(CDECL__EFUNCARRAY); return -1; } return 0; } -static int -check_arrays(struct cdecl_declarator **p, struct cdecl_declarator *d) +/* + * Check a function parameter declaration for validity, which means it has a + * valid combination of declaration specifiers and, if it is a void parameter, + * that it is the one special case where this is allowed. + * + * Returns -1 if the declaration is invalid, or 0 otherwise. + */ +static int check_function_param(struct cdecl_function *f, struct cdecl *param) { - if (!d->child || d->child->type != CDECL_DECL_ARRAY) - return 0; - - switch (d->type) { - case CDECL_DECL_FUNCTION: - cdecl__err(CDECL_EBADARRAY, _("array members cannot be functions")); + if (!valid_declspecs(param, false)) return -1; + + /* Check for "void" function parameters as a special case. */ + if (param->declarators->type == CDECL_DECL_NULL + && have_specifier(param->specifiers, CDECL_TYPE_VOID)) + { + struct cdecl *fp = f->parameters; + + if (f->variadic || fp->next || fp->specifiers->next) { + cdecl__errmsg(CDECL__EVOIDPARAM); + return -1; + } } return 0; } -static int -normalize_specs(struct cdecl_declarator **p, struct cdecl_declarator *d) +/* + * Normalize the specifier lists for function parameters, and then check the + * function declarator for validity. + * + * Returns -1 if the declaration is invalid, or 0 otherwise. + */ +static int postproc_function(struct cdecl_declarator *d) { - struct cdecl_function *func; - struct cdecl_pointer *ptr; + struct cdecl_function *func = &d->u.function; + struct cdecl *param; + int rc; - switch (d->type) { - case CDECL_DECL_POINTER: - ptr = &d->u.pointer; - ptr->qualifiers = cdecl__normalize_specs(ptr->qualifiers); - break; - case CDECL_DECL_FUNCTION: - func = &d->u.function; - for (struct cdecl *i = func->parameters; i; i = i->next) - i->specifiers = cdecl__normalize_specs(i->specifiers); - break; + for (param = func->parameters; param; param = param->next) { + param->specifiers = cdecl__normalize_specs(param->specifiers); + + if ((rc = check_function_param(func, param)) < 0) + return rc; } - return 0; + return check_function_child(d->child); } static int -check_qualifiers(struct cdecl_declarator **p, struct cdecl_declarator *d) +postproc_common(struct cdecl_declarator **p, struct cdecl_declarator *d) { - struct cdecl_declspec *spec; struct cdecl_pointer *ptr; - if (!d->child || d->child->type != CDECL_DECL_POINTER) + switch (d->type) { + case CDECL_DECL_POINTER: + ptr = &d->u.pointer; + ptr->qualifiers = cdecl__normalize_specs(ptr->qualifiers); return 0; - - ptr = &d->child->u.pointer; - for (spec = ptr->qualifiers; spec; spec = spec->next) { - if (spec->type == CDECL_QUAL_RESTRICT - && d->type == CDECL_DECL_FUNCTION) { - cdecl__err(CDECL_EBADPOINTER, _("function pointers cannot be restrict-qualified")); + case CDECL_DECL_FUNCTION: + return postproc_function(d); + case CDECL_DECL_ARRAY: + if (d->child && d->child->type == CDECL_DECL_FUNCTION) { + /* function returning array is never allowed. */ + cdecl__errmsg(CDECL__ERETARRAY); return -1; } + return 0; } return 0; @@ -438,146 +495,132 @@ check_qualifiers(struct cdecl_declarator **p, struct cdecl_declarator *d) * depth-first preorder traversal. The function is given a pointer to the * declarator as well as to the pointer which was used to reach that * declarator: this can be used to rewrite entire subtrees. + * + * The called function may return a negative value to indicate an error + * which terminates traversal. + * + * Returns 0 on success, or a negative value on failure. */ -static bool forall_declarators(struct cdecl *decl, +static int forall_declarators(struct cdecl *decl, int f(struct cdecl_declarator **, struct cdecl_declarator *)) { struct cdecl_declarator *d, **p; - for (p = &decl->declarators, d = *p; d; p = &d->child, d = *p) { - switch (f(p, d)) { - case 0: - break; - case 1: - d = *p; - break; - case -1: - return false; - default: - assert(0); - } + for (p = &decl->declarators; *p; p = &d->child) { + int rc; + + rc = f(p, *p); + if (rc < 0) + return rc; + d = *p; if (d->type == CDECL_DECL_FUNCTION) { struct cdecl *i; for (i = d->u.function.parameters; i; i = i->next) { - if (!forall_declarators(i, f)) - return false; + rc = forall_declarators(i, f); + if (rc < 0) + return rc; } } } - return true; + return 0; } -struct cdecl *cdecl_parse_decl(const char *declstr) +static struct cdecl *do_parse(const char *str, int english_mode) { - struct cdecl_declspec *norm_specs; + struct cdecl *decl = NULL; YY_BUFFER_STATE state; yyscan_t scanner; - struct cdecl *decl; - int rc; - cdecl__init_i18n(); +#if YYDEBUG + extern int cdecl__yydebug; + cdecl__yydebug = 1; +#endif - rc = cdecl__yylex_init(&scanner); - if (rc != 0) + cdecl__init_i18n(); + if (cdecl__yylex_init_extra(english_mode, &scanner) != 0) return NULL; - state = cdecl__yy_scan_string(declstr, scanner); - rc = cdecl__yyparse(scanner, &decl); + state = cdecl__yy_scan_string(str, scanner); + if (cdecl__yyparse(scanner, &decl) != 0) { + /* + * If the input consists of a complete, valid declaration + * followed by some garbage, that parsed declaration will + * be output by the parser and we need to free it here. + */ + cdecl__free(decl); + decl = NULL; + } cdecl__yy_delete_buffer(state, scanner); cdecl__yylex_destroy(scanner); - if (rc != 0) - return NULL; + return decl; +} + +static int do_postprocess(struct cdecl *decl, int english_mode) +{ + struct cdecl_declspec *norm_specs; + struct cdecl *i; /* - * Since the top-level specifiers are shared between each top-level - * declarator, we need to normalize them once and then propagate the - * new specifier list. + * For a C declaration with more than one full declarator, the + * specifier list is common to all of them. Normalize it once, + * then propagate that to all the linked cdecl structures. + * + * In english mode, the cdecl structure list always has exactly + * one entry so we don't need to do anything differently. */ norm_specs = cdecl__normalize_specs(decl->specifiers); - for (struct cdecl *i = decl; i; i = i->next) { + for (i = decl; i; i = i->next) i->specifiers = norm_specs; - } - /* Now perform checks and simplifications on each declarator. */ - for (struct cdecl *i = decl; i; i = i->next) { - if (!forall_declarators(i, reduce_parentheses)) - goto err; - if (!forall_declarators(i, simplify_functions)) - goto err; - if (!forall_declarators(i, check_parameters)) - goto err; - if (!forall_declarators(i, check_rettypes)) - goto err; - if (!forall_declarators(i, check_arrays)) - goto err; - if (!forall_declarators(i, normalize_specs)) - goto err; - if (!forall_declarators(i, check_qualifiers)) - goto err; + for (i = decl; i; i = i->next) { + if (!english_mode) { + if (forall_declarators(i, reduce_parentheses) < 0) + return 0; + } + + if (forall_declarators(i, postproc_common) < 0) + return 0; if (!valid_declspecs(i, true)) - goto err; + return 0; - if (cdecl_is_abstract(i->declarators) - && (i != decl || i->next)) { - cdecl__err(CDECL_EBADDECL, _("mixing type names and declarations is not allowed")); - goto err; + if (decl->next && cdecl_is_abstract(i->declarators)) { + /* Abstract full declarators: there can only be one. */ + cdecl__errmsg(CDECL__EDECLTYPE); + return 0; } } - return decl; -err: - cdecl__free(decl); - return NULL; + return 1; } -struct cdecl *cdecl_parse_english(const char *english) +static struct cdecl *parse_common(const char *str, int english_mode) { - YY_BUFFER_STATE state; - yyscan_t scanner; struct cdecl *decl; - int rc; - - cdecl__init_i18n(); - rc = cdecl__yylex_init_extra(true, &scanner); - if (rc != 0) + if (!(decl = do_parse(str, english_mode))) return NULL; - state = cdecl__yy_scan_string(english, scanner); - rc = cdecl__yyparse(scanner, &decl); - cdecl__yy_delete_buffer(state, scanner); - cdecl__yylex_destroy(scanner); - - if (rc != 0) + if (!do_postprocess(decl, english_mode)) { + cdecl__free(decl); return NULL; - - for (struct cdecl *i = decl; i; i = i->next) { - i->specifiers = cdecl__normalize_specs(i->specifiers); - - if (!forall_declarators(i, check_parameters)) - goto err; - if (!forall_declarators(i, check_rettypes)) - goto err; - if (!forall_declarators(i, check_arrays)) - goto err; - if (!forall_declarators(i, normalize_specs)) - goto err; - if (!forall_declarators(i, check_qualifiers)) - goto err; - - if (!valid_declspecs(i, true)) - goto err; } return decl; -err: - cdecl__free(decl); - return NULL; +} + +struct cdecl *cdecl_parse_decl(const char *declstr) +{ + return parse_common(declstr, false); +} + +struct cdecl *cdecl_parse_english(const char *english) +{ + return parse_common(english, true); } void cdecl_free(struct cdecl *decl)