X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/blobdiff_plain/2713226fe5e2987e7c9ce2d8307379fa92f634a4..108b2ebc0200fbc6218ca5165c25eea4e7890ac6:/src/parse-decl.c diff --git a/src/parse-decl.c b/src/parse-decl.c index 3137859..8f0bd38 100644 --- a/src/parse-decl.c +++ b/src/parse-decl.c @@ -27,6 +27,33 @@ #include "scan.h" #include "errmsg.h" +/* + * Allocate a "parse item", which is a union of several parse tree + * structure types, together with a string buffer. The s_sz argument + * specifies the size of the string (including its terminator), which + * may be zero. + * + * The union's declarator member is pre-initialized to a valid "identifier" + * declarator, which shares several interesting offsets with the "declspec" + * structure for an "identifier" type specifier. + */ +struct parse_item *cdecl__alloc_item(size_t s_sz) +{ + struct parse_item *ret; + + ret = malloc(offsetof(struct parse_item, s) + s_sz); + if (!ret) { + cdecl__errmsg(CDECL__ENOMEM); + return NULL; + } + + ret->u.declarator.child = NULL; + ret->u.declarator.type = CDECL_DECL_IDENT; + ret->u.declarator.u.ident = ret->s; + + return ret; +} + /* * We can represent type specifiers as a bitmap, which gives us a finite * list of acceptable bitmap values according to the C standard. However, @@ -46,9 +73,10 @@ */ static int valid_typespec(struct cdecl_declspec *s) { + struct cdecl_declspec *c; unsigned long map = 0; - for (struct cdecl_declspec *c = s; c; c = c->next) { + for (c = s; c; c = c->next) { unsigned long bit; if (cdecl_spec_kind(c) != CDECL_SPEC_TYPE) @@ -90,7 +118,7 @@ static int valid_typespec(struct cdecl_declspec *s) */ static bool valid_declspecs(struct cdecl *decl, bool top) { - struct cdecl_declspec *specs = decl->specifiers; + struct cdecl_declspec *c, *specs = decl->specifiers; struct cdecl_declarator *d = decl->declarators; bool abstract = cdecl_is_abstract(d); unsigned num_storage = 0; @@ -98,7 +126,7 @@ static bool valid_declspecs(struct cdecl *decl, bool top) if (!valid_typespec(specs)) return false; - for (struct cdecl_declspec *c = specs; c; c = c->next) { + for (c = specs; c; c = c->next) { switch (cdecl_spec_kind(c)) { case CDECL_SPEC_TYPE: if (c->type == CDECL_TYPE_VOID && @@ -173,22 +201,29 @@ static bool valid_declspecs(struct cdecl *decl, bool top) static struct cdecl_declarator *reduce_function(struct cdecl *param) { - struct cdecl_declspec *spec = param->specifiers; - struct cdecl_declarator *decl = param->declarators; - struct cdecl_declarator *last; + struct cdecl_declarator *d, **p = ¶m->declarators; + struct parse_item *spec = (void *)param->specifiers; - for (last = decl; last && last->type != CDECL_DECL_NULL;) - last = last->child; + while ((d = *p)->child) + p = &d->child; - if (!last) + if (d->type != CDECL_DECL_NULL) return NULL; - last->type = CDECL_DECL_IDENT; - last->u.ident = spec->ident; - free(param); - free(spec); + /* + * The child and u.ident members of cdecl_declarator are expected + * to be located at identical offsets as, respectively, the next + * and ident members within cdecl_declspec, so the expectation is + * that the compiler can elide both assignments. + */ + spec->u.declarator.child = (void *)spec->u.declspec.next; + spec->u.declarator.u.ident = spec->u.declspec.ident; + spec->u.declarator.type = CDECL_DECL_IDENT; + *p = &spec->u.declarator; - return decl; + d = param->declarators; + free(param); + return d; } static bool function_is_reducible(struct cdecl_declarator *d) @@ -227,33 +262,42 @@ simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d) if (!new) return 0; /* e.g. int (foo bar) */ *p = new; - free(d->child); free(d); - return 0; + return 1; } /* - * The parser's bias towards considering things as functions whenever possible - * makes nested parentheses tricky. (x) is considered to be part of a function - * declarator until simplify_functions converts it. The problem is that - * (((x))) is not valid as part of a function declarator, but it *is* valid - * as an identifier enclosed 3 times in parentheses. This is complicated by - * the fact that things like (((int))) are not valid anywhere. + * The main parser's bias towards considering things as functions whenever + * possible makes nested parentheses tricky. "(x)" is considered to be part + * of a function declarator until simplify_functions converts it. The problem + * is that "(((x)))" is not valid as part of a function declarator, but it _is_ + * valid as either an identifier enclosed thrice in parentheses, or an abstract + * function declarator enclosed twice in parentheses. + * + * To avoid ambiguities, the main parser actually returns a function declarator + * for every pair of parentheses. The ones we need to look at consist of a + * single parameter with an empty specifier list (noting that every real + * function parameter will have at least one type specifier). + * + * There are two cases: + * + * - For (), the parser emits a parameter with a lone null declarator. + * This fake parameter simply gets deleted, leaving us with a normal + * function declarator with an empty identifier list. * - * To avoid ambiguities, the parser actually emits a "function" declarator for - * every pair of parentheses. The ones that can't reasonably be functions - * consist of a single "parameter" with no declaration specifiers (note that - * every valid function parameter will have at least one type specifier). + * - Otherwise, the parameter's outermost declarator is not null. The + * function itself is deleted, replaced in the parse tree with the + * fake parameter's declarator. * - * This pass is to remove these fake functions from the parse tree. We take - * care to avoid turning invalid things like ((int)) into valid things like - * (int) by observing that the only valid function declarators that appear - * in these "fake" parentheses are those that have a non-null child declarator - * (for instance, int ((*)(int)) *or* those that will be eliminated by the - * simplify_functions pass. + * Repeating until there no fake parameters, this reduction transforms, for + * example, "(((x)))" into "(x)", an abstract function declarator. The result + * is then subject to the function simplification step, which will turn "(x)" + * into x (declaring an identifier). + * + * The whole process is repeated until no more changes are made to the parse + * tree, or a syntax error is detected. */ - static struct cdecl *fake_function_param(struct cdecl_declarator *d) { struct cdecl *param; @@ -273,34 +317,28 @@ static int reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d) { struct cdecl *param; - int fake = 0; - - while ((param = fake_function_param(d))) { - struct cdecl_declarator *decl = param->declarators; - d->u.function.parameters = NULL; - - if (decl->type != CDECL_DECL_NULL) { - if (d->child->type != CDECL_DECL_NULL) { - /* Found fake parameter on real function. */ - d->u.function.parameters = param; - cdecl__errmsg(CDECL__EBADPARAM); - return -1; + + do { + d = *p; + while ((param = fake_function_param(d))) { + struct cdecl_declarator *decl = param->declarators; + d->u.function.parameters = NULL; + + if (decl->type != CDECL_DECL_NULL) { + if (d->child->type != CDECL_DECL_NULL) { + /* Fake parameter on real function. */ + d->u.function.parameters = param; + cdecl__errmsg(CDECL__EBADPARAM); + return -1; + } + + param->declarators = d; + *p = d = decl; } - param->declarators = d; - *p = d = decl; - fake = 1; + cdecl__free(param); } - - cdecl__free(param); - } - - simplify_functions(p, d); - if (fake && (*p)->type == CDECL_DECL_FUNCTION) { - /* Started with a fake function but ended with a real one. */ - cdecl__errmsg(CDECL__EMANYPAREN); - return -1; - } + } while (simplify_functions(p, d)); return 0; } @@ -463,9 +501,9 @@ static int forall_declarators(struct cdecl *decl, static struct cdecl *do_parse(const char *str, int english_mode) { + struct cdecl *decl = NULL; YY_BUFFER_STATE state; yyscan_t scanner; - struct cdecl *decl; #if YYDEBUG extern int cdecl__yydebug; @@ -477,8 +515,15 @@ static struct cdecl *do_parse(const char *str, int english_mode) return NULL; state = cdecl__yy_scan_string(str, scanner); - if (cdecl__yyparse(scanner, &decl) != 0) + if (cdecl__yyparse(scanner, &decl) != 0) { + /* + * If the input consists of a complete, valid declaration + * followed by some garbage, that parsed declaration will + * be output by the parser and we need to free it here. + */ + cdecl__free(decl); decl = NULL; + } cdecl__yy_delete_buffer(state, scanner); cdecl__yylex_destroy(scanner);