From 1518db926f5b2b3fc8de28b2f99ee870c42cc230 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Wed, 6 Jul 2011 19:58:35 -0400 Subject: [PATCH] Initial support for function declarators. Wow. That was way harder than I thought it would be. --- src/cdecl.h | 5 ++ src/explain.c | 81 ++++++++++++++---- src/parse-decl.c | 218 +++++++++++++++++++++++++++++++++++++++++++++-- src/parse.y | 66 ++++++++++++-- src/scan.l | 15 ++-- 5 files changed, 351 insertions(+), 34 deletions(-) diff --git a/src/cdecl.h b/src/cdecl.h index ffc01e3..3217f9e 100644 --- a/src/cdecl.h +++ b/src/cdecl.h @@ -62,6 +62,7 @@ enum { CDECL_DECL_IDENT, CDECL_DECL_POINTER, CDECL_DECL_ARRAY, + CDECL_DECL_FUNCTION, }; struct cdecl { @@ -85,6 +86,10 @@ struct cdecl { char *vla; uintmax_t length; } array; + struct cdecl_function { + struct cdecl *parameters; + _Bool variadic; + } function; } u; } *declarators; }; diff --git a/src/explain.c b/src/explain.c index 1cb8602..8c86530 100644 --- a/src/explain.c +++ b/src/explain.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "cdecl.h" @@ -176,16 +177,26 @@ static size_t explain_pre_specs(char *buf, size_t n, struct cdecl_declspec *s) return ret; } -/* Renders the name of the thing being declared. */ +/* + * Renders the start of the thing being declared. If top is true, print + * the "declare" or "type" keywords at the front, as appropriate. + */ static size_t -explain_prologue(char *buf, size_t n, struct cdecl_declarator *d) +explain_prologue(char *buf, size_t n, struct cdecl_declarator *d, bool top) { + size_t ret = 0, rc = 0; + while (d) { switch (d->type) { case CDECL_DECL_NULL: - return snprintf(buf, n, "type"); + if (top) + return snprintf(buf, n, "type"); + return 0; case CDECL_DECL_IDENT: - return snprintf(buf, n, "declare %s as", d->u.ident); + if (top) + rc = snprintf(buf, n, "declare"); + ret += advance(&buf, &n, rc); + return ret + snprintf(buf, n, "%s as", d->u.ident); } d = d->child; @@ -225,6 +236,53 @@ explain_array(char *buf, size_t n, struct cdecl_array *a) return ret + snprintf(buf, n, "of"); } +static size_t +explain_declarators(char *buf, size_t n, struct cdecl_declarator *decl); + +static size_t explain_decl(char *buf, size_t n, struct cdecl *decl, bool top) +{ + size_t ret = 0, rc; + + rc = explain_prologue(buf, n, decl->declarators, top); + ret += advance(&buf, &n, rc); + + rc = explain_pre_specs(buf, n, decl->specifiers); + ret += advance(&buf, &n, rc); + + rc = explain_declarators(buf, n, decl->declarators); + ret += advance(&buf, &n, rc); + + return ret + explain_post_specs(buf, n, decl->specifiers); +} + +static size_t explain_function(char *buf, size_t n, struct cdecl_function *f) +{ + size_t ret = 0, rc = 0; + + rc = snprintf(buf, n, "function"); + ret += advance(&buf, &n, rc); + + if (f->parameters) { + rc = snprintf(buf, n, "("); + ret += advance_(&buf, &n, rc); + + for (struct cdecl *p = f->parameters; p; p = p->next) { + rc = explain_decl(buf, n, p, false); + ret += advance_(&buf, &n, rc); + + if (p->next) + rc = snprintf(buf, n, ","); + else if (f->variadic) + rc = snprintf(buf, n, ", ...)"); + else + rc = snprintf(buf, n, ")"); + ret += advance(&buf, &n, rc); + } + } + + return ret + snprintf(buf, n, "returning"); +} + static size_t explain_declarators(char *buf, size_t n, struct cdecl_declarator *d) { @@ -241,6 +299,8 @@ explain_declarators(char *buf, size_t n, struct cdecl_declarator *d) return ret + explain_pointer(buf, n, &d->u.pointer); case CDECL_DECL_ARRAY: return ret + explain_array(buf, n, &d->u.array); + case CDECL_DECL_FUNCTION: + return ret + explain_function(buf, n, &d->u.function); default: assert(0); } @@ -248,16 +308,5 @@ explain_declarators(char *buf, size_t n, struct cdecl_declarator *d) size_t cdecl_explain(char *buf, size_t n, struct cdecl *decl) { - size_t ret = 0, rc; - - rc = explain_prologue(buf, n, decl->declarators); - ret += advance(&buf, &n, rc); - - rc = explain_pre_specs(buf, n, decl->specifiers); - ret += advance(&buf, &n, rc); - - rc = explain_declarators(buf, n, decl->declarators); - ret += advance(&buf, &n, rc); - - return ret + explain_post_specs(buf, n, decl->specifiers); + return explain_decl(buf, n, decl, true); } diff --git a/src/parse-decl.c b/src/parse-decl.c index bf70a60..25ace5b 100644 --- a/src/parse-decl.c +++ b/src/parse-decl.c @@ -17,6 +17,7 @@ */ #include #include +#include #include "cdecl.h" #include "typemap.h" @@ -66,9 +67,207 @@ static int verify_declspecs(struct cdecl_declspec *s) return 0; } -static int verify_decl(struct cdecl *decl) +/* + * The C grammar leaves ambiguous some cases where parentheses represent a + * function declarator or just parentheses. The language uses additional + * context (whether or not a typedef is in scope, etc.) to resolve these + * ambiguities, but we don't have access to that kind of information. + * + * The cdecl99 parser uses an unambiguous grammar which treats almost + * everything as a function, and thus considers things like 'int (x)' to + * be a function type with a single parameter of type 'x' (a typedef name), + * returning int. This can result in very complicated types for simple + * declarations. Ideally, cdecl99 should try and find the "simplest" + * explanation for a given declaration. + * + * Whether or not it achieves the simplest explanation, we apply a simple rule: + * if a declarator could be interpreted as something other than a function, + * do that. + * + * - The function declarator has a null child declarator. + * - The function declarator has exactly one parameter, and is not variadic. + * - The function parameter has a type specifier, and it is a typedef name. + * - The function parameter has no other declaration specifiers. + * - The function parameter does not declare an identifier. + * + * Since cdecl99 supports things like [*] in any context (in C, such constructs + * are only valid in function parameter lists), we don't treat them specially + * here. + */ + +static struct cdecl_declarator *reduce_function(struct cdecl *param) +{ + struct cdecl_declspec *spec = param->specifiers; + struct cdecl_declarator *decl = param->declarators; + struct cdecl_declarator *last; + + for (last = decl; last && last->type != CDECL_DECL_NULL;) + last = last->child; + + if (!last) + return NULL; + + last->type = CDECL_DECL_IDENT; + last->u.ident = spec->ident; + free(param); + free(spec); + + return decl; +} + +static bool function_is_reducible(struct cdecl_declarator *d) +{ + if (d->type != CDECL_DECL_FUNCTION) + return false; + if (d->child->type != CDECL_DECL_NULL) + return false; /* e.g., int (*)(x) */ + + if (!d->u.function.parameters) + return false; /* e.g., int f() */ + if (d->u.function.parameters->next) + return false; /* e.g., int (x, y) */ + if (d->u.function.variadic) + return false; /* e.g., int (x, ...) */ + + if (d->u.function.parameters->specifiers->type != CDECL_TYPE_IDENT) + return false; /* e.g. int (int) */ + if (d->u.function.parameters->specifiers->next) + return false; /* e.g. int (size_t const) */ + + return true; +} + +static int +simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d) +{ + struct cdecl_declarator *new; + + if (!function_is_reducible(d)) + return 0; + + new = reduce_function(d->u.function.parameters); + if (!new) + return 0; /* e.g. int (foo bar) */ + *p = new; + free(d->child); + free(d); + + return 1; +} + +/* + * The parser's bias towards considering things as functions whenever possible + * makes nested parentheses tricky. (x) is considered to be part of a function + * declarator until simplify_functions converts it. The problem is that + * (((x))) is not valid as part of a function declarator, but it *is* valid + * as an identifier enclosed 3 times in parentheses. This is complicated by + * the fact that things like (((int))) are not valid anywhere. + * + * To avoid ambiguities, the parser actually emits a "function" declarator for + * every pair of parentheses. The ones that can't reasonably be functions + * consist of a single "parameter" with no declaration specifiers (note that + * every valid function parameter will have at least one type specifier). + * + * This pass is to remove these fake functions from the parse tree. We take + * care to avoid turning invalid things like ((int)) into valid things like + * (int) by observing that the only valid function declarators that appear + * in these "fake" parentheses are those that have a non-null child declarator + * (for instance, int ((*)(int)) *or* those that will be eliminated by the + * simplify_functions pass. + */ + +static int +reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d) +{ + struct cdecl *param; + + if (d->type != CDECL_DECL_FUNCTION) + return 0; + + param = d->u.function.parameters; + if (param && param->specifiers == NULL) { + struct cdecl_declarator *decl; + + assert(!param->next); + + decl = param->declarators; + if (decl->type == CDECL_DECL_NULL) { + free(decl); + free(param); + d->u.function.parameters = NULL; + return 0; + } + + if (d->child->type != CDECL_DECL_NULL) { + fprintf(stderr, "invalid function parameter\n"); + return -1; + } + + free(d->child); + free(param); + free(d); + *p = decl; + + /* + * We may have replaced d with another fake function which + * also needs to be eliminated. + */ + if (reduce_parentheses(p, decl) < 0) + return -1; + + /* + * If the remaining declarator is a function, make sure it's + * valid by checking its reducibility. + */ + decl = *p; + if (decl->type == CDECL_DECL_FUNCTION + && decl->child->type == CDECL_DECL_NULL + && !function_is_reducible(decl)) { + fprintf(stderr, "too many parentheses in function\n"); + return -1; + } + + return 1; + } + + return 0; +} + +/* + * Traverse the parse tree, calling a function on every declarator in a + * depth-first preorder traversal. The function is given a pointer to the + * declarator as well as to the pointer which was used to reach that + * declarator: this can be used to rewrite entire subtrees. + */ +static bool forall_declarators(struct cdecl *decl, + int f(struct cdecl_declarator **, struct cdecl_declarator *)) { - return verify_declspecs(decl->specifiers); + struct cdecl_declarator *d, **p; + + for (p = &decl->declarators, d = *p; d; p = &d->child, d = *p) { + switch (f(p, d)) { + case 0: + break; + case 1: + d = *p; + break; + case -1: + return false; + default: + assert(0); + } + + if (d->type == CDECL_DECL_FUNCTION) { + struct cdecl *i; + + for (i = d->u.function.parameters; i; i = i->next) { + if (!forall_declarators(i, f)) + return false; + } + } + } + + return true; } struct cdecl *cdecl_parse_decl(const char *declstr) @@ -84,11 +283,18 @@ struct cdecl *cdecl_parse_decl(const char *declstr) if (rc != 0) return NULL; - rc = verify_decl(decl); - if (rc != 0) { - cdecl_free(decl); - return NULL; + if (verify_declspecs(decl->specifiers) != 0) + goto err; + + for (struct cdecl *i = decl; i; i = i->next) { + if (!forall_declarators(i, reduce_parentheses)) + goto err; + if (!forall_declarators(i, simplify_functions)) + goto err; } return decl; +err: + cdecl_free(decl); + return NULL; } diff --git a/src/parse.y b/src/parse.y index ac8524c..63d5fa9 100644 --- a/src/parse.y +++ b/src/parse.y @@ -25,6 +25,7 @@ %{ #include +#include #include "scan.h" #include "cdecl.h" @@ -57,6 +58,7 @@ int yyparse(struct cdecl **out); %union { uintmax_t uintval; + _Bool boolval; char *strval; struct cdecl_declspec *declspec; struct cdecl_declarator *declarator; @@ -64,6 +66,8 @@ int yyparse(struct cdecl **out); } %{ +static void free_decl(struct cdecl *); + static void free_declspec(struct cdecl_declspec *x) { struct cdecl_declspec *p; @@ -94,6 +98,9 @@ static void free_declarator(struct cdecl_declarator *x) case CDECL_DECL_ARRAY: free(x->u.array.vla); break; + case CDECL_DECL_FUNCTION: + free_decl(x->u.function.parameters); + break; default: assert(0); } @@ -143,6 +150,7 @@ void cdecl_free(struct cdecl *decl) %token T_LBRACKET "[" %token T_RBRACKET "]" %token T_COMMA "," +%token T_ELLIPSIS "." %token T_TYPEDEF "typedef" %token T_EXTERN "extern" @@ -173,12 +181,15 @@ void cdecl_free(struct cdecl *decl) %token T_ENUM "enum" %type vla_ident +%type varargs %type declspec_simple typespec_simple qualifier_simple %type declspec_notype declspec_noid typespec_noid typespec %type qualifier qualifiers %type declspecs declspecs_noid -%type direct_declarator declarator pointer array +%type direct_declarator declarator pointer array parens postfix +%type direct_declarator_ish declarator_ish parameter_type_list %type declaration declarators declarator_wrap +%type parameter parameters %% @@ -296,6 +307,43 @@ array: T_LBRACKET T_UINT T_RBRACKET { .type = CDECL_DECL_ARRAY); } +parameter: declspecs declarator { + ALLOC_STRUCT($$, struct cdecl, + .specifiers = $1, + .declarators = $2); +} + +parameters: parameter | parameters T_COMMA parameter { + $$ = $3; + $$->next = $1; +} + +varargs: { $$ = false; } | T_COMMA T_ELLIPSIS { $$ = true; } + +parameter_type_list: parameters varargs { + struct cdecl *p, *c, *n; + + /* Parameters were accumulated in reverse order. */ + for (p = NULL, c = $1; c; p = c, c = n) { + n = c->next; + c->next = p; + } + + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_FUNCTION, + .u.function.parameters = p, + .u.function.variadic = $2); +} + +parens: T_LPAREN parameter_type_list T_RPAREN { + $$ = $2; +} | T_LPAREN declarator_ish T_RPAREN { + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_FUNCTION); + ALLOC_STRUCT($$->u.function.parameters, struct cdecl, + .declarators = $2); +} + pointer: T_ASTERISK qualifiers direct_declarator { ALLOC_STRUCT($$, struct cdecl_declarator, .type = CDECL_DECL_POINTER, @@ -309,6 +357,16 @@ pointer: T_ASTERISK qualifiers direct_declarator { } declarator: direct_declarator | pointer +declarator_ish: direct_declarator_ish | pointer +postfix: array | parens + +direct_declarator_ish: { + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_NULL); +} | direct_declarator_ish postfix { + $$ = $2; + $$->child = $1; +} direct_declarator: { ALLOC_STRUCT($$, struct cdecl_declarator, @@ -317,12 +375,10 @@ direct_declarator: { ALLOC_STRUCT($$, struct cdecl_declarator, .type = CDECL_DECL_IDENT, .u.ident = $1); -} | direct_declarator array { +} | direct_declarator postfix { $$ = $2; $$->child = $1; -} | T_LPAREN declarator T_RPAREN { - $$ = $2; -}; +} %% void yyerror(YYLTYPE *loc, struct cdecl **out, const char *err) diff --git a/src/scan.l b/src/scan.l index b17d4ee..f0bf30a 100644 --- a/src/scan.l +++ b/src/scan.l @@ -34,13 +34,14 @@ INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ %% -";" return T_SEMICOLON; -"*" return T_ASTERISK; -"(" return T_LPAREN; -")" return T_RPAREN; -"[" return T_LBRACKET; -"]" return T_RBRACKET; -"," return T_COMMA; +"..." return T_ELLIPSIS; +";" return T_SEMICOLON; +"*" return T_ASTERISK; +"(" return T_LPAREN; +")" return T_RPAREN; +"[" return T_LBRACKET; +"]" return T_RBRACKET; +"," return T_COMMA; "typedef" return T_TYPEDEF; "extern" return T_EXTERN; -- 2.43.2