Wow. That was way harder than I thought it would be.
CDECL_DECL_IDENT,
CDECL_DECL_POINTER,
CDECL_DECL_ARRAY,
+ CDECL_DECL_FUNCTION,
};
struct cdecl {
char *vla;
uintmax_t length;
} array;
+ struct cdecl_function {
+ struct cdecl *parameters;
+ _Bool variadic;
+ } function;
} u;
} *declarators;
};
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <assert.h>
#include "cdecl.h"
return ret;
}
-/* Renders the name of the thing being declared. */
+/*
+ * Renders the start of the thing being declared. If top is true, print
+ * the "declare" or "type" keywords at the front, as appropriate.
+ */
static size_t
-explain_prologue(char *buf, size_t n, struct cdecl_declarator *d)
+explain_prologue(char *buf, size_t n, struct cdecl_declarator *d, bool top)
{
+ size_t ret = 0, rc = 0;
+
while (d) {
switch (d->type) {
case CDECL_DECL_NULL:
- return snprintf(buf, n, "type");
+ if (top)
+ return snprintf(buf, n, "type");
+ return 0;
case CDECL_DECL_IDENT:
- return snprintf(buf, n, "declare %s as", d->u.ident);
+ if (top)
+ rc = snprintf(buf, n, "declare");
+ ret += advance(&buf, &n, rc);
+ return ret + snprintf(buf, n, "%s as", d->u.ident);
}
d = d->child;
return ret + snprintf(buf, n, "of");
}
+static size_t
+explain_declarators(char *buf, size_t n, struct cdecl_declarator *decl);
+
+static size_t explain_decl(char *buf, size_t n, struct cdecl *decl, bool top)
+{
+ size_t ret = 0, rc;
+
+ rc = explain_prologue(buf, n, decl->declarators, top);
+ ret += advance(&buf, &n, rc);
+
+ rc = explain_pre_specs(buf, n, decl->specifiers);
+ ret += advance(&buf, &n, rc);
+
+ rc = explain_declarators(buf, n, decl->declarators);
+ ret += advance(&buf, &n, rc);
+
+ return ret + explain_post_specs(buf, n, decl->specifiers);
+}
+
+static size_t explain_function(char *buf, size_t n, struct cdecl_function *f)
+{
+ size_t ret = 0, rc = 0;
+
+ rc = snprintf(buf, n, "function");
+ ret += advance(&buf, &n, rc);
+
+ if (f->parameters) {
+ rc = snprintf(buf, n, "(");
+ ret += advance_(&buf, &n, rc);
+
+ for (struct cdecl *p = f->parameters; p; p = p->next) {
+ rc = explain_decl(buf, n, p, false);
+ ret += advance_(&buf, &n, rc);
+
+ if (p->next)
+ rc = snprintf(buf, n, ",");
+ else if (f->variadic)
+ rc = snprintf(buf, n, ", ...)");
+ else
+ rc = snprintf(buf, n, ")");
+ ret += advance(&buf, &n, rc);
+ }
+ }
+
+ return ret + snprintf(buf, n, "returning");
+}
+
static size_t
explain_declarators(char *buf, size_t n, struct cdecl_declarator *d)
{
return ret + explain_pointer(buf, n, &d->u.pointer);
case CDECL_DECL_ARRAY:
return ret + explain_array(buf, n, &d->u.array);
+ case CDECL_DECL_FUNCTION:
+ return ret + explain_function(buf, n, &d->u.function);
default:
assert(0);
}
size_t cdecl_explain(char *buf, size_t n, struct cdecl *decl)
{
- size_t ret = 0, rc;
-
- rc = explain_prologue(buf, n, decl->declarators);
- ret += advance(&buf, &n, rc);
-
- rc = explain_pre_specs(buf, n, decl->specifiers);
- ret += advance(&buf, &n, rc);
-
- rc = explain_declarators(buf, n, decl->declarators);
- ret += advance(&buf, &n, rc);
-
- return ret + explain_post_specs(buf, n, decl->specifiers);
+ return explain_decl(buf, n, decl, true);
}
*/
#include <stdio.h>
#include <assert.h>
+#include <stdbool.h>
#include "cdecl.h"
#include "typemap.h"
return 0;
}
-static int verify_decl(struct cdecl *decl)
+/*
+ * The C grammar leaves ambiguous some cases where parentheses represent a
+ * function declarator or just parentheses. The language uses additional
+ * context (whether or not a typedef is in scope, etc.) to resolve these
+ * ambiguities, but we don't have access to that kind of information.
+ *
+ * The cdecl99 parser uses an unambiguous grammar which treats almost
+ * everything as a function, and thus considers things like 'int (x)' to
+ * be a function type with a single parameter of type 'x' (a typedef name),
+ * returning int. This can result in very complicated types for simple
+ * declarations. Ideally, cdecl99 should try and find the "simplest"
+ * explanation for a given declaration.
+ *
+ * Whether or not it achieves the simplest explanation, we apply a simple rule:
+ * if a declarator could be interpreted as something other than a function,
+ * do that.
+ *
+ * - The function declarator has a null child declarator.
+ * - The function declarator has exactly one parameter, and is not variadic.
+ * - The function parameter has a type specifier, and it is a typedef name.
+ * - The function parameter has no other declaration specifiers.
+ * - The function parameter does not declare an identifier.
+ *
+ * Since cdecl99 supports things like [*] in any context (in C, such constructs
+ * are only valid in function parameter lists), we don't treat them specially
+ * here.
+ */
+
+static struct cdecl_declarator *reduce_function(struct cdecl *param)
+{
+ struct cdecl_declspec *spec = param->specifiers;
+ struct cdecl_declarator *decl = param->declarators;
+ struct cdecl_declarator *last;
+
+ for (last = decl; last && last->type != CDECL_DECL_NULL;)
+ last = last->child;
+
+ if (!last)
+ return NULL;
+
+ last->type = CDECL_DECL_IDENT;
+ last->u.ident = spec->ident;
+ free(param);
+ free(spec);
+
+ return decl;
+}
+
+static bool function_is_reducible(struct cdecl_declarator *d)
+{
+ if (d->type != CDECL_DECL_FUNCTION)
+ return false;
+ if (d->child->type != CDECL_DECL_NULL)
+ return false; /* e.g., int (*)(x) */
+
+ if (!d->u.function.parameters)
+ return false; /* e.g., int f() */
+ if (d->u.function.parameters->next)
+ return false; /* e.g., int (x, y) */
+ if (d->u.function.variadic)
+ return false; /* e.g., int (x, ...) */
+
+ if (d->u.function.parameters->specifiers->type != CDECL_TYPE_IDENT)
+ return false; /* e.g. int (int) */
+ if (d->u.function.parameters->specifiers->next)
+ return false; /* e.g. int (size_t const) */
+
+ return true;
+}
+
+static int
+simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d)
+{
+ struct cdecl_declarator *new;
+
+ if (!function_is_reducible(d))
+ return 0;
+
+ new = reduce_function(d->u.function.parameters);
+ if (!new)
+ return 0; /* e.g. int (foo bar) */
+ *p = new;
+ free(d->child);
+ free(d);
+
+ return 1;
+}
+
+/*
+ * The parser's bias towards considering things as functions whenever possible
+ * makes nested parentheses tricky. (x) is considered to be part of a function
+ * declarator until simplify_functions converts it. The problem is that
+ * (((x))) is not valid as part of a function declarator, but it *is* valid
+ * as an identifier enclosed 3 times in parentheses. This is complicated by
+ * the fact that things like (((int))) are not valid anywhere.
+ *
+ * To avoid ambiguities, the parser actually emits a "function" declarator for
+ * every pair of parentheses. The ones that can't reasonably be functions
+ * consist of a single "parameter" with no declaration specifiers (note that
+ * every valid function parameter will have at least one type specifier).
+ *
+ * This pass is to remove these fake functions from the parse tree. We take
+ * care to avoid turning invalid things like ((int)) into valid things like
+ * (int) by observing that the only valid function declarators that appear
+ * in these "fake" parentheses are those that have a non-null child declarator
+ * (for instance, int ((*)(int)) *or* those that will be eliminated by the
+ * simplify_functions pass.
+ */
+
+static int
+reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d)
+{
+ struct cdecl *param;
+
+ if (d->type != CDECL_DECL_FUNCTION)
+ return 0;
+
+ param = d->u.function.parameters;
+ if (param && param->specifiers == NULL) {
+ struct cdecl_declarator *decl;
+
+ assert(!param->next);
+
+ decl = param->declarators;
+ if (decl->type == CDECL_DECL_NULL) {
+ free(decl);
+ free(param);
+ d->u.function.parameters = NULL;
+ return 0;
+ }
+
+ if (d->child->type != CDECL_DECL_NULL) {
+ fprintf(stderr, "invalid function parameter\n");
+ return -1;
+ }
+
+ free(d->child);
+ free(param);
+ free(d);
+ *p = decl;
+
+ /*
+ * We may have replaced d with another fake function which
+ * also needs to be eliminated.
+ */
+ if (reduce_parentheses(p, decl) < 0)
+ return -1;
+
+ /*
+ * If the remaining declarator is a function, make sure it's
+ * valid by checking its reducibility.
+ */
+ decl = *p;
+ if (decl->type == CDECL_DECL_FUNCTION
+ && decl->child->type == CDECL_DECL_NULL
+ && !function_is_reducible(decl)) {
+ fprintf(stderr, "too many parentheses in function\n");
+ return -1;
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Traverse the parse tree, calling a function on every declarator in a
+ * depth-first preorder traversal. The function is given a pointer to the
+ * declarator as well as to the pointer which was used to reach that
+ * declarator: this can be used to rewrite entire subtrees.
+ */
+static bool forall_declarators(struct cdecl *decl,
+ int f(struct cdecl_declarator **, struct cdecl_declarator *))
{
- return verify_declspecs(decl->specifiers);
+ struct cdecl_declarator *d, **p;
+
+ for (p = &decl->declarators, d = *p; d; p = &d->child, d = *p) {
+ switch (f(p, d)) {
+ case 0:
+ break;
+ case 1:
+ d = *p;
+ break;
+ case -1:
+ return false;
+ default:
+ assert(0);
+ }
+
+ if (d->type == CDECL_DECL_FUNCTION) {
+ struct cdecl *i;
+
+ for (i = d->u.function.parameters; i; i = i->next) {
+ if (!forall_declarators(i, f))
+ return false;
+ }
+ }
+ }
+
+ return true;
}
struct cdecl *cdecl_parse_decl(const char *declstr)
if (rc != 0)
return NULL;
- rc = verify_decl(decl);
- if (rc != 0) {
- cdecl_free(decl);
- return NULL;
+ if (verify_declspecs(decl->specifiers) != 0)
+ goto err;
+
+ for (struct cdecl *i = decl; i; i = i->next) {
+ if (!forall_declarators(i, reduce_parentheses))
+ goto err;
+ if (!forall_declarators(i, simplify_functions))
+ goto err;
}
return decl;
+err:
+ cdecl_free(decl);
+ return NULL;
}
%{
#include <assert.h>
+#include <stdbool.h>
#include "scan.h"
#include "cdecl.h"
%union {
uintmax_t uintval;
+ _Bool boolval;
char *strval;
struct cdecl_declspec *declspec;
struct cdecl_declarator *declarator;
}
%{
+static void free_decl(struct cdecl *);
+
static void free_declspec(struct cdecl_declspec *x)
{
struct cdecl_declspec *p;
case CDECL_DECL_ARRAY:
free(x->u.array.vla);
break;
+ case CDECL_DECL_FUNCTION:
+ free_decl(x->u.function.parameters);
+ break;
default:
assert(0);
}
%token T_LBRACKET "["
%token T_RBRACKET "]"
%token T_COMMA ","
+%token T_ELLIPSIS "."
%token T_TYPEDEF "typedef"
%token T_EXTERN "extern"
%token T_ENUM "enum"
%type <strval> vla_ident
+%type <boolval> varargs
%type <uintval> declspec_simple typespec_simple qualifier_simple
%type <declspec> declspec_notype declspec_noid typespec_noid typespec
%type <declspec> qualifier qualifiers
%type <declspec> declspecs declspecs_noid
-%type <declarator> direct_declarator declarator pointer array
+%type <declarator> direct_declarator declarator pointer array parens postfix
+%type <declarator> direct_declarator_ish declarator_ish parameter_type_list
%type <decl> declaration declarators declarator_wrap
+%type <decl> parameter parameters
%%
.type = CDECL_DECL_ARRAY);
}
+parameter: declspecs declarator {
+ ALLOC_STRUCT($$, struct cdecl,
+ .specifiers = $1,
+ .declarators = $2);
+}
+
+parameters: parameter | parameters T_COMMA parameter {
+ $$ = $3;
+ $$->next = $1;
+}
+
+varargs: { $$ = false; } | T_COMMA T_ELLIPSIS { $$ = true; }
+
+parameter_type_list: parameters varargs {
+ struct cdecl *p, *c, *n;
+
+ /* Parameters were accumulated in reverse order. */
+ for (p = NULL, c = $1; c; p = c, c = n) {
+ n = c->next;
+ c->next = p;
+ }
+
+ ALLOC_STRUCT($$, struct cdecl_declarator,
+ .type = CDECL_DECL_FUNCTION,
+ .u.function.parameters = p,
+ .u.function.variadic = $2);
+}
+
+parens: T_LPAREN parameter_type_list T_RPAREN {
+ $$ = $2;
+} | T_LPAREN declarator_ish T_RPAREN {
+ ALLOC_STRUCT($$, struct cdecl_declarator,
+ .type = CDECL_DECL_FUNCTION);
+ ALLOC_STRUCT($$->u.function.parameters, struct cdecl,
+ .declarators = $2);
+}
+
pointer: T_ASTERISK qualifiers direct_declarator {
ALLOC_STRUCT($$, struct cdecl_declarator,
.type = CDECL_DECL_POINTER,
}
declarator: direct_declarator | pointer
+declarator_ish: direct_declarator_ish | pointer
+postfix: array | parens
+
+direct_declarator_ish: {
+ ALLOC_STRUCT($$, struct cdecl_declarator,
+ .type = CDECL_DECL_NULL);
+} | direct_declarator_ish postfix {
+ $$ = $2;
+ $$->child = $1;
+}
direct_declarator: {
ALLOC_STRUCT($$, struct cdecl_declarator,
ALLOC_STRUCT($$, struct cdecl_declarator,
.type = CDECL_DECL_IDENT,
.u.ident = $1);
-} | direct_declarator array {
+} | direct_declarator postfix {
$$ = $2;
$$->child = $1;
-} | T_LPAREN declarator T_RPAREN {
- $$ = $2;
-};
+}
%%
void yyerror(YYLTYPE *loc, struct cdecl **out, const char *err)
%%
-";" return T_SEMICOLON;
-"*" return T_ASTERISK;
-"(" return T_LPAREN;
-")" return T_RPAREN;
-"[" return T_LBRACKET;
-"]" return T_RBRACKET;
-"," return T_COMMA;
+"..." return T_ELLIPSIS;
+";" return T_SEMICOLON;
+"*" return T_ASTERISK;
+"(" return T_LPAREN;
+")" return T_RPAREN;
+"[" return T_LBRACKET;
+"]" return T_RBRACKET;
+"," return T_COMMA;
"typedef" return T_TYPEDEF;
"extern" return T_EXTERN;