From 08ac16e49d67a40f0f8127cf43f8a166626615dc Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Wed, 13 Jul 2011 19:53:37 -0400 Subject: [PATCH] Add support for parsing English-like declarations. I think that makes the parser almost feature-complete. --- src/cdecl.h | 1 + src/cdecl99.c | 28 +++++++ src/parse-decl.c | 37 +++++++++ src/parse.y | 190 ++++++++++++++++++++++++++++++++++++++++++++++- src/scan.l | 40 ++++++++-- 5 files changed, 287 insertions(+), 9 deletions(-) diff --git a/src/cdecl.h b/src/cdecl.h index 29dab35..c4dad87 100644 --- a/src/cdecl.h +++ b/src/cdecl.h @@ -96,6 +96,7 @@ struct cdecl { }; struct cdecl *cdecl_parse_decl(const char *declstr); +struct cdecl *cdecl_parse_english(const char *english); void cdecl_free(struct cdecl *decl); size_t cdecl_explain(char *buf, size_t n, struct cdecl *decl); diff --git a/src/cdecl99.c b/src/cdecl99.c index 2df7ec8..64b95f7 100644 --- a/src/cdecl99.c +++ b/src/cdecl99.c @@ -151,6 +151,32 @@ out: return ret; } +static int cmd_declare(const char *cmd, const char *arg) +{ + struct cdecl *decl; + const char *str; + int ret = -1; + + /* The name of the command is significant here. */ + decl = cdecl_parse_english(cmd); + if (!decl) + goto out; + + /* + * English parses have at most one full declarator, so no loop is + * needed here. + */ + str = do_format(cdecl_declare, decl); + if (!str) + goto out; + + printf("%s\n", str); + ret = 1; +out: + cdecl_free(decl); + return ret; +} + static int cmd_quit(const char *cmd, const char *arg) { return 0; @@ -165,6 +191,8 @@ static const struct command { } commands[] = { { "explain", cmd_explain, "Explain a C declaration." }, { "simplify", cmd_simplify, "Simplify a C declaration." }, + { "declare", cmd_declare, "Construct a C declaration." }, + { "type", cmd_declare, "Construct a C type name." }, { "help", cmd_help, "Print this list of commands." }, { "quit", cmd_quit, "Quit the program." }, { "exit", cmd_quit, NULL } diff --git a/src/parse-decl.c b/src/parse-decl.c index bce0afe..5002f59 100644 --- a/src/parse-decl.c +++ b/src/parse-decl.c @@ -429,3 +429,40 @@ err: cdecl_free(decl); return NULL; } + +struct cdecl *cdecl_parse_english(const char *english) +{ + YY_BUFFER_STATE state; + yyscan_t scanner; + struct cdecl *decl; + int rc; + + rc = yylex_init_extra(true, &scanner); + if (rc != 0) + return NULL; + + state = yy_scan_string(english, scanner); + rc = yyparse(scanner, &decl); + yy_delete_buffer(state, scanner); + yylex_destroy(scanner); + + if (rc != 0) + return NULL; + + for (struct cdecl *i = decl; i; i = i->next) { + if (!forall_declarators(i, check_parameters)) + goto err; + if (!forall_declarators(i, check_rettypes)) + goto err; + if (!forall_declarators(i, check_arrays)) + goto err; + + if (!valid_declspecs(i, true)) + goto err; + } + + return decl; +err: + cdecl_free(decl); + return NULL; +} diff --git a/src/parse.y b/src/parse.y index 5887e4d..8764159 100644 --- a/src/parse.y +++ b/src/parse.y @@ -140,7 +140,9 @@ void cdecl_free(struct cdecl *decl) %destructor { free_declarator($$); } %destructor { free_decl($$); } +/* Magic tokens */ %token T_LEX_ERROR +%token T_ENGLISH %token T_IDENT "identifier" %token T_UINT "integer constant" @@ -183,6 +185,20 @@ void cdecl_free(struct cdecl *decl) %token T_UNION "union" %token T_ENUM "enum" +/* + * English keywords. + */ +%token T_TYPE "type" +%token T_DECLARE "declare" +%token T_POINTER "pointer" +%token T_FUNCTION "function" +%token T_RETURNING "returning" +%token T_ARRAY "array" +%token T_TO "to" +%token T_OF "of" +%token T_AS "as" +%token T_VLA "variable-length" + %type vla_ident %type varargs %type declspec_simple typespec_simple qualifier_simple @@ -194,9 +210,25 @@ void cdecl_free(struct cdecl *decl) %type declaration declarators declarator_wrap %type parameter parameters +%type english_vla +%type storage_func_specs post_specs +%type type_qual_spec type_qual_specs typedef_name_qual +%type english_declarator english_array english_function +%type english_parameter_list null_decl +%type english_parameter english_parameters +%type english english_declaration + +/* + * Harmless shift/reduce conflicts in english_parameter. See comments below + * for more details. + */ +%expect 2 + %% -input: declaration { +input: T_ENGLISH english { + *out = $2; +} | declaration { *out = $1; }; @@ -384,6 +416,162 @@ direct_declarator: { $$->child = $1; } +english: T_DECLARE T_IDENT T_AS english_declaration { + $$ = $4; + for (struct cdecl_declarator *d = $$->declarators; d; d = d->child) { + if (d->type == CDECL_DECL_NULL) { + d->type = CDECL_DECL_IDENT; + d->u.ident = $2; + } + } +} | T_TYPE english_declaration { + $$ = $2; +} + +storage_func_specs: { $$ = NULL; } | declspec_simple storage_func_specs { + ALLOC_STRUCT($$, struct cdecl_declspec, + .type = $1, + .next = $2); +} + +type_qual_spec: typespec_noid | qualifier + +type_qual_specs: { $$ = NULL; } | type_qual_spec type_qual_specs { + $$ = $1; + $$->next = $2; +} + +/* + * The "qualifiers" nonterminal needs to be used here to avoid shift/reduce + * conflicts with pointer declarators. So we end up needing to stitch + * together three different specifiers lists. + */ +post_specs: qualifiers typespec type_qual_specs { + $$ = $2; + $$->next = $1; + for (struct cdecl_declspec *s = $1; s; s = s->next) { + if (!s->next) { + s->next = $3; + break; + } + } +} + +english_declaration: storage_func_specs english_declarator post_specs { + ALLOC_STRUCT($$, struct cdecl, + .specifiers = $3, + .declarators = $2); + + for (struct cdecl_declspec *s = $$->specifiers; s; s = s->next) { + if (!s->next) { + s->next = $1; + break; + } + } +} + +english_declarator: { + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_NULL); +} | english_declarator qualifiers T_POINTER T_TO { + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_POINTER, + .child = $1, + .u.pointer.qualifiers = $2); +} | english_declarator english_array { + $$ = $2; + $$->child = $1; +} | english_declarator english_function { + $$ = $2; + $$->child = $1; +} + +english_function: T_FUNCTION T_RETURNING { + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_FUNCTION, + .u.function.parameters = NULL); +} | T_FUNCTION T_LPAREN english_parameter_list T_RPAREN T_RETURNING { + $$ = $3; +} + +english_parameter_list: english_parameters varargs { + struct cdecl *p, *c, *n; + + /* Parameters were accumulated in reverse order. */ + for (p = NULL, c = $1; c; p = c, c = n) { + n = c->next; + c->next = p; + } + + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_FUNCTION, + .u.function.parameters = p, + .u.function.variadic = $2); +} + +english_parameters: english_parameters T_COMMA english_parameter { + $$ = $3; + $$->next = $1; +} | english_parameter + +typedef_name_qual: T_IDENT qualifiers { + ALLOC_STRUCT($$, struct cdecl_declspec, + .type = CDECL_TYPE_IDENT, + .ident = $1, + .next = $2); +} + +null_decl: { + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_NULL); +} + +/* + * There is a small shift/reduce conflict here. An unadorned identifier + * as the first thing in the parameter might be a typedef name deep in the + * first english_declaration (thus empty storage_func_specs and empty + * english_declarator need to be reduced) or it might be the identifier + * before the "as" (thus the identifier should be shifted). + * + * The typedef name conflict is the only issue, so treating it as a special + * case makes the shift harmless. + */ +english_parameter: english_declaration | typedef_name_qual null_decl { + ALLOC_STRUCT($$, struct cdecl, + .specifiers = $1, + .declarators = $2); +} | T_IDENT T_AS english_declaration { + $$ = $3; + for (struct cdecl_declarator *d = $$->declarators; d; d = d->child) { + if (d->type == CDECL_DECL_NULL) { + d->type = CDECL_DECL_IDENT; + d->u.ident = $1; + } + } +} + +english_array: T_VLA T_ARRAY english_vla T_OF { + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_ARRAY, + .u.array.vla = $3); +} | T_ARRAY T_UINT T_OF { + if ($2 == 0) + FAIL("array length must be positive"); + + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_ARRAY, + .u.array.length = $2); +} | T_ARRAY T_OF { + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_ARRAY, + .u.array.length = 0); +} + +english_vla: T_IDENT | { + ALLOC($$, sizeof ""); + strcpy($$, ""); +} + %% void yyerror(YYLTYPE *loc, yyscan_t scanner, struct cdecl **out, diff --git a/src/scan.l b/src/scan.l index 75657fb..bb6564b 100644 --- a/src/scan.l +++ b/src/scan.l @@ -17,23 +17,41 @@ * along with this program. If not, see . */ - #include "parse.h" +#include "parse.h" } %option noyywrap bison-locations reentrant +%option extra-type="_Bool" %{ #define lex_error(msg) do { \ yyerror(yylloc, NULL, NULL, (msg)); \ return T_LEX_ERROR; \ } while(0) + +#define dup_token() do { \ + yylval->strval = malloc(yyleng+1); \ + if (!yylval->strval) \ + lex_error("failed to allocate memory"); \ + strcpy(yylval->strval, yytext); \ +} while(0) %} +%s ENGLISH + IDENT [_[:alpha:]][_[:alnum:]]* INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ %% +%{ + if (yyextra) { + yyextra = 0; + BEGIN(ENGLISH); + return T_ENGLISH; + } +%} + "..." return T_ELLIPSIS; ";" return T_SEMICOLON; "*" return T_ASTERISK; @@ -85,15 +103,21 @@ INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ return T_UINT; } -{IDENT} { - yylval->strval = malloc(yyleng+1); - if (!yylval->strval) - lex_error("failed to allocate memory"); - - strcpy(yylval->strval, yytext); - return T_IDENT; +{ + "variable-length" return T_VLA; + "type" return T_TYPE; + "declare" return T_DECLARE; + "pointer" return T_POINTER; + "function" return T_FUNCTION; + "returning" return T_RETURNING; + "array" return T_ARRAY; + "to" return T_TO; + "of" return T_OF; + "as" return T_AS; } +{IDENT} { dup_token(); return T_IDENT; } + [[:space:]]+ . { char buf[] = "syntax error, unexpected #"; -- 2.43.2