From f5e2e659f116a49afd3b9d9f37553646e62fa4d4 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Mon, 20 Jun 2011 21:34:45 -0400 Subject: [PATCH] Add an initial declaration parser. --- .gitignore | 1 + Makefile.am | 11 ++- src/cdecl.h | 66 ++++++++++++++++++ src/cdecl99.c | 21 ++++++ src/libcdecl.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++ src/parse.y | 186 +++++++++++++++++++++++++++++++++++++++++++++++-- src/scan.l | 60 ++++++++++++++++ 7 files changed, 515 insertions(+), 5 deletions(-) create mode 100644 src/cdecl.h create mode 100644 src/cdecl99.c create mode 100644 src/libcdecl.c diff --git a/.gitignore b/.gitignore index 56a2d23..191edd0 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ Makefile.in /missing /stamp-h1 /install-sh +/cdecl99 diff --git a/Makefile.am b/Makefile.am index ecc1524..6921dbb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,11 +5,20 @@ MAINTAINERCLEANFILES = src/scan.c src/scan.h src/scan.stamp \ EXTRA_DIST = src/parse.stamp src/scan.stamp +include_HEADERS = src/cdecl.h +noinst_HEADERS = src/scan.h src/parse.h + lib_LTLIBRARIES = libcdecl.la -libcdecl_la_SOURCES = src/scan.c src/scan.h src/parse.c src/parse.h +libcdecl_la_LDFLAGS = -export-symbols-regex '^cdecl_' +libcdecl_la_SOURCES = src/scan.c src/parse.c src/libcdecl.c + +bin_PROGRAMS = cdecl99 +cdecl99_SOURCES = src/cdecl99.c +cdecl99_LDADD = libcdecl.la src/parse.$(OBJEXT): src/scan.h src/scan.$(OBJEXT): src/parse.h +src/libcdecl.$(OBJEXT): src/scan.h src/parse.h # These are required to prevent the builtin lex/yacc rules from triggering... src/scan.c src/scan.h: src/scan.stamp diff --git a/src/cdecl.h b/src/cdecl.h new file mode 100644 index 0000000..90a8804 --- /dev/null +++ b/src/cdecl.h @@ -0,0 +1,66 @@ +#ifndef CDECL_H_ +#define CEDCL_H_ + +/* Declaration specifier kinds. */ +enum { + CDECL_SPEC_TYPE = 0, + CDECL_SPEC_STOR = 256, + CDECL_SPEC_QUAL = 512, + CDECL_SPEC_FUNC = 1024, +}; + +enum { + CDECL_TYPE_VOID = CDECL_SPEC_TYPE, + CDECL_TYPE_CHAR, + CDECL_TYPE_SHORT, + CDECL_TYPE_INT, + CDECL_TYPE_LONG, + CDECL_TYPE_FLOAT, + CDECL_TYPE_DOUBLE, + CDECL_TYPE_SIGNED, + CDECL_TYPE_UNSIGNED, + CDECL_TYPE_BOOL, + CDECL_TYPE_COMPLEX, + CDECL_TYPE_STRUCT, + CDECL_TYPE_UNION, + CDECL_TYPE_ENUM, + CDECL_TYPE_IDENT, + CDECL_STOR_TYPEDEF = CDECL_SPEC_STOR, + CDECL_STOR_EXTERN, + CDECL_STOR_STATIC, + CDECL_STOR_AUTO, + CDECL_STOR_REGISTER, + CDECL_QUAL_RESTRICT = CDECL_SPEC_QUAL, + CDECL_QUAL_VOLATILE, + CDECL_QUAL_CONST, + CDECL_FUNC_INLINE = CDECL_SPEC_FUNC, +}; + +/* Declarator types. */ +enum { + CDECL_DECL_IDENT, +}; + +struct cdecl { + struct cdecl_declspec { + struct cdecl_declspec *next; + unsigned type; + char *ident; + } *specifiers; + + struct cdecl_declarator { + struct cdecl_declarator *next; + unsigned type; + char *ident; + } *declarators; +}; + +struct cdecl *cdecl_parse_decl(const char *declstr); +void cdecl_free(struct cdecl *decl); + +static inline int cdecl_spec_kind(struct cdecl_declspec *spec) +{ + return spec->type & ~0xffu; +} + +#endif diff --git a/src/cdecl99.c b/src/cdecl99.c new file mode 100644 index 0000000..6eb07cc --- /dev/null +++ b/src/cdecl99.c @@ -0,0 +1,21 @@ +#include +#include +#include "cdecl.h" + +int main(int argc, char **argv) +{ + struct cdecl *decl; + + if (argc < 2) { + fprintf(stderr, "usage: cdecl99 decl\n"); + return EXIT_FAILURE; + } + + decl = cdecl_parse_decl(argv[1]); + if (!decl) { + return EXIT_FAILURE; + } + + cdecl_free(decl); + return 0; +} diff --git a/src/libcdecl.c b/src/libcdecl.c new file mode 100644 index 0000000..c06080b --- /dev/null +++ b/src/libcdecl.c @@ -0,0 +1,175 @@ +#include +#include + +#include "cdecl.h" +#include "parse.h" +#include "scan.h" + +#define PASTE(a, b) a ## b +#define PASTE2(a, b) PASTE(a, b) + +#define BIT1(a) ((1ul<<(CDECL_TYPE_ ## a))) +#define BIT2(a, b) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))) +#define BIT3(a, b, c) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))|(1ul<<(CDECL_TYPE_ ## c))) +#define BIT4(a, b, c, d) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))|(1ul<<(CDECL_TYPE_ ## c))|(1ul<<(CDECL_TYPE_ ## d))) + +#define NARG_(_4, _3, _2, _1, n, ...) n +#define NARG(...) NARG_(__VA_ARGS__, 4, 3, 2, 1) + +#define BITS(...) PASTE2(BIT, NARG(__VA_ARGS__))(__VA_ARGS__) + +/* + * We can represent type specifiers as a bitmap, which gives us a finite + * list of acceptable bitmap values according to the C standard. However, + * the "long" specifier is allowed to occur more than once, but only at most + * 2 times. Treat it as a special case, assigning an unused bit to represent + * the second long. + */ +#define CDECL_TYPE_LLONG 32 + +static int typemap_verify(unsigned long map) +{ + /* + * This is the complete list of valid type specifiers from C99§6.7.2#2 + */ + + switch (map) { + case BITS(VOID): + case BITS(CHAR): + case BITS(SIGNED, CHAR): + case BITS(UNSIGNED, CHAR): + case BITS(SHORT): + case BITS(SIGNED, SHORT): + case BITS(SHORT, INT): + case BITS(SIGNED, SHORT, INT): + case BITS(UNSIGNED, SHORT): + case BITS(UNSIGNED, SHORT, INT): + case BITS(INT): + case BITS(SIGNED): + case BITS(SIGNED, INT): + case BITS(UNSIGNED): + case BITS(UNSIGNED, INT): + case BITS(LONG): + case BITS(SIGNED, LONG): + case BITS(SIGNED, LONG, INT): + case BITS(UNSIGNED, LONG): + case BITS(UNSIGNED, LONG, INT): + case BITS(LLONG, LONG): + case BITS(SIGNED, LLONG, LONG): + case BITS(SIGNED, LLONG, LONG, INT): + case BITS(UNSIGNED, LLONG, LONG): + case BITS(UNSIGNED, LLONG, LONG, INT): + case BITS(BOOL): + case BITS(FLOAT): + case BITS(DOUBLE): + case BITS(LONG, DOUBLE): + case BITS(FLOAT, COMPLEX): + case BITS(DOUBLE, COMPLEX): + case BITS(LONG, DOUBLE, COMPLEX): + case BITS(STRUCT): + case BITS(UNION): + case BITS(ENUM): + case BITS(IDENT): + return 0; + } + + return -1; +} + +static unsigned long +typemap_add_typespec(unsigned long map, struct cdecl_declspec *s) +{ + assert(s->type < CDECL_TYPE_LLONG); + + if (s->type == CDECL_TYPE_LONG) { + if (map & BITS(LLONG)) { + fprintf(stderr, "too many long specifiers\n"); + return -1; + } else if (map & BITS(LONG)) { + return map | BITS(LLONG); + } + } + + if (map & (1ul<type)) { + fprintf(stderr, "duplicate type specifier\n"); + return -1; + } + + return map | (1<type); +} + +static int verify_specs(struct cdecl_declspec *s) +{ + unsigned long typemap = 0; + unsigned num_storage = 0; + + for (struct cdecl_declspec *c = s; c; c = c->next) { + switch (cdecl_spec_kind(c)) { + case CDECL_SPEC_TYPE: + typemap = typemap_add_typespec(typemap, c); + if (typemap == -1) { + return -1; + } + break; + case CDECL_SPEC_STOR: + if (++num_storage > 1) { + fprintf(stderr, "too many storage-class specifiers\n"); + return -1; + } + break; + case CDECL_SPEC_QUAL: + /* + * Since we don't support pointer types yet, all + * restrict qualifiers are invalid. Other qualifiers + * are always valid. + */ + if (c->type == CDECL_QUAL_RESTRICT) { + fprintf(stderr, "only pointer types can be restrict-qualified.\n"); + return -1; + } + break; + case CDECL_SPEC_FUNC: + /* + * Likewise for function specifiers. + */ + fprintf(stderr, "only function declarations may have function specifiers.\n"); + return -1; + default: + abort(); + } + } + + if (typemap_verify(typemap) == -1) { + fprintf(stderr, "conflicting type specifiers\n"); + return -1; + } + + return 0; +} + +static int verify_decl(struct cdecl *decl) +{ + return verify_specs(decl->specifiers); +} + +struct cdecl *cdecl_parse_decl(const char *declstr) +{ + YY_BUFFER_STATE state; + struct cdecl *decl; + int rc; + + state = yy_scan_string(declstr); + rc = yyparse(&decl); + yy_delete_buffer(state); + + if (rc != 0) + return NULL; + + rc = verify_decl(decl); + if (rc != 0) { + cdecl_free(decl); + return NULL; + } + + return decl; +} diff --git a/src/parse.y b/src/parse.y index 70e1878..fdffa40 100644 --- a/src/parse.y +++ b/src/parse.y @@ -16,28 +16,206 @@ * along with this program. If not, see . */ +%parse-param {struct cdecl **out} %define api.pure %error-verbose %locations %{ #include "scan.h" +#include "cdecl.h" + +#define FAIL(msg) do { \ + yyerror(&yylloc, NULL, msg); \ + YYERROR; \ +} while (0) + +#define ALLOC(ptr, size) do { \ + (ptr) = malloc(size); \ + if (!(ptr)) \ + FAIL("failed to allocate memory"); \ +} while (0) + +#define ALLOC_STRUCT(ptr, type, ...) do { \ + ALLOC(ptr, sizeof (type)); \ + *(ptr) = (type) { __VA_ARGS__ }; \ +} while (0) %} +%code requires { +#include +} + %code provides { -void yyerror(const char *); +void yyerror(YYLTYPE *, struct cdecl **, const char *); +int yyparse(struct cdecl **out); } %union { - int foo; + uintmax_t uintval; + char *strval; + struct cdecl_declspec *declspec; + struct cdecl_declarator *declarator; + struct cdecl *decl; +} + +%{ +static void free_declspec(struct cdecl_declspec *x) +{ + struct cdecl_declspec *p; + while (x) { + p = x->next; + free(x->ident); + free(x); + x = p; + } } +static void free_declarator(struct cdecl_declarator *x) +{ + struct cdecl_declarator *p; + while (x) { + p = x->next; + free(x->ident); + free(x); + x = p; + } +} + +void cdecl_free(struct cdecl *decl) +{ + free_declspec(decl->specifiers); + free_declarator(decl->declarators); + free(decl); +} +%} + +%destructor { free($$); } +%destructor { free_declspec($$); } +%destructor { free_declarator($$); } +%destructor { cdecl_free($$); } + +%token T_LEX_ERROR + +%token T_IDENT "identifier" +%token T_SEMICOLON ";" +%token T_ASTERISK "*" +%token T_LPAREN "(" +%token T_RPAREN ")" +%token T_LBRACKET "[" +%token T_RBRACKET "]" +%token T_COMMA "," + +%token T_TYPEDEF "typedef" +%token T_EXTERN "extern" +%token T_STATIC "static" +%token T_AUTO "auto" +%token T_REGISTER "register" + +%token T_INLINE "inline" + +%token T_RESTRICT "restrict" +%token T_VOLATILE "volatile" +%token T_CONST "const" + +%token T_VOID "void" +%token T_CHAR "char" +%token T_SHORT "short" +%token T_INT "int" +%token T_LONG "long" +%token T_FLOAT "float" +%token T_DOUBLE "double" +%token T_SIGNED "signed" +%token T_UNSIGNED "unsigned" +%token T_BOOL "_Bool" +%token T_COMPLEX "_Complex" + +%token T_STRUCT "struct" +%token T_UNION "union" +%token T_ENUM "enum" + +%type declspec_simple +%type declspec declspecs +%type declarator declarators +%type declaration + %% -input: ; +input: declaration { + *out = $1; +}; + +declaration: declspecs declarators T_SEMICOLON { + ALLOC_STRUCT($$, struct cdecl, + .specifiers = $1, + .declarators = $2); +}; + +declspecs: { $$ = NULL; } | declspecs declspec { + $$ = $2; + $$->next = $1; +} + +declarators: declarator | declarator T_COMMA declarators { + $$ = $1; + $$->next = $3; +}; + +declspec_simple: T_VOID { $$ = CDECL_TYPE_VOID; } + | T_CHAR { $$ = CDECL_TYPE_CHAR; } + | T_SHORT { $$ = CDECL_TYPE_SHORT; } + | T_INT { $$ = CDECL_TYPE_INT; } + | T_LONG { $$ = CDECL_TYPE_LONG; } + | T_FLOAT { $$ = CDECL_TYPE_FLOAT; } + | T_DOUBLE { $$ = CDECL_TYPE_DOUBLE; } + | T_SIGNED { $$ = CDECL_TYPE_SIGNED; } + | T_UNSIGNED { $$ = CDECL_TYPE_UNSIGNED; } + | T_BOOL { $$ = CDECL_TYPE_BOOL; } + | T_COMPLEX { $$ = CDECL_TYPE_COMPLEX; } + | T_TYPEDEF { $$ = CDECL_STOR_TYPEDEF; } + | T_EXTERN { $$ = CDECL_STOR_EXTERN; } + | T_STATIC { $$ = CDECL_STOR_STATIC; } + | T_AUTO { $$ = CDECL_STOR_AUTO; } + | T_REGISTER { $$ = CDECL_STOR_REGISTER; } + | T_RESTRICT { $$ = CDECL_QUAL_RESTRICT; } + | T_VOLATILE { $$ = CDECL_QUAL_VOLATILE; } + | T_CONST { $$ = CDECL_QUAL_CONST; } + | T_INLINE { $$ = CDECL_FUNC_INLINE; } + ; + +declspec: declspec_simple { + ALLOC_STRUCT($$, struct cdecl_declspec, .type = $1); +} | T_STRUCT T_IDENT { + ALLOC_STRUCT($$, struct cdecl_declspec, + .type = CDECL_TYPE_STRUCT, + .ident = $2); +} | T_UNION T_IDENT { + ALLOC_STRUCT($$, struct cdecl_declspec, + .type = CDECL_TYPE_UNION, + .ident = $2); +} | T_ENUM T_IDENT { + ALLOC_STRUCT($$, struct cdecl_declspec, + .type = CDECL_TYPE_ENUM, + .ident = $2); +} | T_IDENT { + ALLOC_STRUCT($$, struct cdecl_declspec, + .type = CDECL_TYPE_IDENT, + .ident = $1); +}; + +declarator: T_IDENT { + ALLOC_STRUCT($$, struct cdecl_declarator, + .type = CDECL_DECL_IDENT, + .ident = $1); +} | T_LPAREN declarator T_RPAREN { + $$ = $2; +}; %% -void yyerror(const char *err) +void yyerror(YYLTYPE *loc, struct cdecl **out, const char *err) { + if (strstr(err, "T_LEX_ERROR")) + return; + fprintf(stderr, "%s\n", err); } diff --git a/src/scan.l b/src/scan.l index 5c1a960..f2e92a8 100644 --- a/src/scan.l +++ b/src/scan.l @@ -22,5 +22,65 @@ %option noyywrap bison-locations +%{ +#define lex_error(msg) do { \ + yyerror(yylloc, NULL, (msg)); \ + return T_LEX_ERROR; \ +} while(0) +%} + +IDENT [_[:alpha:]][_[:alnum:]]* + %% +";" return T_SEMICOLON; +"*" return T_ASTERISK; +"(" return T_LPAREN; +")" return T_RPAREN; +"[" return T_LBRACKET; +"]" return T_RBRACKET; +"," return T_COMMA; + +"typedef" return T_TYPEDEF; +"extern" return T_EXTERN; +"static" return T_STATIC; +"auto" return T_AUTO; +"register" return T_REGISTER; + +"restrict" return T_RESTRICT; +"volatile" return T_VOLATILE; +"const" return T_CONST; + +"inline" return T_INLINE; + +"void" return T_VOID; +"char" return T_CHAR; +"short" return T_SHORT; +"int" return T_INT; +"long" return T_LONG; +"float" return T_FLOAT; +"double" return T_DOUBLE; +"signed" return T_SIGNED; +"unsigned" return T_UNSIGNED; +"_Bool" return T_BOOL; +"_Complex" return T_COMPLEX; + +"struct" return T_STRUCT; +"union" return T_UNION; +"enum" return T_ENUM; + +{IDENT} { + yylval->strval = malloc(yyleng+1); + if (!yylval->strval) + lex_error("failed to allocate memory"); + + strcpy(yylval->strval, yytext); + return T_IDENT; +} + +[[:space:]]+ +. { + char buf[] = "syntax error, unexpected #"; + *strchr(buf, '#') = *yytext; + lex_error(buf); +} -- 2.43.2