]> git.draconx.ca Git - cdecl99.git/commitdiff
Add an initial declaration parser.
authorNick Bowler <nbowler@draconx.ca>
Tue, 21 Jun 2011 01:34:45 +0000 (21:34 -0400)
committerNick Bowler <nbowler@draconx.ca>
Tue, 21 Jun 2011 01:34:45 +0000 (21:34 -0400)
.gitignore
Makefile.am
src/cdecl.h [new file with mode: 0644]
src/cdecl99.c [new file with mode: 0644]
src/libcdecl.c [new file with mode: 0644]
src/parse.y
src/scan.l

index 56a2d230117c8ec983496e5fa3fefc390173c854..191edd065da322ebc6297b27d29c24c2d42b631b 100644 (file)
@@ -17,3 +17,4 @@ Makefile.in
 /missing
 /stamp-h1
 /install-sh
+/cdecl99
index ecc1524060353f6c31a61373729660b74d4a4488..6921dbb16ef078b494342fd9b6d185681d7dbfee 100644 (file)
@@ -5,11 +5,20 @@ MAINTAINERCLEANFILES = src/scan.c src/scan.h src/scan.stamp \
 
 EXTRA_DIST = src/parse.stamp src/scan.stamp
 
+include_HEADERS = src/cdecl.h
+noinst_HEADERS = src/scan.h src/parse.h
+
 lib_LTLIBRARIES = libcdecl.la
-libcdecl_la_SOURCES = src/scan.c src/scan.h src/parse.c src/parse.h
+libcdecl_la_LDFLAGS = -export-symbols-regex '^cdecl_'
+libcdecl_la_SOURCES = src/scan.c src/parse.c src/libcdecl.c
+
+bin_PROGRAMS = cdecl99
+cdecl99_SOURCES = src/cdecl99.c
+cdecl99_LDADD = libcdecl.la
 
 src/parse.$(OBJEXT): src/scan.h
 src/scan.$(OBJEXT): src/parse.h
+src/libcdecl.$(OBJEXT): src/scan.h src/parse.h
 
 # These are required to prevent the builtin lex/yacc rules from triggering...
 src/scan.c src/scan.h: src/scan.stamp
diff --git a/src/cdecl.h b/src/cdecl.h
new file mode 100644 (file)
index 0000000..90a8804
--- /dev/null
@@ -0,0 +1,66 @@
+#ifndef CDECL_H_
+#define CEDCL_H_
+
+/* Declaration specifier kinds. */
+enum {
+       CDECL_SPEC_TYPE = 0,
+       CDECL_SPEC_STOR = 256,
+       CDECL_SPEC_QUAL = 512,
+       CDECL_SPEC_FUNC = 1024,
+};
+
+enum {
+       CDECL_TYPE_VOID = CDECL_SPEC_TYPE,
+       CDECL_TYPE_CHAR,
+       CDECL_TYPE_SHORT,
+       CDECL_TYPE_INT,
+       CDECL_TYPE_LONG,
+       CDECL_TYPE_FLOAT,
+       CDECL_TYPE_DOUBLE,
+       CDECL_TYPE_SIGNED,
+       CDECL_TYPE_UNSIGNED,
+       CDECL_TYPE_BOOL,
+       CDECL_TYPE_COMPLEX,
+       CDECL_TYPE_STRUCT,
+       CDECL_TYPE_UNION,
+       CDECL_TYPE_ENUM,
+       CDECL_TYPE_IDENT,
+       CDECL_STOR_TYPEDEF = CDECL_SPEC_STOR,
+       CDECL_STOR_EXTERN,
+       CDECL_STOR_STATIC,
+       CDECL_STOR_AUTO,
+       CDECL_STOR_REGISTER,
+       CDECL_QUAL_RESTRICT = CDECL_SPEC_QUAL,
+       CDECL_QUAL_VOLATILE,
+       CDECL_QUAL_CONST,
+       CDECL_FUNC_INLINE = CDECL_SPEC_FUNC,
+};
+
+/* Declarator types. */
+enum {
+       CDECL_DECL_IDENT,
+};
+
+struct cdecl {
+       struct cdecl_declspec {
+               struct cdecl_declspec *next;
+               unsigned type;
+               char *ident;
+       } *specifiers;
+
+       struct cdecl_declarator {
+               struct cdecl_declarator *next;
+               unsigned type;
+               char *ident;
+       } *declarators;
+};
+
+struct cdecl *cdecl_parse_decl(const char *declstr);
+void cdecl_free(struct cdecl *decl);
+
+static inline int cdecl_spec_kind(struct cdecl_declspec *spec)
+{
+       return spec->type & ~0xffu;
+}
+
+#endif
diff --git a/src/cdecl99.c b/src/cdecl99.c
new file mode 100644 (file)
index 0000000..6eb07cc
--- /dev/null
@@ -0,0 +1,21 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "cdecl.h"
+
+int main(int argc, char **argv)
+{
+       struct cdecl *decl;
+
+       if (argc < 2) {
+               fprintf(stderr, "usage: cdecl99 decl\n");
+               return EXIT_FAILURE;
+       }
+
+       decl = cdecl_parse_decl(argv[1]);
+       if (!decl) {
+               return EXIT_FAILURE;
+       }
+
+       cdecl_free(decl);
+       return 0;
+}
diff --git a/src/libcdecl.c b/src/libcdecl.c
new file mode 100644 (file)
index 0000000..c06080b
--- /dev/null
@@ -0,0 +1,175 @@
+#include <stdio.h>
+#include <assert.h>
+
+#include "cdecl.h"
+#include "parse.h"
+#include "scan.h"
+
+#define PASTE(a, b) a ## b
+#define PASTE2(a, b) PASTE(a, b)
+
+#define BIT1(a)          ((1ul<<(CDECL_TYPE_ ## a)))
+#define BIT2(a, b)       ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b)))
+#define BIT3(a, b, c)    ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))|(1ul<<(CDECL_TYPE_ ## c)))
+#define BIT4(a, b, c, d) ((1ul<<(CDECL_TYPE_ ## a))|(1ul<<(CDECL_TYPE_ ## b))|(1ul<<(CDECL_TYPE_ ## c))|(1ul<<(CDECL_TYPE_ ## d)))
+
+#define NARG_(_4, _3, _2, _1, n, ...) n
+#define NARG(...) NARG_(__VA_ARGS__, 4, 3, 2, 1)
+
+#define BITS(...) PASTE2(BIT, NARG(__VA_ARGS__))(__VA_ARGS__)
+
+/*
+ * We can represent type specifiers as a bitmap, which gives us a finite
+ * list of acceptable bitmap values according to the C standard.  However,
+ * the "long" specifier is allowed to occur more than once, but only at most
+ * 2 times.  Treat it as a special case, assigning an unused bit to represent
+ * the second long.
+ */
+#define CDECL_TYPE_LLONG       32
+
+static int typemap_verify(unsigned long map)
+{
+       /*
+        * This is the complete list of valid type specifiers from C99ยง6.7.2#2
+        */
+
+       switch (map) {
+       case BITS(VOID):
+       case BITS(CHAR):
+       case BITS(SIGNED, CHAR):
+       case BITS(UNSIGNED, CHAR):
+       case BITS(SHORT):
+       case BITS(SIGNED, SHORT):
+       case BITS(SHORT, INT):
+       case BITS(SIGNED, SHORT, INT):
+       case BITS(UNSIGNED, SHORT):
+       case BITS(UNSIGNED, SHORT, INT):
+       case BITS(INT):
+       case BITS(SIGNED):
+       case BITS(SIGNED, INT):
+       case BITS(UNSIGNED):
+       case BITS(UNSIGNED, INT):
+       case BITS(LONG):
+       case BITS(SIGNED, LONG):
+       case BITS(SIGNED, LONG, INT):
+       case BITS(UNSIGNED, LONG):
+       case BITS(UNSIGNED, LONG, INT):
+       case BITS(LLONG, LONG):
+       case BITS(SIGNED, LLONG, LONG):
+       case BITS(SIGNED, LLONG, LONG, INT):
+       case BITS(UNSIGNED, LLONG, LONG):
+       case BITS(UNSIGNED, LLONG, LONG, INT):
+       case BITS(BOOL):
+       case BITS(FLOAT):
+       case BITS(DOUBLE):
+       case BITS(LONG, DOUBLE):
+       case BITS(FLOAT, COMPLEX):
+       case BITS(DOUBLE, COMPLEX):
+       case BITS(LONG, DOUBLE, COMPLEX):
+       case BITS(STRUCT):
+       case BITS(UNION):
+       case BITS(ENUM):
+       case BITS(IDENT):
+               return 0;
+       }
+
+       return -1;
+}
+
+static unsigned long
+typemap_add_typespec(unsigned long map, struct cdecl_declspec *s)
+{
+       assert(s->type < CDECL_TYPE_LLONG);
+
+       if (s->type == CDECL_TYPE_LONG) {
+               if (map & BITS(LLONG)) {
+                       fprintf(stderr, "too many long specifiers\n");
+                       return -1;
+               } else if (map & BITS(LONG)) {
+                       return map | BITS(LLONG);
+               }
+       }
+
+       if (map & (1ul<<s->type)) {
+               fprintf(stderr, "duplicate type specifier\n");
+               return -1;
+       }
+
+       return map | (1<<s->type);
+}
+
+static int verify_specs(struct cdecl_declspec *s)
+{
+       unsigned long typemap = 0;
+       unsigned num_storage = 0;
+
+       for (struct cdecl_declspec *c = s; c; c = c->next) {
+               switch (cdecl_spec_kind(c)) {
+               case CDECL_SPEC_TYPE:
+                       typemap = typemap_add_typespec(typemap, c);
+                       if (typemap == -1) {
+                               return -1;
+                       }
+                       break;
+               case CDECL_SPEC_STOR:
+                       if (++num_storage > 1) {
+                               fprintf(stderr, "too many storage-class specifiers\n");
+                               return -1;
+                       }
+                       break;
+               case CDECL_SPEC_QUAL:
+                       /*
+                        * Since we don't support pointer types yet, all
+                        * restrict qualifiers are invalid.  Other qualifiers
+                        * are always valid.
+                        */
+                       if (c->type == CDECL_QUAL_RESTRICT) {
+                               fprintf(stderr, "only pointer types can be restrict-qualified.\n");
+                               return -1;
+                       }
+                       break;
+               case CDECL_SPEC_FUNC:
+                       /*
+                        * Likewise for function specifiers.
+                        */
+                       fprintf(stderr, "only function declarations may have function specifiers.\n");
+                       return -1;
+               default:
+                       abort();
+               }
+       }
+
+       if (typemap_verify(typemap) == -1) {
+               fprintf(stderr, "conflicting type specifiers\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int verify_decl(struct cdecl *decl)
+{
+       return verify_specs(decl->specifiers);
+}
+
+struct cdecl *cdecl_parse_decl(const char *declstr)
+{
+       YY_BUFFER_STATE state;
+       struct cdecl *decl;
+       int rc;
+
+       state = yy_scan_string(declstr);
+       rc = yyparse(&decl);
+       yy_delete_buffer(state);
+
+       if (rc != 0)
+               return NULL;
+
+       rc = verify_decl(decl);
+       if (rc != 0) {
+               cdecl_free(decl);
+               return NULL;
+       }
+
+       return decl;
+}
index 70e18783db6cf68cda0ae447881f6188cc0cf0c5..fdffa40939f561c806bac62e06776ae3ae66c02b 100644 (file)
  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+%parse-param {struct cdecl **out}
 %define api.pure
 %error-verbose
 %locations
 
 %{
 #include "scan.h"
+#include "cdecl.h"
+
+#define FAIL(msg) do { \
+       yyerror(&yylloc, NULL, msg); \
+       YYERROR; \
+} while (0)
+
+#define ALLOC(ptr, size) do { \
+       (ptr) = malloc(size); \
+       if (!(ptr)) \
+               FAIL("failed to allocate memory"); \
+} while (0)
+
+#define ALLOC_STRUCT(ptr, type, ...) do { \
+       ALLOC(ptr, sizeof (type)); \
+       *(ptr) = (type) { __VA_ARGS__ }; \
+} while (0)
 %}
 
+%code requires {
+#include <inttypes.h>
+}
+
 %code provides {
-void yyerror(const char *);
+void yyerror(YYLTYPE *, struct cdecl **, const char *);
+int yyparse(struct cdecl **out);
 }
 
 %union {
-       int foo;
+       uintmax_t uintval;
+       char *strval;
+       struct cdecl_declspec *declspec;
+       struct cdecl_declarator *declarator;
+       struct cdecl *decl;
+}
+
+%{
+static void free_declspec(struct cdecl_declspec *x)
+{
+       struct cdecl_declspec *p;
+       while (x) {
+               p = x->next;
+               free(x->ident);
+               free(x);
+               x = p;
+       }
 }
 
+static void free_declarator(struct cdecl_declarator *x)
+{
+       struct cdecl_declarator *p;
+       while (x) {
+               p = x->next;
+               free(x->ident);
+               free(x);
+               x = p;
+       }
+}
+
+void cdecl_free(struct cdecl *decl)
+{
+       free_declspec(decl->specifiers);
+       free_declarator(decl->declarators);
+       free(decl);
+}
+%}
+
+%destructor { free($$); }            <strval>
+%destructor { free_declspec($$); }   <declspec>
+%destructor { free_declarator($$); } <declarator>
+%destructor { cdecl_free($$); }      <decl>
+
+%token T_LEX_ERROR
+
+%token <strval> T_IDENT "identifier"
+%token T_SEMICOLON ";"
+%token T_ASTERISK  "*"
+%token T_LPAREN    "("
+%token T_RPAREN    ")"
+%token T_LBRACKET  "["
+%token T_RBRACKET  "]"
+%token T_COMMA     ","
+
+%token T_TYPEDEF  "typedef"
+%token T_EXTERN   "extern"
+%token T_STATIC   "static"
+%token T_AUTO     "auto"
+%token T_REGISTER "register"
+
+%token T_INLINE   "inline"
+
+%token T_RESTRICT "restrict"
+%token T_VOLATILE "volatile"
+%token T_CONST    "const"
+
+%token T_VOID     "void"
+%token T_CHAR     "char"
+%token T_SHORT    "short"
+%token T_INT      "int"
+%token T_LONG     "long"
+%token T_FLOAT    "float"
+%token T_DOUBLE   "double"
+%token T_SIGNED   "signed"
+%token T_UNSIGNED "unsigned"
+%token T_BOOL     "_Bool"
+%token T_COMPLEX  "_Complex"
+
+%token T_STRUCT   "struct"
+%token T_UNION    "union"
+%token T_ENUM     "enum"
+
+%type <uintval>    declspec_simple
+%type <declspec>   declspec declspecs
+%type <declarator> declarator declarators
+%type <decl>       declaration
+
 %%
 
-input: ;
+input: declaration {
+       *out = $1;
+};
+
+declaration: declspecs declarators T_SEMICOLON {
+       ALLOC_STRUCT($$, struct cdecl,
+               .specifiers = $1,
+               .declarators = $2);
+};
+
+declspecs: { $$ = NULL; } | declspecs declspec {
+       $$ = $2;
+       $$->next = $1;
+}
+
+declarators: declarator | declarator T_COMMA declarators {
+       $$ = $1;
+       $$->next = $3;
+};
+
+declspec_simple: T_VOID { $$ = CDECL_TYPE_VOID;     }
+       | T_CHAR        { $$ = CDECL_TYPE_CHAR;     }
+       | T_SHORT       { $$ = CDECL_TYPE_SHORT;    }
+       | T_INT         { $$ = CDECL_TYPE_INT;      }
+       | T_LONG        { $$ = CDECL_TYPE_LONG;     }
+       | T_FLOAT       { $$ = CDECL_TYPE_FLOAT;    }
+       | T_DOUBLE      { $$ = CDECL_TYPE_DOUBLE;   }
+       | T_SIGNED      { $$ = CDECL_TYPE_SIGNED;   }
+       | T_UNSIGNED    { $$ = CDECL_TYPE_UNSIGNED; }
+       | T_BOOL        { $$ = CDECL_TYPE_BOOL;     }
+       | T_COMPLEX     { $$ = CDECL_TYPE_COMPLEX;  }
+       | T_TYPEDEF     { $$ = CDECL_STOR_TYPEDEF;  }
+       | T_EXTERN      { $$ = CDECL_STOR_EXTERN;   }
+       | T_STATIC      { $$ = CDECL_STOR_STATIC;   }
+       | T_AUTO        { $$ = CDECL_STOR_AUTO;     }
+       | T_REGISTER    { $$ = CDECL_STOR_REGISTER; }
+       | T_RESTRICT    { $$ = CDECL_QUAL_RESTRICT; }
+       | T_VOLATILE    { $$ = CDECL_QUAL_VOLATILE; }
+       | T_CONST       { $$ = CDECL_QUAL_CONST;    }
+       | T_INLINE      { $$ = CDECL_FUNC_INLINE;   }
+       ;
+
+declspec: declspec_simple  {
+       ALLOC_STRUCT($$, struct cdecl_declspec, .type = $1);
+} | T_STRUCT T_IDENT {
+       ALLOC_STRUCT($$, struct cdecl_declspec,
+               .type = CDECL_TYPE_STRUCT,
+               .ident = $2);
+} | T_UNION T_IDENT {
+       ALLOC_STRUCT($$, struct cdecl_declspec,
+               .type = CDECL_TYPE_UNION,
+               .ident = $2);
+} | T_ENUM T_IDENT {
+       ALLOC_STRUCT($$, struct cdecl_declspec,
+               .type = CDECL_TYPE_ENUM,
+               .ident = $2);
+} | T_IDENT {
+       ALLOC_STRUCT($$, struct cdecl_declspec,
+               .type = CDECL_TYPE_IDENT,
+               .ident = $1);
+};
+
+declarator: T_IDENT {
+       ALLOC_STRUCT($$, struct cdecl_declarator,
+               .type = CDECL_DECL_IDENT,
+               .ident = $1);
+} | T_LPAREN declarator T_RPAREN {
+       $$ = $2;
+};
 
 %%
-void yyerror(const char *err)
+void yyerror(YYLTYPE *loc, struct cdecl **out, const char *err)
 {
+       if (strstr(err, "T_LEX_ERROR"))
+               return;
+
        fprintf(stderr, "%s\n", err);
 }
index 5c1a960651d591f4cfd75bcc10658a55b260df95..f2e92a8703509d32c734abfa7a42391ab7d9559c 100644 (file)
 
 %option noyywrap bison-locations
 
+%{
+#define lex_error(msg) do { \
+       yyerror(yylloc, NULL, (msg)); \
+       return T_LEX_ERROR; \
+} while(0)
+%}
+
+IDENT [_[:alpha:]][_[:alnum:]]*
+
 %%
 
+";" return T_SEMICOLON;
+"*" return T_ASTERISK;
+"(" return T_LPAREN;
+")" return T_RPAREN;
+"[" return T_LBRACKET;
+"]" return T_RBRACKET;
+"," return T_COMMA;
+
+"typedef"  return T_TYPEDEF;
+"extern"   return T_EXTERN;
+"static"   return T_STATIC;
+"auto"     return T_AUTO;
+"register" return T_REGISTER;
+
+"restrict" return T_RESTRICT;
+"volatile" return T_VOLATILE;
+"const"    return T_CONST;
+
+"inline"   return T_INLINE;
+
+"void"     return T_VOID;
+"char"     return T_CHAR;
+"short"    return T_SHORT;
+"int"      return T_INT;
+"long"     return T_LONG;
+"float"    return T_FLOAT;
+"double"   return T_DOUBLE;
+"signed"   return T_SIGNED;
+"unsigned" return T_UNSIGNED;
+"_Bool"    return T_BOOL;
+"_Complex" return T_COMPLEX;
+
+"struct"   return T_STRUCT;
+"union"    return T_UNION;
+"enum"     return T_ENUM;
+
+{IDENT} {
+       yylval->strval = malloc(yyleng+1);
+       if (!yylval->strval)
+               lex_error("failed to allocate memory");
+
+       strcpy(yylval->strval, yytext);
+       return T_IDENT;
+}
+
+[[:space:]]+
+. {
+       char buf[] = "syntax error, unexpected #";
+       *strchr(buf, '#') = *yytext;
+       lex_error(buf);
+}