From 8ef7bb974c538e4802688d45296ee988a6648e02 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Thu, 13 Jul 2023 21:25:04 -0400 Subject: [PATCH] libcdecl: Move specifier type determination into scanner. Instead of having a parser action for every keyword which sets the appropriate specifier type, the scanner can just directly assign the semantic value for the returned specifier tokens. This reduces the size of the generated parser quite a bit, without needing any change to the tokens. --- src/cdecl-internal.h | 31 ++++++++++- src/keywords.gperf | 83 +++++++++++++++--------------- src/parse.y | 119 +++++++++++++++++++++---------------------- src/scan.l | 9 ++-- 4 files changed, 135 insertions(+), 107 deletions(-) diff --git a/src/cdecl-internal.h b/src/cdecl-internal.h index c85ca61..4c5ed0b 100644 --- a/src/cdecl-internal.h +++ b/src/cdecl-internal.h @@ -24,6 +24,23 @@ #define _(s) dgettext(PACKAGE, s) #define N_(s) s +/* Pack the 4 "kind" bits of a valid cdecl specifier value into two bits. */ +#define PACK_KIND(x) ((( ((x>>8) - ((x)>>11)) ) >> 1) & 3) + +/* Expand a packed "kind" to its original value. */ +#define UNPACK_KIND(x) (0x100 << (x)) + +/* + * Pack a valid cdecl specifier value (CDECL_TYPE_xxx, CDECL_STOR_xxx, etc.) + * into 8 bits. We do this by encoding the specifier kind in the upper two + * bits, and the enumerated sequence in the lower 6 bits. */ +#define PACK_SPEC(x) (((x) & 0x3f) | (PACK_KIND(x) << 6)) + +/* + * Expand a packed specifier to its original value. + */ +#define UNPACK_SPEC(x) (UNPACK_KIND((x) >> 6) | ((x) & 0x3f)) + /* * Pack a parser token into 7 bits. * @@ -68,6 +85,18 @@ const char *cdecl__emit_specs(struct output_state *dst, struct cdecl_declspec *s, unsigned mask); -int cdecl__to_keyword(const char *s, int len, int english_mode); +/* + * If s is a (len bytes long) string corresponding to a declaration + * specifier, then: + * + * - bits 7:0 (LSB) of the return value is the packed specifier type, and + * - bits 15:8 of the return value is the packed parser token. + * + * Otherwise, if english_mode is nonzero and s is an "english keyword", + * returns the appropriate packed parser token in bits 15:8. + * + * Otherwise, PACK_TOKEN(T_IDENT) is returned in bits 15:8. + */ +unsigned cdecl__to_keyword(const char *s, int len, int english_mode); #endif diff --git a/src/keywords.gperf b/src/keywords.gperf index 3f8af84..d0caf8b 100644 --- a/src/keywords.gperf +++ b/src/keywords.gperf @@ -20,6 +20,7 @@ #include #include #include "cdecl-internal.h" +#include "cdecl.h" #include "parse.h" static const struct keyword *in_word_set(); @@ -34,63 +35,63 @@ static const struct keyword *in_word_set(); %struct-type struct keyword { - uint_least8_t token; + uint_least16_t val; }; %% -_Bool, PACK_TOKEN(T_BOOL ) -_Complex, PACK_TOKEN(T_COMPLEX ) -_Imaginary, PACK_TOKEN(T_IMAGINARY) -auto, PACK_TOKEN(T_AUTO ) -char, PACK_TOKEN(T_CHAR ) -const, PACK_TOKEN(T_CONST ) -double, PACK_TOKEN(T_DOUBLE ) -enum, PACK_TOKEN(T_ENUM ) -extern, PACK_TOKEN(T_EXTERN ) -float, PACK_TOKEN(T_FLOAT ) -inline, PACK_TOKEN(T_INLINE ) -int, PACK_TOKEN(T_INT ) -long, PACK_TOKEN(T_LONG ) -register, PACK_TOKEN(T_REGISTER ) -restrict, PACK_TOKEN(T_RESTRICT ) -short, PACK_TOKEN(T_SHORT ) -signed, PACK_TOKEN(T_SIGNED ) -static, PACK_TOKEN(T_STATIC ) -struct, PACK_TOKEN(T_STRUCT ) -typedef, PACK_TOKEN(T_TYPEDEF ) -union, PACK_TOKEN(T_UNION ) -unsigned, PACK_TOKEN(T_UNSIGNED ) -void, PACK_TOKEN(T_VOID ) -volatile, PACK_TOKEN(T_VOLATILE ) +_Bool, (PACK_TOKEN(T_BOOL )<<8) | PACK_SPEC(CDECL_TYPE_BOOL) +_Complex, (PACK_TOKEN(T_COMPLEX )<<8) | PACK_SPEC(CDECL_TYPE_COMPLEX) +_Imaginary, (PACK_TOKEN(T_IMAGINARY)<<8) | PACK_SPEC(CDECL_TYPE_IMAGINARY) +auto, (PACK_TOKEN(T_AUTO )<<8) | PACK_SPEC(CDECL_STOR_AUTO) +char, (PACK_TOKEN(T_CHAR )<<8) | PACK_SPEC(CDECL_TYPE_CHAR) +const, (PACK_TOKEN(T_CONST )<<8) | PACK_SPEC(CDECL_QUAL_CONST) +double, (PACK_TOKEN(T_DOUBLE )<<8) | PACK_SPEC(CDECL_TYPE_DOUBLE) +enum, (PACK_TOKEN(T_ENUM )<<8) | PACK_SPEC(CDECL_TYPE_ENUM) +extern, (PACK_TOKEN(T_EXTERN )<<8) | PACK_SPEC(CDECL_STOR_EXTERN) +float, (PACK_TOKEN(T_FLOAT )<<8) | PACK_SPEC(CDECL_TYPE_FLOAT) +inline, (PACK_TOKEN(T_INLINE )<<8) | PACK_SPEC(CDECL_FUNC_INLINE) +int, (PACK_TOKEN(T_INT )<<8) | PACK_SPEC(CDECL_TYPE_INT) +long, (PACK_TOKEN(T_LONG )<<8) | PACK_SPEC(CDECL_TYPE_LONG) +register, (PACK_TOKEN(T_REGISTER )<<8) | PACK_SPEC(CDECL_STOR_REGISTER) +restrict, (PACK_TOKEN(T_RESTRICT )<<8) | PACK_SPEC(CDECL_QUAL_RESTRICT) +short, (PACK_TOKEN(T_SHORT )<<8) | PACK_SPEC(CDECL_TYPE_SHORT) +signed, (PACK_TOKEN(T_SIGNED )<<8) | PACK_SPEC(CDECL_TYPE_SIGNED) +static, (PACK_TOKEN(T_STATIC )<<8) | PACK_SPEC(CDECL_STOR_STATIC) +struct, (PACK_TOKEN(T_STRUCT )<<8) | PACK_SPEC(CDECL_TYPE_STRUCT) +typedef, (PACK_TOKEN(T_TYPEDEF )<<8) | PACK_SPEC(CDECL_STOR_TYPEDEF) +union, (PACK_TOKEN(T_UNION )<<8) | PACK_SPEC(CDECL_TYPE_UNION) +unsigned, (PACK_TOKEN(T_UNSIGNED )<<8) | PACK_SPEC(CDECL_TYPE_UNSIGNED) +void, (PACK_TOKEN(T_VOID )<<8) | PACK_SPEC(CDECL_TYPE_VOID) +volatile, (PACK_TOKEN(T_VOLATILE )<<8) | PACK_SPEC(CDECL_QUAL_VOLATILE) # english keywords -array, PACK_TOKEN(T_ARRAY ) | 0x80 -as, PACK_TOKEN(T_AS ) | 0x80 -declare, PACK_TOKEN(T_DECLARE ) | 0x80 -function, PACK_TOKEN(T_FUNCTION ) | 0x80 -of, PACK_TOKEN(T_OF ) | 0x80 -pointer, PACK_TOKEN(T_POINTER ) | 0x80 -returning, PACK_TOKEN(T_RETURNING) | 0x80 -to, PACK_TOKEN(T_TO ) | 0x80 -type, PACK_TOKEN(T_TYPE ) | 0x80 -variable-length, PACK_TOKEN(T_VLA ) | 0x80 +array, (PACK_TOKEN(T_ARRAY )<<8) | 0x8000 +as, (PACK_TOKEN(T_AS )<<8) | 0x8000 +declare, (PACK_TOKEN(T_DECLARE )<<8) | 0x8000 +function, (PACK_TOKEN(T_FUNCTION )<<8) | 0x8000 +of, (PACK_TOKEN(T_OF )<<8) | 0x8000 +pointer, (PACK_TOKEN(T_POINTER )<<8) | 0x8000 +returning, (PACK_TOKEN(T_RETURNING)<<8) | 0x8000 +to, (PACK_TOKEN(T_TO )<<8) | 0x8000 +type, (PACK_TOKEN(T_TYPE )<<8) | 0x8000 +variable-length, (PACK_TOKEN(T_VLA )<<8) | 0x8000 %% -int cdecl__to_keyword(const char *s, int len, int english_mode) +unsigned cdecl__to_keyword(const char *s, int len, int english_mode) { const struct keyword *k; if ((k = in_word_set(s, len))) { - unsigned x = (k->token & 0x7fu); + uint_least16_t x = k->val; - if (english_mode || !(k->token & ~0x7fu)) { - return UNPACK_TOKEN(x); + if (english_mode || !(x & 0x8000)) { + return x & 0x7fff; } } - return T_IDENT; + return (PACK_TOKEN(T_IDENT)<<8); } static const char *wordlist_func(const struct keyword *k) { - unsigned x = k->token & 0x7f; + unsigned x = (k->val >> 8) & 0x7f; if (!x) return NULL; diff --git a/src/parse.y b/src/parse.y index 263129c..e9df6cb 100644 --- a/src/parse.y +++ b/src/parse.y @@ -173,34 +173,34 @@ yyerror(YYLTYPE *loc, yyscan_t scanner, struct cdecl **out, const char *err) %token T_COMMA "," %token T_ELLIPSIS "..." -%token T_TYPEDEF "typedef" -%token T_EXTERN "extern" -%token T_STATIC "static" -%token T_AUTO "auto" -%token T_REGISTER "register" - -%token T_INLINE "inline" - -%token T_RESTRICT "restrict" -%token T_VOLATILE "volatile" -%token T_CONST "const" - -%token T_VOID "void" -%token T_CHAR "char" -%token T_SHORT "short" -%token T_INT "int" -%token T_LONG "long" -%token T_FLOAT "float" -%token T_DOUBLE "double" -%token T_SIGNED "signed" -%token T_UNSIGNED "unsigned" -%token T_BOOL "_Bool" -%token T_COMPLEX "_Complex" -%token T_IMAGINARY "_Imaginary" - -%token T_STRUCT "struct" -%token T_UNION "union" -%token T_ENUM "enum" +%token T_TYPEDEF "typedef" +%token T_EXTERN "extern" +%token T_STATIC "static" +%token T_AUTO "auto" +%token T_REGISTER "register" + +%token T_INLINE "inline" + +%token T_RESTRICT "restrict" +%token T_VOLATILE "volatile" +%token T_CONST "const" + +%token T_VOID "void" +%token T_CHAR "char" +%token T_SHORT "short" +%token T_INT "int" +%token T_LONG "long" +%token T_FLOAT "float" +%token T_DOUBLE "double" +%token T_SIGNED "signed" +%token T_UNSIGNED "unsigned" +%token T_BOOL "_Bool" +%token T_COMPLEX "_Complex" +%token T_IMAGINARY "_Imaginary" + +%token T_STRUCT "struct" +%token T_UNION "union" +%token T_ENUM "enum" /* * English keywords. @@ -218,7 +218,8 @@ yyerror(YYLTYPE *loc, yyscan_t scanner, struct cdecl **out, const char *err) %type vla_ident %type varargs -%type declspec_simple typespec_simple qualifier_simple +%type declspec_simple qualifier_simple +%type typespec_simple typespec_tagged %type declspec_notype declspec_noid typespec_noid typespec %type qualifier qualifiers %type declspecs declspecs_noid @@ -285,29 +286,31 @@ declarator_wrap: declarator { ALLOC_STRUCT($$, struct cdecl, .declarators = $1); } -declspec_simple: T_AUTO { $$ = CDECL_STOR_AUTO; } - | T_TYPEDEF { $$ = CDECL_STOR_TYPEDEF; } - | T_EXTERN { $$ = CDECL_STOR_EXTERN; } - | T_STATIC { $$ = CDECL_STOR_STATIC; } - | T_REGISTER { $$ = CDECL_STOR_REGISTER; } - | T_INLINE { $$ = CDECL_FUNC_INLINE; } - -typespec_simple: T_VOID { $$ = CDECL_TYPE_VOID; } - | T_CHAR { $$ = CDECL_TYPE_CHAR; } - | T_SHORT { $$ = CDECL_TYPE_SHORT; } - | T_INT { $$ = CDECL_TYPE_INT; } - | T_LONG { $$ = CDECL_TYPE_LONG; } - | T_FLOAT { $$ = CDECL_TYPE_FLOAT; } - | T_DOUBLE { $$ = CDECL_TYPE_DOUBLE; } - | T_SIGNED { $$ = CDECL_TYPE_SIGNED; } - | T_UNSIGNED { $$ = CDECL_TYPE_UNSIGNED; } - | T_BOOL { $$ = CDECL_TYPE_BOOL; } - | T_COMPLEX { $$ = CDECL_TYPE_COMPLEX; } - | T_IMAGINARY { $$ = CDECL_TYPE_IMAGINARY; } - -qualifier_simple: T_CONST { $$ = CDECL_QUAL_CONST; } - | T_RESTRICT { $$ = CDECL_QUAL_RESTRICT; } - | T_VOLATILE { $$ = CDECL_QUAL_VOLATILE; } +declspec_simple: T_AUTO + | T_TYPEDEF + | T_EXTERN + | T_STATIC + | T_REGISTER + | T_INLINE + +typespec_simple: T_VOID + | T_CHAR + | T_SHORT + | T_INT + | T_LONG + | T_FLOAT + | T_DOUBLE + | T_SIGNED + | T_UNSIGNED + | T_BOOL + | T_COMPLEX + | T_IMAGINARY + +typespec_tagged: T_STRUCT | T_UNION | T_ENUM + +qualifier_simple: T_CONST + | T_RESTRICT + | T_VOLATILE declspec_notype: qualifier | declspec_simple { ALLOC_STRUCT($$, struct cdecl_declspec, .type = $1); @@ -321,17 +324,9 @@ qualifier: qualifier_simple { ALLOC_STRUCT($$, struct cdecl_declspec, .type = $1); } -typespec: typespec_noid | T_STRUCT T_IDENT { +typespec: typespec_noid | typespec_tagged T_IDENT { ALLOC_STRUCT($$, struct cdecl_declspec, - .type = CDECL_TYPE_STRUCT, - .ident = $2); -} | T_UNION T_IDENT { - ALLOC_STRUCT($$, struct cdecl_declspec, - .type = CDECL_TYPE_UNION, - .ident = $2); -} | T_ENUM T_IDENT { - ALLOC_STRUCT($$, struct cdecl_declspec, - .type = CDECL_TYPE_ENUM, + .type = $1, .ident = $2); } | T_IDENT { ALLOC_STRUCT($$, struct cdecl_declspec, diff --git a/src/scan.l b/src/scan.l index 6718381..90f5ad9 100644 --- a/src/scan.l +++ b/src/scan.l @@ -169,8 +169,11 @@ INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ } {IDENT} { - int ret = cdecl__to_keyword(yytext, yyleng, yyextra); - if (ret == T_IDENT) { + unsigned x = cdecl__to_keyword(yytext, yyleng, yyextra); + int tok; + + yylval->spectype = UNPACK_SPEC(x & 0xff); + if ((tok = (x >> 8)) == T_IDENT) { /* * Our IDENT pattern includes hyphens so we can match * "variable-length" as a keyword. In all other cases a @@ -189,7 +192,7 @@ INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ #endif dup_token(); } - return ret; + return UNPACK_TOKEN(tok); } [[:space:]]+ -- 2.43.2