From 3a3760ad26782403f6686183be3faa8b22cf849b Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Thu, 13 Jul 2023 21:20:26 -0400 Subject: [PATCH] libcdecl: Use macros for packing tokens into bytes. Instead of coding the same structures everywhere, move the logic to drop/restore the upper bits of tokens into helper macros. Should be no functional change. --- src/cdecl-internal.h | 15 +++++++++ src/gen-specstr.awk | 11 ++----- src/keywords.gperf | 78 ++++++++++++++++++++------------------------ src/scan.l | 26 ++++++--------- 4 files changed, 64 insertions(+), 66 deletions(-) diff --git a/src/cdecl-internal.h b/src/cdecl-internal.h index cf7fc14..c85ca61 100644 --- a/src/cdecl-internal.h +++ b/src/cdecl-internal.h @@ -24,6 +24,21 @@ #define _(s) dgettext(PACKAGE, s) #define N_(s) s +/* + * Pack a parser token into 7 bits. + * + * Bison normally numbers user-defined tokens sequentially starting from 258. + * If api.token,raw is used, then the numbering starts from 3. As we have + * about 50 tokens, the latter case will fit in 7 bits easily; in the former + * case the upper bits are constant so we don't need to store them. + */ +#define PACK_TOKEN(x) ((x) & 0x7f) + +/* + * Expand a packed token to its original value. + */ +#define UNPACK_TOKEN(x) ((x) | ((T_IDENT > 256) << 8)) + struct cdecl_error; struct cdecl_declspec; diff --git a/src/gen-specstr.awk b/src/gen-specstr.awk index 6831667..c9148f8 100755 --- a/src/gen-specstr.awk +++ b/src/gen-specstr.awk @@ -22,7 +22,7 @@ END { BEGIN { kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1; - count = maxwidth = 0; + count = 0; } # Locate all the relevant identifiers in cdecl.h. We assume everything @@ -48,9 +48,6 @@ $1 ~ /^CDECL_/ { if (parts[2] in kinds) { kind_counts[parts[2]]++; specs[count++] = parts[3]; - - if (length(parts[3]) > maxwidth) - maxwidth = length(parts[3]); } } @@ -79,7 +76,7 @@ END { if (specs[i] == "IDENT") s = "0"; else - s = "T_" substr(specs[i] " ", 1, maxwidth) " & 0xff"; + s = "PACK_TOKEN(T_" specs[i] ")"; offset_table = offset_table s suffix; } @@ -93,8 +90,6 @@ END { print "\tassert(x < sizeof idx);"; print "\tif (!(x = idx[x]))"; print "\t\treturn \"\";"; - print "\tif (T_" specs[0] " >= 256)"; - print "\t\tx += 256;"; - print "\treturn cdecl__token_name(x);"; + print "\treturn cdecl__token_name(UNPACK_TOKEN(x));"; print "}"; } diff --git a/src/keywords.gperf b/src/keywords.gperf index 22566aa..3f8af84 100644 --- a/src/keywords.gperf +++ b/src/keywords.gperf @@ -37,41 +37,41 @@ struct keyword { uint_least8_t token; }; %% -_Bool, (T_BOOL & 0x7f) -_Complex, (T_COMPLEX & 0x7f) -_Imaginary, (T_IMAGINARY & 0x7f) -auto, (T_AUTO & 0x7f) -char, (T_CHAR & 0x7f) -const, (T_CONST & 0x7f) -double, (T_DOUBLE & 0x7f) -enum, (T_ENUM & 0x7f) -extern, (T_EXTERN & 0x7f) -float, (T_FLOAT & 0x7f) -inline, (T_INLINE & 0x7f) -int, (T_INT & 0x7f) -long, (T_LONG & 0x7f) -register, (T_REGISTER & 0x7f) -restrict, (T_RESTRICT & 0x7f) -short, (T_SHORT & 0x7f) -signed, (T_SIGNED & 0x7f) -static, (T_STATIC & 0x7f) -struct, (T_STRUCT & 0x7f) -typedef, (T_TYPEDEF & 0x7f) -union, (T_UNION & 0x7f) -unsigned, (T_UNSIGNED & 0x7f) -void, (T_VOID & 0x7f) -volatile, (T_VOLATILE & 0x7f) +_Bool, PACK_TOKEN(T_BOOL ) +_Complex, PACK_TOKEN(T_COMPLEX ) +_Imaginary, PACK_TOKEN(T_IMAGINARY) +auto, PACK_TOKEN(T_AUTO ) +char, PACK_TOKEN(T_CHAR ) +const, PACK_TOKEN(T_CONST ) +double, PACK_TOKEN(T_DOUBLE ) +enum, PACK_TOKEN(T_ENUM ) +extern, PACK_TOKEN(T_EXTERN ) +float, PACK_TOKEN(T_FLOAT ) +inline, PACK_TOKEN(T_INLINE ) +int, PACK_TOKEN(T_INT ) +long, PACK_TOKEN(T_LONG ) +register, PACK_TOKEN(T_REGISTER ) +restrict, PACK_TOKEN(T_RESTRICT ) +short, PACK_TOKEN(T_SHORT ) +signed, PACK_TOKEN(T_SIGNED ) +static, PACK_TOKEN(T_STATIC ) +struct, PACK_TOKEN(T_STRUCT ) +typedef, PACK_TOKEN(T_TYPEDEF ) +union, PACK_TOKEN(T_UNION ) +unsigned, PACK_TOKEN(T_UNSIGNED ) +void, PACK_TOKEN(T_VOID ) +volatile, PACK_TOKEN(T_VOLATILE ) # english keywords -array, (T_ARRAY & 0x7f) | 0x80 -as, (T_AS & 0x7f) | 0x80 -declare, (T_DECLARE & 0x7f) | 0x80 -function, (T_FUNCTION & 0x7f) | 0x80 -of, (T_OF & 0x7f) | 0x80 -pointer, (T_POINTER & 0x7f) | 0x80 -returning, (T_RETURNING & 0x7f) | 0x80 -to, (T_TO & 0x7f) | 0x80 -type, (T_TYPE & 0x7f) | 0x80 -variable-length, (T_VLA & 0x7f) | 0x80 +array, PACK_TOKEN(T_ARRAY ) | 0x80 +as, PACK_TOKEN(T_AS ) | 0x80 +declare, PACK_TOKEN(T_DECLARE ) | 0x80 +function, PACK_TOKEN(T_FUNCTION ) | 0x80 +of, PACK_TOKEN(T_OF ) | 0x80 +pointer, PACK_TOKEN(T_POINTER ) | 0x80 +returning, PACK_TOKEN(T_RETURNING) | 0x80 +to, PACK_TOKEN(T_TO ) | 0x80 +type, PACK_TOKEN(T_TYPE ) | 0x80 +variable-length, PACK_TOKEN(T_VLA ) | 0x80 %% int cdecl__to_keyword(const char *s, int len, int english_mode) { @@ -81,9 +81,7 @@ int cdecl__to_keyword(const char *s, int len, int english_mode) unsigned x = (k->token & 0x7fu); if (english_mode || !(k->token & ~0x7fu)) { - if (T_VOID >= 256) - x += 256; - return x; + return UNPACK_TOKEN(x); } } @@ -96,9 +94,5 @@ static const char *wordlist_func(const struct keyword *k) if (!x) return NULL; - - if (T_VOID >= 256) - x += 256; - - return cdecl__token_name(x); + return cdecl__token_name(UNPACK_TOKEN(x)); } diff --git a/src/scan.l b/src/scan.l index 7e3e365..6718381 100644 --- a/src/scan.l +++ b/src/scan.l @@ -132,29 +132,23 @@ INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ %} "..."|[][;*(),] { + unsigned char *match; static const unsigned char tab[2][8] = { "*[](),.;", { - T_ASTERISK & 0xff, - T_LBRACKET & 0xff, - T_RBRACKET & 0xff, - T_LPAREN & 0xff, - T_RPAREN & 0xff, - T_COMMA & 0xff, - T_ELLIPSIS & 0xff, - T_SEMICOLON & 0xff + PACK_TOKEN(T_ASTERISK), + PACK_TOKEN(T_LBRACKET), + PACK_TOKEN(T_RBRACKET), + PACK_TOKEN(T_LPAREN), + PACK_TOKEN(T_RPAREN), + PACK_TOKEN(T_COMMA), + PACK_TOKEN(T_ELLIPSIS), + PACK_TOKEN(T_SEMICOLON) } }; - unsigned char *match; - int x; - match = memchr(&tab, yytext[0], sizeof tab[0]); - x = match[sizeof tab[0]]; - - if (T_VOID >= 256) - x += 256; - return x; + return UNPACK_TOKEN(match[sizeof tab[0]]); } {INTEGER} { -- 2.43.2