From 566e87bb15a98ed499e79b45f6c834ad0ffdc3b7 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Thu, 6 Jul 2023 23:56:29 -0400 Subject: [PATCH] libcdecl: Re-use strings from parser in spec_string. With the fixups applied by fix-yytname, we now have two distinct string tables containing the same strings. To avoid this duplication, add a new internal function to allow spec_string to access the parser's token name table. --- Makefile.am | 6 +-- src/gen-specstr.awk | 91 +++++++++------------------------------------ src/output.c | 1 + src/parse.y | 16 ++++++++ 4 files changed, 38 insertions(+), 76 deletions(-) diff --git a/Makefile.am b/Makefile.am index b8136eb..6d2ea65 100644 --- a/Makefile.am +++ b/Makefile.am @@ -100,11 +100,11 @@ t_rng_test_LDADD = $(TEST_LIBS) $(t_rng_test_OBJECTS): $(gnulib_headers) EXTRA_DIST += t/xos256p.c +src/error.lo: src/errmsg.h +src/output.lo: src/parse.h src/specstr.h +src/parse-decl.lo: src/scan.h src/parse.h src/typemap.h src/errmsg.h src/parse.lo: src/scan.h src/errmsg.h src/scan.lo: src/parse.h src/errmsg.h -src/parse-decl.lo: src/scan.h src/parse.h src/typemap.h src/errmsg.h -src/output.lo: src/specstr.h -src/error.lo: src/errmsg.h t/declgen.$(OBJEXT): t/typegen.h t/cdeclerr.$(OBJEXT): src/errmsg.h diff --git a/src/gen-specstr.awk b/src/gen-specstr.awk index 68401c1..c07856e 100755 --- a/src/gen-specstr.awk +++ b/src/gen-specstr.awk @@ -22,8 +22,7 @@ END { BEGIN { kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1; - underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1; - count = 0; + count = maxwidth = 0; } # Locate all the relevant identifiers in cdecl.h. We assume everything @@ -48,40 +47,17 @@ $1 ~ /^CDECL_/ { if (parts[2] in kinds) { kind_counts[parts[2]]++; + specs[count++] = parts[3]; - if (parts[3] == "IDENT") { - s = ""; - } else if (parts[3] in underscore) { - s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2)); - } else { - s = tolower(parts[3]); - } - rspecs[s] = count; - specs[count++] = s; + if (length(parts[3]) > maxwidth) + maxwidth = length(parts[3]); } } END { - string_table = ""; - - # The basic approach is to first generate a suffix-compressed string - # table containing all the specifier strings (not a lot of overlap in - # C specifiers, but there is (un)signed. - count = bucketsort(sorted_specs, specs); - for (i = 0; i < count; i++) { - s = sorted_specs[i]; - - if ((n = index(string_table, s "\1")) > 0) { - offsets[rspecs[s]] = n - 1; - } else { - offsets[rspecs[s]] = length(string_table); - string_table = string_table s "\1"; - } - } - - # Next, we create the index table. The first 5 entries key off of bits 9 - # through 11, which is sufficient to distinguish the different specifier - # kinds and is used to partition the rest of the index table. + # Create the token table. The first 5 entries key off of bits 9 through 11, + # which is sufficient to distinguish the different specifier kinds and is + # used to partition the rest of the token table. skip_count = 0; for (i in skiptab) { if (skip_count < i) @@ -95,59 +71,28 @@ END { } sub(/ $/, "\n\t\t", offset_table); - # Then, each remaining entry in the index table is an offset into the - # string table. for (i = 0; i < count; i++) { - suffix = "\t/* " (specs[i] ? specs[i] : "\"\"") " */"; + suffix = ""; if (i+1 < count) - suffix = "," suffix "\n\t\t"; - offset_table = offset_table offsets[i] suffix; - } + suffix = ",\n\t\t"; - sub(/\1$/, "", string_table); - gsub(/\1/, "\"\n\t\t\"\\0\" \"", string_table); + if (specs[i] == "IDENT") + s = "0"; + else + s = "T_" substr(specs[i] " ", 1, maxwidth) " - 256"; + offset_table = offset_table s suffix; + } print "static const char *spec_string(unsigned type)" print "{" - print "\tstatic const char tab[] ="; - print "\t\t \"" string_table "\";\n"; print "\tstatic const uint_least8_t idx[] = {"; print "\t\t" offset_table; print "\t};\n"; print "\tunsigned x = (type & 0xff) + idx[type >> 9];"; print "\tassert(x < sizeof idx);"; - print "\treturn tab + idx[x];"; + print "\tif (!(x = idx[x]))"; + print "\t\treturn \"\";"; + print "\treturn cdecl__token_name(x + 256);"; print "}"; } - -# bucketsort(dst, src) -# -# -# Sort the elements of src by descending string length, -# placing them into dst[0] ... dst[n]. -# -# Returns the number of elements. -function bucketsort(dst, src, buckets, max, count, i, t) -{ - for (t in src) { - i = length(src[t]); - if (i > max) { max = i; } - buckets[i]++; - } - - for (i = max; i >= 0; i--) { - if (i in buckets) { - t = buckets[i]; - buckets[i] = count; - count += t; - } - } - - for (t in src) { - i = length(t = src[t]); - dst[buckets[i]++] = t; - } - - return count; -} diff --git a/src/output.c b/src/output.c index 4590a5c..62c9c8f 100644 --- a/src/output.c +++ b/src/output.c @@ -22,6 +22,7 @@ #include "cdecl.h" #include "cdecl-internal.h" +#include "parse.h" #include "specstr.h" #define MIN(a, b) ((a) < (b) ? (a) : (b)) diff --git a/src/parse.y b/src/parse.y index 6faf2d2..263129c 100644 --- a/src/parse.y +++ b/src/parse.y @@ -62,6 +62,7 @@ %code provides { void cdecl__free(struct cdecl *); int cdecl__yyparse(void *scanner, struct cdecl **out); +const char *cdecl__token_name(unsigned token); } %union { @@ -589,3 +590,18 @@ english_vla: T_IDENT | { ALLOC($$, sizeof ""); strcpy($$, ""); } + +%% + +/* + * Expose the token string table to the rest of the library, in order to + * produce strings that match parser keywords. + * + * In order for this to work properly, the Bison output must be postprocessed + * by fix-yytname.awk to remove pointless quotation marks from the keyword + * strings. + */ +const char *cdecl__token_name(unsigned token) +{ + return yytname[YYTRANSLATE(token)]; +} -- 2.43.2