libcdecl: Re-use strings from parser in spec_string.

author Nick Bowler <nbowler@draconx.ca>

Fri, 7 Jul 2023 03:56:29 +0000 (23:56 -0400)

committer Nick Bowler <nbowler@draconx.ca>

Fri, 7 Jul 2023 04:19:02 +0000 (00:19 -0400)
author Nick Bowler <nbowler@draconx.ca>
Fri, 7 Jul 2023 03:56:29 +0000 (23:56 -0400)
committer Nick Bowler <nbowler@draconx.ca>
Fri, 7 Jul 2023 04:19:02 +0000 (00:19 -0400)
diff --git a/Makefile.am b/Makefile.am

index b8136eb3b6e65f85a0c9436baba9f2c962f9677c..6d2ea6574b9f349502b1c73de4b1fab95605b9af 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -100,11 +100,11 @@ t_rng_test_LDADD = $(TEST_LIBS)
  $(t_rng_test_OBJECTS): $(gnulib_headers)
  EXTRA_DIST += t/xos256p.c
  
+src/error.lo: src/errmsg.h
+src/output.lo: src/parse.h src/specstr.h
+src/parse-decl.lo: src/scan.h src/parse.h src/typemap.h src/errmsg.h
  src/parse.lo: src/scan.h src/errmsg.h
  src/scan.lo: src/parse.h src/errmsg.h
-src/parse-decl.lo: src/scan.h src/parse.h src/typemap.h src/errmsg.h
-src/output.lo: src/specstr.h
-src/error.lo: src/errmsg.h
  t/declgen.$(OBJEXT): t/typegen.h
  t/cdeclerr.$(OBJEXT): src/errmsg.h
  
diff --git a/src/gen-specstr.awk b/src/gen-specstr.awk

index 68401c11a6fc537d30a25c875160faaa9c47d57a..c07856ec93518c2d0a6a99d0dbab64414754ddca 100755 (executable)
--- a/src/gen-specstr.awk
+++ b/src/gen-specstr.awk
@@ -22,8 +22,7 @@ END {
  
  BEGIN {
    kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1;
-  underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1;
-  count = 0;
+  count = maxwidth = 0;
  }
  
  # Locate all the relevant identifiers in cdecl.h.  We assume everything
@@ -48,40 +47,17 @@ $1 ~ /^CDECL_/ {
  
    if (parts[2] in kinds) {
      kind_counts[parts[2]]++;
+    specs[count++] = parts[3];
  
-    if (parts[3] == "IDENT") {
-      s = "";
-    } else if (parts[3] in underscore) {
-      s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2));
-    } else {
-      s = tolower(parts[3]);
-    }
-    rspecs[s] = count;
-    specs[count++] = s;
+    if (length(parts[3]) > maxwidth)
+      maxwidth = length(parts[3]);
    }
  }
  
  END {
-  string_table = "";
-
-  # The basic approach is to first generate a suffix-compressed string
-  # table containing all the specifier strings (not a lot of overlap in
-  # C specifiers, but there is (un)signed.
-  count = bucketsort(sorted_specs, specs);
-  for (i = 0; i < count; i++) {
-    s = sorted_specs[i];
-
-    if ((n = index(string_table, s "\1")) > 0) {
-      offsets[rspecs[s]] = n - 1;
-    } else {
-      offsets[rspecs[s]] = length(string_table);
-      string_table = string_table s "\1";
-    }
-  }
-
-  # Next, we create the index table.  The first 5 entries key off of bits 9
-  # through 11, which is sufficient to distinguish the different specifier
-  # kinds and is used to partition the rest of the index table.
+  # Create the token table.  The first 5 entries key off of bits 9 through 11,
+  # which is sufficient to distinguish the different specifier kinds and is
+  # used to partition the rest of the token table.
    skip_count = 0;
    for (i in skiptab) {
      if (skip_count < i)
@@ -95,59 +71,28 @@ END {
    }
    sub(/ $/, "\n\t\t", offset_table);
  
-  # Then, each remaining entry in the index table is an offset into the
-  # string table.
    for (i = 0; i < count; i++) {
-    suffix = "\t/* " (specs[i] ? specs[i] : "\"\"") " */";
+    suffix = "";
      if (i+1 < count)
-      suffix = "," suffix "\n\t\t";
-    offset_table = offset_table offsets[i] suffix;
-  }
+      suffix = ",\n\t\t";
  
-  sub(/\1$/, "", string_table);
-  gsub(/\1/, "\"\n\t\t\"\\0\" \"", string_table);
+    if (specs[i] == "IDENT")
+      s = "0";
+    else
+      s = "T_" substr(specs[i] "                ", 1, maxwidth) " - 256";
+    offset_table = offset_table s suffix;
+  }
  
    print "static const char *spec_string(unsigned type)"
    print "{"
-  print "\tstatic const char tab[] =";
-  print "\t\t     \"" string_table "\";\n";
    print "\tstatic const uint_least8_t idx[] = {";
    print "\t\t" offset_table;
    print "\t};\n";
  
    print "\tunsigned x = (type & 0xff) + idx[type >> 9];";
    print "\tassert(x < sizeof idx);";
-  print "\treturn tab + idx[x];";
+  print "\tif (!(x = idx[x]))";
+  print "\t\treturn \"\";";
+  print "\treturn cdecl__token_name(x + 256);";
    print "}";
  }
-
-# bucketsort(dst, src)
-#
-#
-# Sort the elements of src by descending string length,
-# placing them into dst[0] ... dst[n].
-#
-# Returns the number of elements.
-function bucketsort(dst, src, buckets, max, count, i, t)
-{
-  for (t in src) {
-    i = length(src[t]);
-    if (i > max) { max = i; }
-    buckets[i]++;
-  }
-
-  for (i = max; i >= 0; i--) {
-    if (i in buckets) {
-      t = buckets[i];
-      buckets[i] = count;
-      count += t;
-    }
-  }
-
-  for (t in src) {
-    i = length(t = src[t]);
-    dst[buckets[i]++] = t;
-  }
-
-  return count;
-}
diff --git a/src/output.c b/src/output.c

index 4590a5cfc60b2fb9c3d1bdead751da7d37b25d9b..62c9c8f9bd0839330de432fde4ea79dac5d9f947 100644 (file)
--- a/src/output.c
+++ b/src/output.c
@@ -22,6 +22,7 @@
  #include "cdecl.h"
  #include "cdecl-internal.h"
  
+#include "parse.h"
  #include "specstr.h"
  
  #define MIN(a, b) ((a) < (b) ? (a) : (b))
diff --git a/src/parse.y b/src/parse.y

index 6faf2d2b8b0d6eb4755491a43952b4659ebea9a4..263129cea8d6f3f9cc9a4b35c2341fa139d14d5f 100644 (file)
--- a/src/parse.y
+++ b/src/parse.y
@@ -62,6 +62,7 @@
  %code provides {
  void cdecl__free(struct cdecl *);
  int cdecl__yyparse(void *scanner, struct cdecl **out);
+const char *cdecl__token_name(unsigned token);
  }
  
  %union {
@@ -589,3 +590,18 @@ english_vla: T_IDENT | {
         ALLOC($$, sizeof "");
         strcpy($$, "");
  }
+
+%%
+
+/*
+ * Expose the token string table to the rest of the library, in order to
+ * produce strings that match parser keywords.
+ *
+ * In order for this to work properly, the Bison output must be postprocessed
+ * by fix-yytname.awk to remove pointless quotation marks from the keyword
+ * strings.
+ */
+const char *cdecl__token_name(unsigned token)
+{
+       return yytname[YYTRANSLATE(token)];
+}
author	Nick Bowler <nbowler@draconx.ca>
	Fri, 7 Jul 2023 03:56:29 +0000 (23:56 -0400)
committer	Nick Bowler <nbowler@draconx.ca>
	Fri, 7 Jul 2023 04:19:02 +0000 (00:19 -0400)
Makefile.am		patch \| blob \| history
src/gen-specstr.awk		patch \| blob \| history
src/output.c		patch \| blob \| history
src/parse.y		patch \| blob \| history