libcdecl: Improve specifier to string conversions.

author Nick Bowler <nbowler@draconx.ca>

Thu, 22 Jun 2023 05:01:46 +0000 (01:01 -0400)

committer Nick Bowler <nbowler@draconx.ca>

Thu, 22 Jun 2023 05:10:27 +0000 (01:10 -0400)
author Nick Bowler <nbowler@draconx.ca>
Thu, 22 Jun 2023 05:01:46 +0000 (01:01 -0400)
committer Nick Bowler <nbowler@draconx.ca>
Thu, 22 Jun 2023 05:10:27 +0000 (01:10 -0400)
diff --git a/configure.ac b/configure.ac

index 91a3e9809b423e38f4e3b1e2ddf1a41136010cbd..f1bd702e331e1c9c2246fa94a10968a788b30b45 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -28,6 +28,7 @@ gl_EARLY
  LT_INIT
  gl_INIT
  
+AC_HEADER_ASSERT
  AC_C_FLEXIBLE_ARRAY_MEMBER
  
  # Work around quoting bug in Gnulib threadlib.m4 which prevents
diff --git a/src/gen-specstr.awk b/src/gen-specstr.awk

index 611b2a851568afe53475ba895bc2e2b79cba3e76..68401c11a6fc537d30a25c875160faaa9c47d57a 100755 (executable)
--- a/src/gen-specstr.awk
+++ b/src/gen-specstr.awk
@@ -1,6 +1,6 @@
  #!/bin/awk -f
  #
-# Copyright © 2021 Nick Bowler
+# Copyright © 2021, 2023 Nick Bowler
  #
  # Generate a function to return the C keyword corresponding to a specifier
  # type as a string, for internal use by the output routines.
@@ -10,46 +10,144 @@
  # There is NO WARRANTY, to the extent permitted by law.
  
  END {
-  print "/*"
+  print "/*";
    if (FILENAME) {
-    print " * Automatically generated by gen-specstr.awk from " FILENAME
+    print " * Automatically generated by gen-specstr.awk from " FILENAME;
    } else {
-    print " * Automatically generated by gen-specstr.awk"
+    print " * Automatically generated by gen-specstr.awk";
    }
-  print " * Do not edit."
-  print " */"
+  print " * Do not edit.";
+  print " */";
  }
  
  BEGIN {
-  kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1
-  underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1
+  kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1;
+  underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1;
+  count = 0;
  }
  
+# Locate all the relevant identifiers in cdecl.h.  We assume everything
+# is in numerically increasing order within the various enums.
  $1 ~ /^CDECL_/ {
-  sub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ_].*/, "", $1)
+  sub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ_].*/, "", $1);
+
+  split($1, parts, "_");
+  if (parts[2] == "SPEC") {
+    x = $0;
+    sub(/^.*= */, "", x);
+    sub(/,? *$/, "", x);
+
+    x = int(x / 512) % 8;
+    if (skiptab[x]) {
+      print "cannot create skip table";
+      exit 1;
+    }
+
+    skiptab[x] = parts[3];
+  }
  
-  split($1, parts, "_")
    if (parts[2] in kinds) {
+    kind_counts[parts[2]]++;
+
      if (parts[3] == "IDENT") {
-      s = ""
+      s = "";
      } else if (parts[3] in underscore) {
-      s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2))
+      s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2));
      } else {
-      s = tolower(parts[3])
+      s = tolower(parts[3]);
      }
-    specs[$1] = s
+    rspecs[s] = count;
+    specs[count++] = s;
    }
  }
  
  END {
-  print "static inline const char *spec_string(unsigned type)\n{"
-  print "\tswitch (type) {"
+  string_table = "";
+
+  # The basic approach is to first generate a suffix-compressed string
+  # table containing all the specifier strings (not a lot of overlap in
+  # C specifiers, but there is (un)signed.
+  count = bucketsort(sorted_specs, specs);
+  for (i = 0; i < count; i++) {
+    s = sorted_specs[i];
+
+    if ((n = index(string_table, s "\1")) > 0) {
+      offsets[rspecs[s]] = n - 1;
+    } else {
+      offsets[rspecs[s]] = length(string_table);
+      string_table = string_table s "\1";
+    }
+  }
+
+  # Next, we create the index table.  The first 5 entries key off of bits 9
+  # through 11, which is sufficient to distinguish the different specifier
+  # kinds and is used to partition the rest of the index table.
+  skip_count = 0;
+  for (i in skiptab) {
+    if (skip_count < i)
+      skip_count = i;
+  }
+
+  skip_pos = ++skip_count;
+  for (i = 0; i < skip_count; i++) {
+    offset_table = offset_table skip_pos ", ";
+    skip_pos += kind_counts[skiptab[i]];
+  }
+  sub(/ $/, "\n\t\t", offset_table);
+
+  # Then, each remaining entry in the index table is an offset into the
+  # string table.
+  for (i = 0; i < count; i++) {
+    suffix = "\t/* " (specs[i] ? specs[i] : "\"\"") " */";
+    if (i+1 < count)
+      suffix = "," suffix "\n\t\t";
+    offset_table = offset_table offsets[i] suffix;
+  }
+
+  sub(/\1$/, "", string_table);
+  gsub(/\1/, "\"\n\t\t\"\\0\" \"", string_table);
+
+  print "static const char *spec_string(unsigned type)"
+  print "{"
+  print "\tstatic const char tab[] =";
+  print "\t\t     \"" string_table "\";\n";
+  print "\tstatic const uint_least8_t idx[] = {";
+  print "\t\t" offset_table;
+  print "\t};\n";
+
+  print "\tunsigned x = (type & 0xff) + idx[type >> 9];";
+  print "\tassert(x < sizeof idx);";
+  print "\treturn tab + idx[x];";
+  print "}";
+}
+
+# bucketsort(dst, src)
+#
+#
+# Sort the elements of src by descending string length,
+# placing them into dst[0] ... dst[n].
+#
+# Returns the number of elements.
+function bucketsort(dst, src, buckets, max, count, i, t)
+{
+  for (t in src) {
+    i = length(src[t]);
+    if (i > max) { max = i; }
+    buckets[i]++;
+  }
+
+  for (i = max; i >= 0; i--) {
+    if (i in buckets) {
+      t = buckets[i];
+      buckets[i] = count;
+      count += t;
+    }
+  }
  
-  for (s in specs) {
-    print "\tcase " s ": return \"" specs[s] "\";"
+  for (t in src) {
+    i = length(t = src[t]);
+    dst[buckets[i]++] = t;
    }
  
-  print "\t}"
-  print "\n\tassert(0);"
-  print "}"
+  return count;
  }
author	Nick Bowler <nbowler@draconx.ca>
	Thu, 22 Jun 2023 05:01:46 +0000 (01:01 -0400)
committer	Nick Bowler <nbowler@draconx.ca>
	Thu, 22 Jun 2023 05:10:27 +0000 (01:10 -0400)
configure.ac		patch \| blob \| history
src/gen-specstr.awk		patch \| blob \| history