#!/bin/awk -f
#
-# Copyright © 2021 Nick Bowler
+# Copyright © 2021, 2023 Nick Bowler
#
# Generate a function to return the C keyword corresponding to a specifier
# type as a string, for internal use by the output routines.
# There is NO WARRANTY, to the extent permitted by law.
END {
- print "/*"
+ print "/*";
if (FILENAME) {
- print " * Automatically generated by gen-specstr.awk from " FILENAME
+ print " * Automatically generated by gen-specstr.awk from " FILENAME;
} else {
- print " * Automatically generated by gen-specstr.awk"
+ print " * Automatically generated by gen-specstr.awk";
}
- print " * Do not edit."
- print " */"
+ print " * Do not edit.";
+ print " */";
}
BEGIN {
- kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1
- underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1
+ kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1;
+ underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1;
+ count = 0;
}
+# Locate all the relevant identifiers in cdecl.h. We assume everything
+# is in numerically increasing order within the various enums.
$1 ~ /^CDECL_/ {
- sub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ_].*/, "", $1)
+ sub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ_].*/, "", $1);
+
+ split($1, parts, "_");
+ if (parts[2] == "SPEC") {
+ x = $0;
+ sub(/^.*= */, "", x);
+ sub(/,? *$/, "", x);
+
+ x = int(x / 512) % 8;
+ if (skiptab[x]) {
+ print "cannot create skip table";
+ exit 1;
+ }
+
+ skiptab[x] = parts[3];
+ }
- split($1, parts, "_")
if (parts[2] in kinds) {
+ kind_counts[parts[2]]++;
+
if (parts[3] == "IDENT") {
- s = ""
+ s = "";
} else if (parts[3] in underscore) {
- s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2))
+ s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2));
} else {
- s = tolower(parts[3])
+ s = tolower(parts[3]);
}
- specs[$1] = s
+ rspecs[s] = count;
+ specs[count++] = s;
}
}
END {
- print "static inline const char *spec_string(unsigned type)\n{"
- print "\tswitch (type) {"
+ string_table = "";
+
+ # The basic approach is to first generate a suffix-compressed string
+ # table containing all the specifier strings (not a lot of overlap in
+ # C specifiers, but there is (un)signed.
+ count = bucketsort(sorted_specs, specs);
+ for (i = 0; i < count; i++) {
+ s = sorted_specs[i];
+
+ if ((n = index(string_table, s "\1")) > 0) {
+ offsets[rspecs[s]] = n - 1;
+ } else {
+ offsets[rspecs[s]] = length(string_table);
+ string_table = string_table s "\1";
+ }
+ }
+
+ # Next, we create the index table. The first 5 entries key off of bits 9
+ # through 11, which is sufficient to distinguish the different specifier
+ # kinds and is used to partition the rest of the index table.
+ skip_count = 0;
+ for (i in skiptab) {
+ if (skip_count < i)
+ skip_count = i;
+ }
+
+ skip_pos = ++skip_count;
+ for (i = 0; i < skip_count; i++) {
+ offset_table = offset_table skip_pos ", ";
+ skip_pos += kind_counts[skiptab[i]];
+ }
+ sub(/ $/, "\n\t\t", offset_table);
+
+ # Then, each remaining entry in the index table is an offset into the
+ # string table.
+ for (i = 0; i < count; i++) {
+ suffix = "\t/* " (specs[i] ? specs[i] : "\"\"") " */";
+ if (i+1 < count)
+ suffix = "," suffix "\n\t\t";
+ offset_table = offset_table offsets[i] suffix;
+ }
+
+ sub(/\1$/, "", string_table);
+ gsub(/\1/, "\"\n\t\t\"\\0\" \"", string_table);
+
+ print "static const char *spec_string(unsigned type)"
+ print "{"
+ print "\tstatic const char tab[] =";
+ print "\t\t \"" string_table "\";\n";
+ print "\tstatic const uint_least8_t idx[] = {";
+ print "\t\t" offset_table;
+ print "\t};\n";
+
+ print "\tunsigned x = (type & 0xff) + idx[type >> 9];";
+ print "\tassert(x < sizeof idx);";
+ print "\treturn tab + idx[x];";
+ print "}";
+}
+
+# bucketsort(dst, src)
+#
+#
+# Sort the elements of src by descending string length,
+# placing them into dst[0] ... dst[n].
+#
+# Returns the number of elements.
+function bucketsort(dst, src, buckets, max, count, i, t)
+{
+ for (t in src) {
+ i = length(src[t]);
+ if (i > max) { max = i; }
+ buckets[i]++;
+ }
+
+ for (i = max; i >= 0; i--) {
+ if (i in buckets) {
+ t = buckets[i];
+ buckets[i] = count;
+ count += t;
+ }
+ }
- for (s in specs) {
- print "\tcase " s ": return \"" specs[s] "\";"
+ for (t in src) {
+ i = length(t = src[t]);
+ dst[buckets[i]++] = t;
}
- print "\t}"
- print "\n\tassert(0);"
- print "}"
+ return count;
}