From: Nick Bowler Date: Thu, 22 Jun 2023 05:01:46 +0000 (-0400) Subject: libcdecl: Improve specifier to string conversions. X-Git-Tag: v1.3~157 X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/commitdiff_plain/8acb9c0d66e01f6b1ed25d3819cf7619a9fd2786 libcdecl: Improve specifier to string conversions. Instead of a big switch statement, we can generate some compact lookup tables to convert specifiers to strings, which appears to produce much better results with gcc at least. --- diff --git a/configure.ac b/configure.ac index 91a3e98..f1bd702 100644 --- a/configure.ac +++ b/configure.ac @@ -28,6 +28,7 @@ gl_EARLY LT_INIT gl_INIT +AC_HEADER_ASSERT AC_C_FLEXIBLE_ARRAY_MEMBER # Work around quoting bug in Gnulib threadlib.m4 which prevents diff --git a/src/gen-specstr.awk b/src/gen-specstr.awk index 611b2a8..68401c1 100755 --- a/src/gen-specstr.awk +++ b/src/gen-specstr.awk @@ -1,6 +1,6 @@ #!/bin/awk -f # -# Copyright © 2021 Nick Bowler +# Copyright © 2021, 2023 Nick Bowler # # Generate a function to return the C keyword corresponding to a specifier # type as a string, for internal use by the output routines. @@ -10,46 +10,144 @@ # There is NO WARRANTY, to the extent permitted by law. END { - print "/*" + print "/*"; if (FILENAME) { - print " * Automatically generated by gen-specstr.awk from " FILENAME + print " * Automatically generated by gen-specstr.awk from " FILENAME; } else { - print " * Automatically generated by gen-specstr.awk" + print " * Automatically generated by gen-specstr.awk"; } - print " * Do not edit." - print " */" + print " * Do not edit."; + print " */"; } BEGIN { - kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1 - underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1 + kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1; + underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1; + count = 0; } +# Locate all the relevant identifiers in cdecl.h. We assume everything +# is in numerically increasing order within the various enums. $1 ~ /^CDECL_/ { - sub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ_].*/, "", $1) + sub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ_].*/, "", $1); + + split($1, parts, "_"); + if (parts[2] == "SPEC") { + x = $0; + sub(/^.*= */, "", x); + sub(/,? *$/, "", x); + + x = int(x / 512) % 8; + if (skiptab[x]) { + print "cannot create skip table"; + exit 1; + } + + skiptab[x] = parts[3]; + } - split($1, parts, "_") if (parts[2] in kinds) { + kind_counts[parts[2]]++; + if (parts[3] == "IDENT") { - s = "" + s = ""; } else if (parts[3] in underscore) { - s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2)) + s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2)); } else { - s = tolower(parts[3]) + s = tolower(parts[3]); } - specs[$1] = s + rspecs[s] = count; + specs[count++] = s; } } END { - print "static inline const char *spec_string(unsigned type)\n{" - print "\tswitch (type) {" + string_table = ""; + + # The basic approach is to first generate a suffix-compressed string + # table containing all the specifier strings (not a lot of overlap in + # C specifiers, but there is (un)signed. + count = bucketsort(sorted_specs, specs); + for (i = 0; i < count; i++) { + s = sorted_specs[i]; + + if ((n = index(string_table, s "\1")) > 0) { + offsets[rspecs[s]] = n - 1; + } else { + offsets[rspecs[s]] = length(string_table); + string_table = string_table s "\1"; + } + } + + # Next, we create the index table. The first 5 entries key off of bits 9 + # through 11, which is sufficient to distinguish the different specifier + # kinds and is used to partition the rest of the index table. + skip_count = 0; + for (i in skiptab) { + if (skip_count < i) + skip_count = i; + } + + skip_pos = ++skip_count; + for (i = 0; i < skip_count; i++) { + offset_table = offset_table skip_pos ", "; + skip_pos += kind_counts[skiptab[i]]; + } + sub(/ $/, "\n\t\t", offset_table); + + # Then, each remaining entry in the index table is an offset into the + # string table. + for (i = 0; i < count; i++) { + suffix = "\t/* " (specs[i] ? specs[i] : "\"\"") " */"; + if (i+1 < count) + suffix = "," suffix "\n\t\t"; + offset_table = offset_table offsets[i] suffix; + } + + sub(/\1$/, "", string_table); + gsub(/\1/, "\"\n\t\t\"\\0\" \"", string_table); + + print "static const char *spec_string(unsigned type)" + print "{" + print "\tstatic const char tab[] ="; + print "\t\t \"" string_table "\";\n"; + print "\tstatic const uint_least8_t idx[] = {"; + print "\t\t" offset_table; + print "\t};\n"; + + print "\tunsigned x = (type & 0xff) + idx[type >> 9];"; + print "\tassert(x < sizeof idx);"; + print "\treturn tab + idx[x];"; + print "}"; +} + +# bucketsort(dst, src) +# +# +# Sort the elements of src by descending string length, +# placing them into dst[0] ... dst[n]. +# +# Returns the number of elements. +function bucketsort(dst, src, buckets, max, count, i, t) +{ + for (t in src) { + i = length(src[t]); + if (i > max) { max = i; } + buckets[i]++; + } + + for (i = max; i >= 0; i--) { + if (i in buckets) { + t = buckets[i]; + buckets[i] = count; + count += t; + } + } - for (s in specs) { - print "\tcase " s ": return \"" specs[s] "\";" + for (t in src) { + i = length(t = src[t]); + dst[buckets[i]++] = t; } - print "\t}" - print "\n\tassert(0);" - print "}" + return count; }