#!/bin/awk -f # # Copyright © 2021, 2023 Nick Bowler # # Generate a function to return the C keyword corresponding to a specifier # type as a string, for internal use by the output routines. # # License WTFPL2: Do What The Fuck You Want To Public License, version 2. # This is free software: you are free to do what the fuck you want to. # There is NO WARRANTY, to the extent permitted by law. END { print "/*"; if (FILENAME) { print " * Automatically generated by gen-specstr.awk from " FILENAME; } else { print " * Automatically generated by gen-specstr.awk"; } print " * Do not edit."; print " */"; } BEGIN { kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1; underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1; count = 0; } # Locate all the relevant identifiers in cdecl.h. We assume everything # is in numerically increasing order within the various enums. $1 ~ /^CDECL_/ { sub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ_].*/, "", $1); split($1, parts, "_"); if (parts[2] == "SPEC") { x = $0; sub(/^.*= */, "", x); sub(/,? *$/, "", x); x = int(x / 512) % 8; if (skiptab[x]) { print "cannot create skip table"; exit 1; } skiptab[x] = parts[3]; } if (parts[2] in kinds) { kind_counts[parts[2]]++; if (parts[3] == "IDENT") { s = ""; } else if (parts[3] in underscore) { s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2)); } else { s = tolower(parts[3]); } rspecs[s] = count; specs[count++] = s; } } END { string_table = ""; # The basic approach is to first generate a suffix-compressed string # table containing all the specifier strings (not a lot of overlap in # C specifiers, but there is (un)signed. count = bucketsort(sorted_specs, specs); for (i = 0; i < count; i++) { s = sorted_specs[i]; if ((n = index(string_table, s "\1")) > 0) { offsets[rspecs[s]] = n - 1; } else { offsets[rspecs[s]] = length(string_table); string_table = string_table s "\1"; } } # Next, we create the index table. The first 5 entries key off of bits 9 # through 11, which is sufficient to distinguish the different specifier # kinds and is used to partition the rest of the index table. skip_count = 0; for (i in skiptab) { if (skip_count < i) skip_count = i; } skip_pos = ++skip_count; for (i = 0; i < skip_count; i++) { offset_table = offset_table skip_pos ", "; skip_pos += kind_counts[skiptab[i]]; } sub(/ $/, "\n\t\t", offset_table); # Then, each remaining entry in the index table is an offset into the # string table. for (i = 0; i < count; i++) { suffix = "\t/* " (specs[i] ? specs[i] : "\"\"") " */"; if (i+1 < count) suffix = "," suffix "\n\t\t"; offset_table = offset_table offsets[i] suffix; } sub(/\1$/, "", string_table); gsub(/\1/, "\"\n\t\t\"\\0\" \"", string_table); print "static const char *spec_string(unsigned type)" print "{" print "\tstatic const char tab[] ="; print "\t\t \"" string_table "\";\n"; print "\tstatic const uint_least8_t idx[] = {"; print "\t\t" offset_table; print "\t};\n"; print "\tunsigned x = (type & 0xff) + idx[type >> 9];"; print "\tassert(x < sizeof idx);"; print "\treturn tab + idx[x];"; print "}"; } # bucketsort(dst, src) # # # Sort the elements of src by descending string length, # placing them into dst[0] ... dst[n]. # # Returns the number of elements. function bucketsort(dst, src, buckets, max, count, i, t) { for (t in src) { i = length(src[t]); if (i > max) { max = i; } buckets[i]++; } for (i = max; i >= 0; i--) { if (i in buckets) { t = buckets[i]; buckets[i] = count; count += t; } } for (t in src) { i = length(t = src[t]); dst[buckets[i]++] = t; } return count; }