#!/bin/awk -f
#
# Copyright © 2021, 2023 Nick Bowler
#
# Generate a function to return the C keyword corresponding to a specifier
# type as a string, for internal use by the output routines.
#
# License WTFPL2: Do What The Fuck You Want To Public License, version 2.
# This is free software: you are free to do what the fuck you want to.
# There is NO WARRANTY, to the extent permitted by law.

END {
  print "/*";
  if (FILENAME) {
    print " * Automatically generated by gen-specstr.awk from " FILENAME;
  } else {
    print " * Automatically generated by gen-specstr.awk";
  }
  print " * Do not edit.";
  print " */";
}

BEGIN {
  kinds["TYPE"] = kinds["STOR"] = kinds["QUAL"] = kinds["FUNC"] = 1;
  underscore["BOOL"] = underscore["COMPLEX"] = underscore["IMAGINARY"] = 1;
  count = 0;
}

# Locate all the relevant identifiers in cdecl.h.  We assume everything
# is in numerically increasing order within the various enums.
$1 ~ /^CDECL_/ {
  sub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ_].*/, "", $1);

  split($1, parts, "_");
  if (parts[2] == "SPEC") {
    x = $0;
    sub(/^.*= */, "", x);
    sub(/,? *$/, "", x);

    x = int(x / 512) % 8;
    if (skiptab[x]) {
      print "cannot create skip table";
      exit 1;
    }

    skiptab[x] = parts[3];
  }

  if (parts[2] in kinds) {
    kind_counts[parts[2]]++;

    if (parts[3] == "IDENT") {
      s = "";
    } else if (parts[3] in underscore) {
      s = "_" substr(parts[3], 1, 1) tolower(substr(parts[3], 2));
    } else {
      s = tolower(parts[3]);
    }
    rspecs[s] = count;
    specs[count++] = s;
  }
}

END {
  string_table = "";

  # The basic approach is to first generate a suffix-compressed string
  # table containing all the specifier strings (not a lot of overlap in
  # C specifiers, but there is (un)signed.
  count = bucketsort(sorted_specs, specs);
  for (i = 0; i < count; i++) {
    s = sorted_specs[i];

    if ((n = index(string_table, s "\1")) > 0) {
      offsets[rspecs[s]] = n - 1;
    } else {
      offsets[rspecs[s]] = length(string_table);
      string_table = string_table s "\1";
    }
  }

  # Next, we create the index table.  The first 5 entries key off of bits 9
  # through 11, which is sufficient to distinguish the different specifier
  # kinds and is used to partition the rest of the index table.
  skip_count = 0;
  for (i in skiptab) {
    if (skip_count < i)
      skip_count = i;
  }

  skip_pos = ++skip_count;
  for (i = 0; i < skip_count; i++) {
    offset_table = offset_table skip_pos ", ";
    skip_pos += kind_counts[skiptab[i]];
  }
  sub(/ $/, "\n\t\t", offset_table);

  # Then, each remaining entry in the index table is an offset into the
  # string table.
  for (i = 0; i < count; i++) {
    suffix = "\t/* " (specs[i] ? specs[i] : "\"\"") " */";
    if (i+1 < count)
      suffix = "," suffix "\n\t\t";
    offset_table = offset_table offsets[i] suffix;
  }

  sub(/\1$/, "", string_table);
  gsub(/\1/, "\"\n\t\t\"\\0\" \"", string_table);

  print "static const char *spec_string(unsigned type)"
  print "{"
  print "\tstatic const char tab[] =";
  print "\t\t     \"" string_table "\";\n";
  print "\tstatic const uint_least8_t idx[] = {";
  print "\t\t" offset_table;
  print "\t};\n";

  print "\tunsigned x = (type & 0xff) + idx[type >> 9];";
  print "\tassert(x < sizeof idx);";
  print "\treturn tab + idx[x];";
  print "}";
}

# bucketsort(dst, src)
#
#
# Sort the elements of src by descending string length,
# placing them into dst[0] ... dst[n].
#
# Returns the number of elements.
function bucketsort(dst, src, buckets, max, count, i, t)
{
  for (t in src) {
    i = length(src[t]);
    if (i > max) { max = i; }
    buckets[i]++;
  }

  for (i = max; i >= 0; i--) {
    if (i in buckets) {
      t = buckets[i];
      buckets[i] = count;
      count += t;
    }
  }

  for (t in src) {
    i = length(t = src[t]);
    dst[buckets[i]++] = t;
  }

  return count;
}