Import getline helper from cdecl99.

[dxcommon.git] / scripts / gen-tree.awk
diff --git a/scripts/gen-tree.awk b/scripts/gen-tree.awk

index 53557f1ce89d9359d3f4800603c1405b54431edd..1334bf8dfe9611ad83bdfce79fcb99020af25f32 100755 (executable)
--- a/scripts/gen-tree.awk
+++ b/scripts/gen-tree.awk
@@ -1,14 +1,20 @@
  #!/bin/awk -f
  #
-# Copyright © 2021 Nick Bowler
+# Copyright © 2021-2022 Nick Bowler
  #
  # Generate one or more C array initializers to encode simple tree structures
  # in a compact format.
  #
-# Each line of the input file defines a tree node.  The first field must be a
-# C identifier, which may be optionally followed by a comma.  The identifiers
-# used on non-leaf nodes must be unique with respect to other non-leaf nodes,
-# but leaf nodes may share the same identifier as other nodes.
+# Each nonempty line of the input file is either a comment, an option
+# specification or a tree specification.  Each line is distinguished by
+# its first character.  A # character introduces a comment, an @ character
+# introduces an option specification (described later), and all other
+# nonempty lines are tree nodes.
+#
+# The first field of a tree specification must be a valid C identifier,
+# optionally followed by a comma.  The identifiers used on non-leaf nodes
+# must be unique with respect to other non-leaf nodes, but leaf nodes may
+# share the same identifier as other nodes.
  #
  # The tree structure is expressed through indentation.  Lines with no leading
  # whitespace designate root nodes.  When the number of white-space characters
@@ -30,11 +36,20 @@
  #
  # For each root node X, the object-like macro X_INITIALIZER is defined.  This
  # can be used to initialize an array with static storage duration.  Each line
-# of the input is surrounded by braces to construct the initializer for one
-# array element.  These are then ordered topologically beginning with the root,
-# but the root itself is not included.  Each subtree is terminated by an
-# "empty" element (with an initializer of {0}).  For example, the above input
-# will produce this (or an equivalent topological ordering) initializer:
+# of the input is defines the initializer for one array element.  These are
+# then ordered topologically beginning with the root, but the root itself is
+# not included.  Each subtree is terminated by an "empty" element (with an
+# initializer of {0}).
+#
+# If there is a comma after the identifier which begins a tree element, or
+# if the identifier is the only text on the line, then the element initializer
+# is constructed by surrounding the entire line by braces.  Otherwise, the
+# identifier is removed and the remainder of the line is surrounded by braces
+# to construct the initializer.  This allows the exact form of the initializer
+# to be customized.
+#
+# For example, the above input will produce this (or an equivalent topological
+# ordering) initializer:
  #
  #    OUTPUT: #define root_INITIALIZER \
  #                 {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0}
@@ -55,6 +70,13 @@
  # constants will normally be in scope when using the root_INITIALIZER macro,
  # which means the resulting array initializers will use them.
  #
+# Options may be used to alter the normal behaviour.  They may be placed
+# anywhere in the input file.  The following options are defined:
+#
+#   @nostrtab
+#     Do not output a definition for tree_strtab or its associated
+#     enumeration constants.
+#
  # License WTFPL2: Do What The Fuck You Want To Public License, version 2.
  # This is free software: you are free to do what the fuck you want to.
  # There is NO WARRANTY, to the extent permitted by law.
@@ -71,6 +93,12 @@ END {
  }
  
  BEGIN {
+  # Check if "\\\\" in substitutions gives just one backslash.
+  bs = "x"; sub(/x/, "\\\\", bs);
+  bs = (length(bs) == 1 ? "\\\\" : "\\");
+
+  opts["strtab"] = 1;
+
    depth = max_depth = 0;
    num_entries = 0;
  
@@ -80,7 +108,26 @@ BEGIN {
    indent_stack[0] = 0;
  }
  
+# Comments
  NF == 0 { next }
+$0 ~ /^#/ { next }
+
+# Options
+sub(/^@/, "", $0) {
+  if (NF == 1) {
+    orig=$1
+    gsub(/-/, "_", $1);
+    val = !sub(/^no_?/, "", $1);
+    if ($1 in opts) {
+      opts[$1] = val;
+    } else {
+      print "error: unrecognized option: @" orig | "cat 1>&2"
+      exit 1
+    }
+  }
+  next
+}
+
  { indent = index($0, $1) - 1 }
  
  indent > 0 {
@@ -101,48 +148,67 @@ indent > 0 {
    entry_name = $1; sub(/,$/, "", entry_name);
    all_items[num_entries++] = entry_name;
  
-  $1 = ", \\\n\t{ " $1; $NF = $NF " }";
+  # Construct the element initializer for this tree node.
+  $1 = " " $1;
+  if (NF > 1) {
+    # Check if entry name is followed by a comma.  If it is not, the entry
+    # name is excluded from the initializer.
+    check_str = $1$2;
+    gsub(/[ \t]/, "", check_str);
+    if (index(check_str, entry_name ",") == 0) {
+      $1 = "";
+    }
+  }
+  $1 = ", \\\n\t{" $1; $NF = $NF " }";
+
    tree_items[depth] = tree_items[depth] $0;
    level_count[depth]++;
  }
  
-indent == 0 && tree_identifier {
+indent == 0 && tree_identifier != "" {
    trees[tree_identifier] = format_items();
    tree_identifier = "";
  }
  END {
-  if (tree_identifier)
+  if (tree_identifier != "")
      trees[tree_identifier] = format_items()
  }
  indent == 0 { tree_identifier = $1 }
  
  END {
-  entry_strtab = "\1";
-  bucketsort(sorted_items, all_items);
-  for (i = 0; i < num_entries; i++) {
-    s = sorted_items[i];
-    if ((n = index(entry_strtab, s "\1")) > 0) {
-      entry_offsets[s] = n-1;
-    } else {
-      entry_offsets[s] = length(entry_strtab);
-      entry_strtab = entry_strtab s "\1";
+  prefix = "\nenum {\n";
+
+  if (opts["strtab"]) {
+    entry_strtab = "\1";
+    bucketsort(sorted_items, all_items);
+    for (i = 0; i < num_entries; i++) {
+      s = sorted_items[i];
+      if ((n = index(entry_strtab, s "\1")) > 0) {
+        entry_offsets[s] = n-1;
+      } else {
+        entry_offsets[s] = length(entry_strtab);
+        entry_strtab = entry_strtab s "\1";
+      }
      }
-  }
  
-  gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab);
-  sub(/^"/, "", entry_strtab);
-  sub(/\n[^\n]*$/, ";", entry_strtab);
-  print "\nstatic const char tree_strtab[] =" entry_strtab
+    gsub("\1", "\"\n\t\"" bs "0\" \"", entry_strtab);
+    sub(/^"/, "", entry_strtab);
+    sub(/\n[^\n]*$/, ";", entry_strtab);
+    print "\nstatic const char tree_strtab[] =" entry_strtab
  
-  prefix = "\nenum {\n";
-  for (item in entry_offsets) {
-    printf "%s\t%s = %d", prefix, item, entry_offsets[item];
-    prefix = ",\n";
+    for (item in entry_offsets) {
+      printf "%s\t%s = %d", prefix, item, entry_offsets[item];
+      prefix = ",\n";
+    }
    }
+
    for (i in subtree_offsets) {
-    printf ",\n\t%s_OFFSET = %d", i, subtree_offsets[i];
+    printf "%s\t%s_OFFSET = %d", prefix, i, subtree_offsets[i];
+    prefix = ",\n";
+  }
+  if (!index(prefix, "enum")) {
+    print "\n};";
    }
-  print "\n};";
  }
  
  END {
@@ -159,7 +225,7 @@ function format_items(s, i)
      depth--;
    }
  
-  for (i = 2; tree_items[i]; i++) {
+  for (i = 2; tree_items[i] != ""; i++) {
      level_count[i] += level_count[i-1];
    }
  
@@ -168,7 +234,7 @@ function format_items(s, i)
      delete subtree_depth[i];
    }
  
-  for (i = 1; tree_items[i]; i++) {
+  for (i = 1; tree_items[i] != ""; i++) {
      s = s tree_items[i];
      delete tree_items[i];
      delete level_count[i];
@@ -184,25 +250,28 @@ function format_items(s, i)
  # placing them into dst[0] ... dst[n].
  #
  # Returns the number of elements.
-function bucketsort(dst, src, buckets, max, count, i, t)
+function bucketsort(dst, src, max, count, i, t)
  {
+  # Note: ULTRIX 4.5 nawk does not support local array parameters
+  split("", bucketsort_buckets);
+
    for (t in src) {
      i = length(src[t])
      if (i > max) { max = i }
-    buckets[i]++
+    bucketsort_buckets[i]++
    }
  
    for (i = max; i > 0; i--) {
-    if (i in buckets) {
-      t = buckets[i]
-      buckets[i] = count
+    if (i in bucketsort_buckets) {
+      t = bucketsort_buckets[i]
+      bucketsort_buckets[i] = count
        count += t
      }
    }
  
    for (t in src) {
      i = length(t = src[t])
-    dst[buckets[i]++] = t
+    dst[bucketsort_buckets[i]++] = t
    }
  
    return count