#!/bin/awk -f
#
-# Copyright © 2021 Nick Bowler
+# Copyright © 2021-2022 Nick Bowler
#
# Generate one or more C array initializers to encode simple tree structures
# in a compact format.
#
-# Each line of the input file defines a tree node. The first field must be a
-# C identifier, which may be optionally followed by a comma. The identifiers
-# used on non-leaf nodes must be unique with respect to other non-leaf nodes,
-# but leaf nodes may share the same identifier as other nodes.
+# Each nonempty line of the input file is either a comment, an option
+# specification or a tree specification. Each line is distinguished by
+# its first character. A # character introduces a comment, an @ character
+# introduces an option specification (described later), and all other
+# nonempty lines are tree nodes.
+#
+# The first field of a tree specification must be a valid C identifier,
+# optionally followed by a comma. The identifiers used on non-leaf nodes
+# must be unique with respect to other non-leaf nodes, but leaf nodes may
+# share the same identifier as other nodes.
#
# The tree structure is expressed through indentation. Lines with no leading
# whitespace designate root nodes. When the number of white-space characters
#
# For each root node X, the object-like macro X_INITIALIZER is defined. This
# can be used to initialize an array with static storage duration. Each line
-# of the input is surrounded by braces to construct the initializer for one
-# array element. These are then ordered topologically beginning with the root,
-# but the root itself is not included. Each subtree is terminated by an
-# "empty" element (with an initializer of {0}). For example, the above input
-# will produce this (or an equivalent topological ordering) initializer:
+# of the input is defines the initializer for one array element. These are
+# then ordered topologically beginning with the root, but the root itself is
+# not included. Each subtree is terminated by an "empty" element (with an
+# initializer of {0}).
+#
+# If there is a comma after the identifier which begins a tree element, or
+# if the identifier is the only text on the line, then the element initializer
+# is constructed by surrounding the entire line by braces. Otherwise, the
+# identifier is removed and the remainder of the line is surrounded by braces
+# to construct the initializer. This allows the exact form of the initializer
+# to be customized.
+#
+# For example, the above input will produce this (or an equivalent topological
+# ordering) initializer:
#
# OUTPUT: #define root_INITIALIZER \
# {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0}
# constants will normally be in scope when using the root_INITIALIZER macro,
# which means the resulting array initializers will use them.
#
+# Options may be used to alter the normal behaviour. They may be placed
+# anywhere in the input file. The following options are defined:
+#
+# @nostrtab
+# Do not output a definition for tree_strtab or its associated
+# enumeration constants.
+#
# License WTFPL2: Do What The Fuck You Want To Public License, version 2.
# This is free software: you are free to do what the fuck you want to.
# There is NO WARRANTY, to the extent permitted by law.
}
BEGIN {
+ # Check if "\\\\" in substitutions gives just one backslash.
+ bs = "x"; sub(/x/, "\\\\", bs);
+ bs = (length(bs) == 1 ? "\\\\" : "\\");
+
+ opts["strtab"] = 1;
+
depth = max_depth = 0;
num_entries = 0;
indent_stack[0] = 0;
}
+# Comments
NF == 0 { next }
+$0 ~ /^#/ { next }
+
+# Options
+sub(/^@/, "", $0) {
+ if (NF == 1) {
+ orig=$1
+ gsub(/-/, "_", $1);
+ val = !sub(/^no_?/, "", $1);
+ if ($1 in opts) {
+ opts[$1] = val;
+ } else {
+ print "error: unrecognized option: @" orig | "cat 1>&2"
+ exit 1
+ }
+ }
+ next
+}
+
{ indent = index($0, $1) - 1 }
indent > 0 {
entry_name = $1; sub(/,$/, "", entry_name);
all_items[num_entries++] = entry_name;
- $1 = ", \\\n\t{ " $1; $NF = $NF " }";
+ # Construct the element initializer for this tree node.
+ $1 = " " $1;
+ if (NF > 1) {
+ # Check if entry name is followed by a comma. If it is not, the entry
+ # name is excluded from the initializer.
+ check_str = $1$2;
+ gsub(/[ \t]/, "", check_str);
+ if (index(check_str, entry_name ",") == 0) {
+ $1 = "";
+ }
+ }
+ $1 = ", \\\n\t{" $1; $NF = $NF " }";
+
tree_items[depth] = tree_items[depth] $0;
level_count[depth]++;
}
-indent == 0 && tree_identifier {
+indent == 0 && tree_identifier != "" {
trees[tree_identifier] = format_items();
tree_identifier = "";
}
END {
- if (tree_identifier)
+ if (tree_identifier != "")
trees[tree_identifier] = format_items()
}
indent == 0 { tree_identifier = $1 }
END {
- entry_strtab = "\1";
- bucketsort(sorted_items, all_items);
- for (i = 0; i < num_entries; i++) {
- s = sorted_items[i];
- if ((n = index(entry_strtab, s "\1")) > 0) {
- entry_offsets[s] = n-1;
- } else {
- entry_offsets[s] = length(entry_strtab);
- entry_strtab = entry_strtab s "\1";
+ prefix = "\nenum {\n";
+
+ if (opts["strtab"]) {
+ entry_strtab = "\1";
+ bucketsort(sorted_items, all_items);
+ for (i = 0; i < num_entries; i++) {
+ s = sorted_items[i];
+ if ((n = index(entry_strtab, s "\1")) > 0) {
+ entry_offsets[s] = n-1;
+ } else {
+ entry_offsets[s] = length(entry_strtab);
+ entry_strtab = entry_strtab s "\1";
+ }
}
- }
- gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab);
- sub(/^"/, "", entry_strtab);
- sub(/\n[^\n]*$/, ";", entry_strtab);
- print "\nstatic const char tree_strtab[] =" entry_strtab
+ gsub("\1", "\"\n\t\"" bs "0\" \"", entry_strtab);
+ sub(/^"/, "", entry_strtab);
+ sub(/\n[^\n]*$/, ";", entry_strtab);
+ print "\nstatic const char tree_strtab[] =" entry_strtab
- prefix = "\nenum {\n";
- for (item in entry_offsets) {
- printf "%s\t%s = %d", prefix, item, entry_offsets[item];
- prefix = ",\n";
+ for (item in entry_offsets) {
+ printf "%s\t%s = %d", prefix, item, entry_offsets[item];
+ prefix = ",\n";
+ }
}
+
for (i in subtree_offsets) {
- printf ",\n\t%s_OFFSET = %d", i, subtree_offsets[i];
+ printf "%s\t%s_OFFSET = %d", prefix, i, subtree_offsets[i];
+ prefix = ",\n";
+ }
+ if (!index(prefix, "enum")) {
+ print "\n};";
}
- print "\n};";
}
END {
depth--;
}
- for (i = 2; tree_items[i]; i++) {
+ for (i = 2; tree_items[i] != ""; i++) {
level_count[i] += level_count[i-1];
}
delete subtree_depth[i];
}
- for (i = 1; tree_items[i]; i++) {
+ for (i = 1; tree_items[i] != ""; i++) {
s = s tree_items[i];
delete tree_items[i];
delete level_count[i];
# placing them into dst[0] ... dst[n].
#
# Returns the number of elements.
-function bucketsort(dst, src, buckets, max, count, i, t)
+function bucketsort(dst, src, max, count, i, t)
{
+ # Note: ULTRIX 4.5 nawk does not support local array parameters
+ split("", bucketsort_buckets);
+
for (t in src) {
i = length(src[t])
if (i > max) { max = i }
- buckets[i]++
+ bucketsort_buckets[i]++
}
for (i = max; i > 0; i--) {
- if (i in buckets) {
- t = buckets[i]
- buckets[i] = count
+ if (i in bucketsort_buckets) {
+ t = bucketsort_buckets[i]
+ bucketsort_buckets[i] = count
count += t
}
}
for (t in src) {
i = length(t = src[t])
- dst[buckets[i]++] = t
+ dst[bucketsort_buckets[i]++] = t
}
return count