3 # Copyright © 2021-2022 Nick Bowler
5 # Generate one or more C array initializers to encode simple tree structures
8 # Each nonempty line of the input file is either an option specification
9 # or # a tree specification. An option specification (described later)
10 # begins with an @ character. Other lines specify tree nodes.
12 # The first field of a tree specification must be a valid C identifier,
13 # optionally followed by a comma. The identifiers used on non-leaf nodes
14 # must be unique with respect to other non-leaf nodes, but leaf nodes may
15 # share the same identifier as other nodes.
17 # The tree structure is expressed through indentation. Lines with no leading
18 # whitespace designate root nodes. When the number of white-space characters
19 # at the beginning of a line is greater than that of the previous line, this
20 # introduces a new subtree rooted at the previous node. When the number of
21 # leading white-space characters is less than the previous line, this concludes
22 # the definition of the current subtree.
24 # For example, the input on the left describes the tree on the right.
26 # INPUT: root TREE: root -- a -- b -- c
35 # For each root node X, the object-like macro X_INITIALIZER is defined. This
36 # can be used to initialize an array with static storage duration. Each line
37 # of the input is defines the initializer for one array element. These are
38 # then ordered topologically beginning with the root, but the root itself is
39 # not included. Each subtree is terminated by an "empty" element (with an
40 # initializer of {0}).
42 # If there is a comma after the identifier which begins a tree element, or
43 # if the identifier is the only text on the line, then the element initializer
44 # is constructed by surrounding the entire line by braces. Otherwise, the
45 # identifier is removed and the remainder of the line is surrounded by braces
46 # to construct the initializer. This allows the exact form of the initializer
49 # For example, the above input will produce this (or an equivalent topological
50 # ordering) initializer:
52 # OUTPUT: #define root_INITIALIZER \
53 # {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0}
55 # Constants representing the array offset for each subtree are additionally
56 # defined. For a non-leaf node X, the enumeration constant X_OFFSET gives the
57 # index of the first node rooted at X. With the above example, the results
60 # OUTPUT: enum { a_OFFSET = 3, b_OFFSET = 9, e_OFFSET = 6 };
62 # By using an array of structures to represent the tree and including these
63 # constants in initializers, it is possible for the flattened tree to describe
66 # Finally, each node identifier is defined as an enumeration constant
67 # representing the nonzero offset into a string table tree_strtab. These
68 # constants will normally be in scope when using the root_INITIALIZER macro,
69 # which means the resulting array initializers will use them.
71 # Options may be used to alter the normal behaviour. They may be placed
72 # anywhere in the input file. The following options are defined:
75 # Do not output a definition for tree_strtab or its associated
76 # enumeration constants.
78 # License WTFPL2: Do What The Fuck You Want To Public License, version 2.
79 # This is free software: you are free to do what the fuck you want to.
80 # There is NO WARRANTY, to the extent permitted by law.
85 print " * Automatically generated by gen-tree.awk from " FILENAME
87 print " * Automatically generated by gen-tree.awk"
89 print " * Do not edit."
96 depth = max_depth = 0;
110 val = !sub(/^no_?/, "", $1);
114 print "error: unrecognized option: @" orig | "cat 1>&2"
122 { indent = index($0, $1) - 1 }
125 if (indent > indent_stack[depth]) {
126 indent_stack[++depth] = indent;
128 subtree_offsets[entry_name] = level_count[depth];
129 subtree_depth[entry_name] = depth;
132 while (indent < indent_stack[depth]) {
133 tree_items[depth] = tree_items[depth] ", \\\n\t{ 0 }";
134 level_count[depth]++;
139 entry_name = $1; sub(/,$/, "", entry_name);
140 all_items[num_entries++] = entry_name;
142 # Construct the element initializer for this tree node.
145 # Check if entry name is followed by a comma. If it is not, the entry
146 # name is excluded from the initializer.
148 gsub(/[ \t]/, "", check_str);
149 if (index(check_str, entry_name ",") == 0) {
153 $1 = ", \\\n\t{" $1; $NF = $NF " }";
155 tree_items[depth] = tree_items[depth] $0;
156 level_count[depth]++;
159 indent == 0 && tree_identifier {
160 trees[tree_identifier] = format_items();
161 tree_identifier = "";
165 trees[tree_identifier] = format_items()
167 indent == 0 { tree_identifier = $1 }
170 prefix = "\nenum {\n";
172 if (opts["strtab"]) {
174 bucketsort(sorted_items, all_items);
175 for (i = 0; i < num_entries; i++) {
177 if ((n = index(entry_strtab, s "\1")) > 0) {
178 entry_offsets[s] = n-1;
180 entry_offsets[s] = length(entry_strtab);
181 entry_strtab = entry_strtab s "\1";
185 gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab);
186 sub(/^"/, "", entry_strtab);
187 sub(/\n[^\n]*$/, ";", entry_strtab);
188 print "\nstatic const char tree_strtab[] =" entry_strtab
190 for (item in entry_offsets) {
191 printf "%s\t%s = %d", prefix, item, entry_offsets[item];
196 for (i in subtree_offsets) {
197 printf "%s\t%s_OFFSET = %d", prefix, i, subtree_offsets[i];
200 if (!index(prefix, "enum")) {
206 for (tree in trees) {
207 print "\n" trees[tree];
211 function format_items(s, i)
214 tree_items[depth] = tree_items[depth] ", \\\n\t{ 0 }";
215 level_count[depth]++;
219 for (i = 2; tree_items[i]; i++) {
220 level_count[i] += level_count[i-1];
223 for (i in subtree_depth) {
224 subtree_offsets[i] += level_count[subtree_depth[i]-1];
225 delete subtree_depth[i];
228 for (i = 1; tree_items[i]; i++) {
230 delete tree_items[i];
231 delete level_count[i];
235 return "#define " tree_identifier "_INITIALIZER" s;
238 # bucketsort(dst, src)
240 # Sort the elements of src by descending string length,
241 # placing them into dst[0] ... dst[n].
243 # Returns the number of elements.
244 function bucketsort(dst, src, buckets, max, count, i, t)
248 if (i > max) { max = i }
252 for (i = max; i > 0; i--) {
261 i = length(t = src[t])
262 dst[buckets[i]++] = t