3 # Copyright © 2021 Nick Bowler
5 # Generate one or more C array initializers to encode simple tree structures
8 # Each line of the input file defines a tree node. The first field must be a
9 # C identifier, which may be optionally followed by a comma. The identifiers
10 # used on non-leaf nodes must be unique with respect to other non-leaf nodes,
11 # but leaf nodes may share the same identifier as other nodes.
13 # The tree structure is expressed through indentation. Lines with no leading
14 # whitespace designate root nodes. When the number of white-space characters
15 # at the beginning of a line is greater than that of the previous line, this
16 # introduces a new subtree rooted at the previous node. When the number of
17 # leading white-space characters is less than the previous line, this concludes
18 # the definition of the current subtree.
20 # For example, the input on the left describes the tree on the right.
22 # INPUT: root TREE: root -- a -- b -- c
31 # For each root node X, the object-like macro X_INITIALIZER is defined. This
32 # can be used to initialize an array with static storage duration. Each line
33 # of the input is surrounded by braces to construct the initializer for one
34 # array element. These are then ordered topologically beginning with the root,
35 # but the root itself is not included. Each subtree is terminated by an
36 # "empty" element (with an initializer of {0}). For example, the above input
37 # will produce this (or an equivalent topological ordering) initializer:
39 # OUTPUT: #define root_INITIALIZER \
40 # {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0}
42 # Constants representing the array offset for each subtree are additionally
43 # defined. For a non-leaf node X, the enumeration constant X_OFFSET gives the
44 # index of the first node rooted at X. With the above example, the results
47 # OUTPUT: enum { a_OFFSET = 3, b_OFFSET = 9, e_OFFSET = 6 };
49 # By using an array of structures to represent the tree and including these
50 # constants in initializers, it is possible for the flattened tree to describe
53 # Finally, each node identifier is defined as an enumeration constant
54 # representing the nonzero offset into a string table tree_strtab. These
55 # constants will normally be in scope when using the root_INITIALIZER macro,
56 # which means the resulting array initializers will use them.
58 # License WTFPL2: Do What The Fuck You Want To Public License, version 2.
59 # This is free software: you are free to do what the fuck you want to.
60 # There is NO WARRANTY, to the extent permitted by law.
65 print " * Automatically generated by gen-tree.awk from " FILENAME
67 print " * Automatically generated by gen-tree.awk"
69 print " * Do not edit."
74 depth = max_depth = 0;
84 { indent = index($0, $1) - 1 }
87 if (indent > indent_stack[depth]) {
88 indent_stack[++depth] = indent;
90 subtree_offsets[entry_name] = level_count[depth];
91 subtree_depth[entry_name] = depth;
94 while (indent < indent_stack[depth]) {
95 tree_items[depth] = tree_items[depth] ", \\\n\t{ 0 }";
101 entry_name = $1; sub(/,$/, "", entry_name);
102 all_items[num_entries++] = entry_name;
104 $1 = ", \\\n\t{ " $1; $NF = $NF " }";
105 tree_items[depth] = tree_items[depth] $0;
106 level_count[depth]++;
109 indent == 0 && tree_identifier {
110 trees[tree_identifier] = format_items();
111 tree_identifier = "";
115 trees[tree_identifier] = format_items()
117 indent == 0 { tree_identifier = $1 }
121 bucketsort(sorted_items, all_items);
122 for (i = 0; i < num_entries; i++) {
124 if ((n = index(entry_strtab, s "\1")) > 0) {
125 entry_offsets[s] = n-1;
127 entry_offsets[s] = length(entry_strtab);
128 entry_strtab = entry_strtab s "\1";
132 gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab);
133 sub(/^"/, "", entry_strtab);
134 sub(/\n[^\n]*$/, ";", entry_strtab);
135 print "\nstatic const char tree_strtab[] =" entry_strtab
137 prefix = "\nenum {\n";
138 for (item in entry_offsets) {
139 printf "%s\t%s = %d", prefix, item, entry_offsets[item];
142 for (i in subtree_offsets) {
143 printf ",\n\t%s_OFFSET = %d", i, subtree_offsets[i];
149 for (tree in trees) {
150 print "\n" trees[tree];
154 function format_items(s, i)
157 tree_items[depth] = tree_items[depth] ", \\\n\t{ 0 }";
158 level_count[depth]++;
162 for (i = 2; tree_items[i]; i++) {
163 level_count[i] += level_count[i-1];
166 for (i in subtree_depth) {
167 subtree_offsets[i] += level_count[subtree_depth[i]-1];
168 delete subtree_depth[i];
171 for (i = 1; tree_items[i]; i++) {
173 delete tree_items[i];
174 delete level_count[i];
178 return "#define " tree_identifier "_INITIALIZER" s;
181 # bucketsort(dst, src)
183 # Sort the elements of src by descending string length,
184 # placing them into dst[0] ... dst[n].
186 # Returns the number of elements.
187 function bucketsort(dst, src, buckets, max, count, i, t)
191 if (i > max) { max = i }
195 for (i = max; i > 0; i--) {
204 i = length(t = src[t])
205 dst[buckets[i]++] = t