From: Nick Bowler Date: Thu, 7 Oct 2021 04:46:33 +0000 (-0400) Subject: Add a script to generate constant tree structures. X-Git-Url: https://git.draconx.ca/gitweb/dxcommon.git/commitdiff_plain/cd522a4c86728df10a96970f6a7bd99b823afc38 Add a script to generate constant tree structures. This can be useful for generating compact compile-time representations of menus or whatever else comes to mind. --- diff --git a/scripts/gen-strtab.awk b/scripts/gen-strtab.awk index f844d4f..1f27b5c 100755 --- a/scripts/gen-strtab.awk +++ b/scripts/gen-strtab.awk @@ -41,6 +41,10 @@ # # The object-like macro STRTAB_MAX_OFFSET is defined and expands to the # greatest string offset, suitable for use in #if preprocessing directives. +# +# License WTFPL2: Do What The Fuck You Want To Public License, version 2. +# This is free software: you are free to do what the fuck you want to. +# There is NO WARRANTY, to the extent permitted by law. END { print "/*" diff --git a/scripts/gen-tree.awk b/scripts/gen-tree.awk new file mode 100755 index 0000000..53557f1 --- /dev/null +++ b/scripts/gen-tree.awk @@ -0,0 +1,209 @@ +#!/bin/awk -f +# +# Copyright © 2021 Nick Bowler +# +# Generate one or more C array initializers to encode simple tree structures +# in a compact format. +# +# Each line of the input file defines a tree node. The first field must be a +# C identifier, which may be optionally followed by a comma. The identifiers +# used on non-leaf nodes must be unique with respect to other non-leaf nodes, +# but leaf nodes may share the same identifier as other nodes. +# +# The tree structure is expressed through indentation. Lines with no leading +# whitespace designate root nodes. When the number of white-space characters +# at the beginning of a line is greater than that of the previous line, this +# introduces a new subtree rooted at the previous node. When the number of +# leading white-space characters is less than the previous line, this concludes +# the definition of the current subtree. +# +# For example, the input on the left describes the tree on the right. +# +# INPUT: root TREE: root -- a -- b -- c +# a | | +# b | + -- d +# c | +# d + -- e -- f +# e | +# f + -- g +# g +# +# For each root node X, the object-like macro X_INITIALIZER is defined. This +# can be used to initialize an array with static storage duration. Each line +# of the input is surrounded by braces to construct the initializer for one +# array element. These are then ordered topologically beginning with the root, +# but the root itself is not included. Each subtree is terminated by an +# "empty" element (with an initializer of {0}). For example, the above input +# will produce this (or an equivalent topological ordering) initializer: +# +# OUTPUT: #define root_INITIALIZER \ +# {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0} +# +# Constants representing the array offset for each subtree are additionally +# defined. For a non-leaf node X, the enumeration constant X_OFFSET gives the +# index of the first node rooted at X. With the above example, the results +# are: +# +# OUTPUT: enum { a_OFFSET = 3, b_OFFSET = 9, e_OFFSET = 6 }; +# +# By using an array of structures to represent the tree and including these +# constants in initializers, it is possible for the flattened tree to describe +# its own structure. +# +# Finally, each node identifier is defined as an enumeration constant +# representing the nonzero offset into a string table tree_strtab. These +# constants will normally be in scope when using the root_INITIALIZER macro, +# which means the resulting array initializers will use them. +# +# License WTFPL2: Do What The Fuck You Want To Public License, version 2. +# This is free software: you are free to do what the fuck you want to. +# There is NO WARRANTY, to the extent permitted by law. + +END { + print "/*" + if (FILENAME) { + print " * Automatically generated by gen-tree.awk from " FILENAME + } else { + print " * Automatically generated by gen-tree.awk" + } + print " * Do not edit." + print " */" +} + +BEGIN { + depth = max_depth = 0; + num_entries = 0; + + tree_name = ""; + entry_name = ""; + + indent_stack[0] = 0; +} + +NF == 0 { next } +{ indent = index($0, $1) - 1 } + +indent > 0 { + if (indent > indent_stack[depth]) { + indent_stack[++depth] = indent; + if (depth > 1) { + subtree_offsets[entry_name] = level_count[depth]; + subtree_depth[entry_name] = depth; + } + } else { + while (indent < indent_stack[depth]) { + tree_items[depth] = tree_items[depth] ", \\\n\t{ 0 }"; + level_count[depth]++; + depth--; + } + } + + entry_name = $1; sub(/,$/, "", entry_name); + all_items[num_entries++] = entry_name; + + $1 = ", \\\n\t{ " $1; $NF = $NF " }"; + tree_items[depth] = tree_items[depth] $0; + level_count[depth]++; +} + +indent == 0 && tree_identifier { + trees[tree_identifier] = format_items(); + tree_identifier = ""; +} +END { + if (tree_identifier) + trees[tree_identifier] = format_items() +} +indent == 0 { tree_identifier = $1 } + +END { + entry_strtab = "\1"; + bucketsort(sorted_items, all_items); + for (i = 0; i < num_entries; i++) { + s = sorted_items[i]; + if ((n = index(entry_strtab, s "\1")) > 0) { + entry_offsets[s] = n-1; + } else { + entry_offsets[s] = length(entry_strtab); + entry_strtab = entry_strtab s "\1"; + } + } + + gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab); + sub(/^"/, "", entry_strtab); + sub(/\n[^\n]*$/, ";", entry_strtab); + print "\nstatic const char tree_strtab[] =" entry_strtab + + prefix = "\nenum {\n"; + for (item in entry_offsets) { + printf "%s\t%s = %d", prefix, item, entry_offsets[item]; + prefix = ",\n"; + } + for (i in subtree_offsets) { + printf ",\n\t%s_OFFSET = %d", i, subtree_offsets[i]; + } + print "\n};"; +} + +END { + for (tree in trees) { + print "\n" trees[tree]; + } +} + +function format_items(s, i) +{ + while (depth > 0) { + tree_items[depth] = tree_items[depth] ", \\\n\t{ 0 }"; + level_count[depth]++; + depth--; + } + + for (i = 2; tree_items[i]; i++) { + level_count[i] += level_count[i-1]; + } + + for (i in subtree_depth) { + subtree_offsets[i] += level_count[subtree_depth[i]-1]; + delete subtree_depth[i]; + } + + for (i = 1; tree_items[i]; i++) { + s = s tree_items[i]; + delete tree_items[i]; + delete level_count[i]; + } + + sub(/^,/, "", s); + return "#define " tree_identifier "_INITIALIZER" s; +} + +# bucketsort(dst, src) +# +# Sort the elements of src by descending string length, +# placing them into dst[0] ... dst[n]. +# +# Returns the number of elements. +function bucketsort(dst, src, buckets, max, count, i, t) +{ + for (t in src) { + i = length(src[t]) + if (i > max) { max = i } + buckets[i]++ + } + + for (i = max; i > 0; i--) { + if (i in buckets) { + t = buckets[i] + buckets[i] = count + count += t + } + } + + for (t in src) { + i = length(t = src[t]) + dst[buckets[i]++] = t + } + + return count +} diff --git a/tests/scripts.at b/tests/scripts.at index d97a6c7..f807650 100644 --- a/tests/scripts.at +++ b/tests/scripts.at @@ -299,3 +299,69 @@ oneline ], [ignore]) AT_CLEANUP + +AT_SETUP([gen-tree.awk]) +AT_DATA([tree.def], +[[ROOT0 + r0a, r0a_OFFSET + r0b, r0b_OFFSET + r0c + r0d + r0e, r0e_OFFSET + r0f + r0g +ROOT1 + r1a, r1a_OFFSET + r1b, r1b_OFFSET + r1b + r1e + r1b + r1c, r1c_OFFSET + r1d, r1d_OFFSET + r1e + r1b + r1e +]]) + +AT_CHECK([$AWK -f "$builddir/scripts/gen-tree.awk" tree.h]) + +AT_DATA([test0.c], +[[#include "tree.h" +#include + +struct tree { unsigned id, subtree; }; + +static const struct tree tree0[] = { + ROOT0_INITIALIZER +}; +static const struct tree tree1[] = { + ROOT1_INITIALIZER +}; + +void print_subtree(const struct tree *root, unsigned offset, int depth) +{ + const struct tree *node; + + for (node = &root[offset]; node->id; node++) { + printf("%*s%s", 2*depth, "", &tree_strtab[node->id]); + if (node->subtree) { + printf(", %s_OFFSET\n", &tree_strtab[node->id]); + print_subtree(root, node->subtree, depth+1); + } else { + putchar('\n'); + } + } +} + +int main(void) +{ + printf("ROOT0\n"); + print_subtree(tree0, 0, 1); + printf("ROOT1\n"); + print_subtree(tree1, 0, 1); +} +]]) +cp tree.def expout +AT_CHECK([$CC -o test0$EXEEXT test0.c && ./test0$EXEEXT], [0], [expout]) + +AT_CLEANUP