From: Nick Bowler Date: Sat, 18 Jun 2022 01:29:28 +0000 (-0400) Subject: gen-tree.awk: Add options to tweak the strtab output. X-Git-Url: https://git.draconx.ca/gitweb/dxcommon.git/commitdiff_plain/cd475ab947ddc0338bd54406234f9cbfa5abe2fd gen-tree.awk: Add options to tweak the strtab output. Add a @nostrtab option to disable the string table output in tree generation, and make it possible to omit the node identifiers from the initializers. --- diff --git a/scripts/gen-tree.awk b/scripts/gen-tree.awk index 53557f1..d04378a 100755 --- a/scripts/gen-tree.awk +++ b/scripts/gen-tree.awk @@ -1,14 +1,18 @@ #!/bin/awk -f # -# Copyright © 2021 Nick Bowler +# Copyright © 2021-2022 Nick Bowler # # Generate one or more C array initializers to encode simple tree structures # in a compact format. # -# Each line of the input file defines a tree node. The first field must be a -# C identifier, which may be optionally followed by a comma. The identifiers -# used on non-leaf nodes must be unique with respect to other non-leaf nodes, -# but leaf nodes may share the same identifier as other nodes. +# Each nonempty line of the input file is either an option specification +# or # a tree specification. An option specification (described later) +# begins with an @ character. Other lines specify tree nodes. +# +# The first field of a tree specification must be a valid C identifier, +# optionally followed by a comma. The identifiers used on non-leaf nodes +# must be unique with respect to other non-leaf nodes, but leaf nodes may +# share the same identifier as other nodes. # # The tree structure is expressed through indentation. Lines with no leading # whitespace designate root nodes. When the number of white-space characters @@ -30,11 +34,20 @@ # # For each root node X, the object-like macro X_INITIALIZER is defined. This # can be used to initialize an array with static storage duration. Each line -# of the input is surrounded by braces to construct the initializer for one -# array element. These are then ordered topologically beginning with the root, -# but the root itself is not included. Each subtree is terminated by an -# "empty" element (with an initializer of {0}). For example, the above input -# will produce this (or an equivalent topological ordering) initializer: +# of the input is defines the initializer for one array element. These are +# then ordered topologically beginning with the root, but the root itself is +# not included. Each subtree is terminated by an "empty" element (with an +# initializer of {0}). +# +# If there is a comma after the identifier which begins a tree element, or +# if the identifier is the only text on the line, then the element initializer +# is constructed by surrounding the entire line by braces. Otherwise, the +# identifier is removed and the remainder of the line is surrounded by braces +# to construct the initializer. This allows the exact form of the initializer +# to be customized. +# +# For example, the above input will produce this (or an equivalent topological +# ordering) initializer: # # OUTPUT: #define root_INITIALIZER \ # {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0} @@ -55,6 +68,13 @@ # constants will normally be in scope when using the root_INITIALIZER macro, # which means the resulting array initializers will use them. # +# Options may be used to alter the normal behaviour. They may be placed +# anywhere in the input file. The following options are defined: +# +# @nostrtab +# Do not output a definition for tree_strtab or its associated +# enumeration constants. +# # License WTFPL2: Do What The Fuck You Want To Public License, version 2. # This is free software: you are free to do what the fuck you want to. # There is NO WARRANTY, to the extent permitted by law. @@ -71,6 +91,8 @@ END { } BEGIN { + opts["strtab"] = 1; + depth = max_depth = 0; num_entries = 0; @@ -80,6 +102,22 @@ BEGIN { indent_stack[0] = 0; } +# Options +sub(/^@/, "", $0) { + if (NF == 1) { + orig=$1 + gsub(/-/, "_", $1); + val = !sub(/^no_?/, "", $1); + if ($1 in opts) { + opts[$1] = val; + } else { + print "error: unrecognized option: @" orig | "cat 1>&2" + exit 1 + } + } + next +} + NF == 0 { next } { indent = index($0, $1) - 1 } @@ -101,7 +139,19 @@ indent > 0 { entry_name = $1; sub(/,$/, "", entry_name); all_items[num_entries++] = entry_name; - $1 = ", \\\n\t{ " $1; $NF = $NF " }"; + # Construct the element initializer for this tree node. + $1 = " " $1; + if (NF > 1) { + # Check if entry name is followed by a comma. If it is not, the entry + # name is excluded from the initializer. + check_str = $1$2; + gsub(/[ \t]/, "", check_str); + if (index(check_str, entry_name ",") == 0) { + $1 = ""; + } + } + $1 = ", \\\n\t{" $1; $NF = $NF " }"; + tree_items[depth] = tree_items[depth] $0; level_count[depth]++; } @@ -117,32 +167,39 @@ END { indent == 0 { tree_identifier = $1 } END { - entry_strtab = "\1"; - bucketsort(sorted_items, all_items); - for (i = 0; i < num_entries; i++) { - s = sorted_items[i]; - if ((n = index(entry_strtab, s "\1")) > 0) { - entry_offsets[s] = n-1; - } else { - entry_offsets[s] = length(entry_strtab); - entry_strtab = entry_strtab s "\1"; + prefix = "\nenum {\n"; + + if (opts["strtab"]) { + entry_strtab = "\1"; + bucketsort(sorted_items, all_items); + for (i = 0; i < num_entries; i++) { + s = sorted_items[i]; + if ((n = index(entry_strtab, s "\1")) > 0) { + entry_offsets[s] = n-1; + } else { + entry_offsets[s] = length(entry_strtab); + entry_strtab = entry_strtab s "\1"; + } } - } - gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab); - sub(/^"/, "", entry_strtab); - sub(/\n[^\n]*$/, ";", entry_strtab); - print "\nstatic const char tree_strtab[] =" entry_strtab + gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab); + sub(/^"/, "", entry_strtab); + sub(/\n[^\n]*$/, ";", entry_strtab); + print "\nstatic const char tree_strtab[] =" entry_strtab - prefix = "\nenum {\n"; - for (item in entry_offsets) { - printf "%s\t%s = %d", prefix, item, entry_offsets[item]; - prefix = ",\n"; + for (item in entry_offsets) { + printf "%s\t%s = %d", prefix, item, entry_offsets[item]; + prefix = ",\n"; + } } + for (i in subtree_offsets) { - printf ",\n\t%s_OFFSET = %d", i, subtree_offsets[i]; + printf "%s\t%s_OFFSET = %d", prefix, i, subtree_offsets[i]; + prefix = ",\n"; + } + if (!index(prefix, "enum")) { + print "\n};"; } - print "\n};"; } END { diff --git a/tests/scripts.at b/tests/scripts.at index 4a83807..dcbcce5 100644 --- a/tests/scripts.at +++ b/tests/scripts.at @@ -7,6 +7,7 @@ dnl There is NO WARRANTY, to the extent permitted by law. AT_BANNER([Script tests]) AT_SETUP([gen-options.awk]) +AT_KEYWORDS([gen-options awk script scripts]) AT_DATA([options.def], [[--option-only @@ -220,6 +221,7 @@ p AT_CLEANUP AT_SETUP([gen-strtab.awk]) +AT_KEYWORDS([gen-strtab awk script scripts]) AT_DATA([test.def], [[ @@ -301,6 +303,8 @@ oneline AT_CLEANUP AT_SETUP([gen-tree.awk]) +AT_KEYWORDS([gen-tree awk script scripts]) + AT_DATA([tree.def], [[ROOT0 r0a, r0a_OFFSET @@ -359,6 +363,7 @@ int main(void) print_subtree(tree0, 0, 1); printf("ROOT1\n"); print_subtree(tree1, 0, 1); + return 0; } ]]) cp tree.def expout @@ -366,7 +371,76 @@ AT_CHECK([$CC -o test0$EXEEXT test0.c && ./test0$EXEEXT], [0], [expout]) AT_CLEANUP +# Test the gen-tree features that avoid creating string labels for nodes. +AT_SETUP([gen-tree.awk @nostrtab option]) +AT_KEYWORDS([gen-tree awk script scripts]) + +AT_DATA([tree.def], +[[@nostrtab +ROOT + a 1, a_OFFSET + b 1 + c 2 + d 2, d_OFFSET + e 1 + f 2 +]]) +AT_CHECK([$AWK -f "$builddir/scripts/gen-tree.awk" tree.h]) + +AT_DATA([test0.c], +[[float tree_strtab = 0; +#define a [] +#define b [] +#define c [] +#define e [] +#define f [] +#include "tree.h" +#include + +static struct { int num, offset; } root[] = { ROOT_INITIALIZER }; + +int main(void) +{ + unsigned i; + for (i = 0; i < sizeof root / sizeof root[0]; i++) { + printf("%d, %d\n", root[i].num, root[i].offset); + } +} +]]) + +AT_CHECK([$CC -o test0$EXEEXT test0.c && ./test0$EXEEXT], [0], +[[1, 3 +2, 6 +0, 0 +1, 0 +2, 0 +0, 0 +1, 0 +2, 0 +0, 0 +]]) + +AT_DATA([flat.def], +[[FLAT + a 1 + b 2 + c 3 +@nostrtab +]]) +AT_CHECK([$AWK -f "$builddir/scripts/gen-tree.awk" flat.h]) + +sed -e 's/tree\.h/flat.h/' -e 's/ROOT/FLAT/' test0.c >test1.c +AT_CHECK([$CC -o test1$EXEEXT test1.c && ./test1$EXEEXT], [0], +[[1, 0 +2, 0 +3, 0 +0, 0 +]]) + +AT_CLEANUP + AT_SETUP([join.awk]) +AT_KEYWORDS([join awk script scripts]) JOIN="$AWK -f $builddir/scripts/join.awk --"