X-Git-Url: http://git.draconx.ca/gitweb/dxcommon.git/blobdiff_plain/cd522a4c86728df10a96970f6a7bd99b823afc38..075a8578cc9467d07ccff7688bfd0f4855712e1c:/scripts/gen-tree.awk diff --git a/scripts/gen-tree.awk b/scripts/gen-tree.awk index 53557f1..c461e5d 100755 --- a/scripts/gen-tree.awk +++ b/scripts/gen-tree.awk @@ -1,14 +1,20 @@ #!/bin/awk -f # -# Copyright © 2021 Nick Bowler +# Copyright © 2021-2022 Nick Bowler # # Generate one or more C array initializers to encode simple tree structures # in a compact format. # -# Each line of the input file defines a tree node. The first field must be a -# C identifier, which may be optionally followed by a comma. The identifiers -# used on non-leaf nodes must be unique with respect to other non-leaf nodes, -# but leaf nodes may share the same identifier as other nodes. +# Each nonempty line of the input file is either a comment, an option +# specification or a tree specification. Each line is distinguished by +# its first character. A # character introduces a comment, an @ character +# introduces an option specification (described later), and all other +# nonempty lines are tree nodes. +# +# The first field of a tree specification must be a valid C identifier, +# optionally followed by a comma. The identifiers used on non-leaf nodes +# must be unique with respect to other non-leaf nodes, but leaf nodes may +# share the same identifier as other nodes. # # The tree structure is expressed through indentation. Lines with no leading # whitespace designate root nodes. When the number of white-space characters @@ -30,11 +36,20 @@ # # For each root node X, the object-like macro X_INITIALIZER is defined. This # can be used to initialize an array with static storage duration. Each line -# of the input is surrounded by braces to construct the initializer for one -# array element. These are then ordered topologically beginning with the root, -# but the root itself is not included. Each subtree is terminated by an -# "empty" element (with an initializer of {0}). For example, the above input -# will produce this (or an equivalent topological ordering) initializer: +# of the input is defines the initializer for one array element. These are +# then ordered topologically beginning with the root, but the root itself is +# not included. Each subtree is terminated by an "empty" element (with an +# initializer of {0}). +# +# If there is a comma after the identifier which begins a tree element, or +# if the identifier is the only text on the line, then the element initializer +# is constructed by surrounding the entire line by braces. Otherwise, the +# identifier is removed and the remainder of the line is surrounded by braces +# to construct the initializer. This allows the exact form of the initializer +# to be customized. +# +# For example, the above input will produce this (or an equivalent topological +# ordering) initializer: # # OUTPUT: #define root_INITIALIZER \ # {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0} @@ -55,6 +70,13 @@ # constants will normally be in scope when using the root_INITIALIZER macro, # which means the resulting array initializers will use them. # +# Options may be used to alter the normal behaviour. They may be placed +# anywhere in the input file. The following options are defined: +# +# @nostrtab +# Do not output a definition for tree_strtab or its associated +# enumeration constants. +# # License WTFPL2: Do What The Fuck You Want To Public License, version 2. # This is free software: you are free to do what the fuck you want to. # There is NO WARRANTY, to the extent permitted by law. @@ -71,6 +93,8 @@ END { } BEGIN { + opts["strtab"] = 1; + depth = max_depth = 0; num_entries = 0; @@ -80,7 +104,26 @@ BEGIN { indent_stack[0] = 0; } +# Comments NF == 0 { next } +$0 ~ /^#/ { next } + +# Options +sub(/^@/, "", $0) { + if (NF == 1) { + orig=$1 + gsub(/-/, "_", $1); + val = !sub(/^no_?/, "", $1); + if ($1 in opts) { + opts[$1] = val; + } else { + print "error: unrecognized option: @" orig | "cat 1>&2" + exit 1 + } + } + next +} + { indent = index($0, $1) - 1 } indent > 0 { @@ -101,7 +144,19 @@ indent > 0 { entry_name = $1; sub(/,$/, "", entry_name); all_items[num_entries++] = entry_name; - $1 = ", \\\n\t{ " $1; $NF = $NF " }"; + # Construct the element initializer for this tree node. + $1 = " " $1; + if (NF > 1) { + # Check if entry name is followed by a comma. If it is not, the entry + # name is excluded from the initializer. + check_str = $1$2; + gsub(/[ \t]/, "", check_str); + if (index(check_str, entry_name ",") == 0) { + $1 = ""; + } + } + $1 = ", \\\n\t{" $1; $NF = $NF " }"; + tree_items[depth] = tree_items[depth] $0; level_count[depth]++; } @@ -117,32 +172,39 @@ END { indent == 0 { tree_identifier = $1 } END { - entry_strtab = "\1"; - bucketsort(sorted_items, all_items); - for (i = 0; i < num_entries; i++) { - s = sorted_items[i]; - if ((n = index(entry_strtab, s "\1")) > 0) { - entry_offsets[s] = n-1; - } else { - entry_offsets[s] = length(entry_strtab); - entry_strtab = entry_strtab s "\1"; + prefix = "\nenum {\n"; + + if (opts["strtab"]) { + entry_strtab = "\1"; + bucketsort(sorted_items, all_items); + for (i = 0; i < num_entries; i++) { + s = sorted_items[i]; + if ((n = index(entry_strtab, s "\1")) > 0) { + entry_offsets[s] = n-1; + } else { + entry_offsets[s] = length(entry_strtab); + entry_strtab = entry_strtab s "\1"; + } } - } - gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab); - sub(/^"/, "", entry_strtab); - sub(/\n[^\n]*$/, ";", entry_strtab); - print "\nstatic const char tree_strtab[] =" entry_strtab + gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab); + sub(/^"/, "", entry_strtab); + sub(/\n[^\n]*$/, ";", entry_strtab); + print "\nstatic const char tree_strtab[] =" entry_strtab - prefix = "\nenum {\n"; - for (item in entry_offsets) { - printf "%s\t%s = %d", prefix, item, entry_offsets[item]; - prefix = ",\n"; + for (item in entry_offsets) { + printf "%s\t%s = %d", prefix, item, entry_offsets[item]; + prefix = ",\n"; + } } + for (i in subtree_offsets) { - printf ",\n\t%s_OFFSET = %d", i, subtree_offsets[i]; + printf "%s\t%s_OFFSET = %d", prefix, i, subtree_offsets[i]; + prefix = ",\n"; + } + if (!index(prefix, "enum")) { + print "\n};"; } - print "\n};"; } END {