X-Git-Url: https://git.draconx.ca/gitweb/dxcommon.git/blobdiff_plain/cd522a4c86728df10a96970f6a7bd99b823afc38..c3f53b697f5dd5ecd5a5f74e94ec0fd55b0f2764:/scripts/gen-tree.awk diff --git a/scripts/gen-tree.awk b/scripts/gen-tree.awk index 53557f1..1334bf8 100755 --- a/scripts/gen-tree.awk +++ b/scripts/gen-tree.awk @@ -1,14 +1,20 @@ #!/bin/awk -f # -# Copyright © 2021 Nick Bowler +# Copyright © 2021-2022 Nick Bowler # # Generate one or more C array initializers to encode simple tree structures # in a compact format. # -# Each line of the input file defines a tree node. The first field must be a -# C identifier, which may be optionally followed by a comma. The identifiers -# used on non-leaf nodes must be unique with respect to other non-leaf nodes, -# but leaf nodes may share the same identifier as other nodes. +# Each nonempty line of the input file is either a comment, an option +# specification or a tree specification. Each line is distinguished by +# its first character. A # character introduces a comment, an @ character +# introduces an option specification (described later), and all other +# nonempty lines are tree nodes. +# +# The first field of a tree specification must be a valid C identifier, +# optionally followed by a comma. The identifiers used on non-leaf nodes +# must be unique with respect to other non-leaf nodes, but leaf nodes may +# share the same identifier as other nodes. # # The tree structure is expressed through indentation. Lines with no leading # whitespace designate root nodes. When the number of white-space characters @@ -30,11 +36,20 @@ # # For each root node X, the object-like macro X_INITIALIZER is defined. This # can be used to initialize an array with static storage duration. Each line -# of the input is surrounded by braces to construct the initializer for one -# array element. These are then ordered topologically beginning with the root, -# but the root itself is not included. Each subtree is terminated by an -# "empty" element (with an initializer of {0}). For example, the above input -# will produce this (or an equivalent topological ordering) initializer: +# of the input is defines the initializer for one array element. These are +# then ordered topologically beginning with the root, but the root itself is +# not included. Each subtree is terminated by an "empty" element (with an +# initializer of {0}). +# +# If there is a comma after the identifier which begins a tree element, or +# if the identifier is the only text on the line, then the element initializer +# is constructed by surrounding the entire line by braces. Otherwise, the +# identifier is removed and the remainder of the line is surrounded by braces +# to construct the initializer. This allows the exact form of the initializer +# to be customized. +# +# For example, the above input will produce this (or an equivalent topological +# ordering) initializer: # # OUTPUT: #define root_INITIALIZER \ # {a}, {e}, {0}, {b}, {d}, {0}, {f}, {g}, {0}, {c}, {0} @@ -55,6 +70,13 @@ # constants will normally be in scope when using the root_INITIALIZER macro, # which means the resulting array initializers will use them. # +# Options may be used to alter the normal behaviour. They may be placed +# anywhere in the input file. The following options are defined: +# +# @nostrtab +# Do not output a definition for tree_strtab or its associated +# enumeration constants. +# # License WTFPL2: Do What The Fuck You Want To Public License, version 2. # This is free software: you are free to do what the fuck you want to. # There is NO WARRANTY, to the extent permitted by law. @@ -71,6 +93,12 @@ END { } BEGIN { + # Check if "\\\\" in substitutions gives just one backslash. + bs = "x"; sub(/x/, "\\\\", bs); + bs = (length(bs) == 1 ? "\\\\" : "\\"); + + opts["strtab"] = 1; + depth = max_depth = 0; num_entries = 0; @@ -80,7 +108,26 @@ BEGIN { indent_stack[0] = 0; } +# Comments NF == 0 { next } +$0 ~ /^#/ { next } + +# Options +sub(/^@/, "", $0) { + if (NF == 1) { + orig=$1 + gsub(/-/, "_", $1); + val = !sub(/^no_?/, "", $1); + if ($1 in opts) { + opts[$1] = val; + } else { + print "error: unrecognized option: @" orig | "cat 1>&2" + exit 1 + } + } + next +} + { indent = index($0, $1) - 1 } indent > 0 { @@ -101,48 +148,67 @@ indent > 0 { entry_name = $1; sub(/,$/, "", entry_name); all_items[num_entries++] = entry_name; - $1 = ", \\\n\t{ " $1; $NF = $NF " }"; + # Construct the element initializer for this tree node. + $1 = " " $1; + if (NF > 1) { + # Check if entry name is followed by a comma. If it is not, the entry + # name is excluded from the initializer. + check_str = $1$2; + gsub(/[ \t]/, "", check_str); + if (index(check_str, entry_name ",") == 0) { + $1 = ""; + } + } + $1 = ", \\\n\t{" $1; $NF = $NF " }"; + tree_items[depth] = tree_items[depth] $0; level_count[depth]++; } -indent == 0 && tree_identifier { +indent == 0 && tree_identifier != "" { trees[tree_identifier] = format_items(); tree_identifier = ""; } END { - if (tree_identifier) + if (tree_identifier != "") trees[tree_identifier] = format_items() } indent == 0 { tree_identifier = $1 } END { - entry_strtab = "\1"; - bucketsort(sorted_items, all_items); - for (i = 0; i < num_entries; i++) { - s = sorted_items[i]; - if ((n = index(entry_strtab, s "\1")) > 0) { - entry_offsets[s] = n-1; - } else { - entry_offsets[s] = length(entry_strtab); - entry_strtab = entry_strtab s "\1"; + prefix = "\nenum {\n"; + + if (opts["strtab"]) { + entry_strtab = "\1"; + bucketsort(sorted_items, all_items); + for (i = 0; i < num_entries; i++) { + s = sorted_items[i]; + if ((n = index(entry_strtab, s "\1")) > 0) { + entry_offsets[s] = n-1; + } else { + entry_offsets[s] = length(entry_strtab); + entry_strtab = entry_strtab s "\1"; + } } - } - gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab); - sub(/^"/, "", entry_strtab); - sub(/\n[^\n]*$/, ";", entry_strtab); - print "\nstatic const char tree_strtab[] =" entry_strtab + gsub("\1", "\"\n\t\"" bs "0\" \"", entry_strtab); + sub(/^"/, "", entry_strtab); + sub(/\n[^\n]*$/, ";", entry_strtab); + print "\nstatic const char tree_strtab[] =" entry_strtab - prefix = "\nenum {\n"; - for (item in entry_offsets) { - printf "%s\t%s = %d", prefix, item, entry_offsets[item]; - prefix = ",\n"; + for (item in entry_offsets) { + printf "%s\t%s = %d", prefix, item, entry_offsets[item]; + prefix = ",\n"; + } } + for (i in subtree_offsets) { - printf ",\n\t%s_OFFSET = %d", i, subtree_offsets[i]; + printf "%s\t%s_OFFSET = %d", prefix, i, subtree_offsets[i]; + prefix = ",\n"; + } + if (!index(prefix, "enum")) { + print "\n};"; } - print "\n};"; } END { @@ -159,7 +225,7 @@ function format_items(s, i) depth--; } - for (i = 2; tree_items[i]; i++) { + for (i = 2; tree_items[i] != ""; i++) { level_count[i] += level_count[i-1]; } @@ -168,7 +234,7 @@ function format_items(s, i) delete subtree_depth[i]; } - for (i = 1; tree_items[i]; i++) { + for (i = 1; tree_items[i] != ""; i++) { s = s tree_items[i]; delete tree_items[i]; delete level_count[i]; @@ -184,25 +250,28 @@ function format_items(s, i) # placing them into dst[0] ... dst[n]. # # Returns the number of elements. -function bucketsort(dst, src, buckets, max, count, i, t) +function bucketsort(dst, src, max, count, i, t) { + # Note: ULTRIX 4.5 nawk does not support local array parameters + split("", bucketsort_buckets); + for (t in src) { i = length(src[t]) if (i > max) { max = i } - buckets[i]++ + bucketsort_buckets[i]++ } for (i = max; i > 0; i--) { - if (i in buckets) { - t = buckets[i] - buckets[i] = count + if (i in bucketsort_buckets) { + t = bucketsort_buckets[i] + bucketsort_buckets[i] = count count += t } } for (t in src) { i = length(t = src[t]) - dst[buckets[i]++] = t + dst[bucketsort_buckets[i]++] = t } return count