#!/bin/awk -f
#
-# Copyright © 2021 Nick Bowler
+# Copyright © 2021, 2023 Nick Bowler
#
# Generate definitions helpful when using getopt_long from an options
# specification file.
#
# static const struct option lopts[] = { LOPTS_INITIALIZER, {0} };
#
+# If none of the options have action specifications, then an alternate
+# set of macros is also defined, which encode the struct option array
+# into a more compact format that can be used to generate the full
+# 'struct option' array at runtime:
+#
+# * the object-like macro LOPT_PACK_BITS expands to an integer constant
+# expression, suitable for use in #if directives, that specifies the
+# minimum number of bits required by the encoding. LOPT_PACK_BITS2
+# is the same, but rounded up to the next power of two greater than
+# or equal to 8.
+#
+# * the object-like macro LOPTS_PACKED_INITIALIZER expands to a
+# comma-separated sequence of integer constant expressions, suitable
+# for initializing an array of integers. All values are less than
+# 2^LOPT_PACK_BITS.
+#
+# * the function-like macro LOPT_UNPACK(opt, x), where opt is an
+# lvalue of type 'struct option', and x is one of the array
+# elements initialized by LOPTS_PACKED_INITIALIZER. This expands
+# the encoded value and sets the name, has_arg and val members of
+# opt appopriately. The caller should ensure that the flag member
+# is set to zero.
+#
# The help text for an individual struct option element may be obtained by
# the function
#
}
BEGIN {
+ # Check if "\\\\" in substitutions gives just one backslash.
+ bs = "x"; sub(/x/, "\\\\", bs);
+ bs = (length(bs) == 1 ? "\\\\" : "\\");
+
+ has_actions = 0
sopt_string = ""
num_options = 0
lopt = ""
}
# Extract argument name
- if (work ~ /^\[=[^\] \t]+\]/) {
+ if (work ~ /^\[=[^ \t]+\]/ && sub(/\]/, "&", work) == 1) {
if (n = index(work, "]")) {
arg = substr(work, 3, n-3)
work = substr(work, n+1)
# Extract action
sub(/^[ \t]*/, "", work)
if (!sopt && work ~ /^\([^, \t]+(,[ \t]*[^, \t]+)?\)/) {
- n = split(work, a, /,[ \t]*/)
+ # packed form is not possible w/ actions
+ has_actions = 1;
+
+ n = split(work, a, ",[ \t]*")
if (n == 2) {
flag = substr(a[1], 2) ", " substr(a[2], 1, length(a[2])-1)
} else if (n == 1) {
}
# Ignore any line beginning with a #
-$0 ~ /^#/ { next }
+$0 ~ /^#/ { next; }
-lopt {
- sub(/^[ \t]*/, "")
- if (!$0) { next }
+NF && lopt != "" {
+ sub(/^[ \t]*/, "");
- optionhelp[lopt] = (lopt in optionhelp ? optionhelp[lopt] "\n" : "") $0
+ if (lopt in optionhelp)
+ $0 = "\n" $0;
+ optionhelp[lopt] = optionhelp[lopt] $0;
}
# Exit immediately on error
lopt_strings = add_to_strtab(lopt_strings, sorted_options[i], offsets)
}
gsub(/[^ ]+/, "\"&", lopt_strings)
- gsub(/ /, "\\0\"\n\t", lopt_strings)
+ gsub(/ /, bs"0\"\n\t", lopt_strings)
print "static const char lopt_strings[] ="
print "\t" lopt_strings "\";\n"
print "\t" to_enum(opt), "= UCHAR_MAX+1 +", offsets[opt] sep
}
print "};"
- print "#define lopt_str(x) (lopt_strings + (LOPT_ ## x - UCHAR_MAX - 1))\n"
+ print "#define lopt_str(x) (lopt_strings + (LOPT_ ## x - UCHAR_MAX - 1))"
+
+ if (!has_actions) {
+ output_packed_macros()
+ }
- print "#define LOPTS_INITIALIZER \\"
+ print "\n#define LOPTS_INITIALIZER \\"
for (i = 0; i < count; i++) {
opt = options[i]
sep = (i+1 == count ? "" : ", \\")
help_offsets[opt] = help_pos
help_pos += length(help) + 1
- gsub(/"/, "\\\"", help)
- gsub(/\n/, "\\n\"\n\t \"", help)
+ gsub(/"/, bs"\"", help)
+ gsub(/\n/, bs"n\"\n\t \"", help)
help = "\tPN_(\"" opt "\",\n\t \"" help "\")"
}
}
opt = options[opt]
if (!(opt in optionhelp)) {
print "\tARG_L10N_(PN_(\"" opt "\", \"\"))"
- help_offsets[opt] = help_pos
+ help_offsets[opt] = help_pos - 1
}
}
print "}"
}
+# Emit the packed initializer macros. This is used as an array initializer
+# that encodes the following information:
+#
+# - short option character offset
+# - arg value (0, 1 or 2), and
+# - long option string offset
+#
+# as a single integer value for each option, in as few bits as practical.
+#
+# Currently, this only works if none of the options use action specifications
+# (as these would require encoding user-specified pointer expressions and
+# arbitrary int values).
+function output_packed_macros(i, tmp, accum, max, totalbits)
+{
+ print "";
+
+ # determine number of bits to encode offsets in SOPT_STRING
+ max = length(sopt_string);
+ totalbits = accum = 0;
+ for (i = 1; i <= max; i *= 2) {
+ accum++;
+ }
+ print "#define LOPT_SC_BITS " accum;
+ totalbits += accum;
+
+ # determine number of bits to encode has_arg values
+ max = 0;
+ for (i in optionspec) {
+ tmp = optionspec[i]; sub(/,.*/, "", tmp);
+ if (tmp > max)
+ max = tmp;
+ }
+ accum = (max > 1 ? 2 : max > 0 ? 1 : 0);
+ print "#define LOPT_HA_BITS " accum;
+ totalbits += accum;
+
+ # determine number of bits to encode offsets in lopt_strings
+ max = 0;
+ for (i in offsets) {
+ if (offsets[i] > max)
+ max = offsets[i];
+ }
+
+ accum = 0;
+ for (i = 1; i <= max; i *= 2) {
+ accum++;
+ }
+ print "#define LOPT_LS_BITS " accum;
+ totalbits += accum;
+
+ print "#define LOPT_PACK_BITS " totalbits;
+ for (i = 8; i < totalbits; i *= 2)
+ ;
+ print "#define LOPT_PACK_BITS2 " i;
+
+ # Now emit the packed initializer macro
+ print "\n#define LOPTS_PACKED_INITIALIZER \\";
+ accum = "";
+ for (i = 0; i < count; i++) {
+ if (accum)
+ print "\t" accum ", \\";
+
+ tmp = options[i];
+ accum = "("offsets[tmp] "ul" "<<LOPT_HA_BITS)";
+ max = tmp = optionspec[tmp];
+ sub(/,.*/, "", max)
+ accum = "((" accum "|" max ")<<LOPT_SC_BITS)";
+
+ sub(/.*[, ]/, "", tmp);
+ if (tmp ~ /^[']/) {
+ tmp = index(sopt_string, substr(tmp, 2, 1)) - 1;
+ } else {
+ tmp = length(sopt_string);
+ }
+ accum = accum "|" tmp;
+ }
+
+ if (accum)
+ print "\t" accum;
+
+ # Finally, the unpack helper macros
+ tmp = "(x) & ((1ul<<LOPT_SC_BITS)-1)";
+ print "\n#define LOPT_UNPACK_VAL(x) \\"
+ print "\t( SOPT_STRING[" tmp "] \\";
+ print "\t? SOPT_STRING[" tmp "] \\";
+ print "\t: 1u + UCHAR_MAX + ((x)>>(LOPT_SC_BITS+LOPT_HA_BITS)))";
+
+ print "\n#define LOPT_UNPACK_ARG(x) \\";
+ print "\t(((x)>>LOPT_SC_BITS)&((1ul<<LOPT_HA_BITS)-1))";
+
+ print "\n#define LOPT_UNPACK_NAME(x) \\"
+ print "\t(lopt_strings+((x)>>(LOPT_SC_BITS+LOPT_HA_BITS)))";
+
+ print "\n#define LOPT_UNPACK(opt, x) do { \\";
+ print "\t(opt).name = LOPT_UNPACK_NAME(x); \\"
+ print "\t(opt).has_arg = LOPT_UNPACK_ARG(x); \\"
+ print "\t(opt).val = LOPT_UNPACK_VAL(x); \\"
+ print "} while (0)";
+}
+
# bucketsort(dst, src)
#
# Sort the elements of src by descending string length,
# placing them into dst[0] ... dst[n].
#
# Returns the number of elements.
-function bucketsort(dst, src, buckets, max, count, i, t)
+function bucketsort(dst, src, max, count, i, t)
{
+ # Note: ULTRIX 4.5 nawk does not support local array parameters
+ split("", bucketsort_buckets);
+
for (t in src) {
i = length(src[t])
if (i > max) { max = i }
- buckets[i]++
+ bucketsort_buckets[i]++
}
for (i = max; i > 0; i--) {
- if (i in buckets) {
- t = buckets[i]
- buckets[i] = count
+ if (i in bucketsort_buckets) {
+ t = bucketsort_buckets[i]
+ bucketsort_buckets[i] = count
count += t
}
}
for (t in src) {
i = length(t = src[t])
- dst[buckets[i]++] = t
+ dst[bucketsort_buckets[i]++] = t
}
return count
# For optimal results, strings should be added in descending length order.
function add_to_strtab(strtab, str, offsets, pos)
{
- if ( (pos = index(strtab, str " ") - 1) < 0) {
- pos = length(strtab)
- if (pos) {
- strtab = strtab " " str
- pos++
- } else {
- strtab = strtab str
- }
+ if ( (pos = index(strtab, str " ") - 1) < 0) {
+ pos = length(strtab)
+ if (pos) {
+ strtab = strtab " " str
+ pos++
+ } else {
+ strtab = strtab str
}
- offsets[str] = pos
- return strtab
+ }
+
+ offsets[str] = pos
+ return strtab
}