From 62bc7469bf3ef88c4f81ddf615aa7dabe9ddbf74 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Sat, 7 Jan 2023 21:14:10 -0500 Subject: [PATCH] gen-options.awk: Add a more compact data representation. Add a new set of macros that can be used to encode the long options array as integers, which is possible provided that the "action" feature is not used. In addition to reducing the size of the static array, it should be helpful to avoid relocations in position-independent executables and improve data sharing. --- scripts/gen-options.awk | 126 +++++++++++++++++++++++++++++++++++++++- tests/scripts.at | 66 +++++++++++++++++++++ 2 files changed, 189 insertions(+), 3 deletions(-) diff --git a/scripts/gen-options.awk b/scripts/gen-options.awk index 1fd0fdb..679dc99 100755 --- a/scripts/gen-options.awk +++ b/scripts/gen-options.awk @@ -1,6 +1,6 @@ #!/bin/awk -f # -# Copyright © 2021 Nick Bowler +# Copyright © 2021, 2023 Nick Bowler # # Generate definitions helpful when using getopt_long from an options # specification file. @@ -75,6 +75,27 @@ # # static const struct option lopts[] = { LOPTS_INITIALIZER, {0} }; # +# If none of the options have action specifications, then an alternate +# set of macros is also defined, which encode the struct option array +# into a more compact format that can be used to generate the full +# 'struct option' array at runtime: +# +# * the object-like macro LOPT_PACK_BITS expands to an integer constant +# expression, suitable for use in #if directives, that specifies the +# minimum number of bits required by the encoding. +# +# * the object-like macro LOPTS_PACKED_INITIALIZER expands to a +# comma-separated sequence of integer constant expressions, suitable +# for initializing an array of integers. All values are less than +# 2^LOPT_PACK_BITS. +# +# * the function-like macro LOPT_UNPACK(opt, x), where opt is an +# lvalue of type 'struct option', and x is one of the array +# elements initialized by LOPTS_PACKED_INITIALIZER. This expands +# the encoded value and sets the name, has_arg and val members of +# opt appopriately. The caller should ensure that the flag member +# is set to zero. +# # The help text for an individual struct option element may be obtained by # the function # @@ -100,6 +121,7 @@ END { } BEGIN { + has_actions = 0 sopt_string = "" num_options = 0 lopt = "" @@ -150,6 +172,9 @@ $0 ~ /^-/ { # Extract action sub(/^[ \t]*/, "", work) if (!sopt && work ~ /^\([^, \t]+(,[ \t]*[^, \t]+)?\)/) { + # packed form is not possible w/ actions + has_actions = 1; + n = split(work, a, /,[ \t]*/) if (n == 2) { flag = substr(a[1], 2) ", " substr(a[2], 1, length(a[2])-1) @@ -221,9 +246,13 @@ END { print "\t" to_enum(opt), "= UCHAR_MAX+1 +", offsets[opt] sep } print "};" - print "#define lopt_str(x) (lopt_strings + (LOPT_ ## x - UCHAR_MAX - 1))\n" + print "#define lopt_str(x) (lopt_strings + (LOPT_ ## x - UCHAR_MAX - 1))" + + if (!has_actions) { + output_packed_macros() + } - print "#define LOPTS_INITIALIZER \\" + print "\n#define LOPTS_INITIALIZER \\" for (i = 0; i < count; i++) { opt = options[i] sep = (i+1 == count ? "" : ", \\") @@ -315,6 +344,97 @@ END { print "}" } +# Emit the packed initializer macros. This is used as an array initializer +# that encodes the following information: +# +# - short option character offset +# - arg value (0, 1 or 2), and +# - long option string offset +# +# as a single integer value for each option, in as few bits as practical. +# +# Currently, this only works if none of the options use action specifications +# (as these would require encoding user-specified pointer expressions and +# arbitrary int values). +function output_packed_macros(i, tmp, accum, max) +{ + print "\n#define LOPT_PACK_BITS (LOPT_SC_BITS + LOPT_HA_BITS + LOPT_LS_BITS)"; + + # determine number of bits to encode offsets in SOPT_STRING + max = length(sopt_string); + accum = 0; + for (i = 1; i <= max; i *= 2) { + accum++; + } + print "#define LOPT_SC_BITS " accum; + + # determine number of bits to encode has_arg values + max = 0; + for (i in optionspec) { + tmp = optionspec[i]; sub(/,.*/, "", tmp); + if (tmp > max) + max = tmp; + } + print "#define LOPT_HA_BITS " (max > 1 ? 2 : max > 0 ? 1 : 0); + + # determine number of bits to encode offsets in lopt_strings + max = 0; + for (i in offsets) { + if (offsets[i] > max) + max = offsets[i]; + } + + accum = 0; + for (i = 1; i <= max; i *= 2) { + accum++; + } + print "#define LOPT_LS_BITS " accum; + + # Now emit the packed initializer macro + print "\n#define LOPTS_PACKED_INITIALIZER \\"; + accum = ""; + for (i = 0; i < count; i++) { + if (accum) + print "\t" accum ", \\"; + + tmp = options[i]; + accum = "("offsets[tmp] "ul" "<>(LOPT_SC_BITS+LOPT_HA_BITS)))"; + + print "\n#define LOPT_UNPACK_ARG(x) \\"; + print "\t(((x)>>LOPT_SC_BITS)&((1ul<>(LOPT_SC_BITS+LOPT_HA_BITS)))"; + + print "\n#define LOPT_UNPACK(opt, x) do { \\"; + print "\t(opt).name = LOPT_UNPACK_NAME(x); \\" + print "\t(opt).has_arg = LOPT_UNPACK_ARG(x); \\" + print "\t(opt).val = LOPT_UNPACK_VAL(x); \\" + print "} while (0)"; +} + # bucketsort(dst, src) # # Sort the elements of src by descending string length, diff --git a/tests/scripts.at b/tests/scripts.at index fc8fbc3..70df388 100644 --- a/tests/scripts.at +++ b/tests/scripts.at @@ -220,6 +220,72 @@ p AT_CLEANUP +AT_SETUP([gen-options.awk packed format]) +AT_KEYWORDS([gen-options awk script scripts]) + +AT_DATA([test.c], [[#include +struct option { const char *name; int has_arg; int *flag; int val; }; + +#include "options.h" + +static unsigned opts[] = { LOPTS_PACKED_INITIALIZER }; + +int main(void) +{ + unsigned i; + int x = +#if !LOPT_PACK_BITS + 0 +#elif LOPT_PACK_BITS <= 8 + 1 +#elif LOPT_PACK_BITS <= 16 + 2 +#elif LOPT_PACK_BITS <= 32 + 3 +#else +# error too big +#endif + ; + printf("%d\n", x); + for (i = 0; i < sizeof opts / sizeof opts[0]; i++) { + struct option o; + + LOPT_UNPACK(o, opts[i]); + printf("--%s, %d, ", o.name, o.has_arg); + if (o.val > UCHAR_MAX) + printf("%d\n", o.val - UCHAR_MAX - 1); + else + printf("'%c'\n", o.val); + } +} +]]) + +AT_DATA([single.dat], +[[--single-option +]]) +AT_CHECK([$AWK -f "$builddir/scripts/gen-options.awk" options.h]) +AT_CHECK([$CC -o single$EXEEXT test.c && ./single$EXEEXT], [0], +[[0 +--single-option, 0, 0 +]]) + +AT_DATA([16bit.dat], +[[-a, --the-first-option +-b, --the-second-option=ARG +-c, --the-third-option[=ARG] +-d, --the-fourth-option +]]) +AT_CHECK([$AWK -f "$builddir/scripts/gen-options.awk" <16bit.dat >options.h]) +AT_CHECK([$CC -o 16bit$EXEEXT test.c && ./16bit$EXEEXT], [0], +[[2 +--the-first-option, 0, 'a' +--the-second-option, 1, 'b' +--the-third-option, 2, 'c' +--the-fourth-option, 0, 'd' +]]) + +AT_CLEANUP + AT_SETUP([gen-strtab.awk]) AT_KEYWORDS([gen-strtab awk script scripts]) -- 2.43.2