3 # Copyright © 2021, 2023 Nick Bowler
5 # Generate definitions helpful when using getopt_long from an options
8 # The options specification file is processed line by line. Any line
9 # beginning with a - character introduces a new option definition. Each
10 # option definition specifies any or all of a short option name, a long
11 # option name, an argument specification, and an action specification.
13 # Only the long option name is mandatory. It is not possible to define
14 # short options without a corresponding long option.
16 # The optional short option name is first, and consists of a hyphen (which
17 # must be the first character on the line) followed by the one character
18 # short option name, followed by a comma.
20 # The long option name is next on the line, which consists of two hyphens
21 # followed by the desired option name. If the short option name was omitted,
22 # then the first hyphen of the long option name must be the first character
25 # The argument specification is next, consisting of an equals sign followed by
26 # the argument name. The argument name can be any sequence of non-whitespace
27 # characters and only relevant for --help text.
29 # If the argument specification is surrounded by square brackets, this
30 # indicates an optional argument. If the argument specification is omitted
31 # completely, this option has no argument. Otherwise, the option has a
34 # Finally, the optional action specification defines how the "flag" and
35 # "val" members are set in the option structure for this option. An action
36 # specification may only be provided for options with no short name.
38 # If the action specification is omitted, then flag will be set to a null
39 # pointer and val is set to the short option character, if any, otherwise the
40 # unique enumeration constant LOPT_xxx for this option (described below).
42 # The action specification can be of the form (val) or (flag, val), where flag
43 # and val are C expressions suitable for use in an initializer for objects
44 # with static storage duration. Neither flag nor val may contain commas or
45 # whitespace. In the first form, the option's flag is set to a null pointer.
47 # Any amount of whitespace may follow the short option name, the argument
48 # specification, the action specification, or the comma within an action
49 # specification. Whitespace is not permitted between a long option name
50 # and a flag specification.
52 # Examples of option specifications:
58 # --parse-only (&parse_only, 1)
60 # Each option is assigned an enumeration constant of the form LOPT_xxx,
61 # where xxx is the long option name with all letters in uppercase and
62 # all non-alphanumeric characters replaced with underscores. The value
63 # of the constants is unspecified, except that they will be unique across
64 # all defined options and distinct from the integer value of any short
67 # The object-like macro SOPT_STRING expands to a string literal suitable
68 # for use as the optstring argument to getopt et al.
70 # The object-like macro LOPTS_INITIALIZER expands to a comma-separated
71 # sequence of struct option initializers, suitable for use in a declaration
72 # of an array of struct option elements with static storage duration. The
73 # all-zero terminating element required by getopt_long must be added by the
76 # static const struct option lopts[] = { LOPTS_INITIALIZER, {0} };
78 # If none of the options have action specifications, then an alternate
79 # set of macros is also defined, which encode the struct option array
80 # into a more compact format that can be used to generate the full
81 # 'struct option' array at runtime:
83 # * the object-like macro LOPT_PACK_BITS expands to an integer constant
84 # expression, suitable for use in #if directives, that specifies the
85 # minimum number of bits required by the encoding. LOPT_PACK_BITS2
86 # is the same, but rounded up to the next power of two greater than
89 # * the object-like macro LOPTS_PACKED_INITIALIZER expands to a
90 # comma-separated sequence of integer constant expressions, suitable
91 # for initializing an array of integers. All values are less than
94 # * the function-like macro LOPT_UNPACK(opt, x), where opt is an
95 # lvalue of type 'struct option', and x is one of the array
96 # elements initialized by LOPTS_PACKED_INITIALIZER. This expands
97 # the encoded value and sets the name, has_arg and val members of
98 # opt appopriately. The caller should ensure that the flag member
101 # The help text for an individual struct option element may be obtained by
104 # struct lopt_help { const char *desc, *arg; }
105 # *lopt_get_help(const struct option *opt);
107 # The returned desc and arg pointers point to the argument name and help text
108 # for the argument, respectively, as written in the options specification file.
110 # License WTFPL2: Do What The Fuck You Want To Public License, version 2.
111 # This is free software: you are free to do what the fuck you want to.
112 # There is NO WARRANTY, to the extent permitted by law.
117 print " * Automatically generated by gen-options.awk from " FILENAME
119 print " * Automatically generated by gen-options.awk"
121 print " * Do not edit."
133 # Parse option specifier lines
136 arg = lopt = sopt = ""
139 # Extract short option name
140 if (work ~ /^-[^-]/) {
141 sopt = substr(work, 2, 1)
142 sub(/^-.,[ \t]*/, "", work)
145 # Extract long option name
147 if (n = match(work, /[= \t[]/)) {
148 lopt = substr(work, 3, n-3)
149 work = substr(work, n)
151 lopt = substr(work, 3)
156 # Extract argument name
157 if (work ~ /^\[=[^ \t]+\]/ && sub(/\]/, "&", work) == 1) {
158 if (n = index(work, "]")) {
159 arg = substr(work, 3, n-3)
160 work = substr(work, n+1)
163 } else if (work ~ /^=/) {
164 if (n = match(work, /[ \t]/)) {
165 arg = substr(work, 2, n-2)
166 work = substr(work, n)
168 arg = substr(work, 2)
175 sub(/^[ \t]*/, "", work)
176 if (!sopt && work ~ /^\([^, \t]+(,[ \t]*[^, \t]+)?\)/) {
177 # packed form is not possible w/ actions
180 n = split(work, a, /,[ \t]*/)
182 flag = substr(a[1], 2) ", " substr(a[2], 1, length(a[2])-1)
184 flag = "NULL, " substr(a[1], 2, length(a[1])-2)
186 sub(/^\([^, \t]+(,[ \t]*[^, \t]+)?/, "", work)
188 flag = "NULL, '" sopt "'"
190 flag = "NULL, " to_enum(lopt)
194 print "invalid option specification:", $0 > "/dev/stderr"
200 sopt_string = sopt_string sopt substr("::", 1, has_arg)
202 options[num_options++] = lopt
203 optionspec[lopt] = has_arg ", " flag
205 optionarg[lopt] = arg
211 # Ignore any line beginning with a #
218 if (lopt in optionhelp)
220 optionhelp[lopt] = optionhelp[lopt] $0;
223 # Exit immediately on error
224 END { if (err) { exit err } }
227 print "#include <stddef.h>"
228 print "#include <limits.h>\n"
229 print "#define SOPT_STRING \"" sopt_string "\"\n"
232 # Generate the main options tables
236 count = bucketsort(sorted_options, options)
237 for (i = 0; i < count; i++) {
238 lopt_strings = add_to_strtab(lopt_strings, sorted_options[i], offsets)
240 gsub(/[^ ]+/, "\"&", lopt_strings)
241 gsub(/ /, "\\0\"\n\t", lopt_strings)
243 print "static const char lopt_strings[] ="
244 print "\t" lopt_strings "\";\n"
246 for (i = 0; i < count; i++) {
248 sep = (i+1 == count ? "" : ",")
250 print "\t" to_enum(opt), "= UCHAR_MAX+1 +", offsets[opt] sep
253 print "#define lopt_str(x) (lopt_strings + (LOPT_ ## x - UCHAR_MAX - 1))"
256 output_packed_macros()
259 print "\n#define LOPTS_INITIALIZER \\"
260 for (i = 0; i < count; i++) {
262 sep = (i+1 == count ? "" : ", \\")
264 print "\t/* --" opt, "*/ \\"
265 print "\t{ lopt_strings+" offsets[opt] ",", optionspec[opt] " }" sep
269 # Generate the help strings
271 # First, sort out the argument names
274 count = bucketsort(sorted_args, optionarg)
275 for (i = 0; i < count; i++) {
276 arg_strings = add_to_strtab(arg_strings, sorted_args[i], arg_offsets)
279 n = split(arg_strings, arg_split)
281 for (i = 1; i <= n; i++) {
282 for (opt in optionarg) {
283 if (optionarg[opt] == arg_split[i]) {
289 sep = (i < n ? "\"\\0\"" : "")
290 arg_strings = arg_strings "\n\tPN_(\"" opt "\", \"" arg_split[i] "\")" sep
293 print "\n#define ARG_L10N_(x)"
295 print "# define PN_(c, x) x"
298 print "static const char arg_strings[] = " arg_strings "\"\";"
299 for (opt in optionarg) {
300 if (opt in l10narg) {
303 print "\tARG_L10N_(PN_(\"" opt "\", \"" optionarg[opt] "\"))"
306 # Then add in the actual descriptions
307 print "\nstatic const char help_strings[] ="
310 for (opt in options) {
312 if (opt in optionhelp) {
317 help = optionhelp[opt]
318 help_offsets[opt] = help_pos
319 help_pos += length(help) + 1
321 gsub(/"/, "\\\"", help)
322 gsub(/\n/, "\\n\"\n\t \"", help)
323 help = "\tPN_(\"" opt "\",\n\t \"" help "\")"
327 for (opt in options) {
329 if (!(opt in optionhelp)) {
330 print "\tARG_L10N_(PN_(\"" opt "\", \"\"))"
331 help_offsets[opt] = help_pos - 1
335 print "\nstatic struct lopt_help { const char *desc, *arg; }"
336 print "*lopt_get_help(const struct option *opt, struct lopt_help *out)\n{"
337 print "\tswitch ((opt->name - lopt_strings) + UCHAR_MAX + 1) {"
338 for (opt in options) {
340 print "\tcase", to_enum(opt) ":"
341 print "\t\tout->desc = help_strings +", help_offsets[opt] ";"
342 if (opt in optionarg) {
343 print "\t\tout->arg = arg_strings +", arg_offsets[optionarg[opt]] ";"
345 print "\t\treturn out;"
347 print "\t}\n\n\treturn NULL;"
351 # Emit the packed initializer macros. This is used as an array initializer
352 # that encodes the following information:
354 # - short option character offset
355 # - arg value (0, 1 or 2), and
356 # - long option string offset
358 # as a single integer value for each option, in as few bits as practical.
360 # Currently, this only works if none of the options use action specifications
361 # (as these would require encoding user-specified pointer expressions and
362 # arbitrary int values).
363 function output_packed_macros(i, tmp, accum, max, totalbits)
367 # determine number of bits to encode offsets in SOPT_STRING
368 max = length(sopt_string);
369 totalbits = accum = 0;
370 for (i = 1; i <= max; i *= 2) {
373 print "#define LOPT_SC_BITS " accum;
376 # determine number of bits to encode has_arg values
378 for (i in optionspec) {
379 tmp = optionspec[i]; sub(/,.*/, "", tmp);
383 accum = (max > 1 ? 2 : max > 0 ? 1 : 0);
384 print "#define LOPT_HA_BITS " accum;
387 # determine number of bits to encode offsets in lopt_strings
390 if (offsets[i] > max)
395 for (i = 1; i <= max; i *= 2) {
398 print "#define LOPT_LS_BITS " accum;
401 print "#define LOPT_PACK_BITS " totalbits;
402 for (i = 8; i < totalbits; i *= 2)
404 print "#define LOPT_PACK_BITS2 " i;
406 # Now emit the packed initializer macro
407 print "\n#define LOPTS_PACKED_INITIALIZER \\";
409 for (i = 0; i < count; i++) {
411 print "\t" accum ", \\";
414 accum = "("offsets[tmp] "ul" "<<LOPT_HA_BITS)";
415 max = tmp = optionspec[tmp];
417 accum = "((" accum "|" max ")<<LOPT_SC_BITS)";
419 sub(/.*[, ]/, "", tmp);
421 tmp = index(sopt_string, substr(tmp, 2, 1)) - 1;
423 tmp = length(sopt_string);
425 accum = accum "|" tmp;
431 # Finally, the unpack helper macros
432 tmp = "(x) & ((1ul<<LOPT_SC_BITS)-1)";
433 print "\n#define LOPT_UNPACK_VAL(x) \\"
434 print "\t( SOPT_STRING[" tmp "] \\";
435 print "\t? SOPT_STRING[" tmp "] \\";
436 print "\t: 1u + UCHAR_MAX + ((x)>>(LOPT_SC_BITS+LOPT_HA_BITS)))";
438 print "\n#define LOPT_UNPACK_ARG(x) \\";
439 print "\t(((x)>>LOPT_SC_BITS)&((1ul<<LOPT_HA_BITS)-1))";
441 print "\n#define LOPT_UNPACK_NAME(x) \\"
442 print "\t(lopt_strings+((x)>>(LOPT_SC_BITS+LOPT_HA_BITS)))";
444 print "\n#define LOPT_UNPACK(opt, x) do { \\";
445 print "\t(opt).name = LOPT_UNPACK_NAME(x); \\"
446 print "\t(opt).has_arg = LOPT_UNPACK_ARG(x); \\"
447 print "\t(opt).val = LOPT_UNPACK_VAL(x); \\"
451 # bucketsort(dst, src)
453 # Sort the elements of src by descending string length,
454 # placing them into dst[0] ... dst[n].
456 # Returns the number of elements.
457 function bucketsort(dst, src, buckets, max, count, i, t)
461 if (i > max) { max = i }
465 for (i = max; i > 0; i--) {
474 i = length(t = src[t])
475 dst[buckets[i]++] = t
483 # Return the string LOPT_xxx, where xxx is the argument with all lowercase
484 # letters converted to uppercase, and all non-alphanumeric characters replaced
486 function to_enum(lopt)
489 gsub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789]/, "_", lopt)
493 # add_to_strtab(strtab, str, offsets)
495 # Append string to strtab if there is not already a matching string present
496 # in the table. Newly-added strings are separated by spaces, which must be
497 # translated into null bytes afterwards. The updated strtab is returned, and
498 # the offsets[str] array member is updated with the position (counting from 0)
499 # of str in the strtab.
501 # For optimal results, strings should be added in descending length order.
502 function add_to_strtab(strtab, str, offsets, pos)
504 if ( (pos = index(strtab, str " ") - 1) < 0) {
507 strtab = strtab " " str