#!/bin/awk -f # # Copyright © 2021 Nick Bowler # # Generate definitions helpful when using getopt_long from an options # specification file. # # The options specification file is processed line by line. Any line # beginning with a - character introduces a new option definition. Each # option definition specifies any or all of a short option name, a long # option name, an argument specification, and an action specification. # # Only the long option name is mandatory. It is not possible to define # short options without a corresponding long option. # # The optional short option name is first, and consists of a hyphen (which # must be the first character on the line) followed by the one character # short option name, followed by a comma. # # The long option name is next on the line, which consists of two hyphens # followed by the desired option name. If the short option name was omitted, # then the first hyphen of the long option name must be the first character # on the line. # # The argument specification is next, consisting of an equals sign followed by # the argument name. The argument name can be any sequence of non-whitespace # characters and only relevant for --help text. # # If the argument specification is surrounded by square brackets, this # indicates an optional argument. If the argument specification is omitted # completely, this option has no argument. Otherwise, the option has a # mandatory argument. # # Finally, the optional action specification defines how the "flag" and # "val" members are set in the option structure for this option. An action # specification may only be provided for options with no short name. # # If the action specification is omitted, then flag will be set to a null # pointer and val is set to the short option character, if any, otherwise the # unique enumeration constant LOPT_xxx for this option (described below). # # The action specification can be of the form (val) or (flag, val), where flag # and val are C expressions suitable for use in an initializer for objects # with static storage duration. Neither flag nor val may contain commas or # whitespace. In the first form, the option's flag is set to a null pointer. # # Any amount of whitespace may follow the short option name, the argument # specification, the action specification, or the comma within an action # specification. Whitespace is not permitted between a long option name # and a flag specification. # # Examples of option specifications: # # -h, --help # --do-nothing (0) # -o, --output=FILE # --pad[=VAL] # --parse-only (&parse_only, 1) # # Each option is assigned an enumeration constant of the form LOPT_xxx, # where xxx is the long option name with all letters in uppercase and # all non-alphanumeric characters replaced with underscores. The value # of the constants is unspecified, except that they will be unique across # all defined options and distinct from the integer value of any short # option character. # # The object-like macro SOPT_STRING expands to a string literal suitable # for use as the optstring argument to getopt et al. # # The object-like macro LOPTS_INITIALIZER expands to a comma-separated # sequence of struct option initializers, suitable for use in a declaration # of an array of struct option elements with static storage duration. The # all-zero terminating element required by getopt_long must be added by the # user. For example: # # static const struct option lopts[] = { LOPTS_INITIALIZER, {0} }; # # The help text for an individual struct option element may be obtained by # the function # # struct lopt_help { const char *desc, *arg; } # *lopt_get_help(const struct option *opt); # # The returned desc and arg pointers point to the argument name and help text # for the argument, respectively, as written in the options specification file. # # License WTFPL2: Do What The Fuck You Want To Public License, version 2. # This is free software: you are free to do what the fuck you want to. # There is NO WARRANTY, to the extent permitted by law. END { print "/*" if (FILENAME) { print " * Automatically generated by gen-options.awk from " FILENAME } else { print " * Automatically generated by gen-options.awk" } print " * Do not edit." print " */" } BEGIN { sopt_string = "" num_options = 0 lopt = "" err = 0 } # Parse option specifier lines $0 ~ /^-/ { work = $0 arg = lopt = sopt = "" has_arg = 0 # Extract short option name if (work ~ /^-[^-]/) { sopt = substr(work, 2, 1) sub(/^-.,[ \t]*/, "", work) } # Extract long option name if (work ~ /^--/) { if (n = match(work, /[= \t[]/)) { lopt = substr(work, 3, n-3) work = substr(work, n) } else { lopt = substr(work, 3) work = "" } } # Extract argument name if (work ~ /^\[=[^\] \t]+\]/) { if (n = index(work, "]")) { arg = substr(work, 3, n-3) work = substr(work, n+1) } has_arg = 2 } else if (work ~ /^=/) { if (n = match(work, /[ \t]/)) { arg = substr(work, 2, n-2) work = substr(work, n) } else { arg = substr(work, 2) work = "" } has_arg = 1 } # Extract action sub(/^[ \t]*/, "", work) if (!sopt && work ~ /^\([^, \t]+(,[ \t]*[^, \t]+)?\)/) { n = split(work, a, /,[ \t]*/) if (n == 2) { flag = substr(a[1], 2) ", " substr(a[2], 1, length(a[2])-1) } else if (n == 1) { flag = "NULL, " substr(a[1], 2, length(a[1])-2) } sub(/^\([^, \t]+(,[ \t]*[^, \t]+)?/, "", work) } else if (sopt) { flag = "NULL, '" sopt "'" } else { flag = "NULL, " to_enum(lopt) } if (work) { print "invalid option specification:", $0 > "/dev/stderr" err = 1 exit } if (sopt) { sopt_string = sopt_string sopt substr("::", 1, has_arg) } options[num_options++] = lopt optionspec[lopt] = has_arg ", " flag if (arg) { optionarg[lopt] = arg } next } # Ignore any line beginning with a # $0 ~ /^#/ { next } lopt { sub(/^[ \t]*/, "") if (!$0) { next } optionhelp[lopt] = (lopt in optionhelp ? optionhelp[lopt] "\n" : "") $0 } # Exit immediately on error END { if (err) { exit err } } END { print "#include " print "#include \n" print "#define SOPT_STRING \"" sopt_string "\"\n" } # Generate the main options tables END { lopt_strings = "" count = bucketsort(sorted_options, options) for (i = 0; i < count; i++) { lopt_strings = add_to_strtab(lopt_strings, sorted_options[i], offsets) } gsub(/[^ ]+/, "\"&", lopt_strings) gsub(/ /, "\\0\"\n\t", lopt_strings) print "static const char lopt_strings[] =" print "\t" lopt_strings "\";\n" print "enum {" for (i = 0; i < count; i++) { opt = options[i] sep = (i+1 == count ? "" : ",") print "\t" to_enum(opt), "= UCHAR_MAX+1 +", offsets[opt] sep } print "};" print "#define lopt_str(x) (lopt_strings + (LOPT_ ## x - UCHAR_MAX - 1))\n" print "#define LOPTS_INITIALIZER \\" for (i = 0; i < count; i++) { opt = options[i] sep = (i+1 == count ? "" : ", \\") print "\t/* --" opt, "*/ \\" print "\t{ lopt_strings+" offsets[opt] ",", optionspec[opt] " }" sep } } # Generate the help strings END { # First, sort out the argument names arg_strings = "" count = bucketsort(sorted_args, optionarg) for (i = 0; i < count; i++) { arg_strings = add_to_strtab(arg_strings, sorted_args[i], arg_offsets) } n = split(arg_strings, arg_split) arg_strings = "" for (i = 1; i <= n; i++) { for (opt in optionarg) { if (optionarg[opt] == arg_split[i]) { l10narg[opt] = 1 break; } } sep = (i < n ? "\"\\0\"" : "") arg_strings = arg_strings "\n\tPN_(\"" opt "\", \"" arg_split[i] "\")" sep } print "\n#define ARG_L10N_(x)" print "#ifndef PN_" print "# define PN_(c, x) x" print "#endif\n" print "static const char arg_strings[] = " arg_strings "\"\";" for (opt in optionarg) { if (opt in l10narg) { continue } print "\tARG_L10N_(PN_(\"" opt "\", \"" optionarg[opt] "\"))" } # Then add in the actual descriptions print "\nstatic const char help_strings[] =" help = "" help_pos = 0 for (opt in options) { opt = options[opt] if (opt in optionhelp) { if (help) { print help "\"\\0\"" } help = optionhelp[opt] help_offsets[opt] = help_pos help_pos += length(help) + 1 gsub(/"/, "\\\"", help) gsub(/\n/, "\\n\"\n\t \"", help) help = "\tPN_(\"" opt "\",\n\t \"" help "\")" } } print help "\"\";" for (opt in options) { opt = options[opt] if (!(opt in optionhelp)) { print "\tARG_L10N_(PN_(\"" opt "\", \"\"))" help_offsets[opt] = help_pos } } print "\nstatic struct lopt_help { const char *desc, *arg; }" print "*lopt_get_help(const struct option *opt, struct lopt_help *out)\n{" print "\tswitch ((opt->name - lopt_strings) + UCHAR_MAX + 1) {" for (opt in options) { opt = options[opt] print "\tcase", to_enum(opt) ":" print "\t\tout->desc = help_strings +", help_offsets[opt] ";" if (opt in optionarg) { print "\t\tout->arg = arg_strings +", arg_offsets[optionarg[opt]] ";" } print "\t\treturn out;" } print "\t}\n\n\treturn NULL;" print "}" } # bucketsort(dst, src) # # Sort the elements of src by descending string length, # placing them into dst[0] ... dst[n]. # # Returns the number of elements. function bucketsort(dst, src, buckets, max, count, i, t) { for (t in src) { i = length(src[t]) if (i > max) { max = i } buckets[i]++ } for (i = max; i > 0; i--) { if (i in buckets) { t = buckets[i] buckets[i] = count count += t } } for (t in src) { i = length(t = src[t]) dst[buckets[i]++] = t } return count } # to_enum(lopt) # # Return the string LOPT_xxx, where xxx is the argument with all lowercase # letters converted to uppercase, and all non-alphanumeric characters replaced # with underscores. function to_enum(lopt) { lopt = toupper(lopt) gsub(/[^ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789]/, "_", lopt) return "LOPT_" lopt } # add_to_strtab(strtab, str, offsets) # # Append string to strtab if there is not already a matching string present # in the table. Newly-added strings are separated by spaces, which must be # translated into null bytes afterwards. The updated strtab is returned, and # the offsets[str] array member is updated with the position (counting from 0) # of str in the strtab. # # For optimal results, strings should be added in descending length order. function add_to_strtab(strtab, str, offsets, pos) { if ( (pos = index(strtab, str " ") - 1) < 0) { pos = length(strtab) if (pos) { strtab = strtab " " str pos++ } else { strtab = strtab str } } offsets[str] = pos return strtab }