--- /dev/null
+#!/bin/awk -f
+#
+# Copyright © 2021 Nick Bowler
+#
+# Generate a C string table based on an input string specification file.
+#
+# A string table is a single large char single array containing all of
+# the specified (0-terminated) strings, which is then offset to obtain
+# the desired string. By storing these offsets instead of string pointers
+# into read-only data structures, this can reduce the need for relocation
+# processing at startup when programs are built in PIC mode.
+#
+# The string specification file is processed line by line. Comment
+# lines may be included by beginning the line with a # character, which
+# must be the very first character on the line. If a comment is encountered,
+# processing immediately moves on to the next line and the result is as if
+# the comment line were omitted from the input.
+#
+# A string is defined by beginning a line with an & character, which must
+# be immediately followed by a C identifier. A nonempty sequence of
+# whitespace (with at most one newline) separates the identifier from the
+# beginning of the string itself. This whitespace is never included in the
+# output.
+#
+# The string is then interpreted as follows:
+#
+# - Leading blanks on each line are ignored.
+# - The sequences \\, \a, \b, \t, \n, \v, \f and \r can be entered and
+# mean the same as they do in C string literals. The "\\" sequence
+# prevents any special interpretation of the second backslash.
+# - Newlines in the input are included in the output, except for the
+# where the entire string (including its identifier) are on one line.
+# - If this is not desired, a newline which is immediately preceded by an
+# unescaped backslash will deleted, along with the backslash.
+# - All other backslashes are deleted. This can be used to prevent special
+# handling of whitespace, # or & characters at the beginning of a line.
+#
+# The output defines a variable, strtab, which contains all of the strings,
+# and each identifier in the input is declared as an emumeration constant
+# whose value is the offset of the associated string within strtab.
+#
+# The object-like macro STRTAB_MAX_OFFSET is defined and expands to the
+# greatest string offset, suitable for use in #if preprocessing directives.
+
+END {
+ print "/*"
+ if (FILENAME) {
+ print " * Automatically generated by gen-strtab.awk from " FILENAME
+ } else {
+ print " * Automatically generated by gen-strtab.awk"
+ }
+ print " * Do not edit."
+ print " */"
+}
+
+BEGIN {
+ collected = ident = ""
+ startline = endline = 0
+ num_vars = 0
+}
+
+$0 ~ /^[#]/ { next }
+
+$0 ~ /^[&]/ {
+ if (ident) {
+ finish_string_input(strings, ident, collected)
+ vars[num_vars++] = ident
+ }
+
+ sub(/^[&]/, "", $1)
+ startline = NR
+ ident = $1
+
+ $1 = ""
+ collected = ""
+}
+
+ident {
+ sub(/^[ \t]*/, "")
+ if (collected) {
+ collected = collected "\n" $0
+ } else {
+ collected = $0
+ }
+
+ endline = NR
+}
+
+END {
+ if (ident) {
+ finish_string_input(strings, ident, collected)
+ vars[num_vars++] = ident
+ }
+}
+
+END {
+ strtab = ""
+ strtab_len = 0
+ count = bucketsort(sorted_strings, strings)
+ max = 0
+
+ print "\n#define STR_L10N_(x)"
+ print "#ifndef N_"
+ print "# define N_(x) x"
+ print "#endif"
+ print "\nstatic const char strtab[] ="
+
+ for (i = 0; i < count; i++) {
+ s = sorted_strings[i]
+ gsub(/\\\\/, "\2", s)
+ if ((n = index(strtab "\1", s "\1")) > 0) {
+ offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1))
+ print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))"
+ } else if (strtab) {
+ strtab = strtab "\1" s
+ offsets[sorted_strings[i]] = strtab_len + 1
+ strtab_len += real_length(s) + 1
+ } else {
+ strtab = s
+ offsets[sorted_strings[i]] = 0
+ strtab_len += real_length(s)
+ }
+ }
+
+ gsub(/\2/, "\\\\", strtab)
+ gsub(/\1/, "\")\"\\0\"\n\tN_(\"", strtab)
+ print "\tN_(\"" strtab "\")"
+ print "\t\"\";"
+
+ print "enum {"
+ for (i = 0; i < num_vars; i++) {
+ sep = (i+1) != num_vars ? "," : ""
+ s = vars[i]
+ o = offsets[strings[s]]
+ print "\t" s " = " o sep
+ if (o > max) {
+ max = o
+ }
+ }
+ print "};"
+ print "\n#define STRTAB_MAX_OFFSET " max
+}
+
+# finish_input_string(strings, ident, val)
+#
+# Deal with backslash-escapes and special characters in val, then set
+# strings[ident] = val.
+function finish_string_input(strings, ident, val, n, tmpval)
+{
+ gsub(/\\\\/, "\1", val)
+ val = val (endline > startline ? "\n" : "")
+ gsub(/\\\n/, "", val)
+
+ tmpval = ""
+ while ((n = match(val, /\\[^abtnvfr]/)) > 0) {
+ tmpval = tmpval substr(val, 1, n-1)
+ val = substr(val, n+1)
+ }
+ tmpval = tmpval val
+
+ # Escape special characters
+ gsub(/"/, "\\\"", tmpval)
+ gsub(/\t/, "\\t", tmpval)
+ gsub(/\n/, "\\n", tmpval)
+ gsub(/\1/, "\\\\", tmpval)
+
+ strings[ident] = tmpval
+}
+
+function real_length(s, t)
+{
+ t = length(s)
+ return t - gsub(/\\./, "&", s)
+}
+
+# bucketsort(dst, src)
+#
+# Sort the elements of src by descending string length,
+# placing them into dst[0] ... dst[n].
+#
+# Returns the number of elements.
+function bucketsort(dst, src, buckets, max, count, i, t)
+{
+ for (t in src) {
+ i = length(src[t])
+ if (i > max) { max = i }
+ buckets[i]++
+ }
+
+ for (i = max; i > 0; i--) {
+ if (i in buckets) {
+ t = buckets[i]
+ buckets[i] = count
+ count += t
+ }
+ }
+
+ for (t in src) {
+ i = length(t = src[t])
+ dst[buckets[i]++] = t
+ }
+
+ return count
+}