X-Git-Url: https://git.draconx.ca/gitweb/dxcommon.git/blobdiff_plain/b84052135e522d00278d33e0b3d0afbb46e03f51..HEAD:/scripts/gen-strtab.awk diff --git a/scripts/gen-strtab.awk b/scripts/gen-strtab.awk index 7e3de12..2a56fd6 100755 --- a/scripts/gen-strtab.awk +++ b/scripts/gen-strtab.awk @@ -1,6 +1,6 @@ #!/bin/awk -f # -# Copyright © 2021, 2023 Nick Bowler +# Copyright © 2021, 2023-2024 Nick Bowler # # Generate a C string table based on an input string specification file. # @@ -23,6 +23,10 @@ # @nozero # All strings will have a non-zero offset in the strtab. # +# @macro +# Instead of a variable declaration, the generated header will define an +# object-like macro that can be used as the initializer for a char array. +# # A string is defined by beginning a line with one or two & characters, which # must be immediately followed by a C identifier. Two & characters indicates # a string that should not be translated, as described below. A nonempty @@ -43,9 +47,12 @@ # - All other backslashes are deleted. This can be used to prevent special # handling of whitespace, # or & characters at the beginning of a line. # -# The output defines a variable, strtab, which contains all of the strings, -# and each identifier in the input is declared as an emumeration constant -# whose value is the offset of the associated string within strtab. +# Unless the @macro option is specified, the output defines a variable, +# strtab, which contains all of the strings, and each identifier in the input +# is declared as an emumeration constant whose value is the offset of the +# associated string within strtab. Otherwise, if the @macro option is +# specified, no variables are defined and STRTAB_INITIALIZER object-like macro +# may be used to initialize a char array with static storage duration. # # Normally, the generated source code wraps strings using the identity macro # N_(x), which has no effect on the resulting data structures but enables tools @@ -73,7 +80,12 @@ END { } BEGIN { + # Check if "\\\\" in substitutions gives just one backslash. + bs = "x"; sub(/x/, "\\\\", bs); + bs = (length(bs) == 1 ? "\\\\" : "\\"); + opts["zero"] = 1 + opts["macro"] = 0 collected = ident = "" startline = endline = 0 num_vars = 0 @@ -99,39 +111,36 @@ sub(/^@/, "", $0) { } sub(/^[&]/, "") { - if (ident) { - finish_string_input(strings, ident, collected) - vars[num_vars++] = ident + if (ident != "") { + finish_string_input(strings, ident, collected); + vars[num_vars++] = ident; } - current_l10n = !sub(/^[&]/, "", $1); - startline = NR - ident = $1 + current_l10n = !sub(/^[&]/, ""); + startline = NR; + ident = $1; - $1 = "" - collected = "" + collected = ""; + sub(/^[^ \t]*/, ""); } -ident { - sub(/^[ \t]*/, "") - if (collected) { - collected = collected "\n" $0 - } else { - collected = $0 - } +ident != "" { + sub(/^[ \t]*/, ""); - endline = NR + sep = collected != "" ? "\n" : ""; + collected = collected sep $0; + endline = NR; } END { - if (ident) { + if (ident != "") { finish_string_input(strings, ident, collected) vars[num_vars++] = ident } } END { - strtab = "" + strtab = cont = "" strtab_len = 0 count = bucketsort(sorted_strings, strings) max = 0 @@ -140,7 +149,12 @@ END { print "#ifndef N_" print "# define N_(x) x" print "#endif" - print "\nstatic const char strtab[] =" + if (opts["macro"]) { + cont = " \\"; + print "\n#define STRTAB_INITIALIZER" cont; + } else { + print "\nstatic const char strtab[] ="; + } for (i = 0; i < count; i++) { s = sorted_strings[i] @@ -148,7 +162,7 @@ END { if ((n = index(strtab "\1", s "\1")) > 0) { offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1)); if (!(sorted_strings[i] in nol10n)) - print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))"; + print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))" cont; } else if (strtab) { strtab = strtab "\1" s offsets[sorted_strings[i]] = strtab_len + 1 @@ -160,18 +174,18 @@ END { } } - gsub(/\2/, "\\\\", strtab); + gsub("\2", bs bs, strtab); n = split(strtab, split_strtab, "\1"); for (i = 1; i <= n; i++) { printf("\t%4s ", i > !!opts["zero"] ? "\"\\0\"" : ""); if (split_strtab[i] in nol10n) { - print "\"" split_strtab[i] "\""; + print "\"" split_strtab[i] "\"" cont; } else { - print "N_(\"" split_strtab[i] "\")"; + print "N_(\"" split_strtab[i] "\")" cont; } } - print "\t\"\";"; + print "\t\"\"" substr(";", 1, !opts["macro"]); print "enum {" for (i = 0; i < num_vars; i++) { @@ -187,30 +201,31 @@ END { print "\n#define STRTAB_MAX_OFFSET " max } -# finish_input_string(strings, ident, val) +# finish_string_input(strings, ident, val) # # Deal with backslash-escapes and special characters in val, then set # strings[ident] = val. function finish_string_input(strings, ident, val, n, tmpval) { - gsub(/\\\\/, "\1", val) - val = val (endline > startline ? "\n" : "") - gsub(/\\\n/, "", val) + gsub(/\\\\/, "\2", val); + if (endline > startline) + val = val "\n"; + gsub(/\\\n/, "", val); tmpval = "" while ((n = match(val, /\\[^abtnvfr]/)) > 0) { - tmpval = tmpval substr(val, 1, n-1) - val = substr(val, n+1) + tmpval = tmpval substr(val, 1, n-1); + val = substr(val, n+1); } - tmpval = tmpval val + tmpval = tmpval val; # Escape special characters - gsub(/"/, "\\\"", tmpval) - gsub(/\t/, "\\t", tmpval) - gsub(/\n/, "\\n", tmpval) - gsub(/\1/, "\\\\", tmpval) + gsub(/"/, bs"\"", tmpval); + gsub(/\t/, bs"t", tmpval); + gsub(/\n/, bs"n", tmpval); + gsub("\2", bs bs, tmpval); - strings[ident] = tmpval + strings[ident] = tmpval; if (!current_l10n) { nol10n[tmpval] = 1; } @@ -228,25 +243,28 @@ function real_length(s, t) # placing them into dst[0] ... dst[n]. # # Returns the number of elements. -function bucketsort(dst, src, buckets, max, count, i, t) +function bucketsort(dst, src, max, count, i, t) { + # Note: ULTRIX 4.5 nawk does not support local array parameters + split("", bucketsort_buckets); + for (t in src) { i = length(src[t]) if (i > max) { max = i } - buckets[i]++ + bucketsort_buckets[i]++ } for (i = max; i > 0; i--) { - if (i in buckets) { - t = buckets[i] - buckets[i] = count + if (i in bucketsort_buckets) { + t = bucketsort_buckets[i] + bucketsort_buckets[i] = count count += t } } for (t in src) { i = length(t = src[t]) - dst[buckets[i]++] = t + dst[bucketsort_buckets[i]++] = t } return count