X-Git-Url: http://git.draconx.ca/gitweb/dxcommon.git/blobdiff_plain/1688bad1e5dc89cacf33bc426c92a4abf2bc0647..075a8578cc9467d07ccff7688bfd0f4855712e1c:/scripts/gen-strtab.awk diff --git a/scripts/gen-strtab.awk b/scripts/gen-strtab.awk index f844d4f..2265bb5 100755 --- a/scripts/gen-strtab.awk +++ b/scripts/gen-strtab.awk @@ -1,6 +1,6 @@ #!/bin/awk -f # -# Copyright © 2021 Nick Bowler +# Copyright © 2021, 2023 Nick Bowler # # Generate a C string table based on an input string specification file. # @@ -16,11 +16,23 @@ # processing immediately moves on to the next line and the result is as if # the comment line were omitted from the input. # -# A string is defined by beginning a line with an & character, which must -# be immediately followed by a C identifier. A nonempty sequence of -# whitespace (with at most one newline) separates the identifier from the -# beginning of the string itself. This whitespace is never included in the -# output. +# Options may be used to alter the normal behaviour. An option is placed +# on a line by itself beginning with an @ character, and may appear anywhere +# in the input file. The following options are defined: +# +# @nozero +# All strings will have a non-zero offset in the strtab. +# +# @macro +# Instead of a variable declaration, the generated header will define an +# object-like macro that can be used as the initializer for a char array. +# +# A string is defined by beginning a line with one or two & characters, which +# must be immediately followed by a C identifier. Two & characters indicates +# a string that should not be translated, as described below. A nonempty +# sequence of whitespace (with at most one newline) separates the identifier +# from the beginning of the string itself. This whitespace is never included +# in the output. # # The string is then interpreted as follows: # @@ -35,12 +47,26 @@ # - All other backslashes are deleted. This can be used to prevent special # handling of whitespace, # or & characters at the beginning of a line. # -# The output defines a variable, strtab, which contains all of the strings, -# and each identifier in the input is declared as an emumeration constant -# whose value is the offset of the associated string within strtab. +# Unless the @macro option is specified, the output defines a variable, +# strtab, which contains all of the strings, and each identifier in the input +# is declared as an emumeration constant whose value is the offset of the +# associated string within strtab. Otherwise, if the @macro option is +# specified, no variables are defined and STRTAB_INITIALIZER object-like macro +# may be used to initialize a char array with static storage duration. +# +# Normally, the generated source code wraps strings using the identity macro +# N_(x), which has no effect on the resulting data structures but enables tools +# such as xgettext to extract translatable strings from the source code. An +# identifier preceded by two ampersands (&&) suppresses this output to allow +# a single string table to also contain both translateable strings as well as +# ones that should not be translated. # # The object-like macro STRTAB_MAX_OFFSET is defined and expands to the # greatest string offset, suitable for use in #if preprocessing directives. +# +# License WTFPL2: Do What The Fuck You Want To Public License, version 2. +# This is free software: you are free to do what the fuck you want to. +# There is NO WARRANTY, to the extent permitted by law. END { print "/*" @@ -54,20 +80,39 @@ END { } BEGIN { + opts["zero"] = 1 + opts["macro"] = 0 collected = ident = "" startline = endline = 0 num_vars = 0 } -$0 ~ /^[#]/ { next } +# Comments +NF == 0 || $0 ~ /^[#]/ { next } -$0 ~ /^[&]/ { +# Options +sub(/^@/, "", $0) { + if (NF == 1) { + orig=$1 + gsub(/-/, "_", $1); + val = !sub(/^no_?/, "", $1); + if ($1 in opts) { + opts[$1] = val; + } else { + print "error: unrecognized option: @" orig | "cat 1>&2" + exit 1 + } + } + next +} + +sub(/^[&]/, "") { if (ident) { finish_string_input(strings, ident, collected) vars[num_vars++] = ident } - sub(/^[&]/, "", $1) + current_l10n = !sub(/^[&]/, "", $1); startline = NR ident = $1 @@ -94,7 +139,7 @@ END { } END { - strtab = "" + strtab = cont = "" strtab_len = 0 count = bucketsort(sorted_strings, strings) max = 0 @@ -103,14 +148,20 @@ END { print "#ifndef N_" print "# define N_(x) x" print "#endif" - print "\nstatic const char strtab[] =" + if (opts["macro"]) { + cont = " \\"; + print "\n#define STRTAB_INITIALIZER" cont; + } else { + print "\nstatic const char strtab[] ="; + } for (i = 0; i < count; i++) { s = sorted_strings[i] - gsub(/\\\\/, "\2", s) + gsub(/\\\\/, "\2\2", s) if ((n = index(strtab "\1", s "\1")) > 0) { - offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1)) - print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))" + offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1)); + if (!(sorted_strings[i] in nol10n)) + print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))" cont; } else if (strtab) { strtab = strtab "\1" s offsets[sorted_strings[i]] = strtab_len + 1 @@ -122,16 +173,24 @@ END { } } - gsub(/\2/, "\\\\", strtab) - gsub(/\1/, "\")\"\\0\"\n\tN_(\"", strtab) - print "\tN_(\"" strtab "\")" - print "\t\"\";" + gsub(/\2/, "\\", strtab); + n = split(strtab, split_strtab, "\1"); + for (i = 1; i <= n; i++) { + printf("\t%4s ", i > !!opts["zero"] ? "\"\\0\"" : ""); + + if (split_strtab[i] in nol10n) { + print "\"" split_strtab[i] "\"" cont; + } else { + print "N_(\"" split_strtab[i] "\")" cont; + } + } + print "\t\"\"" substr(";", 1, !opts["macro"]); print "enum {" for (i = 0; i < num_vars; i++) { sep = (i+1) != num_vars ? "," : "" s = vars[i] - o = offsets[strings[s]] + o = offsets[strings[s]] + (!opts["zero"]) print "\t" s " = " o sep if (o > max) { max = o @@ -141,13 +200,13 @@ END { print "\n#define STRTAB_MAX_OFFSET " max } -# finish_input_string(strings, ident, val) +# finish_string_input(strings, ident, val) # # Deal with backslash-escapes and special characters in val, then set # strings[ident] = val. function finish_string_input(strings, ident, val, n, tmpval) { - gsub(/\\\\/, "\1", val) + gsub(/\\\\/, "\1\1", val) val = val (endline > startline ? "\n" : "") gsub(/\\\n/, "", val) @@ -162,15 +221,18 @@ function finish_string_input(strings, ident, val, n, tmpval) gsub(/"/, "\\\"", tmpval) gsub(/\t/, "\\t", tmpval) gsub(/\n/, "\\n", tmpval) - gsub(/\1/, "\\\\", tmpval) + gsub(/\1/, "\\", tmpval) strings[ident] = tmpval + if (!current_l10n) { + nol10n[tmpval] = 1; + } } function real_length(s, t) { t = length(s) - return t - gsub(/\\./, "&", s) + return t - gsub(/\\.|\2\2/, "&", s) } # bucketsort(dst, src)