#!/bin/awk -f
#
-# Copyright © 2021, 2023 Nick Bowler
+# Copyright © 2021, 2023-2024 Nick Bowler
#
# Generate a C string table based on an input string specification file.
#
# @nozero
# All strings will have a non-zero offset in the strtab.
#
+# @macro
+# Instead of a variable declaration, the generated header will define an
+# object-like macro that can be used as the initializer for a char array.
+#
# A string is defined by beginning a line with one or two & characters, which
# must be immediately followed by a C identifier. Two & characters indicates
# a string that should not be translated, as described below. A nonempty
# - All other backslashes are deleted. This can be used to prevent special
# handling of whitespace, # or & characters at the beginning of a line.
#
-# The output defines a variable, strtab, which contains all of the strings,
-# and each identifier in the input is declared as an emumeration constant
-# whose value is the offset of the associated string within strtab.
+# Unless the @macro option is specified, the output defines a variable,
+# strtab, which contains all of the strings, and each identifier in the input
+# is declared as an emumeration constant whose value is the offset of the
+# associated string within strtab. Otherwise, if the @macro option is
+# specified, no variables are defined and STRTAB_INITIALIZER object-like macro
+# may be used to initialize a char array with static storage duration.
#
# Normally, the generated source code wraps strings using the identity macro
# N_(x), which has no effect on the resulting data structures but enables tools
}
BEGIN {
+ # Check if "\\\\" in substitutions gives just one backslash.
+ bs = "x"; sub(/x/, "\\\\", bs);
+ bs = (length(bs) == 1 ? "\\\\" : "\\");
+
opts["zero"] = 1
+ opts["macro"] = 0
collected = ident = ""
startline = endline = 0
num_vars = 0
}
sub(/^[&]/, "") {
- if (ident) {
- finish_string_input(strings, ident, collected)
- vars[num_vars++] = ident
+ if (ident != "") {
+ finish_string_input(strings, ident, collected);
+ vars[num_vars++] = ident;
}
- current_l10n = !sub(/^[&]/, "", $1);
- startline = NR
- ident = $1
+ current_l10n = !sub(/^[&]/, "");
+ startline = NR;
+ ident = $1;
- $1 = ""
- collected = ""
+ collected = "";
+ sub(/^[^ \t]*/, "");
}
-ident {
- sub(/^[ \t]*/, "")
- if (collected) {
- collected = collected "\n" $0
- } else {
- collected = $0
- }
+ident != "" {
+ sub(/^[ \t]*/, "");
- endline = NR
+ sep = collected != "" ? "\n" : "";
+ collected = collected sep $0;
+ endline = NR;
}
END {
- if (ident) {
+ if (ident != "") {
finish_string_input(strings, ident, collected)
vars[num_vars++] = ident
}
}
END {
- strtab = ""
+ strtab = cont = ""
strtab_len = 0
count = bucketsort(sorted_strings, strings)
max = 0
print "#ifndef N_"
print "# define N_(x) x"
print "#endif"
- print "\nstatic const char strtab[] ="
+ if (opts["macro"]) {
+ cont = " \\";
+ print "\n#define STRTAB_INITIALIZER" cont;
+ } else {
+ print "\nstatic const char strtab[] =";
+ }
for (i = 0; i < count; i++) {
s = sorted_strings[i]
if ((n = index(strtab "\1", s "\1")) > 0) {
offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1));
if (!(sorted_strings[i] in nol10n))
- print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))";
+ print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))" cont;
} else if (strtab) {
strtab = strtab "\1" s
offsets[sorted_strings[i]] = strtab_len + 1
}
}
- gsub(/\2/, "\\\\", strtab);
+ gsub("\2", bs bs, strtab);
n = split(strtab, split_strtab, "\1");
for (i = 1; i <= n; i++) {
printf("\t%4s ", i > !!opts["zero"] ? "\"\\0\"" : "");
if (split_strtab[i] in nol10n) {
- print "\"" split_strtab[i] "\"";
+ print "\"" split_strtab[i] "\"" cont;
} else {
- print "N_(\"" split_strtab[i] "\")";
+ print "N_(\"" split_strtab[i] "\")" cont;
}
}
- print "\t\"\";";
+ print "\t\"\"" substr(";", 1, !opts["macro"]);
print "enum {"
for (i = 0; i < num_vars; i++) {
print "\n#define STRTAB_MAX_OFFSET " max
}
-# finish_input_string(strings, ident, val)
+# finish_string_input(strings, ident, val)
#
# Deal with backslash-escapes and special characters in val, then set
# strings[ident] = val.
function finish_string_input(strings, ident, val, n, tmpval)
{
- gsub(/\\\\/, "\1", val)
- val = val (endline > startline ? "\n" : "")
- gsub(/\\\n/, "", val)
+ gsub(/\\\\/, "\2", val);
+ if (endline > startline)
+ val = val "\n";
+ gsub(/\\\n/, "", val);
tmpval = ""
while ((n = match(val, /\\[^abtnvfr]/)) > 0) {
- tmpval = tmpval substr(val, 1, n-1)
- val = substr(val, n+1)
+ tmpval = tmpval substr(val, 1, n-1);
+ val = substr(val, n+1);
}
- tmpval = tmpval val
+ tmpval = tmpval val;
# Escape special characters
- gsub(/"/, "\\\"", tmpval)
- gsub(/\t/, "\\t", tmpval)
- gsub(/\n/, "\\n", tmpval)
- gsub(/\1/, "\\\\", tmpval)
+ gsub(/"/, bs"\"", tmpval);
+ gsub(/\t/, bs"t", tmpval);
+ gsub(/\n/, bs"n", tmpval);
+ gsub("\2", bs bs, tmpval);
- strings[ident] = tmpval
+ strings[ident] = tmpval;
if (!current_l10n) {
nol10n[tmpval] = 1;
}
# placing them into dst[0] ... dst[n].
#
# Returns the number of elements.
-function bucketsort(dst, src, buckets, max, count, i, t)
+function bucketsort(dst, src, max, count, i, t)
{
+ # Note: ULTRIX 4.5 nawk does not support local array parameters
+ split("", bucketsort_buckets);
+
for (t in src) {
i = length(src[t])
if (i > max) { max = i }
- buckets[i]++
+ bucketsort_buckets[i]++
}
for (i = max; i > 0; i--) {
- if (i in buckets) {
- t = buckets[i]
- buckets[i] = count
+ if (i in bucketsort_buckets) {
+ t = bucketsort_buckets[i]
+ bucketsort_buckets[i] = count
count += t
}
}
for (t in src) {
i = length(t = src[t])
- dst[buckets[i]++] = t
+ dst[bucketsort_buckets[i]++] = t
}
return count