From: Nick Bowler <nbowler@draconx.ca>
Date: Tue, 3 Jan 2023 04:08:00 +0000 (-0500)
Subject: gen-strtab.awk: Add a feature to disable l10n markings.
X-Git-Url: https://git.draconx.ca/gitweb/dxcommon.git/commitdiff_plain/ccd4df684019395c163eb3174c5e8bf2d3d8bdfc

gen-strtab.awk: Add a feature to disable l10n markings.

Preceding an identifier with two ampersands now causes the output to
not include the N_ macro for that string, so xgettext won't pick it up.
This enables creating a mixed string table with some strings marked as
translatable and others not.
---

diff --git a/scripts/gen-strtab.awk b/scripts/gen-strtab.awk
index 118cf54..b91736d 100755
--- a/scripts/gen-strtab.awk
+++ b/scripts/gen-strtab.awk
@@ -23,11 +23,12 @@
 #   @nozero
 #     All strings will have a non-zero offset in the strtab.
 #
-# A string is defined by beginning a line with an & character, which must
-# be immediately followed by a C identifier.  A nonempty sequence of
-# whitespace (with at most one newline) separates the identifier from the
-# beginning of the string itself.  This whitespace is never included in the
-# output.
+# A string is defined by beginning a line with one or two & characters, which
+# must be immediately followed by a C identifier.  Two & characters indicates
+# a string that should not be translated, as described below.  A nonempty
+# sequence of whitespace (with at most one newline) separates the identifier
+# from the beginning of the string itself.  This whitespace is never included
+# in the output.
 #
 # The string is then interpreted as follows:
 #
@@ -46,6 +47,13 @@
 # and each identifier in the input is declared as an emumeration constant
 # whose value is the offset of the associated string within strtab.
 #
+# Normally, the generated source code wraps strings using the identity macro
+# N_(x), which has no effect on the resulting data structures but enables tools
+# such as xgettext to extract translatable strings from the source code.  An
+# identifier preceded by two ampersands (&&) suppresses this output to allow
+# a single string table to also contain both translateable strings as well as
+# ones that should not be translated.
+#
 # The object-like macro STRTAB_MAX_OFFSET is defined and expands to the
 # greatest string offset, suitable for use in #if preprocessing directives.
 #
@@ -90,13 +98,13 @@ sub(/^@/, "", $0) {
   next
 }
 
-$0 ~ /^[&]/ {
+sub(/^[&]/, "") {
   if (ident) {
     finish_string_input(strings, ident, collected)
     vars[num_vars++] = ident
   }
 
-  sub(/^[&]/, "", $1)
+  current_l10n = !sub(/^[&]/, "", $1);
   startline = NR
   ident = $1
 
@@ -134,15 +142,13 @@ END {
   print "#endif"
   print "\nstatic const char strtab[] ="
 
-  if (!opts["zero"])
-    print "\t\"\\0\"";
-
   for (i = 0; i < count; i++) {
     s = sorted_strings[i]
     gsub(/\\\\/, "\2", s)
     if ((n = index(strtab "\1", s "\1")) > 0) {
-      offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1))
-      print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))"
+      offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1));
+      if (!(sorted_strings[i] in nol10n))
+        print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))";
     } else if (strtab) {
       strtab = strtab "\1" s
       offsets[sorted_strings[i]] = strtab_len + 1
@@ -154,10 +160,18 @@ END {
     }
   }
 
-  gsub(/\2/, "\\\\", strtab)
-  gsub(/\1/, "\")\"\\0\"\n\tN_(\"", strtab)
-  print "\tN_(\"" strtab "\")"
-  print "\t\"\";"
+  gsub(/\2/, "\\\\", strtab);
+  n = split(strtab, split_strtab, "\1");
+  for (i = 1; i <= n; i++) {
+    printf("\t%4s ", i > !!opts["zero"] ? "\"\\0\"" : "");
+
+    if (split_strtab[i] in nol10n) {
+      print "\"" split_strtab[i] "\"";
+    } else {
+      print "N_(\"" split_strtab[i] "\")";
+    }
+  }
+  print "\t\"\";";
 
   print "enum {"
   for (i = 0; i < num_vars; i++) {
@@ -197,6 +211,9 @@ function finish_string_input(strings, ident, val, n, tmpval)
   gsub(/\1/, "\\\\", tmpval)
 
   strings[ident] = tmpval
+  if (!current_l10n) {
+    nol10n[tmpval] = 1;
+  }
 }
 
 function real_length(s, t)
diff --git a/tests/scripts.at b/tests/scripts.at
index f4a4a10..fc8fbc3 100644
--- a/tests/scripts.at
+++ b/tests/scripts.at
@@ -328,6 +328,58 @@ AT_CHECK([$CC -DHEADER='"test1.h"' -o test1$EXEEXT test.c && ./test1$EXEEXT],
   [0], [[1 hello
 ]])
 
+AT_CLEANUP
+
+AT_SETUP([gen-strtab.awk l10n options])
+AT_KEYWORDS([gen-strtab awk script scripts])
+
+AT_DATA([l10n.sed], dnl (
+[[/^#/b
+s/.*N_(\([^)]*\)).*/\1/p
+]])
+
+AT_DATA([test0.def],
+[[&a hello world
+&b world
+&c goodbye
+]])
+AT_CHECK([$AWK -f "$builddir/scripts/gen-strtab.awk" <test0.def >test0.h])
+AT_CHECK([sed -n -f l10n.sed test0.h | LC_ALL=C sort], [0],
+[["goodbye"
+"hello world"
+"world"
+]])
+
+AT_DATA([test1.def],
+[[&a hello world
+&&b world
+&&c goodbye
+]])
+AT_CHECK([$AWK -f "$builddir/scripts/gen-strtab.awk" <test1.def >test1.h])
+AT_CHECK([sed -n -f l10n.sed test1.h], [0],
+[["hello world"
+]])
+
+AT_DATA([test.c],
+[[#include <stdio.h>
+#include HEADER
+
+int main(void)
+{
+  printf("%s %s %s\n", strtab+a, strtab+b, strtab+c);
+  return 0;
+}
+]])
+
+AT_CHECK([$CC -DHEADER='"test0.h"' -o test0$EXEEXT test.c && ./test0$EXEEXT],
+  [0], [[hello world world goodbye
+]])
+
+AT_CHECK([$CC -DHEADER='"test1.h"' -o test1$EXEEXT test.c && ./test1$EXEEXT],
+  [0], [[hello world world goodbye
+]])
+
+
 AT_CLEANUP
 
 AT_SETUP([gen-tree.awk])