gen-strtab.awk: Add a feature to disable l10n markings.

author Nick Bowler <nbowler@draconx.ca>

Tue, 3 Jan 2023 04:08:00 +0000 (23:08 -0500)

committer Nick Bowler <nbowler@draconx.ca>

Tue, 3 Jan 2023 04:08:00 +0000 (23:08 -0500)
author Nick Bowler <nbowler@draconx.ca>
Tue, 3 Jan 2023 04:08:00 +0000 (23:08 -0500)
committer Nick Bowler <nbowler@draconx.ca>
Tue, 3 Jan 2023 04:08:00 +0000 (23:08 -0500)
diff --git a/scripts/gen-strtab.awk b/scripts/gen-strtab.awk

index 118cf54430015cabcf15fab7b1c7db090e73618f..b91736d0f4985fff7a0bbf9c52ec3fd9379f9c3e 100755 (executable)
--- a/scripts/gen-strtab.awk
+++ b/scripts/gen-strtab.awk
@@ -23,11 +23,12 @@
  #   @nozero
  #     All strings will have a non-zero offset in the strtab.
  #
-# A string is defined by beginning a line with an & character, which must
-# be immediately followed by a C identifier.  A nonempty sequence of
-# whitespace (with at most one newline) separates the identifier from the
-# beginning of the string itself.  This whitespace is never included in the
-# output.
+# A string is defined by beginning a line with one or two & characters, which
+# must be immediately followed by a C identifier.  Two & characters indicates
+# a string that should not be translated, as described below.  A nonempty
+# sequence of whitespace (with at most one newline) separates the identifier
+# from the beginning of the string itself.  This whitespace is never included
+# in the output.
  #
  # The string is then interpreted as follows:
  #
@@ -46,6 +47,13 @@
  # and each identifier in the input is declared as an emumeration constant
  # whose value is the offset of the associated string within strtab.
  #
+# Normally, the generated source code wraps strings using the identity macro
+# N_(x), which has no effect on the resulting data structures but enables tools
+# such as xgettext to extract translatable strings from the source code.  An
+# identifier preceded by two ampersands (&&) suppresses this output to allow
+# a single string table to also contain both translateable strings as well as
+# ones that should not be translated.
+#
  # The object-like macro STRTAB_MAX_OFFSET is defined and expands to the
  # greatest string offset, suitable for use in #if preprocessing directives.
  #
@@ -90,13 +98,13 @@ sub(/^@/, "", $0) {
    next
  }
  
-$0 ~ /^[&]/ {
+sub(/^[&]/, "") {
    if (ident) {
      finish_string_input(strings, ident, collected)
      vars[num_vars++] = ident
    }
  
-  sub(/^[&]/, "", $1)
+  current_l10n = !sub(/^[&]/, "", $1);
    startline = NR
    ident = $1
  
@@ -134,15 +142,13 @@ END {
    print "#endif"
    print "\nstatic const char strtab[] ="
  
-  if (!opts["zero"])
-    print "\t\"\\0\"";
-
    for (i = 0; i < count; i++) {
      s = sorted_strings[i]
      gsub(/\\\\/, "\2", s)
      if ((n = index(strtab "\1", s "\1")) > 0) {
-      offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1))
-      print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))"
+      offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1));
+      if (!(sorted_strings[i] in nol10n))
+        print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))";
      } else if (strtab) {
        strtab = strtab "\1" s
        offsets[sorted_strings[i]] = strtab_len + 1
@@ -154,10 +160,18 @@ END {
      }
    }
  
-  gsub(/\2/, "\\\\", strtab)
-  gsub(/\1/, "\")\"\\0\"\n\tN_(\"", strtab)
-  print "\tN_(\"" strtab "\")"
-  print "\t\"\";"
+  gsub(/\2/, "\\\\", strtab);
+  n = split(strtab, split_strtab, "\1");
+  for (i = 1; i <= n; i++) {
+    printf("\t%4s ", i > !!opts["zero"] ? "\"\\0\"" : "");
+
+    if (split_strtab[i] in nol10n) {
+      print "\"" split_strtab[i] "\"";
+    } else {
+      print "N_(\"" split_strtab[i] "\")";
+    }
+  }
+  print "\t\"\";";
  
    print "enum {"
    for (i = 0; i < num_vars; i++) {
@@ -197,6 +211,9 @@ function finish_string_input(strings, ident, val, n, tmpval)
    gsub(/\1/, "\\\\", tmpval)
  
    strings[ident] = tmpval
+  if (!current_l10n) {
+    nol10n[tmpval] = 1;
+  }
  }
  
  function real_length(s, t)
diff --git a/tests/scripts.at b/tests/scripts.at

index f4a4a105fb057ede633b10df765502855e9e3066..fc8fbc375a15b4f67652b66419e81dcfaa4ab8e4 100644 (file)
--- a/tests/scripts.at
+++ b/tests/scripts.at
@@ -328,6 +328,58 @@ AT_CHECK([$CC -DHEADER='"test1.h"' -o test1$EXEEXT test.c && ./test1$EXEEXT],
    [0], [[1 hello
  ]])
  
+AT_CLEANUP
+
+AT_SETUP([gen-strtab.awk l10n options])
+AT_KEYWORDS([gen-strtab awk script scripts])
+
+AT_DATA([l10n.sed], dnl (
+[[/^#/b
+s/.*N_(\([^)]*\)).*/\1/p
+]])
+
+AT_DATA([test0.def],
+[[&a hello world
+&b world
+&c goodbye
+]])
+AT_CHECK([$AWK -f "$builddir/scripts/gen-strtab.awk" <test0.def >test0.h])
+AT_CHECK([sed -n -f l10n.sed test0.h | LC_ALL=C sort], [0],
+[["goodbye"
+"hello world"
+"world"
+]])
+
+AT_DATA([test1.def],
+[[&a hello world
+&&b world
+&&c goodbye
+]])
+AT_CHECK([$AWK -f "$builddir/scripts/gen-strtab.awk" <test1.def >test1.h])
+AT_CHECK([sed -n -f l10n.sed test1.h], [0],
+[["hello world"
+]])
+
+AT_DATA([test.c],
+[[#include <stdio.h>
+#include HEADER
+
+int main(void)
+{
+  printf("%s %s %s\n", strtab+a, strtab+b, strtab+c);
+  return 0;
+}
+]])
+
+AT_CHECK([$CC -DHEADER='"test0.h"' -o test0$EXEEXT test.c && ./test0$EXEEXT],
+  [0], [[hello world world goodbye
+]])
+
+AT_CHECK([$CC -DHEADER='"test1.h"' -o test1$EXEEXT test.c && ./test1$EXEEXT],
+  [0], [[hello world world goodbye
+]])
+
+
  AT_CLEANUP
  
  AT_SETUP([gen-tree.awk])
author	Nick Bowler <nbowler@draconx.ca>
	Tue, 3 Jan 2023 04:08:00 +0000 (23:08 -0500)
committer	Nick Bowler <nbowler@draconx.ca>
	Tue, 3 Jan 2023 04:08:00 +0000 (23:08 -0500)
scripts/gen-strtab.awk		patch \| blob \| history
tests/scripts.at		patch \| blob \| history