From 95d8e7745d9b00097d50d9903d62c48268324e7a Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Sat, 2 Dec 2023 23:13:24 -0500 Subject: [PATCH] Rework backslash substitutions in awk scripts. It appears that even using plain "\\" in a replacement string for sub and gsub is still problematic. Most awks handle this OK (returning a literal backslash) provided the next character is not a backslash, but on ULTRIX 4.5 nawk eats the next character regardless and interprets it as a backslash-escape sequence, oops. So let's try something a bit different. We can ask awk what it does with "\\\\", and use either "\\\\" or "\\" depending on the answer. Hopefully this actually works everywhere. --- scripts/gen-options.awk | 10 +++++++--- scripts/gen-strtab.awk | 25 +++++++++++++++---------- scripts/gen-tree.awk | 6 +++++- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/scripts/gen-options.awk b/scripts/gen-options.awk index 80adcb4..c6d361c 100755 --- a/scripts/gen-options.awk +++ b/scripts/gen-options.awk @@ -123,6 +123,10 @@ END { } BEGIN { + # Check if "\\\\" in substitutions gives just one backslash. + bs = "x"; sub(/x/, "\\\\", bs); + bs = (length(bs) == 1 ? "\\\\" : "\\"); + has_actions = 0 sopt_string = "" num_options = 0 @@ -238,7 +242,7 @@ END { lopt_strings = add_to_strtab(lopt_strings, sorted_options[i], offsets) } gsub(/[^ ]+/, "\"&", lopt_strings) - gsub(/ /, "\\0\"\n\t", lopt_strings) + gsub(/ /, bs"0\"\n\t", lopt_strings) print "static const char lopt_strings[] =" print "\t" lopt_strings "\";\n" @@ -318,8 +322,8 @@ END { help_offsets[opt] = help_pos help_pos += length(help) + 1 - gsub(/"/, "\\\"", help) - gsub(/\n/, "\\n\"\n\t \"", help) + gsub(/"/, bs"\"", help) + gsub(/\n/, bs"n\"\n\t \"", help) help = "\tPN_(\"" opt "\",\n\t \"" help "\")" } } diff --git a/scripts/gen-strtab.awk b/scripts/gen-strtab.awk index 2265bb5..c5b02c6 100755 --- a/scripts/gen-strtab.awk +++ b/scripts/gen-strtab.awk @@ -80,6 +80,10 @@ END { } BEGIN { + # Check if "\\\\" in substitutions gives just one backslash. + bs = "x"; sub(/x/, "\\\\", bs); + bs = (length(bs) == 1 ? "\\\\" : "\\"); + opts["zero"] = 1 opts["macro"] = 0 collected = ident = "" @@ -157,7 +161,7 @@ END { for (i = 0; i < count; i++) { s = sorted_strings[i] - gsub(/\\\\/, "\2\2", s) + gsub(/\\\\/, "\2", s) if ((n = index(strtab "\1", s "\1")) > 0) { offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1)); if (!(sorted_strings[i] in nol10n)) @@ -173,7 +177,7 @@ END { } } - gsub(/\2/, "\\", strtab); + gsub("\2", bs bs, strtab); n = split(strtab, split_strtab, "\1"); for (i = 1; i <= n; i++) { printf("\t%4s ", i > !!opts["zero"] ? "\"\\0\"" : ""); @@ -206,9 +210,10 @@ END { # strings[ident] = val. function finish_string_input(strings, ident, val, n, tmpval) { - gsub(/\\\\/, "\1\1", val) - val = val (endline > startline ? "\n" : "") - gsub(/\\\n/, "", val) + gsub(/\\\\/, "\1", val); + if (endline > startline) + val = val "\n"; + gsub(/\\\n/, "", val); tmpval = "" while ((n = match(val, /\\[^abtnvfr]/)) > 0) { @@ -218,10 +223,10 @@ function finish_string_input(strings, ident, val, n, tmpval) tmpval = tmpval val # Escape special characters - gsub(/"/, "\\\"", tmpval) - gsub(/\t/, "\\t", tmpval) - gsub(/\n/, "\\n", tmpval) - gsub(/\1/, "\\", tmpval) + gsub(/"/, bs"\"", tmpval) + gsub(/\t/, bs"t", tmpval) + gsub(/\n/, bs"n", tmpval) + gsub("\1", bs bs, tmpval) strings[ident] = tmpval if (!current_l10n) { @@ -232,7 +237,7 @@ function finish_string_input(strings, ident, val, n, tmpval) function real_length(s, t) { t = length(s) - return t - gsub(/\\.|\2\2/, "&", s) + return t - gsub(/\\./, "&", s) } # bucketsort(dst, src) diff --git a/scripts/gen-tree.awk b/scripts/gen-tree.awk index c461e5d..55a7d5c 100755 --- a/scripts/gen-tree.awk +++ b/scripts/gen-tree.awk @@ -93,6 +93,10 @@ END { } BEGIN { + # Check if "\\\\" in substitutions gives just one backslash. + bs = "x"; sub(/x/, "\\\\", bs); + bs = (length(bs) == 1 ? "\\\\" : "\\"); + opts["strtab"] = 1; depth = max_depth = 0; @@ -187,7 +191,7 @@ END { } } - gsub(/\1/, "\"\n\t\"\\0\" \"", entry_strtab); + gsub("\1", "\"\n\t\"" bs "0\" \"", entry_strtab); sub(/^"/, "", entry_strtab); sub(/\n[^\n]*$/, ";", entry_strtab); print "\nstatic const char tree_strtab[] =" entry_strtab -- 2.43.2