--- /dev/null
+/*
+ * Copyright © 2023 Nick Bowler
+ *
+ * Helper function to output the copyright symbol in a specified encoding.
+ *
+ * License WTFPL2: Do What The Fuck You Want To Public License, version 2.
+ * This is free software: you are free to do what the fuck you want to.
+ * There is NO WARRANTY, to the extent permitted by law.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#if ENABLE_NLS
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include "xtra.h"
+
+static int compar_5arr(const void *key, const void *elem_)
+{
+ const char (*elem)[5] = elem_;
+
+ return strncmp(key, *elem, sizeof *elem);
+}
+
+/*
+ * Return, as a multibyte string, the copyright symbol for the
+ * given character encoding, which is one of the strings returned
+ * by Gnulib's locale_charset function. In particular, we are
+ * looking for one of the strings:
+ *
+ * CP1129
+ * CP1250
+ * CP1251
+ * CP1252
+ * CP1253
+ * CP1254
+ * CP1256
+ * CP1257
+ * CP1258
+ * CP775
+ * CP850
+ * CP856
+ * CP857
+ * CP869
+ * CP922
+ * GEORGIAN-PS
+ * ISO-8859-1
+ * ISO-8859-13
+ * ISO-8859-14
+ * ISO-8859-15
+ * ISO-8859-7
+ * ISO-8859-8
+ * ISO-8859-9
+ * PT154
+ * EUC-JP
+ * GB18030
+ * KOI8-R
+ * KOI8-T
+ * KOI8-U
+ * UTF-8
+ *
+ * All of these are ASCII supersets. EBCDIC code pages like CP1122 are
+ * presently handled by returning (C), even if the character set does
+ * include the copyright symbol.
+ *
+ * To simplify the implementation, we allow some slop in the matching,
+ * as long as the result is valid for any actual encoding names.
+ *
+ * If NLS support is disabled, or if the character set does not
+ * include the copyright symbol, then the string (C) is returned
+ * in the C execution character set.
+ */
+const char *copyright_symbol(const char *charset)
+{
+ /* All known encodings of the copyright symbol */
+ static const char codes[] =
+ "\xc2\xa9" "\0"
+ "\x97" "\0"
+ "\xa8" "\0"
+ "\xb8" "\0"
+ "\xbf" "\0"
+ "\x8f\xa2\xed" "\0"
+ "\x81\x30\x84\x38" "\0"
+ "(C)";
+
+ /*
+ * We need the list below to be in lexicographic order in
+ * the C execution character encoding.
+ */
+#if 'B'>'E' || 'C'>'E' || 'E'>'G' || 'G'>'I' || 'K'>'P' || 'P'>'U'
+# error this character encoding is unsupported, please report a bug.
+#endif
+
+ /*
+ * For character sets that include the copyright symbol,
+ * the first 5 characters suffices to distinguish amongst
+ * all the different possible encodings.
+ */
+ static const char t1[][5] = {
+ "CP112",
+ "CP125",
+ "CP775",
+ "CP850",
+ "CP856",
+ "CP857",
+ "CP869",
+ "CP922",
+ "EUC-J",
+ "GB180",
+ "GEORG",
+ "ISO-8",
+ "KOI8-",
+ "PT154",
+ "UTF-8"
+ };
+
+ /*
+ * Each nibble in the results value contains the offset in the
+ * codes array for the corresponding index in t1, except that
+ * ISO-8859 matches the special value '2' (handled below).
+ */
+ uint_least64_t results = 0x001921fb13777511;
+ const char (*m1)[sizeof *t1];
+ unsigned x, cindex;
+
+ if (!charset)
+ goto no_conv;
+
+ m1 = bsearch(charset, t1, XTRA_ARRAYSIZE(t1), sizeof *t1, compar_5arr);
+ if (!m1)
+ goto no_conv;
+ charset += 5;
+
+ x = m1-t1;
+ cindex = (results >> (x << 2)) & 0xf;
+
+ /*
+ * We now need to identify encodings that match one of the 5-character
+ * prefixes above but don't actually have the copyright symbol in their
+ * character set. Specifically, these are:
+ *
+ * CP1122 (does have it, but EBCDIC)
+ * CP1124
+ * CP1125
+ * ISO-8859-10
+ * ISO-8859-11
+ * ISO-8859-2
+ * ISO-8859-3
+ * ISO-8859-4
+ * ISO-8859-5
+ * ISO-8859-6
+ */
+ if ((x == 0) != (*charset == '9')) {
+ /* CP112x, x != '9', no copyright symbol. */
+ goto no_conv;
+ } else if (cindex == 2) {
+ /*
+ * ISO-8859 special case. Simply find and look at the final
+ * two digits. The set bits in the 'accept' value indicate
+ * which encodings have the copyright symbol.
+ */
+ uint_least32_t accept = 0x00380383;
+ uint_least32_t collect = 0;
+ char c;
+
+ while ((c = *charset++)) {
+ collect <<= 4;
+
+ if (c != '-')
+ collect |= c - '0';
+ }
+
+ cindex = (accept >> (collect & 0x1f)) & 1;
+ if (!cindex)
+ goto no_conv;
+ }
+
+ return &codes[cindex];
+no_conv:
+ return &codes[20];
+}
+
+#endif