/*
- * Copyright © 2023 Nick Bowler
+ * Copyright © 2023-2024 Nick Bowler
*
* Helper function to output the copyright symbol in a specified encoding.
*
#include <stdlib.h>
#include <string.h>
-#include <stdint.h>
+#include <stddef.h>
+
+#if HAVE_INTTYPES_H
+#include <inttypes.h>
+typedef uint_least32_t dx_u32;
+#else
+#include <limits.h>
+#if UINT_MAX >= 0xffffffff
+typedef unsigned dx_u32;
+#else
+typedef unsigned long dx_u32;
+#endif
+#endif
-#define ARRAYSIZE(x) (sizeof (x) / sizeof (x)[0])
+#define BSEARCH_ARRAY(key, arr, cmp) \
+ bsearch(key, arr, sizeof (arr) / sizeof *(arr), sizeof *(arr), cmp)
-static int compar_5arr(const void *key, const void *elem_)
+enum { PREFIXLEN = 5 };
+
+static int compar_prefix(const void *key, const void *elem_)
{
- const char (*elem)[5] = (void *)elem_;
+ const char (*elem)[PREFIXLEN+1] = (void *)elem_;
- return strncmp(key, *elem, sizeof *elem);
+ return strncmp(key, *elem, PREFIXLEN);
}
/*
*/
const char *copyright_symbol(const char *charset)
{
- /* All known encodings of the copyright symbol */
- static const char codes[] =
- "\xc2\xa9" "\0"
- "\x97" "\0"
- "\xa8" "\0"
- "\xb8" "\0"
- "\xbf" "\0"
- "\x8f\xa2\xed" "\0"
- "\x81\x30\x84\x38" "\0"
- "(C)";
+ struct copysym_data {
+ char tab[15][PREFIXLEN+1];
+ char codes[24];
+ };
/*
* We need the list below to be in lexicographic order in
# error this character encoding is unsupported, please report a bug.
#endif
- /*
- * For character sets that include the copyright symbol,
- * the first 5 characters suffices to distinguish amongst
- * all the different possible encodings.
- */
- static const char t1[][5] = {
- "CP112",
- "CP125",
- "CP775",
- "CP850",
- "CP856",
- "CP857",
- "CP869",
- "CP922",
- "EUC-J",
- "GB180",
- "GEORG",
- "ISO-8",
- "KOI8-",
- "PT154",
- "UTF-8"
+ static const struct copysym_data data = {
+ /*
+ * For character sets that include the copyright symbol,
+ * the first 5 characters suffices to distinguish amongst
+ * all the different possible encodings.
+ *
+ * The final byte of each entry indicates the corresponding
+ * offset into the codes array, except for CP112x and ISO-8859
+ * which use the values 0 and 1, respectively (handled below).
+ */
+ .tab =
+ {
+ "CP112\x00",
+ "CP125\x05",
+ "CP775\x09",
+ "CP850\x0b",
+ "CP856\x0b",
+ "CP857\x0b",
+ "CP869\x07",
+ "CP922\x05",
+ "EUC-J\x0f",
+ "GB180\x13",
+ "GEORG\x05",
+ "ISO-8\x01",
+ "KOI8-\x0d",
+ "PT154\x05",
+ "UTF-8\x04"
+ },
+
+ /* All known encodings of the copyright symbol. */
+ .codes =
+ "(C)" "\0"
+ "\xc2\xa9" "\0"
+ "\x97" "\0"
+ "\xa8" "\0"
+ "\xb8" "\0"
+ "\xbf" "\0"
+ "\x8f\xa2\xed" "\0"
+ "\x81\x30\x84\x38"
};
- /*
- * Each nibble in the results value contains the offset in the
- * codes array for the corresponding index in t1, except that
- * ISO-8859 matches the special value '2' (handled below).
- */
- uint_least64_t results = 0x001921fb13777511;
- const char (*m1)[sizeof *t1];
- unsigned x, cindex;
+ unsigned cindex = 0;
+ const char *m;
- if (!charset)
+ if (!charset || !(m = BSEARCH_ARRAY(charset, data.tab, compar_prefix)))
goto no_conv;
- m1 = bsearch(charset, t1, ARRAYSIZE(t1), sizeof *t1, compar_5arr);
- if (!m1)
- goto no_conv;
- charset += 5;
-
- x = m1-t1;
- cindex = (results >> (x << 2)) & 0xf;
+ cindex = m[PREFIXLEN];
+ charset += PREFIXLEN;
/*
* We now need to identify encodings that match one of the 5-character
* ISO-8859-5
* ISO-8859-6
*/
- if ((x == 0) != (*charset == '9')) {
- /* CP112x, x != '9', no copyright symbol. */
- goto no_conv;
- } else if (cindex == 2) {
+ if (cindex == 0) {
+ /* CP112x, only CP1129 has copyright symbol. */
+ cindex = 5 * (*charset == '9');
+ } else if (cindex == 1) {
/*
* ISO-8859 special case. Simply find and look at the final
* two digits. The set bits in the 'accept' value indicate
* which encodings have the copyright symbol.
*/
- uint_least32_t accept = 0x00380383;
- uint_least32_t collect = 0;
+ dx_u32 accept = 0x00380383;
+ dx_u32 collect = 0;
char c;
while ((c = *charset++)) {
collect |= c - '0';
}
- cindex = (accept >> (collect & 0x1f)) & 1;
- if (!cindex)
- goto no_conv;
+ cindex = 5 * ((accept >> (collect & 0x1f)) & 1);
}
-
- return &codes[cindex];
no_conv:
- return &codes[20];
+ return (char *)&data + offsetof(struct copysym_data, codes) + cindex;
}
#endif