X-Git-Url: https://git.draconx.ca/gitweb/dxcommon.git/blobdiff_plain/207d0dbfbbfa7ad70b59ada3a741895842958885..HEAD:/src/copysym.c diff --git a/src/copysym.c b/src/copysym.c index 8ecace7..2106965 100644 --- a/src/copysym.c +++ b/src/copysym.c @@ -1,5 +1,5 @@ /* - * Copyright © 2023 Nick Bowler + * Copyright © 2023-2024 Nick Bowler * * Helper function to output the copyright symbol in a specified encoding. * @@ -16,14 +16,30 @@ #include #include -#include -#include "xtra.h" +#include + +#if HAVE_INTTYPES_H +#include +typedef uint_least32_t dx_u32; +#else +#include +#if UINT_MAX >= 0xffffffff +typedef unsigned dx_u32; +#else +typedef unsigned long dx_u32; +#endif +#endif + +#define BSEARCH_ARRAY(key, arr, cmp) \ + bsearch(key, arr, sizeof (arr) / sizeof *(arr), sizeof *(arr), cmp) + +enum { PREFIXLEN = 5 }; -static int compar_5arr(const void *key, const void *elem_) +static int compar_prefix(const void *key, const void *elem_) { - const char (*elem)[5] = elem_; + const char (*elem)[PREFIXLEN+1] = (void *)elem_; - return strncmp(key, *elem, sizeof *elem); + return strncmp(key, *elem, PREFIXLEN); } /* @@ -76,16 +92,10 @@ static int compar_5arr(const void *key, const void *elem_) */ const char *copyright_symbol(const char *charset) { - /* All known encodings of the copyright symbol */ - static const char codes[] = - "\xc2\xa9" "\0" - "\x97" "\0" - "\xa8" "\0" - "\xb8" "\0" - "\xbf" "\0" - "\x8f\xa2\xed" "\0" - "\x81\x30\x84\x38" "\0" - "(C)"; + struct copysym_data { + char tab[15][PREFIXLEN+1]; + char codes[24]; + }; /* * We need the list below to be in lexicographic order in @@ -95,48 +105,55 @@ const char *copyright_symbol(const char *charset) # error this character encoding is unsupported, please report a bug. #endif - /* - * For character sets that include the copyright symbol, - * the first 5 characters suffices to distinguish amongst - * all the different possible encodings. - */ - static const char t1[][5] = { - "CP112", - "CP125", - "CP775", - "CP850", - "CP856", - "CP857", - "CP869", - "CP922", - "EUC-J", - "GB180", - "GEORG", - "ISO-8", - "KOI8-", - "PT154", - "UTF-8" + static const struct copysym_data data = { + /* + * For character sets that include the copyright symbol, + * the first 5 characters suffices to distinguish amongst + * all the different possible encodings. + * + * The final byte of each entry indicates the corresponding + * offset into the codes array, except for CP112x and ISO-8859 + * which use the values 0 and 1, respectively (handled below). + */ + .tab = + { + "CP112\x00", + "CP125\x05", + "CP775\x09", + "CP850\x0b", + "CP856\x0b", + "CP857\x0b", + "CP869\x07", + "CP922\x05", + "EUC-J\x0f", + "GB180\x13", + "GEORG\x05", + "ISO-8\x01", + "KOI8-\x0d", + "PT154\x05", + "UTF-8\x04" + }, + + /* All known encodings of the copyright symbol. */ + .codes = + "(C)" "\0" + "\xc2\xa9" "\0" + "\x97" "\0" + "\xa8" "\0" + "\xb8" "\0" + "\xbf" "\0" + "\x8f\xa2\xed" "\0" + "\x81\x30\x84\x38" }; - /* - * Each nibble in the results value contains the offset in the - * codes array for the corresponding index in t1, except that - * ISO-8859 matches the special value '2' (handled below). - */ - uint_least64_t results = 0x001921fb13777511; - const char (*m1)[sizeof *t1]; - unsigned x, cindex; + unsigned cindex = 0; + const char *m; - if (!charset) + if (!charset || !(m = BSEARCH_ARRAY(charset, data.tab, compar_prefix))) goto no_conv; - m1 = bsearch(charset, t1, XTRA_ARRAYSIZE(t1), sizeof *t1, compar_5arr); - if (!m1) - goto no_conv; - charset += 5; - - x = m1-t1; - cindex = (results >> (x << 2)) & 0xf; + cindex = m[PREFIXLEN]; + charset += PREFIXLEN; /* * We now need to identify encodings that match one of the 5-character @@ -154,17 +171,17 @@ const char *copyright_symbol(const char *charset) * ISO-8859-5 * ISO-8859-6 */ - if ((x == 0) != (*charset == '9')) { - /* CP112x, x != '9', no copyright symbol. */ - goto no_conv; - } else if (cindex == 2) { + if (cindex == 0) { + /* CP112x, only CP1129 has copyright symbol. */ + cindex = 5 * (*charset == '9'); + } else if (cindex == 1) { /* * ISO-8859 special case. Simply find and look at the final * two digits. The set bits in the 'accept' value indicate * which encodings have the copyright symbol. */ - uint_least32_t accept = 0x00380383; - uint_least32_t collect = 0; + dx_u32 accept = 0x00380383; + dx_u32 collect = 0; char c; while ((c = *charset++)) { @@ -174,14 +191,10 @@ const char *copyright_symbol(const char *charset) collect |= c - '0'; } - cindex = (accept >> (collect & 0x1f)) & 1; - if (!cindex) - goto no_conv; + cindex = 5 * ((accept >> (collect & 0x1f)) & 1); } - - return &codes[cindex]; no_conv: - return &codes[20]; + return (char *)&data + offsetof(struct copysym_data, codes) + cindex; } #endif