Import getline helper from cdecl99.

[dxcommon.git] / src / copysym.c
diff --git a/src/copysym.c b/src/copysym.c

index 8ecace7c32a68d924bb9a2ca2932ad9d578f04a5..2106965cd8b87615bb3b4bb456e1052585669df5 100644 (file)
--- a/src/copysym.c
+++ b/src/copysym.c
@@ -1,5 +1,5 @@
  /*
- * Copyright © 2023 Nick Bowler
+ * Copyright © 2023-2024 Nick Bowler
   *
   * Helper function to output the copyright symbol in a specified encoding.
   *
@@ -16,14 +16,30 @@
  
  #include <stdlib.h>
  #include <string.h>
-#include <stdint.h>
-#include "xtra.h"
+#include <stddef.h>
+
+#if HAVE_INTTYPES_H
+#include <inttypes.h>
+typedef uint_least32_t dx_u32;
+#else
+#include <limits.h>
+#if UINT_MAX >= 0xffffffff
+typedef unsigned dx_u32;
+#else
+typedef unsigned long dx_u32;
+#endif
+#endif
+
+#define BSEARCH_ARRAY(key, arr, cmp) \
+       bsearch(key, arr, sizeof (arr) / sizeof *(arr), sizeof *(arr), cmp)
+
+enum { PREFIXLEN = 5 };
  
-static int compar_5arr(const void *key, const void *elem_)
+static int compar_prefix(const void *key, const void *elem_)
  {
-       const char (*elem)[5] = elem_;
+       const char (*elem)[PREFIXLEN+1] = (void *)elem_;
  
-       return strncmp(key, *elem, sizeof *elem);
+       return strncmp(key, *elem, PREFIXLEN);
  }
  
  /*
@@ -76,16 +92,10 @@ static int compar_5arr(const void *key, const void *elem_)
   */
  const char *copyright_symbol(const char *charset)
  {
-       /* All known encodings of the copyright symbol */
-       static const char codes[] =
-               "\xc2\xa9"         "\0"
-               "\x97"             "\0"
-               "\xa8"             "\0"
-               "\xb8"             "\0"
-               "\xbf"             "\0"
-               "\x8f\xa2\xed"     "\0"
-               "\x81\x30\x84\x38" "\0"
-               "(C)";
+       struct copysym_data {
+               char tab[15][PREFIXLEN+1];
+               char codes[24];
+       };
  
         /*
          * We need the list below to be in lexicographic order in
@@ -95,48 +105,55 @@ const char *copyright_symbol(const char *charset)
  #  error this character encoding is unsupported, please report a bug.
  #endif
  
-       /*
-        * For character sets that include the copyright symbol,
-        * the first 5 characters suffices to distinguish amongst
-        * all the different possible encodings.
-        */
-       static const char t1[][5] = {
-               "CP112",
-               "CP125",
-               "CP775",
-               "CP850",
-               "CP856",
-               "CP857",
-               "CP869",
-               "CP922",
-               "EUC-J",
-               "GB180",
-               "GEORG",
-               "ISO-8",
-               "KOI8-",
-               "PT154",
-               "UTF-8"
+       static const struct copysym_data data = {
+               /*
+                * For character sets that include the copyright symbol,
+                * the first 5 characters suffices to distinguish amongst
+                * all the different possible encodings.
+                *
+                * The final byte of each entry indicates the corresponding
+                * offset into the codes array, except for CP112x and ISO-8859
+                * which use the values 0 and 1, respectively (handled below).
+                */
+               .tab =
+               {
+               "CP112\x00",
+               "CP125\x05",
+               "CP775\x09",
+               "CP850\x0b",
+               "CP856\x0b",
+               "CP857\x0b",
+               "CP869\x07",
+               "CP922\x05",
+               "EUC-J\x0f",
+               "GB180\x13",
+               "GEORG\x05",
+               "ISO-8\x01",
+               "KOI8-\x0d",
+               "PT154\x05",
+               "UTF-8\x04"
+               },
+
+               /* All known encodings of the copyright symbol. */
+               .codes =
+               "(C)"          "\0"
+               "\xc2\xa9"     "\0"
+               "\x97"         "\0"
+               "\xa8"         "\0"
+               "\xb8"         "\0"
+               "\xbf"         "\0"
+               "\x8f\xa2\xed" "\0"
+               "\x81\x30\x84\x38"
         };
  
-       /*
-        * Each nibble in the results value contains the offset in the
-        * codes array for the corresponding index in t1, except that
-        * ISO-8859 matches the special value '2' (handled below).
-        */
-       uint_least64_t results = 0x001921fb13777511;
-       const char (*m1)[sizeof *t1];
-       unsigned x, cindex;
+       unsigned cindex = 0;
+       const char *m;
  
-       if (!charset)
+       if (!charset || !(m = BSEARCH_ARRAY(charset, data.tab, compar_prefix)))
                 goto no_conv;
  
-       m1 = bsearch(charset, t1, XTRA_ARRAYSIZE(t1), sizeof *t1, compar_5arr);
-       if (!m1)
-               goto no_conv;
-       charset += 5;
-
-       x = m1-t1;
-       cindex = (results >> (x << 2)) & 0xf;
+       cindex = m[PREFIXLEN];
+       charset += PREFIXLEN;
  
         /*
          * We now need to identify encodings that match one of the 5-character
@@ -154,17 +171,17 @@ const char *copyright_symbol(const char *charset)
          *   ISO-8859-5
          *   ISO-8859-6
          */
-       if ((x == 0) != (*charset == '9')) {
-               /* CP112x, x != '9', no copyright symbol. */
-               goto no_conv;
-       } else if (cindex == 2) {
+       if (cindex == 0) {
+               /* CP112x, only CP1129 has copyright symbol. */
+               cindex = 5 * (*charset == '9');
+       } else if (cindex == 1) {
                 /*
                  * ISO-8859 special case.  Simply find and look at the final
                  * two digits.  The set bits in the 'accept' value indicate
                  * which encodings have the copyright symbol.
                  */
-               uint_least32_t accept  = 0x00380383;
-               uint_least32_t collect = 0;
+               dx_u32 accept  = 0x00380383;
+               dx_u32 collect = 0;
                 char c;
  
                 while ((c = *charset++)) {
@@ -174,14 +191,10 @@ const char *copyright_symbol(const char *charset)
                                 collect |= c - '0';
                 }
  
-               cindex = (accept >> (collect & 0x1f)) & 1;
-               if (!cindex)
-                       goto no_conv;
+               cindex = 5 * ((accept >> (collect & 0x1f)) & 1);
         }
-
-       return &codes[cindex];
  no_conv:
-       return &codes[20];
+       return (char *)&data + offsetof(struct copysym_data, codes) + cindex;
  }
  
  #endif