dx_getline: Fix EOF handling in standard C fallback.

[dxcommon.git] / src / copysym.c
diff --git a/src/copysym.c b/src/copysym.c

index 3128c3e92d9da790ce77a995a502901f3fdf0562..2106965cd8b87615bb3b4bb456e1052585669df5 100644 (file)
--- a/src/copysym.c
+++ b/src/copysym.c
@@ -16,11 +16,11 @@
  
  #include <stdlib.h>
  #include <string.h>
  
  #include <stdlib.h>
  #include <string.h>
+#include <stddef.h>
  
  #if HAVE_INTTYPES_H
  #include <inttypes.h>
  typedef uint_least32_t dx_u32;
  
  #if HAVE_INTTYPES_H
  #include <inttypes.h>
  typedef uint_least32_t dx_u32;
-typedef uint_least64_t dx_u64;
  #else
  #include <limits.h>
  #if UINT_MAX >= 0xffffffff
  #else
  #include <limits.h>
  #if UINT_MAX >= 0xffffffff
@@ -28,16 +28,18 @@ typedef unsigned dx_u32;
  #else
  typedef unsigned long dx_u32;
  #endif
  #else
  typedef unsigned long dx_u32;
  #endif
-typedef unsigned long long dx_u64;
  #endif
  
  #endif
  
-#define ARRAYSIZE(x) (sizeof (x) / sizeof (x)[0])
+#define BSEARCH_ARRAY(key, arr, cmp) \
+       bsearch(key, arr, sizeof (arr) / sizeof *(arr), sizeof *(arr), cmp)
  
  
-static int compar_5arr(const void *key, const void *elem_)
+enum { PREFIXLEN = 5 };
+
+static int compar_prefix(const void *key, const void *elem_)
  {
  {
-       const char (*elem)[5] = (void *)elem_;
+       const char (*elem)[PREFIXLEN+1] = (void *)elem_;
  
  
-       return strncmp(key, *elem, sizeof *elem);
+       return strncmp(key, *elem, PREFIXLEN);
  }
  
  /*
  }
  
  /*
@@ -90,16 +92,10 @@ static int compar_5arr(const void *key, const void *elem_)
   */
  const char *copyright_symbol(const char *charset)
  {
   */
  const char *copyright_symbol(const char *charset)
  {
-       /* All known encodings of the copyright symbol */
-       static const char codes[] =
-               "\xc2\xa9"         "\0"
-               "\x97"             "\0"
-               "\xa8"             "\0"
-               "\xb8"             "\0"
-               "\xbf"             "\0"
-               "\x8f\xa2\xed"     "\0"
-               "\x81\x30\x84\x38" "\0"
-               "(C)";
+       struct copysym_data {
+               char tab[15][PREFIXLEN+1];
+               char codes[24];
+       };
  
         /*
          * We need the list below to be in lexicographic order in
  
         /*
          * We need the list below to be in lexicographic order in
@@ -109,48 +105,55 @@ const char *copyright_symbol(const char *charset)
  #  error this character encoding is unsupported, please report a bug.
  #endif
  
  #  error this character encoding is unsupported, please report a bug.
  #endif
  
-       /*
-        * For character sets that include the copyright symbol,
-        * the first 5 characters suffices to distinguish amongst
-        * all the different possible encodings.
-        */
-       static const char t1[][5] = {
-               "CP112",
-               "CP125",
-               "CP775",
-               "CP850",
-               "CP856",
-               "CP857",
-               "CP869",
-               "CP922",
-               "EUC-J",
-               "GB180",
-               "GEORG",
-               "ISO-8",
-               "KOI8-",
-               "PT154",
-               "UTF-8"
+       static const struct copysym_data data = {
+               /*
+                * For character sets that include the copyright symbol,
+                * the first 5 characters suffices to distinguish amongst
+                * all the different possible encodings.
+                *
+                * The final byte of each entry indicates the corresponding
+                * offset into the codes array, except for CP112x and ISO-8859
+                * which use the values 0 and 1, respectively (handled below).
+                */
+               .tab =
+               {
+               "CP112\x00",
+               "CP125\x05",
+               "CP775\x09",
+               "CP850\x0b",
+               "CP856\x0b",
+               "CP857\x0b",
+               "CP869\x07",
+               "CP922\x05",
+               "EUC-J\x0f",
+               "GB180\x13",
+               "GEORG\x05",
+               "ISO-8\x01",
+               "KOI8-\x0d",
+               "PT154\x05",
+               "UTF-8\x04"
+               },
+
+               /* All known encodings of the copyright symbol. */
+               .codes =
+               "(C)"          "\0"
+               "\xc2\xa9"     "\0"
+               "\x97"         "\0"
+               "\xa8"         "\0"
+               "\xb8"         "\0"
+               "\xbf"         "\0"
+               "\x8f\xa2\xed" "\0"
+               "\x81\x30\x84\x38"
         };
  
         };
  
-       /*
-        * Each nibble in the results value contains the offset in the
-        * codes array for the corresponding index in t1, except that
-        * ISO-8859 matches the special value '2' (handled below).
-        */
-       dx_u64 results = 0x001921fb13777511ull;
-       const char (*m1)[sizeof *t1];
-       unsigned x, cindex;
-
-       if (!charset)
-               goto no_conv;
+       unsigned cindex = 0;
+       const char *m;
  
  
-       m1 = bsearch(charset, t1, ARRAYSIZE(t1), sizeof *t1, compar_5arr);
-       if (!m1)
+       if (!charset || !(m = BSEARCH_ARRAY(charset, data.tab, compar_prefix)))
                 goto no_conv;
                 goto no_conv;
-       charset += 5;
  
  
-       x = m1-t1;
-       cindex = (results >> (x << 2)) & 0xf;
+       cindex = m[PREFIXLEN];
+       charset += PREFIXLEN;
  
         /*
          * We now need to identify encodings that match one of the 5-character
  
         /*
          * We now need to identify encodings that match one of the 5-character
@@ -168,10 +171,10 @@ const char *copyright_symbol(const char *charset)
          *   ISO-8859-5
          *   ISO-8859-6
          */
          *   ISO-8859-5
          *   ISO-8859-6
          */
-       if ((x == 0) != (*charset == '9')) {
-               /* CP112x, x != '9', no copyright symbol. */
-               goto no_conv;
-       } else if (cindex == 2) {
+       if (cindex == 0) {
+               /* CP112x, only CP1129 has copyright symbol. */
+               cindex = 5 * (*charset == '9');
+       } else if (cindex == 1) {
                 /*
                  * ISO-8859 special case.  Simply find and look at the final
                  * two digits.  The set bits in the 'accept' value indicate
                 /*
                  * ISO-8859 special case.  Simply find and look at the final
                  * two digits.  The set bits in the 'accept' value indicate
@@ -188,14 +191,10 @@ const char *copyright_symbol(const char *charset)
                                 collect |= c - '0';
                 }
  
                                 collect |= c - '0';
                 }
  
-               cindex = (accept >> (collect & 0x1f)) & 1;
-               if (!cindex)
-                       goto no_conv;
+               cindex = 5 * ((accept >> (collect & 0x1f)) & 1);
         }
         }
-
-       return &codes[cindex];
  no_conv:
  no_conv:
-       return &codes[20];
+       return (char *)&data + offsetof(struct copysym_data, codes) + cindex;
  }
  
  #endif
  }
  
  #endif