X-Git-Url: https://git.draconx.ca/gitweb/dxcommon.git/blobdiff_plain/207d0dbfbbfa7ad70b59ada3a741895842958885..HEAD:/src/copysym.c

diff --git a/src/copysym.c b/src/copysym.c
index 8ecace7..2106965 100644
--- a/src/copysym.c
+++ b/src/copysym.c
@@ -1,5 +1,5 @@
 /*
- * Copyright Â© 2023 Nick Bowler
+ * Copyright Â© 2023-2024 Nick Bowler
  *
  * Helper function to output the copyright symbol in a specified encoding.
  *
@@ -16,14 +16,30 @@
 
 #include <stdlib.h>
 #include <string.h>
-#include <stdint.h>
-#include "xtra.h"
+#include <stddef.h>
+
+#if HAVE_INTTYPES_H
+#include <inttypes.h>
+typedef uint_least32_t dx_u32;
+#else
+#include <limits.h>
+#if UINT_MAX >= 0xffffffff
+typedef unsigned dx_u32;
+#else
+typedef unsigned long dx_u32;
+#endif
+#endif
+
+#define BSEARCH_ARRAY(key, arr, cmp) \
+	bsearch(key, arr, sizeof (arr) / sizeof *(arr), sizeof *(arr), cmp)
+
+enum { PREFIXLEN = 5 };
 
-static int compar_5arr(const void *key, const void *elem_)
+static int compar_prefix(const void *key, const void *elem_)
 {
-	const char (*elem)[5] = elem_;
+	const char (*elem)[PREFIXLEN+1] = (void *)elem_;
 
-	return strncmp(key, *elem, sizeof *elem);
+	return strncmp(key, *elem, PREFIXLEN);
 }
 
 /*
@@ -76,16 +92,10 @@ static int compar_5arr(const void *key, const void *elem_)
  */
 const char *copyright_symbol(const char *charset)
 {
-	/* All known encodings of the copyright symbol */
-	static const char codes[] =
-		"\xc2\xa9"         "\0"
-		"\x97"             "\0"
-		"\xa8"             "\0"
-		"\xb8"             "\0"
-		"\xbf"             "\0"
-		"\x8f\xa2\xed"     "\0"
-		"\x81\x30\x84\x38" "\0"
-		"(C)";
+	struct copysym_data {
+		char tab[15][PREFIXLEN+1];
+		char codes[24];
+	};
 
 	/*
 	 * We need the list below to be in lexicographic order in
@@ -95,48 +105,55 @@ const char *copyright_symbol(const char *charset)
 #  error this character encoding is unsupported, please report a bug.
 #endif
 
-	/*
-	 * For character sets that include the copyright symbol,
-	 * the first 5 characters suffices to distinguish amongst
-	 * all the different possible encodings.
-	 */
-	static const char t1[][5] = {
-		"CP112",
-		"CP125",
-		"CP775",
-		"CP850",
-		"CP856",
-		"CP857",
-		"CP869",
-		"CP922",
-		"EUC-J",
-		"GB180",
-		"GEORG",
-		"ISO-8",
-		"KOI8-",
-		"PT154",
-		"UTF-8"
+	static const struct copysym_data data = {
+		/*
+		 * For character sets that include the copyright symbol,
+		 * the first 5 characters suffices to distinguish amongst
+		 * all the different possible encodings.
+		 *
+		 * The final byte of each entry indicates the corresponding
+		 * offset into the codes array, except for CP112x and ISO-8859
+		 * which use the values 0 and 1, respectively (handled below).
+		 */
+		.tab =
+		{
+		"CP112\x00",
+		"CP125\x05",
+		"CP775\x09",
+		"CP850\x0b",
+		"CP856\x0b",
+		"CP857\x0b",
+		"CP869\x07",
+		"CP922\x05",
+		"EUC-J\x0f",
+		"GB180\x13",
+		"GEORG\x05",
+		"ISO-8\x01",
+		"KOI8-\x0d",
+		"PT154\x05",
+		"UTF-8\x04"
+		},
+
+		/* All known encodings of the copyright symbol. */
+		.codes =
+		"(C)"          "\0"
+		"\xc2\xa9"     "\0"
+		"\x97"         "\0"
+		"\xa8"         "\0"
+		"\xb8"         "\0"
+		"\xbf"         "\0"
+		"\x8f\xa2\xed" "\0"
+		"\x81\x30\x84\x38"
 	};
 
-	/*
-	 * Each nibble in the results value contains the offset in the
-	 * codes array for the corresponding index in t1, except that
-	 * ISO-8859 matches the special value '2' (handled below).
-	 */
-	uint_least64_t results = 0x001921fb13777511;
-	const char (*m1)[sizeof *t1];
-	unsigned x, cindex;
+	unsigned cindex = 0;
+	const char *m;
 
-	if (!charset)
+	if (!charset || !(m = BSEARCH_ARRAY(charset, data.tab, compar_prefix)))
 		goto no_conv;
 
-	m1 = bsearch(charset, t1, XTRA_ARRAYSIZE(t1), sizeof *t1, compar_5arr);
-	if (!m1)
-		goto no_conv;
-	charset += 5;
-
-	x = m1-t1;
-	cindex = (results >> (x << 2)) & 0xf;
+	cindex = m[PREFIXLEN];
+	charset += PREFIXLEN;
 
 	/*
 	 * We now need to identify encodings that match one of the 5-character
@@ -154,17 +171,17 @@ const char *copyright_symbol(const char *charset)
 	 *   ISO-8859-5
 	 *   ISO-8859-6
 	 */
-	if ((x == 0) != (*charset == '9')) {
-		/* CP112x, x != '9', no copyright symbol. */
-		goto no_conv;
-	} else if (cindex == 2) {
+	if (cindex == 0) {
+		/* CP112x, only CP1129 has copyright symbol. */
+		cindex = 5 * (*charset == '9');
+	} else if (cindex == 1) {
 		/*
 		 * ISO-8859 special case.  Simply find and look at the final
 		 * two digits.  The set bits in the 'accept' value indicate
 		 * which encodings have the copyright symbol.
 		 */
-		uint_least32_t accept  = 0x00380383;
-		uint_least32_t collect = 0;
+		dx_u32 accept  = 0x00380383;
+		dx_u32 collect = 0;
 		char c;
 
 		while ((c = *charset++)) {
@@ -174,14 +191,10 @@ const char *copyright_symbol(const char *charset)
 				collect |= c - '0';
 		}
 
-		cindex = (accept >> (collect & 0x1f)) & 1;
-		if (!cindex)
-			goto no_conv;
+		cindex = 5 * ((accept >> (collect & 0x1f)) & 1);
 	}
-
-	return &codes[cindex];
 no_conv:
-	return &codes[20];
+	return (char *)&data + offsetof(struct copysym_data, codes) + cindex;
 }
 
 #endif