2 * Copyright © 2023-2024 Nick Bowler
4 * Helper function to output the copyright symbol in a specified encoding.
6 * License WTFPL2: Do What The Fuck You Want To Public License, version 2.
7 * This is free software: you are free to do what the fuck you want to.
8 * There is NO WARRANTY, to the extent permitted by law.
23 typedef uint_least32_t dx_u32;
26 #if UINT_MAX >= 0xffffffff
27 typedef unsigned dx_u32;
29 typedef unsigned long dx_u32;
33 #define BSEARCH_ARRAY(key, arr, cmp) \
34 bsearch(key, arr, sizeof (arr) / sizeof *(arr), sizeof *(arr), cmp)
36 enum { PREFIXLEN = 5 };
38 static int compar_prefix(const void *key, const void *elem_)
40 const char (*elem)[PREFIXLEN+1] = (void *)elem_;
42 return strncmp(key, *elem, PREFIXLEN);
46 * Return, as a multibyte string, the copyright symbol for the
47 * given character encoding, which is one of the strings returned
48 * by Gnulib's locale_charset function. In particular, we are
49 * looking for one of the strings:
82 * All of these are ASCII supersets. EBCDIC code pages like CP1122 are
83 * presently handled by returning (C), even if the character set does
84 * include the copyright symbol.
86 * To simplify the implementation, we allow some slop in the matching,
87 * as long as the result is valid for any actual encoding names.
89 * If NLS support is disabled, or if the character set does not
90 * include the copyright symbol, then the string (C) is returned
91 * in the C execution character set.
93 const char *copyright_symbol(const char *charset)
96 char tab[15][PREFIXLEN+1];
101 * We need the list below to be in lexicographic order in
102 * the C execution character encoding.
104 #if 'B'>'E' || 'C'>'E' || 'E'>'G' || 'G'>'I' || 'K'>'P' || 'P'>'U'
105 # error this character encoding is unsupported, please report a bug.
108 static const struct copysym_data data = {
110 * For character sets that include the copyright symbol,
111 * the first 5 characters suffices to distinguish amongst
112 * all the different possible encodings.
114 * The final byte of each entry indicates the corresponding
115 * offset into the codes array, except for CP112x and ISO-8859
116 * which use the values 0 and 1, respectively (handled below).
137 /* All known encodings of the copyright symbol. */
152 if (!charset || !(m = BSEARCH_ARRAY(charset, data.tab, compar_prefix)))
155 cindex = m[PREFIXLEN];
156 charset += PREFIXLEN;
159 * We now need to identify encodings that match one of the 5-character
160 * prefixes above but don't actually have the copyright symbol in their
161 * character set. Specifically, these are:
163 * CP1122 (does have it, but EBCDIC)
175 /* CP112x, only CP1129 has copyright symbol. */
176 cindex = 5 * (*charset == '9');
177 } else if (cindex == 1) {
179 * ISO-8859 special case. Simply find and look at the final
180 * two digits. The set bits in the 'accept' value indicate
181 * which encodings have the copyright symbol.
183 dx_u32 accept = 0x00380383;
187 while ((c = *charset++)) {
194 cindex = 5 * ((accept >> (collect & 0x1f)) & 1);
197 return (char *)&data + offsetof(struct copysym_data, codes) + cindex;