From 984752355c18f9cfac6425e6387a2d157c011846 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Tue, 20 Feb 2024 21:20:37 -0500 Subject: [PATCH] copysym: Merge tables into a single static allocation. On x86_64, gcc overaligns static arrays at least 16 bytes long onto a 16-byte boundary, and arrays at least 32 bytes long on a 32-byte boundary, which for these tables seems to be totally unnecessary. Putting both arrays into one structure means that at least within this one translation unit, we don't pointlessly waste space padding between the two arrays to maintain unneeded alignment. --- src/copysym.c | 53 +++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/src/copysym.c b/src/copysym.c index ca23c44..2106965 100644 --- a/src/copysym.c +++ b/src/copysym.c @@ -92,16 +92,10 @@ static int compar_prefix(const void *key, const void *elem_) */ const char *copyright_symbol(const char *charset) { - /* All known encodings of the copyright symbol */ - static const char codes[] = - "(C)" "\0" - "\xc2\xa9" "\0" - "\x97" "\0" - "\xa8" "\0" - "\xb8" "\0" - "\xbf" "\0" - "\x8f\xa2\xed" "\0" - "\x81\x30\x84\x38"; + struct copysym_data { + char tab[15][PREFIXLEN+1]; + char codes[24]; + }; /* * We need the list below to be in lexicographic order in @@ -111,17 +105,18 @@ const char *copyright_symbol(const char *charset) # error this character encoding is unsupported, please report a bug. #endif - /* - * For character sets that include the copyright symbol, - * the first 5 characters suffices to distinguish amongst - * all the different possible encodings. - * - * The final byte of each entry indicates the corresponding - * offset into the codes array, except for CP112x and ISO-8859-x - * which use the special values 0 and 1, respectively (handled - * below). - */ - static const char t1[][PREFIXLEN+1] = { + static const struct copysym_data data = { + /* + * For character sets that include the copyright symbol, + * the first 5 characters suffices to distinguish amongst + * all the different possible encodings. + * + * The final byte of each entry indicates the corresponding + * offset into the codes array, except for CP112x and ISO-8859 + * which use the values 0 and 1, respectively (handled below). + */ + .tab = + { "CP112\x00", "CP125\x05", "CP775\x09", @@ -137,12 +132,24 @@ const char *copyright_symbol(const char *charset) "KOI8-\x0d", "PT154\x05", "UTF-8\x04" + }, + + /* All known encodings of the copyright symbol. */ + .codes = + "(C)" "\0" + "\xc2\xa9" "\0" + "\x97" "\0" + "\xa8" "\0" + "\xb8" "\0" + "\xbf" "\0" + "\x8f\xa2\xed" "\0" + "\x81\x30\x84\x38" }; unsigned cindex = 0; const char *m; - if (!charset || !(m = BSEARCH_ARRAY(charset, t1, compar_prefix))) + if (!charset || !(m = BSEARCH_ARRAY(charset, data.tab, compar_prefix))) goto no_conv; cindex = m[PREFIXLEN]; @@ -187,7 +194,7 @@ const char *copyright_symbol(const char *charset) cindex = 5 * ((accept >> (collect & 0x1f)) & 1); } no_conv: - return codes+cindex; + return (char *)&data + offsetof(struct copysym_data, codes) + cindex; } #endif -- 2.43.2