From 62fa1dac8cf3098d212c0e8f4b3cc2cadaddd4c5 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Tue, 26 Dec 2023 20:52:01 -0500 Subject: [PATCH] libcdecl: Implement portable alternative to strtoumax. It's not really OK for the scanner to use a strtoumax fallback with limited range, as this means the library can't actually parse its own output in general. This causes test failures on systems which don't have a suitable function in the C library. Try hard to have a compact implementation, letting the scanner do as much of the job as possible to reduce the amount of extra checks that have to happen later. For the ELF shared library case, the result seems to be pretty good overall: with GCC, the size savings from the no-longer- needed PLT entry are similar to the amount of extra code added. As a bonus, this also avoids any unwanted locale dependency, since the C standard permits strtoumax et al. to do weird things outside of the "C" locale, although I'm not sure to what extent any real-world systems take advantage of that freedom. --- Makefile.am | 4 +-- src/intconv.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/scan.l | 50 +++++++++++++++++-------------------- 3 files changed, 92 insertions(+), 30 deletions(-) create mode 100644 src/intconv.h diff --git a/Makefile.am b/Makefile.am index b0ba8bd..eae1366 100644 --- a/Makefile.am +++ b/Makefile.am @@ -31,8 +31,8 @@ EXTRA_DIST = bootstrap $(DX_BASEDIR)/scripts/fix-gnulib.pl m4/gnulib-cache.m4 \ dist_man_MANS = doc/cdecl99.1 doc/libcdecl.3 noinst_HEADERS = conf_pre.h conf_post.h common/src/help.h common/src/tap.h \ - common/src/xtra.h src/cdecl.h src/scan.h src/parse.h \ - src/version.h t/declgen.h t/test.h + common/src/xtra.h src/cdecl.h src/intconv.h src/scan.h \ + src/parse.h src/version.h t/declgen.h t/test.h noinst_DATA = $(MOFILES) diff --git a/src/intconv.h b/src/intconv.h new file mode 100644 index 0000000..17644f2 --- /dev/null +++ b/src/intconv.h @@ -0,0 +1,68 @@ +/* + * Copyright © 2023 Nick Bowler + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef CDECL99_INTCONV_H_ +#define CDECL99_INTCONV_H_ + +#include +#include +#include + +enum { INTCONV_OCTAL = 8, INTCONV_DECIMAL = 10, INTCONV_HEXADECIMAL = 16 }; + +/* + * Multiply *v by base, which must be one of the above enumeration constants, + * and add digit, updating *v with the result. + * + * If the result does not fit in uintmax_t, then 0 is returned. Otherwise, + * a non-zero result is returned. + */ +static inline bool intconv_shift(uintmax_t *v, unsigned base, unsigned digit) +{ + uintmax_t old_v = *v; + + if (old_v > (uintmax_t)-1 / base) + return false; + old_v *= base; + + return (*v = old_v + digit) >= old_v; +} + +/* + * Assuming d is a hexadecimal digit character (converted to unsigned char), + * return the corresponding value of that digit (between 0 and 15, inclusive). + */ +static inline unsigned char intconv_digit(unsigned char d) +{ + if (d >= '0' && d <= '9') { + return d - '0'; + } else { +#if ('A' & 7) == 1 && ('B' & 7) == 2 && ('C' & 7) == 3 \ + && ('a' & 7) == 1 && ('b' & 7) == 2 && ('c' & 7) == 3 \ + && ('D' & 7) == 4 && ('E' & 7) == 5 && ('F' & 7) == 6 \ + && ('d' & 7) == 4 && ('e' & 7) == 5 && ('f' & 7) == 6 + /* EBCDIC or ASCII-like encoding */ + return 9 + (d & 7); +#else + /* Something else */ + static const char idx[] = "abcdef..ABCDEF"; + return 10 + (((char *)memchr(idx, d, sizeof idx) - idx) & 7); +#endif + } +} + +#endif diff --git a/src/scan.l b/src/scan.l index fbe80b2..48314d4 100644 --- a/src/scan.l +++ b/src/scan.l @@ -33,20 +33,7 @@ #include "cdecl-internal.h" #include "cdecl.h" #include "errmsg.h" - -#if HAVE_STRTOUMAX -/* Best case, implementation provides strtoumax. */ -# define STRTOUMAX strtoumax -#elif HAVE_STRTOULL -/* Fall back to strtoull, with possibly reduced range. */ -#define STRTOUMAX strtoull -#elif HAVE___STRTOULL -/* HP-UX 11 has __strtoull in */ -#define STRTOUMAX __strtoull -#else -/* Fall back to strtoul, with possibly reduced range. */ -#define STRTOUMAX strtoul -#endif +#include "intconv.h" static char *to_octal(char *dst, unsigned val) { @@ -111,11 +98,11 @@ static void to_readable_ch(char *dst, char c) %} IDENT [_[:alpha:]][-_[:alnum:]]* -INTEGER 0[Xx][[:xdigit:]]*|[[:digit:]]+ %% %{ + int intconv_base; char *c; %} @@ -139,22 +126,29 @@ INTEGER 0[Xx][[:xdigit:]]*|[[:digit:]]+ return UNPACK_TOKEN(match[sizeof tab[0]]); } -{INTEGER} { - char *end; - - errno = 0; - yylval->uintval = STRTOUMAX(yytext, &end, 0); - if (errno == ERANGE) { - cdecl__errmsg(CDECL__ERANGE); - return T_LEX_ERROR; - } - if (*end) { - cdecl__errmsg(CDECL__EBADINT); - return T_LEX_ERROR; +0[0-7]* { intconv_base = INTCONV_OCTAL; goto int_parse; } +[1-9][0-9]* { intconv_base = INTCONV_DECIMAL; goto int_parse; } +0[Xx][[:xdigit:]]+ { + unsigned char d; + uintmax_t v; + + yytext += 2; + intconv_base = INTCONV_HEXADECIMAL; +int_parse: + for (v = 0; (d = *yytext++);) { + if (!intconv_shift(&v, intconv_base, intconv_digit(d))) { + cdecl__errmsg(CDECL__ERANGE); + return T_LEX_ERROR; + } } + yylval->uintval = v; return T_UINT; } +0[Xx]|[0-9]+ { + cdecl__errmsg(CDECL__EBADINT); + return T_LEX_ERROR; +} {IDENT} { int len = yyleng, tok; @@ -180,7 +174,7 @@ INTEGER 0[Xx][[:xdigit:]]*|[[:digit:]]+ yyless(strcspn(yytext, "-")); #endif if (!(yylval->item = cdecl__alloc_item(len+1))) - return T_LEX_ERROR; \ + return T_LEX_ERROR; memcpy(yylval->item->s, yytext, len+1); } return UNPACK_TOKEN(tok); -- 2.43.2