From: Nick Bowler Date: Wed, 27 Dec 2023 01:34:20 +0000 (-0500) Subject: libcdecl: Fix scanning of hexadecimal constants. X-Git-Tag: v1.3~54 X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/commitdiff_plain/fed6418a32d3b0fd6036d169bd325c1172b391b5 libcdecl: Fix scanning of hexadecimal constants. Due to an oversight, hexadecimal constants such as 0X1234 are not parsed correctly, scanning as two tokens: the integer 0, and the identifier X1234. A mistake in the same area also scans plain 0x as two tokens in the same way, rather than returning a syntax error (although there should not be any valid parses if such tokens are returned, so this is a minor issue). Fix both of these problems, and add a new internal test case which directly verifies the scanner output for a selection of integer constants. --- diff --git a/Makefile.am b/Makefile.am index 90818f5..b0ba8bd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -118,6 +118,10 @@ check_PROGRAMS += t/rendertest t_rendertest_LDADD = $(TEST_LIBS) $(t_rendertest_OBJECTS): $(gnulib_headers) +check_PROGRAMS += t/scantest +t_scantest_LDADD = src/scan.lo src/parse.lo src/keywords.lo $(TEST_LIBS) +$(t_scantest_OBJECTS): $(gnulib_headers) src/scan.h src/parse.h + src/error.lo: src/errmsg.h src/keywords.lo: src/parse.h src/output.lo: src/parse.h src/specstr.h diff --git a/src/scan.l b/src/scan.l index 2b92846..fbe80b2 100644 --- a/src/scan.l +++ b/src/scan.l @@ -111,7 +111,7 @@ static void to_readable_ch(char *dst, char c) %} IDENT [_[:alpha:]][-_[:alnum:]]* -INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ +INTEGER 0[Xx][[:xdigit:]]*|[[:digit:]]+ %% diff --git a/t/.gitignore b/t/.gitignore index 58a948e..895ab96 100644 --- a/t/.gitignore +++ b/t/.gitignore @@ -5,4 +5,5 @@ /randomdecl /rendertest /rng-test +/scantest /typegen.h diff --git a/t/crossparse.c b/t/crossparse.c index 4122b1c..f66ca62 100644 --- a/t/crossparse.c +++ b/t/crossparse.c @@ -52,7 +52,7 @@ static void print_help(void) enum { MODE_CDECL, - MODE_ENGLISH, + MODE_ENGLISH }; typedef struct cdecl *parse_func(const char *); diff --git a/t/scantest.c b/t/scantest.c new file mode 100644 index 0000000..093c6ca --- /dev/null +++ b/t/scantest.c @@ -0,0 +1,166 @@ +/* + * Helper to verify scanner output. + * Copyright © 2023 Nick Bowler + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include "scan.h" +#include "cdecl-internal.h" +#include "test.h" + +/* Stubs */ +static union { struct parse_item item; char buf[1000]; } stub_item; +struct parse_item *cdecl__alloc_item(size_t sz) +{ + return &stub_item.item; +} + +void cdecl__errmsg(unsigned msg) +{ +} + +void cdecl__err(unsigned code, const char *fmt, const char *arg) +{ +} + +size_t cdecl__strlcpy(char *dst, const char *src, size_t len) +{ + abort(); +} + +#define PROGNAME "scantest" +static const char *progname = PROGNAME; +static const char sopts[] = "ECVH"; +static const struct option lopts[] = { + { "cdecl", 0, NULL, 'C' }, + { "english", 0, NULL, 'E' }, + { "version", 0, NULL, 'V' }, + { "help", 0, NULL, 'H' }, + { 0 } +}; + +static void print_usage(FILE *f) +{ + fprintf(f, "Usage: %s [options] string [string ...]\n", progname); +} + +static void print_help(void) +{ + print_usage(stdout); + puts("Test the scanner by tokenizing one or more strings.\n"); + test_print_options(lopts); +} + +enum { + MODE_CDECL, + MODE_ENGLISH +}; + +static int print_uintmax_rec(char *buf, uintmax_t v) +{ + int rc = 0; + + if (v > 9) { + rc = print_uintmax_rec(buf, v/10); + v %= 10; + } + + buf[rc] = '0' + v; + return rc + 1; +} + +static void print_uintmax(char *buf, uintmax_t v) +{ + buf[print_uintmax_rec(buf, v)] = 0; +} + +static int do_scan(const char *s, int mode) +{ + YY_BUFFER_STATE state; + yyscan_t scanner; + YYSTYPE lval; + YYLTYPE lloc; + int tok; + + if (cdecl__yylex_init_extra(mode, &scanner) != 0) + return -1; + + state = cdecl__yy_scan_string(s, scanner); + + while ((tok = cdecl__yylex(&lval, &lloc, scanner))) { + const char *tname = cdecl__token_name(tok); + + switch (tok) { + case T_UINT: + print_uintmax(stub_item.item.s, lval.uintval); + lval.item = &stub_item.item; + case T_IDENT: + printf("%s %s\n", tname, lval.item->s); + break; + default: + printf("%s\n", tname); + } + } + printf("eof\n"); + + cdecl__yy_delete_buffer(state, scanner); + cdecl__yylex_destroy(scanner); + return 0; +} + +int main(int argc, char **argv) +{ + int i, opt, mode = MODE_CDECL; + + if (argc > 0) + progname = argv[0]; + + while ((opt = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { + switch (opt) { + case 'C': + mode = MODE_CDECL; + break; + case 'E': + mode = MODE_ENGLISH; + break; + case 'V': + test_print_version(PROGNAME); + return EXIT_SUCCESS; + case 'H': + print_help(); + return EXIT_SUCCESS; + default: + print_usage(stderr); + return EXIT_FAILURE; + } + } + + if (!argv[optind]) { + print_usage(stderr); + return EXIT_FAILURE; + } + + for (i = optind; i < argc; i++) { + if (do_scan(argv[i], mode) != 0) + return EXIT_FAILURE; + } + + return 0; +} diff --git a/tests/internal.at b/tests/internal.at index 3fa785a..ee87919 100644 --- a/tests/internal.at +++ b/tests/internal.at @@ -113,3 +113,37 @@ AT_CHECK([rendertest --english -n 15