X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/blobdiff_plain/9a50128ac12ab834fd97699c7ca543fc040ea8f1..HEAD:/src/scan.l diff --git a/src/scan.l b/src/scan.l index 0a4db93..5d460ed 100644 --- a/src/scan.l +++ b/src/scan.l @@ -1,7 +1,7 @@ %top{ /* * Scanner for C declarations. - * Copyright © 2011, 2021, 2023 Nick Bowler + * Copyright © 2011, 2021, 2023-2024 Nick Bowler * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,11 +18,67 @@ */ #include +#include #include "parse.h" + +/* Disable various generated code we don't use */ +#define YY_INPUT(a, b, c) do {} while (0) +#define YY_NO_INPUT 1 +#define YY_NO_UNPUT 1 + +/* + * The flex-generated scanner defines a huge pile of external functions of + * which we use almost none elsewhere. Explicitly declare any unneeded + * functions static, which allows better optimization (especially wrt. + * dead code elimination). + */ +#if !cdecl__yyIN_HEADER + +#if __GNUC__ +# define static __attribute__((__unused__)) static +#endif + +static struct yy_buffer_state *cdecl__yy_create_buffer(FILE *, int, void *); +static struct yy_buffer_state *cdecl__yy_scan_bytes(const char *, int, void *); +static struct yy_buffer_state *cdecl__yy_scan_buffer(char *, size_t, void *); +static void cdecl__yy_switch_to_buffer(struct yy_buffer_state *, void *); +static void cdecl__yy_flush_buffer(struct yy_buffer_state *, void *); +static void cdecl__yypush_buffer_state(struct yy_buffer_state *, void *); +static void cdecl__yypop_buffer_state(void *); +static void cdecl__yyrestart(FILE *, void *); +static int cdecl__yylex_init(void **); + +static int cdecl__yyget_extra(void *); +static YYLTYPE *cdecl__yyget_lloc(void *); +static YYSTYPE *cdecl__yyget_lval(void *); +static char *cdecl__yyget_text(void *); +static FILE *cdecl__yyget_out(void *); +static FILE *cdecl__yyget_in(void *); +static int cdecl__yyget_debug(void *); +static int cdecl__yyget_lineno(void *); +static int cdecl__yyget_column(void *); +static int cdecl__yyget_leng(void *); + +static void cdecl__yyset_extra(int, void *); +static void cdecl__yyset_lloc(YYLTYPE *, void *); +static void cdecl__yyset_lval(YYSTYPE *, void *); +static void cdecl__yyset_in(FILE *, void *); +static void cdecl__yyset_out(FILE *, void *); +static void cdecl__yyset_debug(int, void *); +static void cdecl__yyset_lineno(int, void *); +static void cdecl__yyset_column(int, void *); + +static void *cdecl__yyrealloc(void *, size_t, void *); +static void *cdecl__yyalloc(size_t, void *); +static void cdecl__yyfree(void *, void *); + +#undef static + +#endif } %option nodefault noyywrap bison-locations reentrant never-interactive -%option extra-type="_Bool" +%option extra-type="int" %option prefix="cdecl__yy" %{ @@ -30,29 +86,7 @@ #include "cdecl-internal.h" #include "cdecl.h" #include "errmsg.h" - -#if HAVE_STRTOUMAX -/* Best case, implementation provides strtoumax. */ -# define STRTOUMAX strtoumax -#elif HAVE_STRTOULL -/* Fall back to strtoull, with possibly reduced range. */ -#define STRTOUMAX strtoull -#elif HAVE___STRTOULL -/* HP-UX 11 has __strtoull in */ -#define STRTOUMAX __strtoull -#else -/* Fall back to strtoul, with possibly reduced range. */ -#define STRTOUMAX strtoul -#endif - -#define dup_token() do { \ - yylval->strval = malloc(yyleng+1); \ - if (!yylval->strval) { \ - cdecl__errmsg(CDECL__ENOMEM); \ - return T_LEX_ERROR; \ - } \ - strcpy(yylval->strval, yytext); \ -} while(0) +#include "intconv.h" static char *to_octal(char *dst, unsigned val) { @@ -116,96 +150,96 @@ static void to_readable_ch(char *dst, char c) %} -%s ENGLISH - -IDENT [_[:alpha:]][_[:alnum:]]* -INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ +IDENT [_[:alpha:]][-_[:alnum:]]* %% %{ - if (yyextra) { - yyextra = 0; - BEGIN(ENGLISH); - return T_ENGLISH; - } + int intconv_base; + char *c; %} -"..." return T_ELLIPSIS; -";" return T_SEMICOLON; -"*" return T_ASTERISK; -"(" return T_LPAREN; -")" return T_RPAREN; -"[" return T_LBRACKET; -"]" return T_RBRACKET; -"," return T_COMMA; - -"typedef" return T_TYPEDEF; -"extern" return T_EXTERN; -"static" return T_STATIC; -"auto" return T_AUTO; -"register" return T_REGISTER; - -"restrict" return T_RESTRICT; -"volatile" return T_VOLATILE; -"const" return T_CONST; - -"inline" return T_INLINE; - -"void" return T_VOID; -"char" return T_CHAR; -"short" return T_SHORT; -"int" return T_INT; -"long" return T_LONG; -"float" return T_FLOAT; -"double" return T_DOUBLE; -"signed" return T_SIGNED; -"unsigned" return T_UNSIGNED; -"_Bool" return T_BOOL; -"_Complex" return T_COMPLEX; -"_Imaginary" return T_IMAGINARY; - -"struct" return T_STRUCT; -"union" return T_UNION; -"enum" return T_ENUM; - -{INTEGER} { - char *end; - - errno = 0; - yylval->uintval = STRTOUMAX(yytext, &end, 0); - if (errno == ERANGE) { - cdecl__errmsg(CDECL__ERANGE); - return T_LEX_ERROR; - } - if (*end) { - cdecl__errmsg(CDECL__EBADINT); - return T_LEX_ERROR; +"..."|[][;*(),] { + unsigned char *match; + static const unsigned char tab[2][8] = { + "*[](),.;", + { + PACK_TOKEN(T_ASTERISK), + PACK_TOKEN(T_LBRACKET), + PACK_TOKEN(T_RBRACKET), + PACK_TOKEN(T_LPAREN), + PACK_TOKEN(T_RPAREN), + PACK_TOKEN(T_COMMA), + PACK_TOKEN(T_ELLIPSIS), + PACK_TOKEN(T_SEMICOLON) + } + }; + + match = memchr(&tab, yytext[0], sizeof tab[0]); + return UNPACK_TOKEN(match[sizeof tab[0]]); +} + +0[0-7]* { intconv_base = INTCONV_OCTAL; goto int_parse; } +[1-9][0-9]* { intconv_base = INTCONV_DECIMAL; goto int_parse; } +0[Xx][[:xdigit:]]+ { + unsigned char d; + cdecl_uintmax v; + + yytext += 2; + intconv_base = INTCONV_HEXADECIMAL; +int_parse: + for (v = 0; (d = *yytext++);) { + if (!intconv_shift(&v, intconv_base, intconv_digit(d))) { + cdecl__errmsg(CDECL__ERANGE); + return T_LEX_ERROR; + } } + yylval->uintval = v; return T_UINT; } - -{ - "variable-length" return T_VLA; - "type" return T_TYPE; - "declare" return T_DECLARE; - "pointer" return T_POINTER; - "function" return T_FUNCTION; - "returning" return T_RETURNING; - "array" return T_ARRAY; - "to" return T_TO; - "of" return T_OF; - "as" return T_AS; +0[Xx]|[0-9]+ { + cdecl__errmsg(CDECL__EBADINT); + return T_LEX_ERROR; } -{IDENT} { dup_token(); return T_IDENT; } +{IDENT} { + int len = yyleng, tok; + unsigned x; + + x = cdecl__to_keyword(yytext, len, yyextra); + yylval->spectype = UNPACK_SPEC(x & 0xff); + if ((tok = (x >> 8)) == PACK_TOKEN(T_IDENT)) { + /* + * Our IDENT pattern includes hyphens so we can match + * "variable-length" as a keyword. In all other cases a + * hyphen is an error. + * + * We could use yyless to re-scan the hyphen and hit the + * error catch-all, but jumping straight to the error code + * seems to produce better results with gcc with no obvious + * downsides. + */ +#if 1 + if ((c = memchr(yytext, '-', len))) + goto invalid_char; +#else + yyless(strcspn(yytext, "-")); +#endif + if (!(yylval->item = cdecl__alloc_item(len+1))) + return T_LEX_ERROR; + memcpy(yylval->item->s, yytext, len+1); + } + return UNPACK_TOKEN(tok); +} [[:space:]]+ . { char buf[8]; - to_readable_ch(buf, yytext[0]); - cdecl__err(CDECL_ENOPARSE, _("syntax error, unexpected %s"), buf); + c = yytext; +invalid_char: + to_readable_ch(buf, *c); + cdecl__err(_("syntax error, unexpected %s"), buf); return T_LEX_ERROR; }