%top{
/*
* Scanner for C declarations.
- * Copyright © 2011 Nick Bowler
+ * Copyright © 2011, 2021, 2023-2024 Nick Bowler
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <config.h>
+#include <stdio.h>
#include "parse.h"
+
+/* Disable various generated code we don't use */
+#define YY_INPUT(a, b, c) do {} while (0)
+#define YY_NO_INPUT 1
+#define YY_NO_UNPUT 1
+
+/*
+ * The flex-generated scanner defines a huge pile of external functions of
+ * which we use almost none elsewhere. Explicitly declare any unneeded
+ * functions static, which allows better optimization (especially wrt.
+ * dead code elimination).
+ */
+static struct yy_buffer_state *cdecl__yy_create_buffer(FILE *, int, void *);
+static struct yy_buffer_state *cdecl__yy_scan_bytes(const char *, int, void *);
+static struct yy_buffer_state *cdecl__yy_scan_buffer(char *, size_t, void *);
+static void cdecl__yy_switch_to_buffer(struct yy_buffer_state *, void *);
+static void cdecl__yy_flush_buffer(struct yy_buffer_state *, void *);
+static void yypush_buffer_state(struct yy_buffer_state *, void *);
+static void cdecl__yypop_buffer_state(void *);
+static void cdecl__yyrestart(FILE *, void *);
+static int cdecl__yylex_init(void **);
+
+static int cdecl__yyget_extra(void *);
+static YYLTYPE *cdecl__yyget_lloc(void *);
+static YYSTYPE *cdecl__yyget_lval(void *);
+static char *cdecl__yyget_text(void *);
+static FILE *cdecl__yyget_out(void *);
+static FILE *cdecl__yyget_in(void *);
+static int cdecl__yyget_debug(void *);
+static int cdecl__yyget_lineno(void *);
+static int cdecl__yyget_column(void *);
+static int cdecl__yyget_leng(void *);
+
+static void cdecl__yyset_extra(int, void *);
+static void cdecl__yyset_lloc(YYLTYPE *, void *);
+static void cdecl__yyset_lval(YYSTYPE *, void *);
+static void cdecl__yyset_in(FILE *, void *);
+static void cdecl__yyset_out(FILE *, void *);
+static void cdecl__yyset_debug(int, void *);
+static void cdecl__yyset_lineno(int, void *);
+static void cdecl__yyset_column(int, void *);
+static void cdecl__yyset_leng(int, void *);
+
+static void *cdecl__yyrealloc(void *, size_t, void *);
+static void *cdecl__yyalloc(size_t, void *);
+static void cdecl__yyfree(void *, void *);
}
-%option noyywrap bison-locations reentrant
-%option extra-type="_Bool"
+%option nodefault noyywrap bison-locations reentrant never-interactive
+%option extra-type="int"
%option prefix="cdecl__yy"
%{
-#define lex_error(msg) do { \
- cdecl__yyerror(yylloc, NULL, NULL, (msg)); \
- return T_LEX_ERROR; \
-} while(0)
-
-#define dup_token() do { \
- yylval->strval = malloc(yyleng+1); \
- if (!yylval->strval) \
- lex_error("failed to allocate memory"); \
- strcpy(yylval->strval, yytext); \
-} while(0)
-%}
+#include <ctype.h>
+#include "cdecl-internal.h"
+#include "cdecl.h"
+#include "errmsg.h"
+#include "intconv.h"
+
+static char *to_octal(char *dst, unsigned val)
+{
+ unsigned i;
+
+ for (i = 0; i < 3; i++) {
+ *dst++ = '0' + ((val >> 6) & 7u);
+ val <<= 3;
+ }
+
+ return dst;
+}
+
+/*
+ * Convert a single character to a C-style character constant, including quote
+ * characters. At most 7 bytes are written to the buffer for the longest
+ * octal encoding, e.g., '\177'
+ */
+static void to_readable_ch(char *dst, char c)
+{
+ unsigned char uc = c;
+ unsigned i;
+ char esc;
+
+ /*
+ * The 7 standard C control characters are contiguous in ASCII,
+ * permitting a simple and compact lookup table; separating their
+ * handling from backslash and quote characters hopefully allows
+ * the compiler to recognize that.
+ */
+ switch (c) {
+ case '\a': i = 0; break;
+ case '\b': i = 1; break;
+ case '\t': i = 2; break;
+ case '\n': i = 3; break;
+ case '\v': i = 4; break;
+ case '\f': i = 5; break;
+ case '\r': i = 6; break;
+ default: i = 7; break;
+ }
+ esc = "abtnvfr"[i];
+
+ /* Otherwise printable characters that should still be escaped. */
+ switch (c) {
+ case '\\': case '\'': esc = c; break;
+ }
-%s ENGLISH
+ *dst++ = '\'';
+ if (esc) {
+ *dst++ = '\\';
+ *dst++ = esc;
+ } else if (isprint(uc)) {
+ *dst++ = c;
+ } else {
+ *dst++ = '\\';
+ dst = to_octal(dst, uc);
+ }
+ *dst++ = '\'';
+ *dst++ = 0;
+}
-IDENT [_[:alpha:]][_[:alnum:]]*
-INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+
+%}
+
+IDENT [_[:alpha:]][-_[:alnum:]]*
%%
%{
- if (yyextra) {
- yyextra = 0;
- BEGIN(ENGLISH);
- return T_ENGLISH;
- }
+ int intconv_base;
+ char *c;
%}
-"..." return T_ELLIPSIS;
-";" return T_SEMICOLON;
-"*" return T_ASTERISK;
-"(" return T_LPAREN;
-")" return T_RPAREN;
-"[" return T_LBRACKET;
-"]" return T_RBRACKET;
-"," return T_COMMA;
-
-"typedef" return T_TYPEDEF;
-"extern" return T_EXTERN;
-"static" return T_STATIC;
-"auto" return T_AUTO;
-"register" return T_REGISTER;
-
-"restrict" return T_RESTRICT;
-"volatile" return T_VOLATILE;
-"const" return T_CONST;
-
-"inline" return T_INLINE;
-
-"void" return T_VOID;
-"char" return T_CHAR;
-"short" return T_SHORT;
-"int" return T_INT;
-"long" return T_LONG;
-"float" return T_FLOAT;
-"double" return T_DOUBLE;
-"signed" return T_SIGNED;
-"unsigned" return T_UNSIGNED;
-"_Bool" return T_BOOL;
-"_Complex" return T_COMPLEX;
-"_Imaginary" return T_IMAGINARY;
-
-"struct" return T_STRUCT;
-"union" return T_UNION;
-"enum" return T_ENUM;
-
-{INTEGER} {
- char *end;
-
- errno = 0;
- yylval->uintval = strtoumax(yytext, &end, 0);
- if (errno == ERANGE)
- lex_error("integer constant out of range");
- if (*end)
- lex_error("invalid integer constant");
+"..."|[][;*(),] {
+ unsigned char *match;
+ static const unsigned char tab[2][8] = {
+ "*[](),.;",
+ {
+ PACK_TOKEN(T_ASTERISK),
+ PACK_TOKEN(T_LBRACKET),
+ PACK_TOKEN(T_RBRACKET),
+ PACK_TOKEN(T_LPAREN),
+ PACK_TOKEN(T_RPAREN),
+ PACK_TOKEN(T_COMMA),
+ PACK_TOKEN(T_ELLIPSIS),
+ PACK_TOKEN(T_SEMICOLON)
+ }
+ };
+
+ match = memchr(&tab, yytext[0], sizeof tab[0]);
+ return UNPACK_TOKEN(match[sizeof tab[0]]);
+}
+0[0-7]* { intconv_base = INTCONV_OCTAL; goto int_parse; }
+[1-9][0-9]* { intconv_base = INTCONV_DECIMAL; goto int_parse; }
+0[Xx][[:xdigit:]]+ {
+ unsigned char d;
+ uintmax_t v;
+
+ yytext += 2;
+ intconv_base = INTCONV_HEXADECIMAL;
+int_parse:
+ for (v = 0; (d = *yytext++);) {
+ if (!intconv_shift(&v, intconv_base, intconv_digit(d))) {
+ cdecl__errmsg(CDECL__ERANGE);
+ return T_LEX_ERROR;
+ }
+ }
+
+ yylval->uintval = v;
return T_UINT;
}
-
-<ENGLISH>{
- "variable-length" return T_VLA;
- "type" return T_TYPE;
- "declare" return T_DECLARE;
- "pointer" return T_POINTER;
- "function" return T_FUNCTION;
- "returning" return T_RETURNING;
- "array" return T_ARRAY;
- "to" return T_TO;
- "of" return T_OF;
- "as" return T_AS;
+0[Xx]|[0-9]+ {
+ cdecl__errmsg(CDECL__EBADINT);
+ return T_LEX_ERROR;
}
-{IDENT} { dup_token(); return T_IDENT; }
+{IDENT} {
+ int len = yyleng, tok;
+ unsigned x;
+
+ x = cdecl__to_keyword(yytext, len, yyextra);
+ yylval->spectype = UNPACK_SPEC(x & 0xff);
+ if ((tok = (x >> 8)) == PACK_TOKEN(T_IDENT)) {
+ /*
+ * Our IDENT pattern includes hyphens so we can match
+ * "variable-length" as a keyword. In all other cases a
+ * hyphen is an error.
+ *
+ * We could use yyless to re-scan the hyphen and hit the
+ * error catch-all, but jumping straight to the error code
+ * seems to produce better results with gcc with no obvious
+ * downsides.
+ */
+#if 1
+ if ((c = memchr(yytext, '-', len)))
+ goto invalid_char;
+#else
+ yyless(strcspn(yytext, "-"));
+#endif
+ if (!(yylval->item = cdecl__alloc_item(len+1)))
+ return T_LEX_ERROR;
+ memcpy(yylval->item->s, yytext, len+1);
+ }
+ return UNPACK_TOKEN(tok);
+}
[[:space:]]+
. {
- char buf[] = "syntax error, unexpected #";
- *strchr(buf, '#') = *yytext;
- lex_error(buf);
+ char buf[8];
+
+ c = yytext;
+invalid_char:
+ to_readable_ch(buf, *c);
+ cdecl__err(_("syntax error, unexpected %s"), buf);
+ return T_LEX_ERROR;
}