]> git.draconx.ca Git - cdecl99.git/blobdiff - src/scan.l
Port to use getline.h from dxcommon.
[cdecl99.git] / src / scan.l
index 0a4db93bb2ec74e4b64b7ca8085cd82873c7225b..5d460ed0366dd15ca3c20c6e9ac4ec21c74b6e95 100644 (file)
@@ -1,7 +1,7 @@
 %top{
 /*
  *  Scanner for C declarations.
- *  Copyright © 2011, 2021, 2023 Nick Bowler
+ *  Copyright © 2011, 2021, 2023-2024 Nick Bowler
  *
  *  This program is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  */
 
 #include <config.h>
+#include <stdio.h>
 #include "parse.h"
+
+/* Disable various generated code we don't use */
+#define YY_INPUT(a, b, c) do {} while (0)
+#define YY_NO_INPUT 1
+#define YY_NO_UNPUT 1
+
+/*
+ * The flex-generated scanner defines a huge pile of external functions of
+ * which we use almost none elsewhere.  Explicitly declare any unneeded
+ * functions static, which allows better optimization (especially wrt.
+ * dead code elimination).
+ */
+#if !cdecl__yyIN_HEADER
+
+#if __GNUC__
+#  define static __attribute__((__unused__)) static
+#endif
+
+static struct yy_buffer_state *cdecl__yy_create_buffer(FILE *, int, void *);
+static struct yy_buffer_state *cdecl__yy_scan_bytes(const char *, int, void *);
+static struct yy_buffer_state *cdecl__yy_scan_buffer(char *, size_t, void *);
+static void cdecl__yy_switch_to_buffer(struct yy_buffer_state *, void *);
+static void cdecl__yy_flush_buffer(struct yy_buffer_state *, void *);
+static void cdecl__yypush_buffer_state(struct yy_buffer_state *, void *);
+static void cdecl__yypop_buffer_state(void *);
+static void cdecl__yyrestart(FILE *, void *);
+static int cdecl__yylex_init(void **);
+
+static int cdecl__yyget_extra(void *);
+static YYLTYPE *cdecl__yyget_lloc(void *);
+static YYSTYPE *cdecl__yyget_lval(void *);
+static char *cdecl__yyget_text(void *);
+static FILE *cdecl__yyget_out(void *);
+static FILE *cdecl__yyget_in(void *);
+static int cdecl__yyget_debug(void *);
+static int cdecl__yyget_lineno(void *);
+static int cdecl__yyget_column(void *);
+static int cdecl__yyget_leng(void *);
+
+static void cdecl__yyset_extra(int, void *);
+static void cdecl__yyset_lloc(YYLTYPE *, void *);
+static void cdecl__yyset_lval(YYSTYPE *, void *);
+static void cdecl__yyset_in(FILE *, void *);
+static void cdecl__yyset_out(FILE *, void *);
+static void cdecl__yyset_debug(int, void *);
+static void cdecl__yyset_lineno(int, void *);
+static void cdecl__yyset_column(int, void *);
+
+static void *cdecl__yyrealloc(void *, size_t, void *);
+static void *cdecl__yyalloc(size_t, void *);
+static void cdecl__yyfree(void *, void *);
+
+#undef static
+
+#endif
 }
 
 %option nodefault noyywrap bison-locations reentrant never-interactive
-%option extra-type="_Bool"
+%option extra-type="int"
 %option prefix="cdecl__yy"
 
 %{
 #include "cdecl-internal.h"
 #include "cdecl.h"
 #include "errmsg.h"
-
-#if HAVE_STRTOUMAX
-/* Best case, implementation provides strtoumax. */
-#  define STRTOUMAX strtoumax
-#elif HAVE_STRTOULL
-/* Fall back to strtoull, with possibly reduced range. */
-#define STRTOUMAX strtoull
-#elif HAVE___STRTOULL
-/* HP-UX 11 has __strtoull in <inttypes.h> */
-#define STRTOUMAX __strtoull
-#else
-/* Fall back to strtoul, with possibly reduced range. */
-#define STRTOUMAX strtoul
-#endif
-
-#define dup_token() do { \
-       yylval->strval = malloc(yyleng+1); \
-       if (!yylval->strval) { \
-               cdecl__errmsg(CDECL__ENOMEM); \
-               return T_LEX_ERROR; \
-       } \
-       strcpy(yylval->strval, yytext); \
-} while(0)
+#include "intconv.h"
 
 static char *to_octal(char *dst, unsigned val)
 {
@@ -116,96 +150,96 @@ static void to_readable_ch(char *dst, char c)
 
 %}
 
-%s ENGLISH
-
-IDENT [_[:alpha:]][_[:alnum:]]*
-INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+
+IDENT [_[:alpha:]][-_[:alnum:]]*
 
 %%
 
 %{
-       if (yyextra) {
-               yyextra = 0;
-               BEGIN(ENGLISH);
-               return T_ENGLISH;
-       }
+       int intconv_base;
+       char *c;
 %}
 
-"..." return T_ELLIPSIS;
-";"   return T_SEMICOLON;
-"*"   return T_ASTERISK;
-"("   return T_LPAREN;
-")"   return T_RPAREN;
-"["   return T_LBRACKET;
-"]"   return T_RBRACKET;
-","   return T_COMMA;
-
-"typedef"    return T_TYPEDEF;
-"extern"     return T_EXTERN;
-"static"     return T_STATIC;
-"auto"       return T_AUTO;
-"register"   return T_REGISTER;
-
-"restrict"   return T_RESTRICT;
-"volatile"   return T_VOLATILE;
-"const"      return T_CONST;
-
-"inline"     return T_INLINE;
-
-"void"       return T_VOID;
-"char"       return T_CHAR;
-"short"      return T_SHORT;
-"int"        return T_INT;
-"long"       return T_LONG;
-"float"      return T_FLOAT;
-"double"     return T_DOUBLE;
-"signed"     return T_SIGNED;
-"unsigned"   return T_UNSIGNED;
-"_Bool"      return T_BOOL;
-"_Complex"   return T_COMPLEX;
-"_Imaginary" return T_IMAGINARY;
-
-"struct"     return T_STRUCT;
-"union"      return T_UNION;
-"enum"       return T_ENUM;
-
-{INTEGER} {
-       char *end;
-
-       errno = 0;
-       yylval->uintval = STRTOUMAX(yytext, &end, 0);
-       if (errno == ERANGE) {
-               cdecl__errmsg(CDECL__ERANGE);
-               return T_LEX_ERROR;
-       }
-       if (*end) {
-               cdecl__errmsg(CDECL__EBADINT);
-               return T_LEX_ERROR;
+"..."|[][;*(),] {
+       unsigned char *match;
+       static const unsigned char tab[2][8] = {
+               "*[](),.;",
+               {
+                       PACK_TOKEN(T_ASTERISK),
+                       PACK_TOKEN(T_LBRACKET),
+                       PACK_TOKEN(T_RBRACKET),
+                       PACK_TOKEN(T_LPAREN),
+                       PACK_TOKEN(T_RPAREN),
+                       PACK_TOKEN(T_COMMA),
+                       PACK_TOKEN(T_ELLIPSIS),
+                       PACK_TOKEN(T_SEMICOLON)
+               }
+       };
+
+       match = memchr(&tab, yytext[0], sizeof tab[0]);
+       return UNPACK_TOKEN(match[sizeof tab[0]]);
+}
+
+0[0-7]* { intconv_base = INTCONV_OCTAL; goto int_parse; }
+[1-9][0-9]* { intconv_base = INTCONV_DECIMAL; goto int_parse; }
+0[Xx][[:xdigit:]]+ {
+       unsigned char d;
+       cdecl_uintmax v;
+
+       yytext += 2;
+       intconv_base = INTCONV_HEXADECIMAL;
+int_parse:
+       for (v = 0; (d = *yytext++);) {
+               if (!intconv_shift(&v, intconv_base, intconv_digit(d))) {
+                       cdecl__errmsg(CDECL__ERANGE);
+                       return T_LEX_ERROR;
+               }
        }
 
+       yylval->uintval = v;
        return T_UINT;
 }
-
-<ENGLISH>{
-       "variable-length" return T_VLA;
-       "type"            return T_TYPE;
-       "declare"         return T_DECLARE;
-       "pointer"         return T_POINTER;
-       "function"        return T_FUNCTION;
-       "returning"       return T_RETURNING;
-       "array"           return T_ARRAY;
-       "to"              return T_TO;
-       "of"              return T_OF;
-       "as"              return T_AS;
+0[Xx]|[0-9]+ {
+       cdecl__errmsg(CDECL__EBADINT);
+       return T_LEX_ERROR;
 }
 
-{IDENT} { dup_token(); return T_IDENT; }
+{IDENT} {
+       int len = yyleng, tok;
+       unsigned x;
+
+       x = cdecl__to_keyword(yytext, len, yyextra);
+       yylval->spectype = UNPACK_SPEC(x & 0xff);
+       if ((tok = (x >> 8)) == PACK_TOKEN(T_IDENT)) {
+               /*
+                * Our IDENT pattern includes hyphens so we can match
+                * "variable-length" as a keyword.  In all other cases a
+                * hyphen is an error.
+                *
+                * We could use yyless to re-scan the hyphen and hit the
+                * error catch-all, but jumping straight to the error code
+                * seems to produce better results with gcc with no obvious
+                * downsides.
+                */
+#if 1
+               if ((c = memchr(yytext, '-', len)))
+                       goto invalid_char;
+#else
+               yyless(strcspn(yytext, "-"));
+#endif
+               if (!(yylval->item = cdecl__alloc_item(len+1)))
+                       return T_LEX_ERROR;
+               memcpy(yylval->item->s, yytext, len+1);
+       }
+       return UNPACK_TOKEN(tok);
+}
 
 [[:space:]]+
 . {
        char buf[8];
 
-       to_readable_ch(buf, yytext[0]);
-       cdecl__err(CDECL_ENOPARSE, _("syntax error, unexpected %s"), buf);
+       c = yytext;
+invalid_char:
+       to_readable_ch(buf, *c);
+       cdecl__err(_("syntax error, unexpected %s"), buf);
        return T_LEX_ERROR;
 }