]> git.draconx.ca Git - cdecl99.git/blobdiff - src/scan.l
Avoid warnings about unused scanner functions.
[cdecl99.git] / src / scan.l
index 1688873eea7a98744f68041bd16a858cfd82b332..29dd33da4374060ea6a9428e7e297a34e0ee6190 100644 (file)
 
 #include <config.h>
 #include "parse.h"
+
+#define YY_NO_INPUT 1
+#define YY_NO_UNPUT 1
 }
 
 %option nodefault noyywrap bison-locations reentrant never-interactive
-%option extra-type="_Bool"
+%option extra-type="int"
 %option prefix="cdecl__yy"
 
 %{
 #include <ctype.h>
 #include "cdecl-internal.h"
 #include "cdecl.h"
+#include "errmsg.h"
 
 #if HAVE_STRTOUMAX
 /* Best case, implementation provides strtoumax. */
 #define STRTOUMAX strtoul
 #endif
 
-#define lex_error(...) do { \
-       cdecl__err(CDECL_ENOPARSE, __VA_ARGS__); \
-       return T_LEX_ERROR; \
-} while(0)
-
 #define dup_token() do { \
        yylval->strval = malloc(yyleng+1); \
        if (!yylval->strval) { \
-               cdecl__err(CDECL_ENOMEM); \
+               cdecl__errmsg(CDECL__ENOMEM); \
                return T_LEX_ERROR; \
        } \
-       strcpy(yylval->strval, yytext); \
+       memcpy(yylval->strval, yytext, yyleng); \
+       yylval->strval[yyleng] = 0; \
 } while(0)
-%}
 
-%s ENGLISH
+static char *to_octal(char *dst, unsigned val)
+{
+       unsigned i;
+
+       for (i = 0; i < 3; i++) {
+               *dst++ = '0' + ((val >> 6) & 7u);
+               val <<= 3;
+       }
+
+       return dst;
+}
+
+/*
+ * Convert a single character to a C-style character constant, including quote
+ * characters.  At most 7 bytes are written to the buffer for the longest
+ * octal encoding, e.g., '\177'
+ */
+static void to_readable_ch(char *dst, char c)
+{
+       unsigned char uc = c;
+       unsigned i;
+       char esc;
+
+       /*
+        * The 7 standard C control characters are contiguous in ASCII,
+        * permitting a simple and compact lookup table; separating their
+        * handling from backslash and quote characters hopefully allows
+        * the compiler to recognize that.
+        */
+       switch (c) {
+       case '\a': i = 0; break;
+       case '\b': i = 1; break;
+       case '\t': i = 2; break;
+       case '\n': i = 3; break;
+       case '\v': i = 4; break;
+       case '\f': i = 5; break;
+       case '\r': i = 6; break;
+       default:   i = 7; break;
+       }
+       esc = "abtnvfr"[i];
+
+       /* Otherwise printable characters that should still be escaped. */
+       switch (c) {
+       case '\\': case '\'': esc = c; break;
+       }
 
-IDENT [_[:alpha:]][_[:alnum:]]*
+       *dst++ = '\'';
+       if (esc) {
+               *dst++ = '\\';
+               *dst++ = esc;
+       } else if (isprint(uc)) {
+               *dst++ = c;
+       } else {
+               *dst++ = '\\';
+               dst = to_octal(dst, uc);
+       }
+       *dst++ = '\'';
+       *dst++ = 0;
+}
+
+%}
+
+IDENT [_[:alpha:]][-_[:alnum:]]*
 INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+
 
 %%
 
 %{
-       if (yyextra) {
-               yyextra = 0;
-               BEGIN(ENGLISH);
-               return T_ENGLISH;
-       }
+       char *c;
 %}
 
-"..." return T_ELLIPSIS;
-";"   return T_SEMICOLON;
-"*"   return T_ASTERISK;
-"("   return T_LPAREN;
-")"   return T_RPAREN;
-"["   return T_LBRACKET;
-"]"   return T_RBRACKET;
-","   return T_COMMA;
-
-"typedef"    return T_TYPEDEF;
-"extern"     return T_EXTERN;
-"static"     return T_STATIC;
-"auto"       return T_AUTO;
-"register"   return T_REGISTER;
-
-"restrict"   return T_RESTRICT;
-"volatile"   return T_VOLATILE;
-"const"      return T_CONST;
-
-"inline"     return T_INLINE;
-
-"void"       return T_VOID;
-"char"       return T_CHAR;
-"short"      return T_SHORT;
-"int"        return T_INT;
-"long"       return T_LONG;
-"float"      return T_FLOAT;
-"double"     return T_DOUBLE;
-"signed"     return T_SIGNED;
-"unsigned"   return T_UNSIGNED;
-"_Bool"      return T_BOOL;
-"_Complex"   return T_COMPLEX;
-"_Imaginary" return T_IMAGINARY;
-
-"struct"     return T_STRUCT;
-"union"      return T_UNION;
-"enum"       return T_ENUM;
+"..."|[][;*(),] {
+       unsigned char *match;
+       static const unsigned char tab[2][8] = {
+               "*[](),.;",
+               {
+                       PACK_TOKEN(T_ASTERISK),
+                       PACK_TOKEN(T_LBRACKET),
+                       PACK_TOKEN(T_RBRACKET),
+                       PACK_TOKEN(T_LPAREN),
+                       PACK_TOKEN(T_RPAREN),
+                       PACK_TOKEN(T_COMMA),
+                       PACK_TOKEN(T_ELLIPSIS),
+                       PACK_TOKEN(T_SEMICOLON)
+               }
+       };
+
+       match = memchr(&tab, yytext[0], sizeof tab[0]);
+       return UNPACK_TOKEN(match[sizeof tab[0]]);
+}
 
 {INTEGER} {
        char *end;
 
        errno = 0;
        yylval->uintval = STRTOUMAX(yytext, &end, 0);
-       if (errno == ERANGE)
-               lex_error(_("integer constant out of range"));
-       if (*end)
-               lex_error(_("invalid integer constant"));
+       if (errno == ERANGE) {
+               cdecl__errmsg(CDECL__ERANGE);
+               return T_LEX_ERROR;
+       }
+       if (*end) {
+               cdecl__errmsg(CDECL__EBADINT);
+               return T_LEX_ERROR;
+       }
 
        return T_UINT;
 }
 
-<ENGLISH>{
-       "variable-length" return T_VLA;
-       "type"            return T_TYPE;
-       "declare"         return T_DECLARE;
-       "pointer"         return T_POINTER;
-       "function"        return T_FUNCTION;
-       "returning"       return T_RETURNING;
-       "array"           return T_ARRAY;
-       "to"              return T_TO;
-       "of"              return T_OF;
-       "as"              return T_AS;
+{IDENT} {
+       unsigned x = cdecl__to_keyword(yytext, yyleng, yyextra);
+       int tok;
+
+       yylval->spectype = UNPACK_SPEC(x & 0xff);
+       if ((tok = (x >> 8)) == PACK_TOKEN(T_IDENT)) {
+               /*
+                * Our IDENT pattern includes hyphens so we can match
+                * "variable-length" as a keyword.  In all other cases a
+                * hyphen is an error.
+                *
+                * We could use yyless to re-scan the hyphen and hit the
+                * error catch-all, but jumping straight to the error code
+                * seems to produce better results with gcc with no obvious
+                * downsides.
+                */
+#if 1
+               if ((c = strchr(yytext, '-')))
+                       goto invalid_char;
+#else
+               yyless(strcspn(yytext, "-"));
+#endif
+               dup_token();
+       }
+       return UNPACK_TOKEN(tok);
 }
 
-{IDENT} { dup_token(); return T_IDENT; }
-
 [[:space:]]+
 . {
-       char buf[5] = { yytext[0] };
-       unsigned char c = buf[0];
-
-       if (!isprint(c) || c == '\\' || c == '\'') {
-               /* Encode nonprinting characters with C-style escapes */
-               buf[0] = '\\';
-               switch (c) {
-               case '\a': buf[1] = 'a'; break;
-               case '\b': buf[1] = 'b'; break;
-               case '\f': buf[1] = 'f'; break;
-               case '\n': buf[1] = 'n'; break;
-               case '\r': buf[1] = 'r'; break;
-               case '\t': buf[1] = 't'; break;
-               case '\v': buf[1] = 'v'; break;
-               case '\\': buf[1] = '\\'; break;
-               case '\'': buf[1] = '\''; break;
-               default:
-                       buf[1] = '0' + ((c >> 6) & 3);
-                       buf[2] = '0' + ((c >> 3) & 7);
-                       buf[3] = '0' + ((c >> 0) & 7);
-               }
-       }
+       char buf[8];
 
-       lex_error(_("syntax error, unexpected '%s'"), buf);
+       c = yytext;
+invalid_char:
+       to_readable_ch(buf, *c);
+       cdecl__err(CDECL_ENOPARSE, _("syntax error, unexpected %s"), buf);
+       return T_LEX_ERROR;
 }