]> git.draconx.ca Git - cdecl99.git/blobdiff - src/scan.l
libcdecl: Use gperf to identify keywords during scanning.
[cdecl99.git] / src / scan.l
index 0a4db93bb2ec74e4b64b7ca8085cd82873c7225b..1342e5fd3dbbac0f095686e1bebd5ec2dea59d96 100644 (file)
@@ -22,7 +22,7 @@
 }
 
 %option nodefault noyywrap bison-locations reentrant never-interactive
-%option extra-type="_Bool"
+%option extra-type="int"
 %option prefix="cdecl__yy"
 
 %{
@@ -51,7 +51,8 @@
                cdecl__errmsg(CDECL__ENOMEM); \
                return T_LEX_ERROR; \
        } \
-       strcpy(yylval->strval, yytext); \
+       memcpy(yylval->strval, yytext, yyleng); \
+       yylval->strval[yyleng] = 0; \
 } while(0)
 
 static char *to_octal(char *dst, unsigned val)
@@ -116,17 +117,16 @@ static void to_readable_ch(char *dst, char c)
 
 %}
 
-%s ENGLISH
-
-IDENT [_[:alpha:]][_[:alnum:]]*
+IDENT [_[:alpha:]][-_[:alnum:]]*
 INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+
 
 %%
 
 %{
-       if (yyextra) {
-               yyextra = 0;
-               BEGIN(ENGLISH);
+       char *c;
+
+       if (yyextra > 0) {
+               yyextra = -yyextra;
                return T_ENGLISH;
        }
 %}
@@ -140,35 +140,6 @@ INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+
 "]"   return T_RBRACKET;
 ","   return T_COMMA;
 
-"typedef"    return T_TYPEDEF;
-"extern"     return T_EXTERN;
-"static"     return T_STATIC;
-"auto"       return T_AUTO;
-"register"   return T_REGISTER;
-
-"restrict"   return T_RESTRICT;
-"volatile"   return T_VOLATILE;
-"const"      return T_CONST;
-
-"inline"     return T_INLINE;
-
-"void"       return T_VOID;
-"char"       return T_CHAR;
-"short"      return T_SHORT;
-"int"        return T_INT;
-"long"       return T_LONG;
-"float"      return T_FLOAT;
-"double"     return T_DOUBLE;
-"signed"     return T_SIGNED;
-"unsigned"   return T_UNSIGNED;
-"_Bool"      return T_BOOL;
-"_Complex"   return T_COMPLEX;
-"_Imaginary" return T_IMAGINARY;
-
-"struct"     return T_STRUCT;
-"union"      return T_UNION;
-"enum"       return T_ENUM;
-
 {INTEGER} {
        char *end;
 
@@ -186,26 +157,37 @@ INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+
        return T_UINT;
 }
 
-<ENGLISH>{
-       "variable-length" return T_VLA;
-       "type"            return T_TYPE;
-       "declare"         return T_DECLARE;
-       "pointer"         return T_POINTER;
-       "function"        return T_FUNCTION;
-       "returning"       return T_RETURNING;
-       "array"           return T_ARRAY;
-       "to"              return T_TO;
-       "of"              return T_OF;
-       "as"              return T_AS;
+{IDENT} {
+       int ret = cdecl__to_keyword(yytext, yyleng, yyextra);
+       if (ret == T_IDENT) {
+               /*
+                * Our IDENT pattern includes hyphens so we can match
+                * "variable-length" as a keyword.  In all other cases a
+                * hyphen is an error.
+                *
+                * We could use yyless to re-scan the hyphen and hit the
+                * error catch-all, but jumping straight to the error code
+                * seems to produce better results with gcc with no obvious
+                * downsides.
+                */
+#if 1
+               if ((c = strchr(yytext, '-')))
+                       goto invalid_char;
+#else
+               yyless(strcspn(yytext, "-"));
+#endif
+               dup_token();
+       }
+       return ret;
 }
 
-{IDENT} { dup_token(); return T_IDENT; }
-
 [[:space:]]+
 . {
        char buf[8];
 
-       to_readable_ch(buf, yytext[0]);
+       c = yytext;
+invalid_char:
+       to_readable_ch(buf, *c);
        cdecl__err(CDECL_ENOPARSE, _("syntax error, unexpected %s"), buf);
        return T_LEX_ERROR;
 }