X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/blobdiff_plain/9a50128ac12ab834fd97699c7ca543fc040ea8f1..41ff7ec97691:/src/scan.l diff --git a/src/scan.l b/src/scan.l index 0a4db93..767ba22 100644 --- a/src/scan.l +++ b/src/scan.l @@ -22,7 +22,7 @@ } %option nodefault noyywrap bison-locations reentrant never-interactive -%option extra-type="_Bool" +%option extra-type="int" %option prefix="cdecl__yy" %{ @@ -51,7 +51,8 @@ cdecl__errmsg(CDECL__ENOMEM); \ return T_LEX_ERROR; \ } \ - strcpy(yylval->strval, yytext); \ + memcpy(yylval->strval, yytext, yyleng); \ + yylval->strval[yyleng] = 0; \ } while(0) static char *to_octal(char *dst, unsigned val) @@ -116,58 +117,39 @@ static void to_readable_ch(char *dst, char c) %} -%s ENGLISH - -IDENT [_[:alpha:]][_[:alnum:]]* +IDENT [_[:alpha:]][-_[:alnum:]]* INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ %% %{ - if (yyextra) { - yyextra = 0; - BEGIN(ENGLISH); + char *c; + + if (yyextra > 0) { + yyextra = -yyextra; return T_ENGLISH; } %} -"..." return T_ELLIPSIS; -";" return T_SEMICOLON; -"*" return T_ASTERISK; -"(" return T_LPAREN; -")" return T_RPAREN; -"[" return T_LBRACKET; -"]" return T_RBRACKET; -"," return T_COMMA; - -"typedef" return T_TYPEDEF; -"extern" return T_EXTERN; -"static" return T_STATIC; -"auto" return T_AUTO; -"register" return T_REGISTER; - -"restrict" return T_RESTRICT; -"volatile" return T_VOLATILE; -"const" return T_CONST; - -"inline" return T_INLINE; - -"void" return T_VOID; -"char" return T_CHAR; -"short" return T_SHORT; -"int" return T_INT; -"long" return T_LONG; -"float" return T_FLOAT; -"double" return T_DOUBLE; -"signed" return T_SIGNED; -"unsigned" return T_UNSIGNED; -"_Bool" return T_BOOL; -"_Complex" return T_COMPLEX; -"_Imaginary" return T_IMAGINARY; - -"struct" return T_STRUCT; -"union" return T_UNION; -"enum" return T_ENUM; +"..."|[][;*(),] { + unsigned char *match; + static const unsigned char tab[2][8] = { + "*[](),.;", + { + PACK_TOKEN(T_ASTERISK), + PACK_TOKEN(T_LBRACKET), + PACK_TOKEN(T_RBRACKET), + PACK_TOKEN(T_LPAREN), + PACK_TOKEN(T_RPAREN), + PACK_TOKEN(T_COMMA), + PACK_TOKEN(T_ELLIPSIS), + PACK_TOKEN(T_SEMICOLON) + } + }; + + match = memchr(&tab, yytext[0], sizeof tab[0]); + return UNPACK_TOKEN(match[sizeof tab[0]]); +} {INTEGER} { char *end; @@ -186,26 +168,40 @@ INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ return T_UINT; } -{ - "variable-length" return T_VLA; - "type" return T_TYPE; - "declare" return T_DECLARE; - "pointer" return T_POINTER; - "function" return T_FUNCTION; - "returning" return T_RETURNING; - "array" return T_ARRAY; - "to" return T_TO; - "of" return T_OF; - "as" return T_AS; +{IDENT} { + unsigned x = cdecl__to_keyword(yytext, yyleng, yyextra); + int tok; + + yylval->spectype = UNPACK_SPEC(x & 0xff); + if ((tok = (x >> 8)) == PACK_TOKEN(T_IDENT)) { + /* + * Our IDENT pattern includes hyphens so we can match + * "variable-length" as a keyword. In all other cases a + * hyphen is an error. + * + * We could use yyless to re-scan the hyphen and hit the + * error catch-all, but jumping straight to the error code + * seems to produce better results with gcc with no obvious + * downsides. + */ +#if 1 + if ((c = strchr(yytext, '-'))) + goto invalid_char; +#else + yyless(strcspn(yytext, "-")); +#endif + dup_token(); + } + return UNPACK_TOKEN(tok); } -{IDENT} { dup_token(); return T_IDENT; } - [[:space:]]+ . { char buf[8]; - to_readable_ch(buf, yytext[0]); + c = yytext; +invalid_char: + to_readable_ch(buf, *c); cdecl__err(CDECL_ENOPARSE, _("syntax error, unexpected %s"), buf); return T_LEX_ERROR; }