%top{
/*
* Scanner for C declarations.
* Copyright © 2011, 2021, 2023-2024 Nick Bowler
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include "parse.h"
/* Disable various generated code we don't use */
#define YY_INPUT(a, b, c) do {} while (0)
#define YY_NO_INPUT 1
#define YY_NO_UNPUT 1
}
%option nodefault noyywrap bison-locations reentrant never-interactive
%option extra-type="int"
%option prefix="cdecl__yy"
%{
#include
#include "cdecl-internal.h"
#include "cdecl.h"
#include "errmsg.h"
#include "intconv.h"
static char *to_octal(char *dst, unsigned val)
{
unsigned i;
for (i = 0; i < 3; i++) {
*dst++ = '0' + ((val >> 6) & 7u);
val <<= 3;
}
return dst;
}
/*
* Convert a single character to a C-style character constant, including quote
* characters. At most 7 bytes are written to the buffer for the longest
* octal encoding, e.g., '\177'
*/
static void to_readable_ch(char *dst, char c)
{
unsigned char uc = c;
unsigned i;
char esc;
/*
* The 7 standard C control characters are contiguous in ASCII,
* permitting a simple and compact lookup table; separating their
* handling from backslash and quote characters hopefully allows
* the compiler to recognize that.
*/
switch (c) {
case '\a': i = 0; break;
case '\b': i = 1; break;
case '\t': i = 2; break;
case '\n': i = 3; break;
case '\v': i = 4; break;
case '\f': i = 5; break;
case '\r': i = 6; break;
default: i = 7; break;
}
esc = "abtnvfr"[i];
/* Otherwise printable characters that should still be escaped. */
switch (c) {
case '\\': case '\'': esc = c; break;
}
*dst++ = '\'';
if (esc) {
*dst++ = '\\';
*dst++ = esc;
} else if (isprint(uc)) {
*dst++ = c;
} else {
*dst++ = '\\';
dst = to_octal(dst, uc);
}
*dst++ = '\'';
*dst++ = 0;
}
%}
IDENT [_[:alpha:]][-_[:alnum:]]*
%%
%{
int intconv_base;
char *c;
%}
"..."|[][;*(),] {
unsigned char *match;
static const unsigned char tab[2][8] = {
"*[](),.;",
{
PACK_TOKEN(T_ASTERISK),
PACK_TOKEN(T_LBRACKET),
PACK_TOKEN(T_RBRACKET),
PACK_TOKEN(T_LPAREN),
PACK_TOKEN(T_RPAREN),
PACK_TOKEN(T_COMMA),
PACK_TOKEN(T_ELLIPSIS),
PACK_TOKEN(T_SEMICOLON)
}
};
match = memchr(&tab, yytext[0], sizeof tab[0]);
return UNPACK_TOKEN(match[sizeof tab[0]]);
}
0[0-7]* { intconv_base = INTCONV_OCTAL; goto int_parse; }
[1-9][0-9]* { intconv_base = INTCONV_DECIMAL; goto int_parse; }
0[Xx][[:xdigit:]]+ {
unsigned char d;
uintmax_t v;
yytext += 2;
intconv_base = INTCONV_HEXADECIMAL;
int_parse:
for (v = 0; (d = *yytext++);) {
if (!intconv_shift(&v, intconv_base, intconv_digit(d))) {
cdecl__errmsg(CDECL__ERANGE);
return T_LEX_ERROR;
}
}
yylval->uintval = v;
return T_UINT;
}
0[Xx]|[0-9]+ {
cdecl__errmsg(CDECL__EBADINT);
return T_LEX_ERROR;
}
{IDENT} {
int len = yyleng, tok;
unsigned x;
x = cdecl__to_keyword(yytext, len, yyextra);
yylval->spectype = UNPACK_SPEC(x & 0xff);
if ((tok = (x >> 8)) == PACK_TOKEN(T_IDENT)) {
/*
* Our IDENT pattern includes hyphens so we can match
* "variable-length" as a keyword. In all other cases a
* hyphen is an error.
*
* We could use yyless to re-scan the hyphen and hit the
* error catch-all, but jumping straight to the error code
* seems to produce better results with gcc with no obvious
* downsides.
*/
#if 1
if ((c = memchr(yytext, '-', len)))
goto invalid_char;
#else
yyless(strcspn(yytext, "-"));
#endif
if (!(yylval->item = cdecl__alloc_item(len+1)))
return T_LEX_ERROR;
memcpy(yylval->item->s, yytext, len+1);
}
return UNPACK_TOKEN(tok);
}
[[:space:]]+
. {
char buf[8];
c = yytext;
invalid_char:
to_readable_ch(buf, *c);
cdecl__err(CDECL_ENOPARSE, _("syntax error, unexpected %s"), buf);
return T_LEX_ERROR;
}