3 * Scanner for C declarations.
4 * Copyright © 2011, 2021, 2023-2024 Nick Bowler
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
23 /* Disable various generated code we don't use */
24 #define YY_INPUT(a, b, c) do {} while (0)
29 %option nodefault noyywrap bison-locations reentrant never-interactive
30 %option extra-type="int"
31 %option prefix="cdecl__yy"
35 #include "cdecl-internal.h"
40 static char *to_octal(char *dst, unsigned val)
44 for (i = 0; i < 3; i++) {
45 *dst++ = '0' + ((val >> 6) & 7u);
53 * Convert a single character to a C-style character constant, including quote
54 * characters. At most 7 bytes are written to the buffer for the longest
55 * octal encoding, e.g., '\177'
57 static void to_readable_ch(char *dst, char c)
64 * The 7 standard C control characters are contiguous in ASCII,
65 * permitting a simple and compact lookup table; separating their
66 * handling from backslash and quote characters hopefully allows
67 * the compiler to recognize that.
70 case '\a': i = 0; break;
71 case '\b': i = 1; break;
72 case '\t': i = 2; break;
73 case '\n': i = 3; break;
74 case '\v': i = 4; break;
75 case '\f': i = 5; break;
76 case '\r': i = 6; break;
77 default: i = 7; break;
81 /* Otherwise printable characters that should still be escaped. */
83 case '\\': case '\'': esc = c; break;
90 } else if (isprint(uc)) {
94 dst = to_octal(dst, uc);
102 IDENT [_[:alpha:]][-_[:alnum:]]*
112 unsigned char *match;
113 static const unsigned char tab[2][8] = {
116 PACK_TOKEN(T_ASTERISK),
117 PACK_TOKEN(T_LBRACKET),
118 PACK_TOKEN(T_RBRACKET),
119 PACK_TOKEN(T_LPAREN),
120 PACK_TOKEN(T_RPAREN),
122 PACK_TOKEN(T_ELLIPSIS),
123 PACK_TOKEN(T_SEMICOLON)
127 match = memchr(&tab, yytext[0], sizeof tab[0]);
128 return UNPACK_TOKEN(match[sizeof tab[0]]);
131 0[0-7]* { intconv_base = INTCONV_OCTAL; goto int_parse; }
132 [1-9][0-9]* { intconv_base = INTCONV_DECIMAL; goto int_parse; }
138 intconv_base = INTCONV_HEXADECIMAL;
140 for (v = 0; (d = *yytext++);) {
141 if (!intconv_shift(&v, intconv_base, intconv_digit(d))) {
142 cdecl__errmsg(CDECL__ERANGE);
151 cdecl__errmsg(CDECL__EBADINT);
156 int len = yyleng, tok;
159 x = cdecl__to_keyword(yytext, len, yyextra);
160 yylval->spectype = UNPACK_SPEC(x & 0xff);
161 if ((tok = (x >> 8)) == PACK_TOKEN(T_IDENT)) {
163 * Our IDENT pattern includes hyphens so we can match
164 * "variable-length" as a keyword. In all other cases a
165 * hyphen is an error.
167 * We could use yyless to re-scan the hyphen and hit the
168 * error catch-all, but jumping straight to the error code
169 * seems to produce better results with gcc with no obvious
173 if ((c = memchr(yytext, '-', len)))
176 yyless(strcspn(yytext, "-"));
178 if (!(yylval->item = cdecl__alloc_item(len+1)))
180 memcpy(yylval->item->s, yytext, len+1);
182 return UNPACK_TOKEN(tok);
191 to_readable_ch(buf, *c);
192 cdecl__err(CDECL_ENOPARSE, _("syntax error, unexpected %s"), buf);