3 * Scanner for C declarations.
4 * Copyright © 2011, 2021, 2023 Nick Bowler
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
24 %option nodefault noyywrap bison-locations reentrant never-interactive
25 %option extra-type="int"
26 %option prefix="cdecl__yy"
30 #include "cdecl-internal.h"
35 /* Best case, implementation provides strtoumax. */
36 # define STRTOUMAX strtoumax
38 /* Fall back to strtoull, with possibly reduced range. */
39 #define STRTOUMAX strtoull
41 /* HP-UX 11 has __strtoull in <inttypes.h> */
42 #define STRTOUMAX __strtoull
44 /* Fall back to strtoul, with possibly reduced range. */
45 #define STRTOUMAX strtoul
48 #define dup_token() do { \
49 yylval->strval = malloc(yyleng+1); \
50 if (!yylval->strval) { \
51 cdecl__errmsg(CDECL__ENOMEM); \
54 memcpy(yylval->strval, yytext, yyleng); \
55 yylval->strval[yyleng] = 0; \
58 static char *to_octal(char *dst, unsigned val)
62 for (i = 0; i < 3; i++) {
63 *dst++ = '0' + ((val >> 6) & 7u);
71 * Convert a single character to a C-style character constant, including quote
72 * characters. At most 7 bytes are written to the buffer for the longest
73 * octal encoding, e.g., '\177'
75 static void to_readable_ch(char *dst, char c)
82 * The 7 standard C control characters are contiguous in ASCII,
83 * permitting a simple and compact lookup table; separating their
84 * handling from backslash and quote characters hopefully allows
85 * the compiler to recognize that.
88 case '\a': i = 0; break;
89 case '\b': i = 1; break;
90 case '\t': i = 2; break;
91 case '\n': i = 3; break;
92 case '\v': i = 4; break;
93 case '\f': i = 5; break;
94 case '\r': i = 6; break;
95 default: i = 7; break;
99 /* Otherwise printable characters that should still be escaped. */
101 case '\\': case '\'': esc = c; break;
108 } else if (isprint(uc)) {
112 dst = to_octal(dst, uc);
120 IDENT [_[:alpha:]][-_[:alnum:]]*
121 INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+
135 unsigned char *match;
136 static const unsigned char tab[2][8] = {
139 PACK_TOKEN(T_ASTERISK),
140 PACK_TOKEN(T_LBRACKET),
141 PACK_TOKEN(T_RBRACKET),
142 PACK_TOKEN(T_LPAREN),
143 PACK_TOKEN(T_RPAREN),
145 PACK_TOKEN(T_ELLIPSIS),
146 PACK_TOKEN(T_SEMICOLON)
150 match = memchr(&tab, yytext[0], sizeof tab[0]);
151 return UNPACK_TOKEN(match[sizeof tab[0]]);
158 yylval->uintval = STRTOUMAX(yytext, &end, 0);
159 if (errno == ERANGE) {
160 cdecl__errmsg(CDECL__ERANGE);
164 cdecl__errmsg(CDECL__EBADINT);
172 unsigned x = cdecl__to_keyword(yytext, yyleng, yyextra);
175 yylval->spectype = UNPACK_SPEC(x & 0xff);
176 if ((tok = (x >> 8)) == PACK_TOKEN(T_IDENT)) {
178 * Our IDENT pattern includes hyphens so we can match
179 * "variable-length" as a keyword. In all other cases a
180 * hyphen is an error.
182 * We could use yyless to re-scan the hyphen and hit the
183 * error catch-all, but jumping straight to the error code
184 * seems to produce better results with gcc with no obvious
188 if ((c = strchr(yytext, '-')))
191 yyless(strcspn(yytext, "-"));
195 return UNPACK_TOKEN(tok);
204 to_readable_ch(buf, *c);
205 cdecl__err(CDECL_ENOPARSE, _("syntax error, unexpected %s"), buf);