%top{
/*
* Scanner for C declarations.
- * Copyright © 2011 Nick Bowler
+ * Copyright © 2011, 2021, 2023 Nick Bowler
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
- #include "parse.h"
+#include <config.h>
+#include "parse.h"
}
-%option noyywrap bison-locations reentrant
+%option nodefault noyywrap bison-locations reentrant never-interactive
+%option extra-type="_Bool"
+%option prefix="cdecl__yy"
%{
-#define lex_error(msg) do { \
- yyerror(yylloc, NULL, NULL, (msg)); \
- return T_LEX_ERROR; \
+#include <ctype.h>
+#include "cdecl-internal.h"
+#include "cdecl.h"
+#include "errmsg.h"
+
+#if HAVE_STRTOUMAX
+/* Best case, implementation provides strtoumax. */
+# define STRTOUMAX strtoumax
+#elif HAVE_STRTOULL
+/* Fall back to strtoull, with possibly reduced range. */
+#define STRTOUMAX strtoull
+#elif HAVE___STRTOULL
+/* HP-UX 11 has __strtoull in <inttypes.h> */
+#define STRTOUMAX __strtoull
+#else
+/* Fall back to strtoul, with possibly reduced range. */
+#define STRTOUMAX strtoul
+#endif
+
+#define dup_token() do { \
+ yylval->strval = malloc(yyleng+1); \
+ if (!yylval->strval) { \
+ cdecl__errmsg(CDECL__ENOMEM); \
+ return T_LEX_ERROR; \
+ } \
+ strcpy(yylval->strval, yytext); \
} while(0)
+
+static char *to_octal(char *dst, unsigned val)
+{
+ unsigned i;
+
+ for (i = 0; i < 3; i++) {
+ *dst++ = '0' + ((val >> 6) & 7u);
+ val <<= 3;
+ }
+
+ return dst;
+}
+
+/*
+ * Convert a single character to a C-style character constant, including quote
+ * characters. At most 7 bytes are written to the buffer for the longest
+ * octal encoding, e.g., '\177'
+ */
+static void to_readable_ch(char *dst, char c)
+{
+ unsigned char uc = c;
+ unsigned i;
+ char esc;
+
+ /*
+ * The 7 standard C control characters are contiguous in ASCII,
+ * permitting a simple and compact lookup table; separating their
+ * handling from backslash and quote characters hopefully allows
+ * the compiler to recognize that.
+ */
+ switch (c) {
+ case '\a': i = 0; break;
+ case '\b': i = 1; break;
+ case '\t': i = 2; break;
+ case '\n': i = 3; break;
+ case '\v': i = 4; break;
+ case '\f': i = 5; break;
+ case '\r': i = 6; break;
+ default: i = 7; break;
+ }
+ esc = "abtnvfr"[i];
+
+ /* Otherwise printable characters that should still be escaped. */
+ switch (c) {
+ case '\\': case '\'': esc = c; break;
+ }
+
+ *dst++ = '\'';
+ if (esc) {
+ *dst++ = '\\';
+ *dst++ = esc;
+ } else if (isprint(uc)) {
+ *dst++ = c;
+ } else {
+ *dst++ = '\\';
+ dst = to_octal(dst, uc);
+ }
+ *dst++ = '\'';
+ *dst++ = 0;
+}
+
%}
+%s ENGLISH
+
IDENT [_[:alpha:]][_[:alnum:]]*
INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+
%%
+%{
+ if (yyextra) {
+ yyextra = 0;
+ BEGIN(ENGLISH);
+ return T_ENGLISH;
+ }
+%}
+
"..." return T_ELLIPSIS;
";" return T_SEMICOLON;
"*" return T_ASTERISK;
"]" return T_RBRACKET;
"," return T_COMMA;
-"typedef" return T_TYPEDEF;
-"extern" return T_EXTERN;
-"static" return T_STATIC;
-"auto" return T_AUTO;
-"register" return T_REGISTER;
-
-"restrict" return T_RESTRICT;
-"volatile" return T_VOLATILE;
-"const" return T_CONST;
-
-"inline" return T_INLINE;
-
-"void" return T_VOID;
-"char" return T_CHAR;
-"short" return T_SHORT;
-"int" return T_INT;
-"long" return T_LONG;
-"float" return T_FLOAT;
-"double" return T_DOUBLE;
-"signed" return T_SIGNED;
-"unsigned" return T_UNSIGNED;
-"_Bool" return T_BOOL;
-"_Complex" return T_COMPLEX;
-
-"struct" return T_STRUCT;
-"union" return T_UNION;
-"enum" return T_ENUM;
+"typedef" return T_TYPEDEF;
+"extern" return T_EXTERN;
+"static" return T_STATIC;
+"auto" return T_AUTO;
+"register" return T_REGISTER;
+
+"restrict" return T_RESTRICT;
+"volatile" return T_VOLATILE;
+"const" return T_CONST;
+
+"inline" return T_INLINE;
+
+"void" return T_VOID;
+"char" return T_CHAR;
+"short" return T_SHORT;
+"int" return T_INT;
+"long" return T_LONG;
+"float" return T_FLOAT;
+"double" return T_DOUBLE;
+"signed" return T_SIGNED;
+"unsigned" return T_UNSIGNED;
+"_Bool" return T_BOOL;
+"_Complex" return T_COMPLEX;
+"_Imaginary" return T_IMAGINARY;
+
+"struct" return T_STRUCT;
+"union" return T_UNION;
+"enum" return T_ENUM;
{INTEGER} {
char *end;
errno = 0;
- yylval->uintval = strtoumax(yytext, &end, 0);
- if (errno == ERANGE)
- lex_error("integer constant out of range");
- if (*end)
- lex_error("invalid integer constant");
+ yylval->uintval = STRTOUMAX(yytext, &end, 0);
+ if (errno == ERANGE) {
+ cdecl__errmsg(CDECL__ERANGE);
+ return T_LEX_ERROR;
+ }
+ if (*end) {
+ cdecl__errmsg(CDECL__EBADINT);
+ return T_LEX_ERROR;
+ }
return T_UINT;
}
-{IDENT} {
- yylval->strval = malloc(yyleng+1);
- if (!yylval->strval)
- lex_error("failed to allocate memory");
-
- strcpy(yylval->strval, yytext);
- return T_IDENT;
+<ENGLISH>{
+ "variable-length" return T_VLA;
+ "type" return T_TYPE;
+ "declare" return T_DECLARE;
+ "pointer" return T_POINTER;
+ "function" return T_FUNCTION;
+ "returning" return T_RETURNING;
+ "array" return T_ARRAY;
+ "to" return T_TO;
+ "of" return T_OF;
+ "as" return T_AS;
}
+{IDENT} { dup_token(); return T_IDENT; }
+
[[:space:]]+
. {
- char buf[] = "syntax error, unexpected #";
- *strchr(buf, '#') = *yytext;
- lex_error(buf);
+ char buf[8];
+
+ to_readable_ch(buf, yytext[0]);
+ cdecl__err(CDECL_ENOPARSE, _("syntax error, unexpected %s"), buf);
+ return T_LEX_ERROR;
}