/*
* Parse and validate C declarations.
* Copyright © 2011-2012, 2020-2021 Nick Bowler
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include "cdecl.h"
#include "cdecl-internal.h"
#include "parse.h"
#include "scan.h"
/*
* We can represent type specifiers as a bitmap, which gives us a finite
* list of acceptable bitmap values according to the C standard. However,
* the "long" specifier is allowed to occur more than once, but only at most
* 2 times. Treat it as a special case, assigning an unused bit to represent
* the second long.
*/
#define MAP_LLONG_BIT 31
#define MAP_LONG_BIT (CDECL_TYPE_LONG-CDECL_SPEC_TYPE)
#define CDECL_TYPE_LLONG (CDECL_SPEC_TYPE+MAP_LLONG_BIT)
#include "typemap.h"
/*
* Convert the declaration specifiers to a bitmap with each bit
* corresponding to one specific type specifier.
*/
static int valid_typespec(struct cdecl_declspec *s)
{
unsigned long map = 0;
for (struct cdecl_declspec *c = s; c; c = c->next) {
unsigned long bit;
if (cdecl_spec_kind(c) != CDECL_SPEC_TYPE)
continue;
bit = c->type - CDECL_SPEC_TYPE;
assert(bit < MAP_LLONG_BIT);
bit = 1ul << bit;
/* "long" special case */
if ((map & bit) == 1ul << MAP_LONG_BIT)
bit = 1ul << MAP_LLONG_BIT;
if (map & bit) {
if (bit == 1ul << MAP_LLONG_BIT)
cdecl__err(CDECL_EBADTYPE, _("too many long specifiers"));
else
cdecl__err(CDECL_EBADTYPE, _("duplicate type specifier"));
return false;
}
map |= bit;
}
if (typemap_is_valid(map))
return true;
if (map == 0)
cdecl__err(CDECL_EBADTYPE, _("no type specified"));
else
cdecl__err(CDECL_EBADTYPE, _("invalid type specified"));
return false;
}
/*
* Verify the declaration specifiers of a declaration. If top is true, treat
* this as a top-level declaration. Otherwise, treat this as a function
* parameter (which carries additional constraints).
*/
static bool valid_declspecs(struct cdecl *decl, bool top)
{
struct cdecl_declspec *specs = decl->specifiers;
struct cdecl_declarator *d = decl->declarators;
bool abstract = cdecl_is_abstract(d);
unsigned num_storage = 0;
if (!valid_typespec(specs))
return false;
for (struct cdecl_declspec *c = specs; c; c = c->next) {
switch (cdecl_spec_kind(c)) {
case CDECL_SPEC_TYPE:
if (c->type == CDECL_TYPE_VOID &&
(d->type == CDECL_DECL_IDENT
|| d->type == CDECL_DECL_ARRAY)) {
cdecl__err(CDECL_EBADTYPE, _("invalid declaration of type void"));
return false;
}
continue;
case CDECL_SPEC_STOR:
if (top && abstract) {
cdecl__err(CDECL_EBADSTOR, _("type names cannot have storage-class specifiers"));
return false;
}
if (!top && c->type != CDECL_STOR_REGISTER) {
cdecl__err(CDECL_EBADSTOR, _("function parameters may only have register storage"));
return false;
}
if (++num_storage > 1) {
cdecl__err(CDECL_EBADSTOR, _("too many storage-class specifiers"));
return false;
}
break;
case CDECL_SPEC_QUAL:
/*
* Restrict qualifiers are only valid in the
* pointer qualifier list, which isn't checked here.
*/
if (c->type == CDECL_QUAL_RESTRICT) {
cdecl__err(CDECL_EBADQUAL, _("only pointer types can be restrict-qualified"));
return false;
}
break;
case CDECL_SPEC_FUNC:
if (abstract) {
cdecl__err(CDECL_ENOTFUNC, _("type names cannot have function specifiers"));
return false;
}
if (!top || d->type != CDECL_DECL_FUNCTION) {
cdecl__err(CDECL_ENOTFUNC, _("only function declarations can have function specifiers"));
return false;
}
break;
default:
assert(0);
}
}
return true;
}
/*
* The C grammar leaves ambiguous some cases where parentheses represent a
* function declarator or just parentheses. The language uses additional
* context (whether or not a typedef is in scope, etc.) to resolve these
* ambiguities, but we don't have access to that kind of information.
*
* The cdecl99 parser uses an unambiguous grammar which treats almost
* everything as a function, and thus considers things like 'int (x)' to
* be a function type with a single parameter of type 'x' (a typedef name),
* returning int. This can result in very complicated types for simple
* declarations. Ideally, cdecl99 should try and find the "simplest"
* explanation for a given declaration.
*
* Whether or not it achieves the simplest explanation, we apply a simple rule:
* if a declarator could be interpreted as something other than a function,
* do that.
*
* Since cdecl99 supports things like [*] in any context (in C, such constructs
* are only valid in function parameter lists), we don't treat them specially
* here.
*/
static struct cdecl_declarator *reduce_function(struct cdecl *param)
{
struct cdecl_declspec *spec = param->specifiers;
struct cdecl_declarator *decl = param->declarators;
struct cdecl_declarator *last;
for (last = decl; last && last->type != CDECL_DECL_NULL;)
last = last->child;
if (!last)
return NULL;
last->type = CDECL_DECL_IDENT;
last->u.ident = spec->ident;
free(param);
free(spec);
return decl;
}
static bool function_is_reducible(struct cdecl_declarator *d)
{
if (d->type != CDECL_DECL_FUNCTION)
return false;
if (d->child->type != CDECL_DECL_NULL)
return false; /* e.g., int (*)(x) */
if (!d->u.function.parameters)
return false; /* e.g., int f() */
if (d->u.function.parameters->next)
return false; /* e.g., int (x, y) */
if (d->u.function.variadic)
return false; /* e.g., int (x, ...) */
if (d->u.function.parameters->specifiers->type != CDECL_TYPE_IDENT)
return false; /* e.g. int (int) */
if (d->u.function.parameters->specifiers->next)
return false; /* e.g. int (size_t const) */
if (d->u.function.parameters->declarators->type == CDECL_DECL_POINTER)
return false; /* e.g. int (x *) */
return true;
}
static int
simplify_functions(struct cdecl_declarator **p, struct cdecl_declarator *d)
{
struct cdecl_declarator *new;
if (!function_is_reducible(d))
return 0;
new = reduce_function(d->u.function.parameters);
if (!new)
return 0; /* e.g. int (foo bar) */
*p = new;
free(d->child);
free(d);
return 1;
}
/*
* The parser's bias towards considering things as functions whenever possible
* makes nested parentheses tricky. (x) is considered to be part of a function
* declarator until simplify_functions converts it. The problem is that
* (((x))) is not valid as part of a function declarator, but it *is* valid
* as an identifier enclosed 3 times in parentheses. This is complicated by
* the fact that things like (((int))) are not valid anywhere.
*
* To avoid ambiguities, the parser actually emits a "function" declarator for
* every pair of parentheses. The ones that can't reasonably be functions
* consist of a single "parameter" with no declaration specifiers (note that
* every valid function parameter will have at least one type specifier).
*
* This pass is to remove these fake functions from the parse tree. We take
* care to avoid turning invalid things like ((int)) into valid things like
* (int) by observing that the only valid function declarators that appear
* in these "fake" parentheses are those that have a non-null child declarator
* (for instance, int ((*)(int)) *or* those that will be eliminated by the
* simplify_functions pass.
*/
static int
reduce_parentheses(struct cdecl_declarator **p, struct cdecl_declarator *d)
{
struct cdecl *param;
if (d->type != CDECL_DECL_FUNCTION)
return 0;
param = d->u.function.parameters;
if (param && param->specifiers == NULL) {
struct cdecl_declarator *decl;
assert(!param->next);
decl = param->declarators;
if (decl->type == CDECL_DECL_NULL) {
free(decl);
free(param);
d->u.function.parameters = NULL;
return 0;
}
if (d->child->type != CDECL_DECL_NULL) {
cdecl__err(CDECL_EBADPARAMS, _("invalid function parameter"));
return -1;
}
free(d->child);
free(param);
free(d);
*p = decl;
/*
* We may have replaced d with another fake function which
* also needs to be eliminated.
*/
if (reduce_parentheses(p, decl) < 0)
return -1;
/*
* If the remaining declarator is a function, make sure it's
* valid by checking its reducibility.
*/
decl = *p;
if (decl->type == CDECL_DECL_FUNCTION
&& decl->child->type == CDECL_DECL_NULL
&& !function_is_reducible(decl)) {
cdecl__err(CDECL_EBADPARAMS, _("too many parentheses in function"));
return -1;
}
return 1;
}
return 0;
}
/*
* Function parameters and return types have a few restrictions that are
* really easy to check in comparison to the above absurdity.
*/
static int
check_parameters(struct cdecl_declarator **p, struct cdecl_declarator *d)
{
struct cdecl_declspec *spec;
struct cdecl *param;
if (d->type != CDECL_DECL_FUNCTION)
return 0;
for (param = d->u.function.parameters; param; param = param->next) {
if (!valid_declspecs(param, false))
return -1;
/* Check for "void" function parameters as a special case. */
for (spec = param->specifiers; spec; spec = spec->next) {
if (param->declarators->type != CDECL_DECL_NULL)
continue;
if (spec->type != CDECL_TYPE_VOID)
continue;
if (spec != param->specifiers || spec->next != NULL) {
cdecl__err(CDECL_EVOIDPARAM, _("void parameter cannot have extra specifiers"));
return -1;
} else if (d->u.function.parameters->next) {
cdecl__err(CDECL_EVOIDPARAM, _("void parameter must stand alone"));
return -1;
} else if (d->u.function.variadic) {
cdecl__err(CDECL_EVOIDPARAM, _("variadic function cannot have void parameter"));
return -1;
}
}
}
return 0;
}
/*
* Functions cannot return arrays or functions. Since the parse tree is
* "inside-out", we need to look for functions as the child declarator.
*/
static int
check_rettypes(struct cdecl_declarator **p, struct cdecl_declarator *d)
{
if (!d->child || d->child->type != CDECL_DECL_FUNCTION)
return 0;
switch (d->type) {
case CDECL_DECL_FUNCTION:
cdecl__err(CDECL_EBADRETURN, _("functions cannot return functions"));
return -1;
case CDECL_DECL_ARRAY:
cdecl__err(CDECL_EBADRETURN, _("functions cannot return arrays"));
return -1;
}
return 0;
}
static int
check_arrays(struct cdecl_declarator **p, struct cdecl_declarator *d)
{
if (!d->child || d->child->type != CDECL_DECL_ARRAY)
return 0;
switch (d->type) {
case CDECL_DECL_FUNCTION:
cdecl__err(CDECL_EBADARRAY, _("array members cannot be functions"));
return -1;
}
return 0;
}
static int
normalize_specs(struct cdecl_declarator **p, struct cdecl_declarator *d)
{
struct cdecl_function *func;
struct cdecl_pointer *ptr;
switch (d->type) {
case CDECL_DECL_POINTER:
ptr = &d->u.pointer;
ptr->qualifiers = cdecl__normalize_specs(ptr->qualifiers);
break;
case CDECL_DECL_FUNCTION:
func = &d->u.function;
for (struct cdecl *i = func->parameters; i; i = i->next)
i->specifiers = cdecl__normalize_specs(i->specifiers);
break;
}
return 0;
}
static int
check_qualifiers(struct cdecl_declarator **p, struct cdecl_declarator *d)
{
struct cdecl_declspec *spec;
struct cdecl_pointer *ptr;
if (!d->child || d->child->type != CDECL_DECL_POINTER)
return 0;
ptr = &d->child->u.pointer;
for (spec = ptr->qualifiers; spec; spec = spec->next) {
if (spec->type == CDECL_QUAL_RESTRICT
&& d->type == CDECL_DECL_FUNCTION) {
cdecl__err(CDECL_EBADPOINTER, _("function pointers cannot be restrict-qualified"));
return -1;
}
}
return 0;
}
/*
* Traverse the parse tree, calling a function on every declarator in a
* depth-first preorder traversal. The function is given a pointer to the
* declarator as well as to the pointer which was used to reach that
* declarator: this can be used to rewrite entire subtrees.
*/
static bool forall_declarators(struct cdecl *decl,
int f(struct cdecl_declarator **, struct cdecl_declarator *))
{
struct cdecl_declarator *d, **p;
for (p = &decl->declarators, d = *p; d; p = &d->child, d = *p) {
switch (f(p, d)) {
case 0:
break;
case 1:
d = *p;
break;
case -1:
return false;
default:
assert(0);
}
if (d->type == CDECL_DECL_FUNCTION) {
struct cdecl *i;
for (i = d->u.function.parameters; i; i = i->next) {
if (!forall_declarators(i, f))
return false;
}
}
}
return true;
}
struct cdecl *cdecl_parse_decl(const char *declstr)
{
struct cdecl_declspec *norm_specs;
YY_BUFFER_STATE state;
yyscan_t scanner;
struct cdecl *decl;
int rc;
cdecl__init_i18n();
rc = cdecl__yylex_init(&scanner);
if (rc != 0)
return NULL;
state = cdecl__yy_scan_string(declstr, scanner);
rc = cdecl__yyparse(scanner, &decl);
cdecl__yy_delete_buffer(state, scanner);
cdecl__yylex_destroy(scanner);
if (rc != 0)
return NULL;
/*
* Since the top-level specifiers are shared between each top-level
* declarator, we need to normalize them once and then propagate the
* new specifier list.
*/
norm_specs = cdecl__normalize_specs(decl->specifiers);
for (struct cdecl *i = decl; i; i = i->next) {
i->specifiers = norm_specs;
}
/* Now perform checks and simplifications on each declarator. */
for (struct cdecl *i = decl; i; i = i->next) {
if (!forall_declarators(i, reduce_parentheses))
goto err;
if (!forall_declarators(i, simplify_functions))
goto err;
if (!forall_declarators(i, check_parameters))
goto err;
if (!forall_declarators(i, check_rettypes))
goto err;
if (!forall_declarators(i, check_arrays))
goto err;
if (!forall_declarators(i, normalize_specs))
goto err;
if (!forall_declarators(i, check_qualifiers))
goto err;
if (!valid_declspecs(i, true))
goto err;
if (cdecl_is_abstract(i->declarators)
&& (i != decl || i->next)) {
cdecl__err(CDECL_EBADDECL, _("mixing type names and declarations is not allowed"));
goto err;
}
}
return decl;
err:
cdecl__free(decl);
return NULL;
}
struct cdecl *cdecl_parse_english(const char *english)
{
YY_BUFFER_STATE state;
yyscan_t scanner;
struct cdecl *decl;
int rc;
cdecl__init_i18n();
rc = cdecl__yylex_init_extra(true, &scanner);
if (rc != 0)
return NULL;
state = cdecl__yy_scan_string(english, scanner);
rc = cdecl__yyparse(scanner, &decl);
cdecl__yy_delete_buffer(state, scanner);
cdecl__yylex_destroy(scanner);
if (rc != 0)
return NULL;
for (struct cdecl *i = decl; i; i = i->next) {
i->specifiers = cdecl__normalize_specs(i->specifiers);
if (!forall_declarators(i, check_parameters))
goto err;
if (!forall_declarators(i, check_rettypes))
goto err;
if (!forall_declarators(i, check_arrays))
goto err;
if (!forall_declarators(i, normalize_specs))
goto err;
if (!forall_declarators(i, check_qualifiers))
goto err;
if (!valid_declspecs(i, true))
goto err;
}
return decl;
err:
cdecl__free(decl);
return NULL;
}
void cdecl_free(struct cdecl *decl)
{
cdecl__free(decl);
}