From: Nick Bowler Date: Sun, 23 Jul 2023 19:06:29 +0000 (-0400) Subject: libcdecl: Combine identifier and struct allocation. X-Git-Tag: v1.3~97 X-Git-Url: https://git.draconx.ca/gitweb/cdecl99.git/commitdiff_plain/1ef30936f5fdb253b397347b91521455d0c39172 libcdecl: Combine identifier and struct allocation. Whenever the parser uses an identifier token, it is always to stuff it into a newly-allocated declarator or declspec structure. So we can reduce the amount of work that needs to be done in the parser actions by by pre-allocating a declarator structure in the scanner. By using a union, we can also use this same allocation for the declspec case. And by happy accident, some of the structure members are identical between both cases, and GCC at least recognizes that it doesn't need to do anything to copy e.g., declarator.u.ident to declspec.ident within the union. --- diff --git a/src/cdecl-internal.h b/src/cdecl-internal.h index a5f3272..cae248b 100644 --- a/src/cdecl-internal.h +++ b/src/cdecl-internal.h @@ -20,6 +20,7 @@ #include #include +#include "cdecl.h" #define _(s) dgettext(PACKAGE, s) #define N_(s) s @@ -100,4 +101,16 @@ const char *cdecl__emit_specs(struct output_state *dst, */ unsigned cdecl__to_keyword(const char *s, int len, int english_mode); +/* Container for an allocated parser token's value */ +struct parse_item { + union { + struct cdecl_declarator declarator; + struct cdecl_declspec declspec; + } u; + + char s[FLEXIBLE_ARRAY_MEMBER]; +}; + +struct parse_item *cdecl__alloc_item(size_t s_sz); + #endif diff --git a/src/parse-decl.c b/src/parse-decl.c index a6204a1..8516a3a 100644 --- a/src/parse-decl.c +++ b/src/parse-decl.c @@ -27,6 +27,33 @@ #include "scan.h" #include "errmsg.h" +/* + * Allocate a "parse item", which is a union of several parse tree + * structure types, together with a string buffer. The s_sz argument + * specifies the size of the string (including its terminator), which + * may be zero. + * + * The union's declarator member is pre-initialized to a valid "identifier" + * declarator, which shares several interesting offsets with the "declspec" + * structure for an "identifier" type specifier. + */ +struct parse_item *cdecl__alloc_item(size_t s_sz) +{ + struct parse_item *ret; + + ret = malloc(offsetof(struct parse_item, s) + s_sz); + if (!ret) { + cdecl__errmsg(CDECL__ENOMEM); + return NULL; + } + + ret->u.declarator.child = NULL; + ret->u.declarator.type = CDECL_DECL_IDENT; + ret->u.declarator.u.ident = ret->s; + + return ret; +} + /* * We can represent type specifiers as a bitmap, which gives us a finite * list of acceptable bitmap values according to the C standard. However, @@ -173,22 +200,30 @@ static bool valid_declspecs(struct cdecl *decl, bool top) static struct cdecl_declarator *reduce_function(struct cdecl *param) { - struct cdecl_declspec *spec = param->specifiers; - struct cdecl_declarator *decl = param->declarators; - struct cdecl_declarator *last; + struct cdecl_declarator *d, **p = ¶m->declarators; + struct parse_item *spec = (void *)param->specifiers; - for (last = decl; last && last->type != CDECL_DECL_NULL;) - last = last->child; + while ((d = *p)->child) + p = &d->child; - if (!last) + if (d->type != CDECL_DECL_NULL) return NULL; - last->type = CDECL_DECL_IDENT; - last->u.ident = spec->ident; - free(param); - free(spec); + /* + * The child and u.ident members of cdecl_declarator are expected + * to be located at identical offsets as, respectively, the next + * and ident members within cdecl_declspec, so the expectation is + * that the compiler can elide both assignments. + */ + spec->u.declarator.child = (void *)spec->u.declspec.next; + spec->u.declarator.u.ident = spec->u.declspec.ident; + spec->u.declarator.type = CDECL_DECL_IDENT; + *p = &spec->u.declarator; - return decl; + free(d); + d = param->declarators; + free(param); + return d; } static bool function_is_reducible(struct cdecl_declarator *d) diff --git a/src/parse.y b/src/parse.y index ec7ea1b..8631f3b 100644 --- a/src/parse.y +++ b/src/parse.y @@ -76,10 +76,10 @@ const char *cdecl__token_name(unsigned token); uintmax_t uintval; unsigned spectype; _Bool boolval; - char *strval; struct cdecl_declspec *declspec; struct cdecl_declarator *declarator; struct cdecl *decl; + struct parse_item *item; } %{ @@ -91,7 +91,6 @@ static void free_declspec(struct cdecl_declspec *x) struct cdecl_declspec *p; while (x) { p = x->next; - free(x->ident); free(x); x = p; } @@ -106,16 +105,12 @@ static void free_declarator(struct cdecl_declarator *x) switch (x->type) { case CDECL_DECL_NULL: - break; case CDECL_DECL_IDENT: - free(x->u.ident); + case CDECL_DECL_ARRAY: break; case CDECL_DECL_POINTER: free_declspec(x->u.pointer.qualifiers); break; - case CDECL_DECL_ARRAY: - free(x->u.array.vla); - break; case CDECL_DECL_FUNCTION: free_decl(x->u.function.parameters); break; @@ -167,21 +162,20 @@ static void join_specs(struct cdecl_declspec *a, struct cdecl_declspec *b) * Alter an abstract declarator (type name) to declare an identifier instead, * used by the English parser rules to reduce "identifier as type" sequences. */ -static struct cdecl *insert_identifier(struct cdecl *decl, char *ident) +static struct cdecl *insert_identifier(struct cdecl *decl, struct parse_item *ident) { - struct cdecl_declarator *d = decl->declarators; + struct cdecl_declarator *d, **p = &decl->declarators; - while (d->child) - d = d->child; - - d->type = CDECL_DECL_IDENT; - d->u.ident = ident; + while ((d = *p)->child) + p = &d->child; + free(d); + *p = d = &ident->u.declarator; return decl; } %} -%destructor { free($$); } +%destructor { free($$); } %destructor { free_declspec($$); } %destructor { free_declarator($$); } %destructor { free_decl($$); } @@ -189,8 +183,8 @@ static struct cdecl *insert_identifier(struct cdecl *decl, char *ident) /* Magic tokens */ %token T_LEX_ERROR -%token T_IDENT "identifier" -%token T_UINT "integer constant" +%token T_IDENT "identifier" +%token T_UINT "integer constant" %token T_SEMICOLON ";" %token T_ASTERISK "*" @@ -244,7 +238,7 @@ static struct cdecl *insert_identifier(struct cdecl *decl, char *ident) %token T_AS "as" %token T_VLA "variable-length" -%type vla_ident +%type vla_ident %type array_length %type varargs %type declspec_simple qualifier_simple @@ -257,7 +251,7 @@ static struct cdecl *insert_identifier(struct cdecl *decl, char *ident) %type declaration declarators declarator_wrap %type parameter -%type english_vla +%type english_vla %type storage_func_specs post_specs %type type_qual_spec type_qual_specs typedef_name_qual %type english_declarator english_array english_function @@ -366,16 +360,19 @@ qualifier: qualifier_simple { } typespec: typespec_noid | typespec_tagged T_IDENT { - ALLOC_STRUCT($$, struct cdecl_declspec, - .type = $1, - .ident = $2); + /* Compiler should be able to elide this assignment. */ + $2->u.declspec.ident = $2->u.declarator.u.ident; + + $$ = &$2->u.declspec; + $$->type = $1; } declspec_noid: declspec_notype | typespec_noid vla_ident: T_IDENT | T_ASTERISK { - ALLOC($$, sizeof ""); - *$$ = 0; + if (!($$ = cdecl__alloc_item(1))) + YYERROR; + *$$->s = 0; } array: T_LBRACKET array_length T_RBRACKET { @@ -383,9 +380,10 @@ array: T_LBRACKET array_length T_RBRACKET { .type = CDECL_DECL_ARRAY, .u.array.length = $2); } | T_LBRACKET vla_ident T_RBRACKET { - ALLOC_STRUCT($$, struct cdecl_declarator, - .type = CDECL_DECL_ARRAY, - .u.array.vla = $2); + $$ = &$2->u.declarator; + $$->type = CDECL_DECL_ARRAY; + $$->u.array.vla = $$->u.ident; + $$->u.array.length = 0; } parameter: declspecs declarator { @@ -444,9 +442,7 @@ direct_declarator: { ALLOC_STRUCT($$, struct cdecl_declarator, .type = CDECL_DECL_NULL); } | T_IDENT { - ALLOC_STRUCT($$, struct cdecl_declarator, - .type = CDECL_DECL_IDENT, - .u.ident = $1); + $$ = &$1->u.declarator; } | direct_declarator postfix { $$ = $2; $$->child = $1; @@ -530,10 +526,12 @@ english_parameter_list: english_parameter varargs { } typedef_name_qual: T_IDENT qualifiers { - ALLOC_STRUCT($$, struct cdecl_declspec, - .type = CDECL_TYPE_IDENT, - .ident = $1, - .next = $2); + /* Compiler should be able to elide this assignment. */ + $1->u.declspec.ident = $1->u.declarator.u.ident; + + $$ = &$1->u.declspec; + $$->type = CDECL_TYPE_IDENT; + $$->next = $2; } null_decl: { @@ -564,9 +562,10 @@ english_parameter: english_declaration | typedef_name_qual null_decl { } english_array: T_VLA T_ARRAY english_vla T_OF { - ALLOC_STRUCT($$, struct cdecl_declarator, - .type = CDECL_DECL_ARRAY, - .u.array.vla = $3); + $$ = &$3->u.declarator; + $$->type = CDECL_DECL_ARRAY; + $$->u.array.vla = $$->u.ident; + $$->u.array.length = 0; } | T_ARRAY array_length T_OF { ALLOC_STRUCT($$, struct cdecl_declarator, .type = CDECL_DECL_ARRAY, @@ -580,8 +579,9 @@ array_length: T_UINT { } english_vla: T_IDENT | { - ALLOC($$, sizeof ""); - *$$ = 0; + if (!($$ = cdecl__alloc_item(1))) + YYERROR; + *$$->s = 0; } %% diff --git a/src/scan.l b/src/scan.l index 29dd33d..7549e6f 100644 --- a/src/scan.l +++ b/src/scan.l @@ -48,16 +48,6 @@ #define STRTOUMAX strtoul #endif -#define dup_token() do { \ - yylval->strval = malloc(yyleng+1); \ - if (!yylval->strval) { \ - cdecl__errmsg(CDECL__ENOMEM); \ - return T_LEX_ERROR; \ - } \ - memcpy(yylval->strval, yytext, yyleng); \ - yylval->strval[yyleng] = 0; \ -} while(0) - static char *to_octal(char *dst, unsigned val) { unsigned i; @@ -188,7 +178,9 @@ INTEGER 0x[[:xdigit:]]+|0[0-7]+|[[:digit:]]+ #else yyless(strcspn(yytext, "-")); #endif - dup_token(); + if (!(yylval->item = cdecl__alloc_item(yyleng+1))) + return T_LEX_ERROR; \ + memcpy(yylval->item->s, yytext, yyleng+1); } return UNPACK_TOKEN(tok); }