From d45087889eac31f237bc9b10bddca7c9d2871296 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Tue, 11 Jul 2023 00:57:04 -0400 Subject: [PATCH] libcdecl: Avoid duplicating keyword strings w/ gperf. The string table generated by gperf is quite redundant: since we already resolved to the parser token, we can easily get the string from the parser's token name table. Unfortunately gperf provides no options to customize the string table generation to the extent needed, so let's continue our theme of post- processing everything in awk. --- Makefile.am | 7 ++- src/gperf-wordwrap.awk | 105 +++++++++++++++++++++++++++++++++++++++++ src/keywords.gperf | 22 +++++++-- 3 files changed, 128 insertions(+), 6 deletions(-) create mode 100755 src/gperf-wordwrap.awk diff --git a/Makefile.am b/Makefile.am index 4fdf566..e7cfb6b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -313,9 +313,12 @@ V_GPERF_0 = @printf ' %$(DX_ALIGN_V)s %s\n' 'GPERF ' $@; GPERFFILES = src/execute.gperf src/keywords.gperf .gperf.c: $(V_GPERF) $(GPERF) $< >$@.tmp - $(AM_V_at) mv $@.tmp $@ + $(AM_V_at) $(AWK) -f $(srcdir)/src/gperf-wordwrap.awk $@.tmp >$@.t2 + $(AM_V_at) mv $@.t2 $@ + $(AM_V_at) rm -f $@.tmp MAINTAINERCLEANFILES += $(GPERFFILES:.gperf=.c) -EXTRA_DIST += $(GPERFFILES) +EXTRA_DIST += $(GPERFFILES) src/gperf-wordwrap.awk +$(GPERFFILES:.gperf=.c): $(srcdir)/src/gperf-wordwrap.awk atlocal: config.status $(AM_V_GEN) :; { \ diff --git a/src/gperf-wordwrap.awk b/src/gperf-wordwrap.awk new file mode 100755 index 0000000..0588507 --- /dev/null +++ b/src/gperf-wordwrap.awk @@ -0,0 +1,105 @@ +#!/bin/awk -f +# +# Copyright © 2023 Nick Bowler +# +# Hackjob to replace gperf's generated string table with a custom function. +# +# Sometimes it is not desired to use a gperf-generated string table, as this +# can be quite large and is mostly wasted space if we have some other means +# of creating strings. This script replaces the "wordlist" initializer with +# one that does not include any strings, and replaces the reference to the +# ".name" member of the wordlist with a function call which can be supplied +# by the user. +# +# This transformation only occurs if the %define word-array-name is used to +# define an identifier that ends in _wrapped. If this option is not used, +# no modifications are made to the gperf output, so that this script can be +# used in generic build recipes and selectively enabled/disabled. +# +# To work, the following gperf options are assumed: +# +# %struct-type +# %null-strings +# +# Do not use %pic. Since this script removes all keyword strings from the +# table initializer it is not required to avoid relocations in PIC code. +# +# The user must supply a function with the following signature in the gperf file: +# +# const char *wordlist_func(const struct tag *); +# +# where "wordlist" is the same identifier provided to word-array-name, without +# the _wrapped suffix, and "tag" is the identifier used for the gperf struct-type +# declaration. This function returns the string corresponding to a given table +# entry. +# +# The structure declaration should not include the first "name" member which is +# ordinarily required. All struct members for nonempty table entries are set +# exactly as specified in the gperf input, without the keyword name. All empty +# table entries are initialized with {0}. + +BEGIN { + wl_tag = wl_name = ""; + in_wordlist = 0; + linecount = 0; +} + +NF == 1 && $1 == "};" { + in_wordlist = 0; +} + +!in_wordlist && wl_name { + # Convert wordlist "name" member references to function call. + re = wl_name "_wrapped\\[key\\]\\.name"; + gsub(re, wl_name "_func(" wl_name "_wrapped+key)"); +} + +in_wordlist && $1 ~ /^[^#]/ { + # Convert empty wordlist entries to {0}. + gsub(/[{][(]char\*[)]0[}]/, "{0}"); + + # Remove string portion of populated entry initializers. + gsub(/\\"/, "\1"); + sub(/"[^"]*",? */, ""); + gsub("\1", "\\\""); +} + +# Locate the wordlist array definition, which is identified by a magic +# name ending in "_wrapped" +$NF == "=" && $(NF-1) ~ /._wrapped\[\]$/ { + wl_tag = $(NF-2); + wl_name = $(NF-1); + sub(/_[^_]*$/, "", wl_name); + + in_wordlist = 1; + dump_lines(); +} + +!wl_name { + # Buffer lines until we know the structure and wordlist names + lines[linecount++] = $0; + next; +} + +{ print; } + +END { dump_lines(); } + +function dump_lines(i, flag) { + flag = 0; + for (i = 0; i < linecount; i++) { + if (wl_name && !(flag > 0 || lines[i] ~ /^\//)) { + print "/* Postprocessed by gperf-wordwrap.awk */"; + flag = 1; + } + + if (flag == 1 && (lines[i] ~ "struct *" wl_tag " *{")) { + print "static const char *" wl_name "_func(const struct " wl_tag " *);" + flag = 2; + } + + print lines[i]; + delete lines[i]; + } + linecount = 0; +} diff --git a/src/keywords.gperf b/src/keywords.gperf index 88b2476..22566aa 100644 --- a/src/keywords.gperf +++ b/src/keywords.gperf @@ -25,14 +25,15 @@ static const struct keyword *in_word_set(); %} -%struct-type %readonly-tables %language=ANSI-C -%global-table -%pic + +/* Note: the following options enable gperf-wordwrap.awk to do its thing */ +%define word-array-name wordlist_wrapped +%null-strings +%struct-type struct keyword { - int_least16_t name; uint_least8_t token; }; %% @@ -88,3 +89,16 @@ int cdecl__to_keyword(const char *s, int len, int english_mode) return T_IDENT; } + +static const char *wordlist_func(const struct keyword *k) +{ + unsigned x = k->token & 0x7f; + + if (!x) + return NULL; + + if (T_VOID >= 256) + x += 256; + + return cdecl__token_name(x); +} -- 2.43.2