3 # Copyright © 2021 Nick Bowler
5 # Generate a C string table based on an input string specification file.
7 # A string table is a single large char single array containing all of
8 # the specified (0-terminated) strings, which is then offset to obtain
9 # the desired string. By storing these offsets instead of string pointers
10 # into read-only data structures, this can reduce the need for relocation
11 # processing at startup when programs are built in PIC mode.
13 # The string specification file is processed line by line. Comment
14 # lines may be included by beginning the line with a # character, which
15 # must be the very first character on the line. If a comment is encountered,
16 # processing immediately moves on to the next line and the result is as if
17 # the comment line were omitted from the input.
19 # A string is defined by beginning a line with an & character, which must
20 # be immediately followed by a C identifier. A nonempty sequence of
21 # whitespace (with at most one newline) separates the identifier from the
22 # beginning of the string itself. This whitespace is never included in the
25 # The string is then interpreted as follows:
27 # - Leading blanks on each line are ignored.
28 # - The sequences \\, \a, \b, \t, \n, \v, \f and \r can be entered and
29 # mean the same as they do in C string literals. The "\\" sequence
30 # prevents any special interpretation of the second backslash.
31 # - Newlines in the input are included in the output, except for the
32 # where the entire string (including its identifier) are on one line.
33 # - If this is not desired, a newline which is immediately preceded by an
34 # unescaped backslash will deleted, along with the backslash.
35 # - All other backslashes are deleted. This can be used to prevent special
36 # handling of whitespace, # or & characters at the beginning of a line.
38 # The output defines a variable, strtab, which contains all of the strings,
39 # and each identifier in the input is declared as an emumeration constant
40 # whose value is the offset of the associated string within strtab.
42 # The object-like macro STRTAB_MAX_OFFSET is defined and expands to the
43 # greatest string offset, suitable for use in #if preprocessing directives.
48 print " * Automatically generated by gen-strtab.awk from " FILENAME
50 print " * Automatically generated by gen-strtab.awk"
52 print " * Do not edit."
57 collected = ident = ""
58 startline = endline = 0
66 finish_string_input(strings, ident, collected)
67 vars[num_vars++] = ident
81 collected = collected "\n" $0
91 finish_string_input(strings, ident, collected)
92 vars[num_vars++] = ident
99 count = bucketsort(sorted_strings, strings)
102 print "\n#define STR_L10N_(x)"
104 print "# define N_(x) x"
106 print "\nstatic const char strtab[] ="
108 for (i = 0; i < count; i++) {
109 s = sorted_strings[i]
110 gsub(/\\\\/, "\2", s)
111 if ((n = index(strtab "\1", s "\1")) > 0) {
112 offsets[sorted_strings[i]] = real_length(substr(strtab, 1, n-1))
113 print "\tSTR_L10N_(N_(\"" sorted_strings[i] "\"))"
115 strtab = strtab "\1" s
116 offsets[sorted_strings[i]] = strtab_len + 1
117 strtab_len += real_length(s) + 1
120 offsets[sorted_strings[i]] = 0
121 strtab_len += real_length(s)
125 gsub(/\2/, "\\\\", strtab)
126 gsub(/\1/, "\")\"\\0\"\n\tN_(\"", strtab)
127 print "\tN_(\"" strtab "\")"
131 for (i = 0; i < num_vars; i++) {
132 sep = (i+1) != num_vars ? "," : ""
134 o = offsets[strings[s]]
135 print "\t" s " = " o sep
141 print "\n#define STRTAB_MAX_OFFSET " max
144 # finish_input_string(strings, ident, val)
146 # Deal with backslash-escapes and special characters in val, then set
147 # strings[ident] = val.
148 function finish_string_input(strings, ident, val, n, tmpval)
150 gsub(/\\\\/, "\1", val)
151 val = val (endline > startline ? "\n" : "")
152 gsub(/\\\n/, "", val)
155 while ((n = match(val, /\\[^abtnvfr]/)) > 0) {
156 tmpval = tmpval substr(val, 1, n-1)
157 val = substr(val, n+1)
161 # Escape special characters
162 gsub(/"/, "\\\"", tmpval)
163 gsub(/\t/, "\\t", tmpval)
164 gsub(/\n/, "\\n", tmpval)
165 gsub(/\1/, "\\\\", tmpval)
167 strings[ident] = tmpval
170 function real_length(s, t)
173 return t - gsub(/\\./, "&", s)
176 # bucketsort(dst, src)
178 # Sort the elements of src by descending string length,
179 # placing them into dst[0] ... dst[n].
181 # Returns the number of elements.
182 function bucketsort(dst, src, buckets, max, count, i, t)
186 if (i > max) { max = i }
190 for (i = max; i > 0; i--) {
199 i = length(t = src[t])
200 dst[buckets[i]++] = t