Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
  1// SPDX-License-Identifier: GPL-2.0
  2#include <string.h>
  3#include "util.h"
  4#include "debug.h"
  5
  6#include "demangle-rust.h"
  7
  8/*
  9 * Mangled Rust symbols look like this:
 10 *
 11 *     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
 12 *
 13 * The original symbol is:
 14 *
 15 *     <std::sys::fd::FileDesc as core::ops::Drop>::drop
 16 *
 17 * The last component of the path is a 64-bit hash in lowercase hex, prefixed
 18 * with "h". Rust does not have a global namespace between crates, an illusion
 19 * which Rust maintains by using the hash to distinguish things that would
 20 * otherwise have the same symbol.
 21 *
 22 * Any path component not starting with a XID_Start character is prefixed with
 23 * "_".
 24 *
 25 * The following escape sequences are used:
 26 *
 27 *     ","  =>  $C$
 28 *     "@"  =>  $SP$
 29 *     "*"  =>  $BP$
 30 *     "&"  =>  $RF$
 31 *     "<"  =>  $LT$
 32 *     ">"  =>  $GT$
 33 *     "("  =>  $LP$
 34 *     ")"  =>  $RP$
 35 *     " "  =>  $u20$
 36 *     "'"  =>  $u27$
 37 *     "["  =>  $u5b$
 38 *     "]"  =>  $u5d$
 39 *     "~"  =>  $u7e$
 40 *
 41 * A double ".." means "::" and a single "." means "-".
 42 *
 43 * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
 44 */
 45
 46static const char *hash_prefix = "::h";
 47static const size_t hash_prefix_len = 3;
 48static const size_t hash_len = 16;
 49
 50static bool is_prefixed_hash(const char *start);
 51static bool looks_like_rust(const char *sym, size_t len);
 52static bool unescape(const char **in, char **out, const char *seq, char value);
 53
 54/*
 55 * INPUT:
 56 *     sym: symbol that has been through BFD-demangling
 57 *
 58 * This function looks for the following indicators:
 59 *
 60 *  1. The hash must consist of "h" followed by 16 lowercase hex digits.
 61 *
 62 *  2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
 63 *     hex digits. This is true of 99.9998% of hashes so once in your life you
 64 *     may see a false negative. The point is to notice path components that
 65 *     could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
 66 *     this case a false positive (non-Rust symbol has an important path
 67 *     component removed because it looks like a Rust hash) is worse than a
 68 *     false negative (the rare Rust symbol is not demangled) so this sets the
 69 *     balance in favor of false negatives.
 70 *
 71 *  3. There must be no characters other than a-zA-Z0-9 and _.:$
 72 *
 73 *  4. There must be no unrecognized $-sign sequences.
 74 *
 75 *  5. There must be no sequence of three or more dots in a row ("...").
 76 */
 77bool
 78rust_is_mangled(const char *sym)
 79{
 80	size_t len, len_without_hash;
 81
 82	if (!sym)
 83		return false;
 84
 85	len = strlen(sym);
 86	if (len <= hash_prefix_len + hash_len)
 87		/* Not long enough to contain "::h" + hash + something else */
 88		return false;
 89
 90	len_without_hash = len - (hash_prefix_len + hash_len);
 91	if (!is_prefixed_hash(sym + len_without_hash))
 92		return false;
 93
 94	return looks_like_rust(sym, len_without_hash);
 95}
 96
 97/*
 98 * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
 99 * digits must comprise between 5 and 15 (inclusive) distinct digits.
100 */
101static bool is_prefixed_hash(const char *str)
102{
103	const char *end;
104	bool seen[16];
105	size_t i;
106	int count;
107
108	if (strncmp(str, hash_prefix, hash_prefix_len))
109		return false;
110	str += hash_prefix_len;
111
112	memset(seen, false, sizeof(seen));
113	for (end = str + hash_len; str < end; str++)
114		if (*str >= '0' && *str <= '9')
115			seen[*str - '0'] = true;
116		else if (*str >= 'a' && *str <= 'f')
117			seen[*str - 'a' + 10] = true;
118		else
119			return false;
120
121	/* Count how many distinct digits seen */
122	count = 0;
123	for (i = 0; i < 16; i++)
124		if (seen[i])
125			count++;
126
127	return count >= 5 && count <= 15;
128}
129
130static bool looks_like_rust(const char *str, size_t len)
131{
132	const char *end = str + len;
133
134	while (str < end)
135		switch (*str) {
136		case '$':
137			if (!strncmp(str, "$C$", 3))
138				str += 3;
139			else if (!strncmp(str, "$SP$", 4)
140					|| !strncmp(str, "$BP$", 4)
141					|| !strncmp(str, "$RF$", 4)
142					|| !strncmp(str, "$LT$", 4)
143					|| !strncmp(str, "$GT$", 4)
144					|| !strncmp(str, "$LP$", 4)
145					|| !strncmp(str, "$RP$", 4))
146				str += 4;
147			else if (!strncmp(str, "$u20$", 5)
148					|| !strncmp(str, "$u27$", 5)
149					|| !strncmp(str, "$u5b$", 5)
150					|| !strncmp(str, "$u5d$", 5)
151					|| !strncmp(str, "$u7e$", 5))
152				str += 5;
153			else
154				return false;
155			break;
156		case '.':
157			/* Do not allow three or more consecutive dots */
158			if (!strncmp(str, "...", 3))
159				return false;
160			/* Fall through */
161		case 'a' ... 'z':
162		case 'A' ... 'Z':
163		case '0' ... '9':
164		case '_':
165		case ':':
166			str++;
167			break;
168		default:
169			return false;
170		}
171
172	return true;
173}
174
175/*
176 * INPUT:
177 *     sym: symbol for which rust_is_mangled(sym) returns true
178 *
179 * The input is demangled in-place because the mangled name is always longer
180 * than the demangled one.
181 */
182void
183rust_demangle_sym(char *sym)
184{
185	const char *in;
186	char *out;
187	const char *end;
188
189	if (!sym)
190		return;
191
192	in = sym;
193	out = sym;
194	end = sym + strlen(sym) - (hash_prefix_len + hash_len);
195
196	while (in < end)
197		switch (*in) {
198		case '$':
199			if (!(unescape(&in, &out, "$C$", ',')
200					|| unescape(&in, &out, "$SP$", '@')
201					|| unescape(&in, &out, "$BP$", '*')
202					|| unescape(&in, &out, "$RF$", '&')
203					|| unescape(&in, &out, "$LT$", '<')
204					|| unescape(&in, &out, "$GT$", '>')
205					|| unescape(&in, &out, "$LP$", '(')
206					|| unescape(&in, &out, "$RP$", ')')
207					|| unescape(&in, &out, "$u20$", ' ')
208					|| unescape(&in, &out, "$u27$", '\'')
209					|| unescape(&in, &out, "$u5b$", '[')
210					|| unescape(&in, &out, "$u5d$", ']')
211					|| unescape(&in, &out, "$u7e$", '~'))) {
212				pr_err("demangle-rust: unexpected escape sequence");
213				goto done;
214			}
215			break;
216		case '_':
217			/*
218			 * If this is the start of a path component and the next
219			 * character is an escape sequence, ignore the
220			 * underscore. The mangler inserts an underscore to make
221			 * sure the path component begins with a XID_Start
222			 * character.
223			 */
224			if ((in == sym || in[-1] == ':') && in[1] == '$')
225				in++;
226			else
227				*out++ = *in++;
228			break;
229		case '.':
230			if (in[1] == '.') {
231				/* ".." becomes "::" */
232				*out++ = ':';
233				*out++ = ':';
234				in += 2;
235			} else {
236				/* "." becomes "-" */
237				*out++ = '-';
238				in++;
239			}
240			break;
241		case 'a' ... 'z':
242		case 'A' ... 'Z':
243		case '0' ... '9':
244		case ':':
245			*out++ = *in++;
246			break;
247		default:
248			pr_err("demangle-rust: unexpected character '%c' in symbol\n",
249				*in);
250			goto done;
251		}
252
253done:
254	*out = '\0';
255}
256
257static bool unescape(const char **in, char **out, const char *seq, char value)
258{
259	size_t len = strlen(seq);
260
261	if (strncmp(*in, seq, len))
262		return false;
263
264	**out = value;
265
266	*in += len;
267	*out += 1;
268
269	return true;
270}