Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0
  2#include <string.h>
  3#include "debug.h"
  4
  5#include "demangle-rust.h"
  6
  7/*
  8 * Mangled Rust symbols look like this:
  9 *
 10 *     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
 11 *
 12 * The original symbol is:
 13 *
 14 *     <std::sys::fd::FileDesc as core::ops::Drop>::drop
 15 *
 16 * The last component of the path is a 64-bit hash in lowercase hex, prefixed
 17 * with "h". Rust does not have a global namespace between crates, an illusion
 18 * which Rust maintains by using the hash to distinguish things that would
 19 * otherwise have the same symbol.
 20 *
 21 * Any path component not starting with a XID_Start character is prefixed with
 22 * "_".
 23 *
 24 * The following escape sequences are used:
 25 *
 26 *     ","  =>  $C$
 27 *     "@"  =>  $SP$
 28 *     "*"  =>  $BP$
 29 *     "&"  =>  $RF$
 30 *     "<"  =>  $LT$
 31 *     ">"  =>  $GT$
 32 *     "("  =>  $LP$
 33 *     ")"  =>  $RP$
 34 *     " "  =>  $u20$
 35 *     "'"  =>  $u27$
 36 *     "["  =>  $u5b$
 37 *     "]"  =>  $u5d$
 38 *     "~"  =>  $u7e$
 39 *
 40 * A double ".." means "::" and a single "." means "-".
 41 *
 42 * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
 43 */
 44
 45static const char *hash_prefix = "::h";
 46static const size_t hash_prefix_len = 3;
 47static const size_t hash_len = 16;
 48
 49static bool is_prefixed_hash(const char *start);
 50static bool looks_like_rust(const char *sym, size_t len);
 51static bool unescape(const char **in, char **out, const char *seq, char value);
 52
 53/*
 54 * INPUT:
 55 *     sym: symbol that has been through BFD-demangling
 56 *
 57 * This function looks for the following indicators:
 58 *
 59 *  1. The hash must consist of "h" followed by 16 lowercase hex digits.
 60 *
 61 *  2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
 62 *     hex digits. This is true of 99.9998% of hashes so once in your life you
 63 *     may see a false negative. The point is to notice path components that
 64 *     could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
 65 *     this case a false positive (non-Rust symbol has an important path
 66 *     component removed because it looks like a Rust hash) is worse than a
 67 *     false negative (the rare Rust symbol is not demangled) so this sets the
 68 *     balance in favor of false negatives.
 69 *
 70 *  3. There must be no characters other than a-zA-Z0-9 and _.:$
 71 *
 72 *  4. There must be no unrecognized $-sign sequences.
 73 *
 74 *  5. There must be no sequence of three or more dots in a row ("...").
 75 */
 76bool
 77rust_is_mangled(const char *sym)
 78{
 79	size_t len, len_without_hash;
 80
 81	if (!sym)
 82		return false;
 83
 84	len = strlen(sym);
 85	if (len <= hash_prefix_len + hash_len)
 86		/* Not long enough to contain "::h" + hash + something else */
 87		return false;
 88
 89	len_without_hash = len - (hash_prefix_len + hash_len);
 90	if (!is_prefixed_hash(sym + len_without_hash))
 91		return false;
 92
 93	return looks_like_rust(sym, len_without_hash);
 94}
 95
 96/*
 97 * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
 98 * digits must comprise between 5 and 15 (inclusive) distinct digits.
 99 */
100static bool is_prefixed_hash(const char *str)
101{
102	const char *end;
103	bool seen[16];
104	size_t i;
105	int count;
106
107	if (strncmp(str, hash_prefix, hash_prefix_len))
108		return false;
109	str += hash_prefix_len;
110
111	memset(seen, false, sizeof(seen));
112	for (end = str + hash_len; str < end; str++)
113		if (*str >= '0' && *str <= '9')
114			seen[*str - '0'] = true;
115		else if (*str >= 'a' && *str <= 'f')
116			seen[*str - 'a' + 10] = true;
117		else
118			return false;
119
120	/* Count how many distinct digits seen */
121	count = 0;
122	for (i = 0; i < 16; i++)
123		if (seen[i])
124			count++;
125
126	return count >= 5 && count <= 15;
127}
128
129static bool looks_like_rust(const char *str, size_t len)
130{
131	const char *end = str + len;
132
133	while (str < end)
134		switch (*str) {
135		case '$':
136			if (!strncmp(str, "$C$", 3))
137				str += 3;
138			else if (!strncmp(str, "$SP$", 4)
139					|| !strncmp(str, "$BP$", 4)
140					|| !strncmp(str, "$RF$", 4)
141					|| !strncmp(str, "$LT$", 4)
142					|| !strncmp(str, "$GT$", 4)
143					|| !strncmp(str, "$LP$", 4)
144					|| !strncmp(str, "$RP$", 4))
145				str += 4;
146			else if (!strncmp(str, "$u20$", 5)
147					|| !strncmp(str, "$u27$", 5)
148					|| !strncmp(str, "$u5b$", 5)
149					|| !strncmp(str, "$u5d$", 5)
150					|| !strncmp(str, "$u7e$", 5))
151				str += 5;
152			else
153				return false;
154			break;
155		case '.':
156			/* Do not allow three or more consecutive dots */
157			if (!strncmp(str, "...", 3))
158				return false;
159			/* Fall through */
160		case 'a' ... 'z':
161		case 'A' ... 'Z':
162		case '0' ... '9':
163		case '_':
164		case ':':
165			str++;
166			break;
167		default:
168			return false;
169		}
170
171	return true;
172}
173
174/*
175 * INPUT:
176 *     sym: symbol for which rust_is_mangled(sym) returns true
177 *
178 * The input is demangled in-place because the mangled name is always longer
179 * than the demangled one.
180 */
181void
182rust_demangle_sym(char *sym)
183{
184	const char *in;
185	char *out;
186	const char *end;
187
188	if (!sym)
189		return;
190
191	in = sym;
192	out = sym;
193	end = sym + strlen(sym) - (hash_prefix_len + hash_len);
194
195	while (in < end)
196		switch (*in) {
197		case '$':
198			if (!(unescape(&in, &out, "$C$", ',')
199					|| unescape(&in, &out, "$SP$", '@')
200					|| unescape(&in, &out, "$BP$", '*')
201					|| unescape(&in, &out, "$RF$", '&')
202					|| unescape(&in, &out, "$LT$", '<')
203					|| unescape(&in, &out, "$GT$", '>')
204					|| unescape(&in, &out, "$LP$", '(')
205					|| unescape(&in, &out, "$RP$", ')')
206					|| unescape(&in, &out, "$u20$", ' ')
207					|| unescape(&in, &out, "$u27$", '\'')
208					|| unescape(&in, &out, "$u5b$", '[')
209					|| unescape(&in, &out, "$u5d$", ']')
210					|| unescape(&in, &out, "$u7e$", '~'))) {
211				pr_err("demangle-rust: unexpected escape sequence");
212				goto done;
213			}
214			break;
215		case '_':
216			/*
217			 * If this is the start of a path component and the next
218			 * character is an escape sequence, ignore the
219			 * underscore. The mangler inserts an underscore to make
220			 * sure the path component begins with a XID_Start
221			 * character.
222			 */
223			if ((in == sym || in[-1] == ':') && in[1] == '$')
224				in++;
225			else
226				*out++ = *in++;
227			break;
228		case '.':
229			if (in[1] == '.') {
230				/* ".." becomes "::" */
231				*out++ = ':';
232				*out++ = ':';
233				in += 2;
234			} else {
235				/* "." becomes "-" */
236				*out++ = '-';
237				in++;
238			}
239			break;
240		case 'a' ... 'z':
241		case 'A' ... 'Z':
242		case '0' ... '9':
243		case ':':
244			*out++ = *in++;
245			break;
246		default:
247			pr_err("demangle-rust: unexpected character '%c' in symbol\n",
248				*in);
249			goto done;
250		}
251
252done:
253	*out = '\0';
254}
255
256static bool unescape(const char **in, char **out, const char *seq, char value)
257{
258	size_t len = strlen(seq);
259
260	if (strncmp(*in, seq, len))
261		return false;
262
263	**out = value;
264
265	*in += len;
266	*out += 1;
267
268	return true;
269}
v6.8
  1// SPDX-License-Identifier: GPL-2.0
  2#include <string.h>
  3#include "debug.h"
  4
  5#include "demangle-rust.h"
  6
  7/*
  8 * Mangled Rust symbols look like this:
  9 *
 10 *     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
 11 *
 12 * The original symbol is:
 13 *
 14 *     <std::sys::fd::FileDesc as core::ops::Drop>::drop
 15 *
 16 * The last component of the path is a 64-bit hash in lowercase hex, prefixed
 17 * with "h". Rust does not have a global namespace between crates, an illusion
 18 * which Rust maintains by using the hash to distinguish things that would
 19 * otherwise have the same symbol.
 20 *
 21 * Any path component not starting with a XID_Start character is prefixed with
 22 * "_".
 23 *
 24 * The following escape sequences are used:
 25 *
 26 *     ","  =>  $C$
 27 *     "@"  =>  $SP$
 28 *     "*"  =>  $BP$
 29 *     "&"  =>  $RF$
 30 *     "<"  =>  $LT$
 31 *     ">"  =>  $GT$
 32 *     "("  =>  $LP$
 33 *     ")"  =>  $RP$
 34 *     " "  =>  $u20$
 35 *     "'"  =>  $u27$
 36 *     "["  =>  $u5b$
 37 *     "]"  =>  $u5d$
 38 *     "~"  =>  $u7e$
 39 *
 40 * A double ".." means "::" and a single "." means "-".
 41 *
 42 * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
 43 */
 44
 45static const char *hash_prefix = "::h";
 46static const size_t hash_prefix_len = 3;
 47static const size_t hash_len = 16;
 48
 49static bool is_prefixed_hash(const char *start);
 50static bool looks_like_rust(const char *sym, size_t len);
 51static bool unescape(const char **in, char **out, const char *seq, char value);
 52
 53/*
 54 * INPUT:
 55 *     sym: symbol that has been through BFD-demangling
 56 *
 57 * This function looks for the following indicators:
 58 *
 59 *  1. The hash must consist of "h" followed by 16 lowercase hex digits.
 60 *
 61 *  2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
 62 *     hex digits. This is true of 99.9998% of hashes so once in your life you
 63 *     may see a false negative. The point is to notice path components that
 64 *     could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
 65 *     this case a false positive (non-Rust symbol has an important path
 66 *     component removed because it looks like a Rust hash) is worse than a
 67 *     false negative (the rare Rust symbol is not demangled) so this sets the
 68 *     balance in favor of false negatives.
 69 *
 70 *  3. There must be no characters other than a-zA-Z0-9 and _.:$
 71 *
 72 *  4. There must be no unrecognized $-sign sequences.
 73 *
 74 *  5. There must be no sequence of three or more dots in a row ("...").
 75 */
 76bool
 77rust_is_mangled(const char *sym)
 78{
 79	size_t len, len_without_hash;
 80
 81	if (!sym)
 82		return false;
 83
 84	len = strlen(sym);
 85	if (len <= hash_prefix_len + hash_len)
 86		/* Not long enough to contain "::h" + hash + something else */
 87		return false;
 88
 89	len_without_hash = len - (hash_prefix_len + hash_len);
 90	if (!is_prefixed_hash(sym + len_without_hash))
 91		return false;
 92
 93	return looks_like_rust(sym, len_without_hash);
 94}
 95
 96/*
 97 * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
 98 * digits must comprise between 5 and 15 (inclusive) distinct digits.
 99 */
100static bool is_prefixed_hash(const char *str)
101{
102	const char *end;
103	bool seen[16];
104	size_t i;
105	int count;
106
107	if (strncmp(str, hash_prefix, hash_prefix_len))
108		return false;
109	str += hash_prefix_len;
110
111	memset(seen, false, sizeof(seen));
112	for (end = str + hash_len; str < end; str++)
113		if (*str >= '0' && *str <= '9')
114			seen[*str - '0'] = true;
115		else if (*str >= 'a' && *str <= 'f')
116			seen[*str - 'a' + 10] = true;
117		else
118			return false;
119
120	/* Count how many distinct digits seen */
121	count = 0;
122	for (i = 0; i < 16; i++)
123		if (seen[i])
124			count++;
125
126	return count >= 5 && count <= 15;
127}
128
129static bool looks_like_rust(const char *str, size_t len)
130{
131	const char *end = str + len;
132
133	while (str < end)
134		switch (*str) {
135		case '$':
136			if (!strncmp(str, "$C$", 3))
137				str += 3;
138			else if (!strncmp(str, "$SP$", 4)
139					|| !strncmp(str, "$BP$", 4)
140					|| !strncmp(str, "$RF$", 4)
141					|| !strncmp(str, "$LT$", 4)
142					|| !strncmp(str, "$GT$", 4)
143					|| !strncmp(str, "$LP$", 4)
144					|| !strncmp(str, "$RP$", 4))
145				str += 4;
146			else if (!strncmp(str, "$u20$", 5)
147					|| !strncmp(str, "$u27$", 5)
148					|| !strncmp(str, "$u5b$", 5)
149					|| !strncmp(str, "$u5d$", 5)
150					|| !strncmp(str, "$u7e$", 5))
151				str += 5;
152			else
153				return false;
154			break;
155		case '.':
156			/* Do not allow three or more consecutive dots */
157			if (!strncmp(str, "...", 3))
158				return false;
159			/* Fall through */
160		case 'a' ... 'z':
161		case 'A' ... 'Z':
162		case '0' ... '9':
163		case '_':
164		case ':':
165			str++;
166			break;
167		default:
168			return false;
169		}
170
171	return true;
172}
173
174/*
175 * INPUT:
176 *     sym: symbol for which rust_is_mangled(sym) returns true
177 *
178 * The input is demangled in-place because the mangled name is always longer
179 * than the demangled one.
180 */
181void
182rust_demangle_sym(char *sym)
183{
184	const char *in;
185	char *out;
186	const char *end;
187
188	if (!sym)
189		return;
190
191	in = sym;
192	out = sym;
193	end = sym + strlen(sym) - (hash_prefix_len + hash_len);
194
195	while (in < end)
196		switch (*in) {
197		case '$':
198			if (!(unescape(&in, &out, "$C$", ',')
199					|| unescape(&in, &out, "$SP$", '@')
200					|| unescape(&in, &out, "$BP$", '*')
201					|| unescape(&in, &out, "$RF$", '&')
202					|| unescape(&in, &out, "$LT$", '<')
203					|| unescape(&in, &out, "$GT$", '>')
204					|| unescape(&in, &out, "$LP$", '(')
205					|| unescape(&in, &out, "$RP$", ')')
206					|| unescape(&in, &out, "$u20$", ' ')
207					|| unescape(&in, &out, "$u27$", '\'')
208					|| unescape(&in, &out, "$u5b$", '[')
209					|| unescape(&in, &out, "$u5d$", ']')
210					|| unescape(&in, &out, "$u7e$", '~'))) {
211				pr_err("demangle-rust: unexpected escape sequence");
212				goto done;
213			}
214			break;
215		case '_':
216			/*
217			 * If this is the start of a path component and the next
218			 * character is an escape sequence, ignore the
219			 * underscore. The mangler inserts an underscore to make
220			 * sure the path component begins with a XID_Start
221			 * character.
222			 */
223			if ((in == sym || in[-1] == ':') && in[1] == '$')
224				in++;
225			else
226				*out++ = *in++;
227			break;
228		case '.':
229			if (in[1] == '.') {
230				/* ".." becomes "::" */
231				*out++ = ':';
232				*out++ = ':';
233				in += 2;
234			} else {
235				/* "." becomes "-" */
236				*out++ = '-';
237				in++;
238			}
239			break;
240		case 'a' ... 'z':
241		case 'A' ... 'Z':
242		case '0' ... '9':
243		case ':':
244			*out++ = *in++;
245			break;
246		default:
247			pr_err("demangle-rust: unexpected character '%c' in symbol\n",
248				*in);
249			goto done;
250		}
251
252done:
253	*out = '\0';
254}
255
256static bool unescape(const char **in, char **out, const char *seq, char value)
257{
258	size_t len = strlen(seq);
259
260	if (strncmp(*in, seq, len))
261		return false;
262
263	**out = value;
264
265	*in += len;
266	*out += 1;
267
268	return true;
269}