Loading...
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2/* Copyright (c) 2018 Facebook */
3
4#include <endian.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8#include <fcntl.h>
9#include <unistd.h>
10#include <errno.h>
11#include <linux/err.h>
12#include <linux/btf.h>
13#include <gelf.h>
14#include "btf.h"
15#include "bpf.h"
16#include "libbpf.h"
17#include "libbpf_internal.h"
18#include "hashmap.h"
19
20#define BTF_MAX_NR_TYPES 0x7fffffff
21#define BTF_MAX_STR_OFFSET 0x7fffffff
22
23static struct btf_type btf_void;
24
25struct btf {
26 union {
27 struct btf_header *hdr;
28 void *data;
29 };
30 struct btf_type **types;
31 const char *strings;
32 void *nohdr_data;
33 __u32 nr_types;
34 __u32 types_size;
35 __u32 data_size;
36 int fd;
37};
38
39static inline __u64 ptr_to_u64(const void *ptr)
40{
41 return (__u64) (unsigned long) ptr;
42}
43
44static int btf_add_type(struct btf *btf, struct btf_type *t)
45{
46 if (btf->types_size - btf->nr_types < 2) {
47 struct btf_type **new_types;
48 __u32 expand_by, new_size;
49
50 if (btf->types_size == BTF_MAX_NR_TYPES)
51 return -E2BIG;
52
53 expand_by = max(btf->types_size >> 2, 16);
54 new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
55
56 new_types = realloc(btf->types, sizeof(*new_types) * new_size);
57 if (!new_types)
58 return -ENOMEM;
59
60 if (btf->nr_types == 0)
61 new_types[0] = &btf_void;
62
63 btf->types = new_types;
64 btf->types_size = new_size;
65 }
66
67 btf->types[++(btf->nr_types)] = t;
68
69 return 0;
70}
71
72static int btf_parse_hdr(struct btf *btf)
73{
74 const struct btf_header *hdr = btf->hdr;
75 __u32 meta_left;
76
77 if (btf->data_size < sizeof(struct btf_header)) {
78 pr_debug("BTF header not found\n");
79 return -EINVAL;
80 }
81
82 if (hdr->magic != BTF_MAGIC) {
83 pr_debug("Invalid BTF magic:%x\n", hdr->magic);
84 return -EINVAL;
85 }
86
87 if (hdr->version != BTF_VERSION) {
88 pr_debug("Unsupported BTF version:%u\n", hdr->version);
89 return -ENOTSUP;
90 }
91
92 if (hdr->flags) {
93 pr_debug("Unsupported BTF flags:%x\n", hdr->flags);
94 return -ENOTSUP;
95 }
96
97 meta_left = btf->data_size - sizeof(*hdr);
98 if (!meta_left) {
99 pr_debug("BTF has no data\n");
100 return -EINVAL;
101 }
102
103 if (meta_left < hdr->type_off) {
104 pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off);
105 return -EINVAL;
106 }
107
108 if (meta_left < hdr->str_off) {
109 pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off);
110 return -EINVAL;
111 }
112
113 if (hdr->type_off >= hdr->str_off) {
114 pr_debug("BTF type section offset >= string section offset. No type?\n");
115 return -EINVAL;
116 }
117
118 if (hdr->type_off & 0x02) {
119 pr_debug("BTF type section is not aligned to 4 bytes\n");
120 return -EINVAL;
121 }
122
123 btf->nohdr_data = btf->hdr + 1;
124
125 return 0;
126}
127
128static int btf_parse_str_sec(struct btf *btf)
129{
130 const struct btf_header *hdr = btf->hdr;
131 const char *start = btf->nohdr_data + hdr->str_off;
132 const char *end = start + btf->hdr->str_len;
133
134 if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
135 start[0] || end[-1]) {
136 pr_debug("Invalid BTF string section\n");
137 return -EINVAL;
138 }
139
140 btf->strings = start;
141
142 return 0;
143}
144
145static int btf_type_size(struct btf_type *t)
146{
147 int base_size = sizeof(struct btf_type);
148 __u16 vlen = btf_vlen(t);
149
150 switch (btf_kind(t)) {
151 case BTF_KIND_FWD:
152 case BTF_KIND_CONST:
153 case BTF_KIND_VOLATILE:
154 case BTF_KIND_RESTRICT:
155 case BTF_KIND_PTR:
156 case BTF_KIND_TYPEDEF:
157 case BTF_KIND_FUNC:
158 return base_size;
159 case BTF_KIND_INT:
160 return base_size + sizeof(__u32);
161 case BTF_KIND_ENUM:
162 return base_size + vlen * sizeof(struct btf_enum);
163 case BTF_KIND_ARRAY:
164 return base_size + sizeof(struct btf_array);
165 case BTF_KIND_STRUCT:
166 case BTF_KIND_UNION:
167 return base_size + vlen * sizeof(struct btf_member);
168 case BTF_KIND_FUNC_PROTO:
169 return base_size + vlen * sizeof(struct btf_param);
170 case BTF_KIND_VAR:
171 return base_size + sizeof(struct btf_var);
172 case BTF_KIND_DATASEC:
173 return base_size + vlen * sizeof(struct btf_var_secinfo);
174 default:
175 pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
176 return -EINVAL;
177 }
178}
179
180static int btf_parse_type_sec(struct btf *btf)
181{
182 struct btf_header *hdr = btf->hdr;
183 void *nohdr_data = btf->nohdr_data;
184 void *next_type = nohdr_data + hdr->type_off;
185 void *end_type = nohdr_data + hdr->str_off;
186
187 while (next_type < end_type) {
188 struct btf_type *t = next_type;
189 int type_size;
190 int err;
191
192 type_size = btf_type_size(t);
193 if (type_size < 0)
194 return type_size;
195 next_type += type_size;
196 err = btf_add_type(btf, t);
197 if (err)
198 return err;
199 }
200
201 return 0;
202}
203
204__u32 btf__get_nr_types(const struct btf *btf)
205{
206 return btf->nr_types;
207}
208
209const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
210{
211 if (type_id > btf->nr_types)
212 return NULL;
213
214 return btf->types[type_id];
215}
216
217static bool btf_type_is_void(const struct btf_type *t)
218{
219 return t == &btf_void || btf_is_fwd(t);
220}
221
222static bool btf_type_is_void_or_null(const struct btf_type *t)
223{
224 return !t || btf_type_is_void(t);
225}
226
227#define MAX_RESOLVE_DEPTH 32
228
229__s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
230{
231 const struct btf_array *array;
232 const struct btf_type *t;
233 __u32 nelems = 1;
234 __s64 size = -1;
235 int i;
236
237 t = btf__type_by_id(btf, type_id);
238 for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
239 i++) {
240 switch (btf_kind(t)) {
241 case BTF_KIND_INT:
242 case BTF_KIND_STRUCT:
243 case BTF_KIND_UNION:
244 case BTF_KIND_ENUM:
245 case BTF_KIND_DATASEC:
246 size = t->size;
247 goto done;
248 case BTF_KIND_PTR:
249 size = sizeof(void *);
250 goto done;
251 case BTF_KIND_TYPEDEF:
252 case BTF_KIND_VOLATILE:
253 case BTF_KIND_CONST:
254 case BTF_KIND_RESTRICT:
255 case BTF_KIND_VAR:
256 type_id = t->type;
257 break;
258 case BTF_KIND_ARRAY:
259 array = btf_array(t);
260 if (nelems && array->nelems > UINT32_MAX / nelems)
261 return -E2BIG;
262 nelems *= array->nelems;
263 type_id = array->type;
264 break;
265 default:
266 return -EINVAL;
267 }
268
269 t = btf__type_by_id(btf, type_id);
270 }
271
272 if (size < 0)
273 return -EINVAL;
274
275done:
276 if (nelems && size > UINT32_MAX / nelems)
277 return -E2BIG;
278
279 return nelems * size;
280}
281
282int btf__resolve_type(const struct btf *btf, __u32 type_id)
283{
284 const struct btf_type *t;
285 int depth = 0;
286
287 t = btf__type_by_id(btf, type_id);
288 while (depth < MAX_RESOLVE_DEPTH &&
289 !btf_type_is_void_or_null(t) &&
290 (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) {
291 type_id = t->type;
292 t = btf__type_by_id(btf, type_id);
293 depth++;
294 }
295
296 if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
297 return -EINVAL;
298
299 return type_id;
300}
301
302__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
303{
304 __u32 i;
305
306 if (!strcmp(type_name, "void"))
307 return 0;
308
309 for (i = 1; i <= btf->nr_types; i++) {
310 const struct btf_type *t = btf->types[i];
311 const char *name = btf__name_by_offset(btf, t->name_off);
312
313 if (name && !strcmp(type_name, name))
314 return i;
315 }
316
317 return -ENOENT;
318}
319
320void btf__free(struct btf *btf)
321{
322 if (!btf)
323 return;
324
325 if (btf->fd != -1)
326 close(btf->fd);
327
328 free(btf->data);
329 free(btf->types);
330 free(btf);
331}
332
333struct btf *btf__new(__u8 *data, __u32 size)
334{
335 struct btf *btf;
336 int err;
337
338 btf = calloc(1, sizeof(struct btf));
339 if (!btf)
340 return ERR_PTR(-ENOMEM);
341
342 btf->fd = -1;
343
344 btf->data = malloc(size);
345 if (!btf->data) {
346 err = -ENOMEM;
347 goto done;
348 }
349
350 memcpy(btf->data, data, size);
351 btf->data_size = size;
352
353 err = btf_parse_hdr(btf);
354 if (err)
355 goto done;
356
357 err = btf_parse_str_sec(btf);
358 if (err)
359 goto done;
360
361 err = btf_parse_type_sec(btf);
362
363done:
364 if (err) {
365 btf__free(btf);
366 return ERR_PTR(err);
367 }
368
369 return btf;
370}
371
372static bool btf_check_endianness(const GElf_Ehdr *ehdr)
373{
374#if __BYTE_ORDER == __LITTLE_ENDIAN
375 return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
376#elif __BYTE_ORDER == __BIG_ENDIAN
377 return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
378#else
379# error "Unrecognized __BYTE_ORDER__"
380#endif
381}
382
383struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
384{
385 Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
386 int err = 0, fd = -1, idx = 0;
387 struct btf *btf = NULL;
388 Elf_Scn *scn = NULL;
389 Elf *elf = NULL;
390 GElf_Ehdr ehdr;
391
392 if (elf_version(EV_CURRENT) == EV_NONE) {
393 pr_warning("failed to init libelf for %s\n", path);
394 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
395 }
396
397 fd = open(path, O_RDONLY);
398 if (fd < 0) {
399 err = -errno;
400 pr_warning("failed to open %s: %s\n", path, strerror(errno));
401 return ERR_PTR(err);
402 }
403
404 err = -LIBBPF_ERRNO__FORMAT;
405
406 elf = elf_begin(fd, ELF_C_READ, NULL);
407 if (!elf) {
408 pr_warning("failed to open %s as ELF file\n", path);
409 goto done;
410 }
411 if (!gelf_getehdr(elf, &ehdr)) {
412 pr_warning("failed to get EHDR from %s\n", path);
413 goto done;
414 }
415 if (!btf_check_endianness(&ehdr)) {
416 pr_warning("non-native ELF endianness is not supported\n");
417 goto done;
418 }
419 if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
420 pr_warning("failed to get e_shstrndx from %s\n", path);
421 goto done;
422 }
423
424 while ((scn = elf_nextscn(elf, scn)) != NULL) {
425 GElf_Shdr sh;
426 char *name;
427
428 idx++;
429 if (gelf_getshdr(scn, &sh) != &sh) {
430 pr_warning("failed to get section(%d) header from %s\n",
431 idx, path);
432 goto done;
433 }
434 name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
435 if (!name) {
436 pr_warning("failed to get section(%d) name from %s\n",
437 idx, path);
438 goto done;
439 }
440 if (strcmp(name, BTF_ELF_SEC) == 0) {
441 btf_data = elf_getdata(scn, 0);
442 if (!btf_data) {
443 pr_warning("failed to get section(%d, %s) data from %s\n",
444 idx, name, path);
445 goto done;
446 }
447 continue;
448 } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
449 btf_ext_data = elf_getdata(scn, 0);
450 if (!btf_ext_data) {
451 pr_warning("failed to get section(%d, %s) data from %s\n",
452 idx, name, path);
453 goto done;
454 }
455 continue;
456 }
457 }
458
459 err = 0;
460
461 if (!btf_data) {
462 err = -ENOENT;
463 goto done;
464 }
465 btf = btf__new(btf_data->d_buf, btf_data->d_size);
466 if (IS_ERR(btf))
467 goto done;
468
469 if (btf_ext && btf_ext_data) {
470 *btf_ext = btf_ext__new(btf_ext_data->d_buf,
471 btf_ext_data->d_size);
472 if (IS_ERR(*btf_ext))
473 goto done;
474 } else if (btf_ext) {
475 *btf_ext = NULL;
476 }
477done:
478 if (elf)
479 elf_end(elf);
480 close(fd);
481
482 if (err)
483 return ERR_PTR(err);
484 /*
485 * btf is always parsed before btf_ext, so no need to clean up
486 * btf_ext, if btf loading failed
487 */
488 if (IS_ERR(btf))
489 return btf;
490 if (btf_ext && IS_ERR(*btf_ext)) {
491 btf__free(btf);
492 err = PTR_ERR(*btf_ext);
493 return ERR_PTR(err);
494 }
495 return btf;
496}
497
498static int compare_vsi_off(const void *_a, const void *_b)
499{
500 const struct btf_var_secinfo *a = _a;
501 const struct btf_var_secinfo *b = _b;
502
503 return a->offset - b->offset;
504}
505
506static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
507 struct btf_type *t)
508{
509 __u32 size = 0, off = 0, i, vars = btf_vlen(t);
510 const char *name = btf__name_by_offset(btf, t->name_off);
511 const struct btf_type *t_var;
512 struct btf_var_secinfo *vsi;
513 const struct btf_var *var;
514 int ret;
515
516 if (!name) {
517 pr_debug("No name found in string section for DATASEC kind.\n");
518 return -ENOENT;
519 }
520
521 ret = bpf_object__section_size(obj, name, &size);
522 if (ret || !size || (t->size && t->size != size)) {
523 pr_debug("Invalid size for section %s: %u bytes\n", name, size);
524 return -ENOENT;
525 }
526
527 t->size = size;
528
529 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
530 t_var = btf__type_by_id(btf, vsi->type);
531 var = btf_var(t_var);
532
533 if (!btf_is_var(t_var)) {
534 pr_debug("Non-VAR type seen in section %s\n", name);
535 return -EINVAL;
536 }
537
538 if (var->linkage == BTF_VAR_STATIC)
539 continue;
540
541 name = btf__name_by_offset(btf, t_var->name_off);
542 if (!name) {
543 pr_debug("No name found in string section for VAR kind\n");
544 return -ENOENT;
545 }
546
547 ret = bpf_object__variable_offset(obj, name, &off);
548 if (ret) {
549 pr_debug("No offset found in symbol table for VAR %s\n",
550 name);
551 return -ENOENT;
552 }
553
554 vsi->offset = off;
555 }
556
557 qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off);
558 return 0;
559}
560
561int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
562{
563 int err = 0;
564 __u32 i;
565
566 for (i = 1; i <= btf->nr_types; i++) {
567 struct btf_type *t = btf->types[i];
568
569 /* Loader needs to fix up some of the things compiler
570 * couldn't get its hands on while emitting BTF. This
571 * is section size and global variable offset. We use
572 * the info from the ELF itself for this purpose.
573 */
574 if (btf_is_datasec(t)) {
575 err = btf_fixup_datasec(obj, btf, t);
576 if (err)
577 break;
578 }
579 }
580
581 return err;
582}
583
584int btf__load(struct btf *btf)
585{
586 __u32 log_buf_size = BPF_LOG_BUF_SIZE;
587 char *log_buf = NULL;
588 int err = 0;
589
590 if (btf->fd >= 0)
591 return -EEXIST;
592
593 log_buf = malloc(log_buf_size);
594 if (!log_buf)
595 return -ENOMEM;
596
597 *log_buf = 0;
598
599 btf->fd = bpf_load_btf(btf->data, btf->data_size,
600 log_buf, log_buf_size, false);
601 if (btf->fd < 0) {
602 err = -errno;
603 pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno);
604 if (*log_buf)
605 pr_warning("%s\n", log_buf);
606 goto done;
607 }
608
609done:
610 free(log_buf);
611 return err;
612}
613
614int btf__fd(const struct btf *btf)
615{
616 return btf->fd;
617}
618
619const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
620{
621 *size = btf->data_size;
622 return btf->data;
623}
624
625const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
626{
627 if (offset < btf->hdr->str_len)
628 return &btf->strings[offset];
629 else
630 return NULL;
631}
632
633int btf__get_from_id(__u32 id, struct btf **btf)
634{
635 struct bpf_btf_info btf_info = { 0 };
636 __u32 len = sizeof(btf_info);
637 __u32 last_size;
638 int btf_fd;
639 void *ptr;
640 int err;
641
642 err = 0;
643 *btf = NULL;
644 btf_fd = bpf_btf_get_fd_by_id(id);
645 if (btf_fd < 0)
646 return 0;
647
648 /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
649 * let's start with a sane default - 4KiB here - and resize it only if
650 * bpf_obj_get_info_by_fd() needs a bigger buffer.
651 */
652 btf_info.btf_size = 4096;
653 last_size = btf_info.btf_size;
654 ptr = malloc(last_size);
655 if (!ptr) {
656 err = -ENOMEM;
657 goto exit_free;
658 }
659
660 memset(ptr, 0, last_size);
661 btf_info.btf = ptr_to_u64(ptr);
662 err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
663
664 if (!err && btf_info.btf_size > last_size) {
665 void *temp_ptr;
666
667 last_size = btf_info.btf_size;
668 temp_ptr = realloc(ptr, last_size);
669 if (!temp_ptr) {
670 err = -ENOMEM;
671 goto exit_free;
672 }
673 ptr = temp_ptr;
674 memset(ptr, 0, last_size);
675 btf_info.btf = ptr_to_u64(ptr);
676 err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
677 }
678
679 if (err || btf_info.btf_size > last_size) {
680 err = errno;
681 goto exit_free;
682 }
683
684 *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
685 if (IS_ERR(*btf)) {
686 err = PTR_ERR(*btf);
687 *btf = NULL;
688 }
689
690exit_free:
691 close(btf_fd);
692 free(ptr);
693
694 return err;
695}
696
697int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
698 __u32 expected_key_size, __u32 expected_value_size,
699 __u32 *key_type_id, __u32 *value_type_id)
700{
701 const struct btf_type *container_type;
702 const struct btf_member *key, *value;
703 const size_t max_name = 256;
704 char container_name[max_name];
705 __s64 key_size, value_size;
706 __s32 container_id;
707
708 if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
709 max_name) {
710 pr_warning("map:%s length of '____btf_map_%s' is too long\n",
711 map_name, map_name);
712 return -EINVAL;
713 }
714
715 container_id = btf__find_by_name(btf, container_name);
716 if (container_id < 0) {
717 pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
718 map_name, container_name);
719 return container_id;
720 }
721
722 container_type = btf__type_by_id(btf, container_id);
723 if (!container_type) {
724 pr_warning("map:%s cannot find BTF type for container_id:%u\n",
725 map_name, container_id);
726 return -EINVAL;
727 }
728
729 if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
730 pr_warning("map:%s container_name:%s is an invalid container struct\n",
731 map_name, container_name);
732 return -EINVAL;
733 }
734
735 key = btf_members(container_type);
736 value = key + 1;
737
738 key_size = btf__resolve_size(btf, key->type);
739 if (key_size < 0) {
740 pr_warning("map:%s invalid BTF key_type_size\n", map_name);
741 return key_size;
742 }
743
744 if (expected_key_size != key_size) {
745 pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
746 map_name, (__u32)key_size, expected_key_size);
747 return -EINVAL;
748 }
749
750 value_size = btf__resolve_size(btf, value->type);
751 if (value_size < 0) {
752 pr_warning("map:%s invalid BTF value_type_size\n", map_name);
753 return value_size;
754 }
755
756 if (expected_value_size != value_size) {
757 pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
758 map_name, (__u32)value_size, expected_value_size);
759 return -EINVAL;
760 }
761
762 *key_type_id = key->type;
763 *value_type_id = value->type;
764
765 return 0;
766}
767
768struct btf_ext_sec_setup_param {
769 __u32 off;
770 __u32 len;
771 __u32 min_rec_size;
772 struct btf_ext_info *ext_info;
773 const char *desc;
774};
775
776static int btf_ext_setup_info(struct btf_ext *btf_ext,
777 struct btf_ext_sec_setup_param *ext_sec)
778{
779 const struct btf_ext_info_sec *sinfo;
780 struct btf_ext_info *ext_info;
781 __u32 info_left, record_size;
782 /* The start of the info sec (including the __u32 record_size). */
783 void *info;
784
785 if (ext_sec->len == 0)
786 return 0;
787
788 if (ext_sec->off & 0x03) {
789 pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
790 ext_sec->desc);
791 return -EINVAL;
792 }
793
794 info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
795 info_left = ext_sec->len;
796
797 if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
798 pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
799 ext_sec->desc, ext_sec->off, ext_sec->len);
800 return -EINVAL;
801 }
802
803 /* At least a record size */
804 if (info_left < sizeof(__u32)) {
805 pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
806 return -EINVAL;
807 }
808
809 /* The record size needs to meet the minimum standard */
810 record_size = *(__u32 *)info;
811 if (record_size < ext_sec->min_rec_size ||
812 record_size & 0x03) {
813 pr_debug("%s section in .BTF.ext has invalid record size %u\n",
814 ext_sec->desc, record_size);
815 return -EINVAL;
816 }
817
818 sinfo = info + sizeof(__u32);
819 info_left -= sizeof(__u32);
820
821 /* If no records, return failure now so .BTF.ext won't be used. */
822 if (!info_left) {
823 pr_debug("%s section in .BTF.ext has no records", ext_sec->desc);
824 return -EINVAL;
825 }
826
827 while (info_left) {
828 unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec);
829 __u64 total_record_size;
830 __u32 num_records;
831
832 if (info_left < sec_hdrlen) {
833 pr_debug("%s section header is not found in .BTF.ext\n",
834 ext_sec->desc);
835 return -EINVAL;
836 }
837
838 num_records = sinfo->num_info;
839 if (num_records == 0) {
840 pr_debug("%s section has incorrect num_records in .BTF.ext\n",
841 ext_sec->desc);
842 return -EINVAL;
843 }
844
845 total_record_size = sec_hdrlen +
846 (__u64)num_records * record_size;
847 if (info_left < total_record_size) {
848 pr_debug("%s section has incorrect num_records in .BTF.ext\n",
849 ext_sec->desc);
850 return -EINVAL;
851 }
852
853 info_left -= total_record_size;
854 sinfo = (void *)sinfo + total_record_size;
855 }
856
857 ext_info = ext_sec->ext_info;
858 ext_info->len = ext_sec->len - sizeof(__u32);
859 ext_info->rec_size = record_size;
860 ext_info->info = info + sizeof(__u32);
861
862 return 0;
863}
864
865static int btf_ext_setup_func_info(struct btf_ext *btf_ext)
866{
867 struct btf_ext_sec_setup_param param = {
868 .off = btf_ext->hdr->func_info_off,
869 .len = btf_ext->hdr->func_info_len,
870 .min_rec_size = sizeof(struct bpf_func_info_min),
871 .ext_info = &btf_ext->func_info,
872 .desc = "func_info"
873 };
874
875 return btf_ext_setup_info(btf_ext, ¶m);
876}
877
878static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
879{
880 struct btf_ext_sec_setup_param param = {
881 .off = btf_ext->hdr->line_info_off,
882 .len = btf_ext->hdr->line_info_len,
883 .min_rec_size = sizeof(struct bpf_line_info_min),
884 .ext_info = &btf_ext->line_info,
885 .desc = "line_info",
886 };
887
888 return btf_ext_setup_info(btf_ext, ¶m);
889}
890
891static int btf_ext_setup_offset_reloc(struct btf_ext *btf_ext)
892{
893 struct btf_ext_sec_setup_param param = {
894 .off = btf_ext->hdr->offset_reloc_off,
895 .len = btf_ext->hdr->offset_reloc_len,
896 .min_rec_size = sizeof(struct bpf_offset_reloc),
897 .ext_info = &btf_ext->offset_reloc_info,
898 .desc = "offset_reloc",
899 };
900
901 return btf_ext_setup_info(btf_ext, ¶m);
902}
903
904static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
905{
906 const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
907
908 if (data_size < offsetofend(struct btf_ext_header, hdr_len) ||
909 data_size < hdr->hdr_len) {
910 pr_debug("BTF.ext header not found");
911 return -EINVAL;
912 }
913
914 if (hdr->magic != BTF_MAGIC) {
915 pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
916 return -EINVAL;
917 }
918
919 if (hdr->version != BTF_VERSION) {
920 pr_debug("Unsupported BTF.ext version:%u\n", hdr->version);
921 return -ENOTSUP;
922 }
923
924 if (hdr->flags) {
925 pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags);
926 return -ENOTSUP;
927 }
928
929 if (data_size == hdr->hdr_len) {
930 pr_debug("BTF.ext has no data\n");
931 return -EINVAL;
932 }
933
934 return 0;
935}
936
937void btf_ext__free(struct btf_ext *btf_ext)
938{
939 if (!btf_ext)
940 return;
941 free(btf_ext->data);
942 free(btf_ext);
943}
944
945struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
946{
947 struct btf_ext *btf_ext;
948 int err;
949
950 err = btf_ext_parse_hdr(data, size);
951 if (err)
952 return ERR_PTR(err);
953
954 btf_ext = calloc(1, sizeof(struct btf_ext));
955 if (!btf_ext)
956 return ERR_PTR(-ENOMEM);
957
958 btf_ext->data_size = size;
959 btf_ext->data = malloc(size);
960 if (!btf_ext->data) {
961 err = -ENOMEM;
962 goto done;
963 }
964 memcpy(btf_ext->data, data, size);
965
966 if (btf_ext->hdr->hdr_len <
967 offsetofend(struct btf_ext_header, line_info_len))
968 goto done;
969 err = btf_ext_setup_func_info(btf_ext);
970 if (err)
971 goto done;
972
973 err = btf_ext_setup_line_info(btf_ext);
974 if (err)
975 goto done;
976
977 if (btf_ext->hdr->hdr_len <
978 offsetofend(struct btf_ext_header, offset_reloc_len))
979 goto done;
980 err = btf_ext_setup_offset_reloc(btf_ext);
981 if (err)
982 goto done;
983
984done:
985 if (err) {
986 btf_ext__free(btf_ext);
987 return ERR_PTR(err);
988 }
989
990 return btf_ext;
991}
992
993const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
994{
995 *size = btf_ext->data_size;
996 return btf_ext->data;
997}
998
999static int btf_ext_reloc_info(const struct btf *btf,
1000 const struct btf_ext_info *ext_info,
1001 const char *sec_name, __u32 insns_cnt,
1002 void **info, __u32 *cnt)
1003{
1004 __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec);
1005 __u32 i, record_size, existing_len, records_len;
1006 struct btf_ext_info_sec *sinfo;
1007 const char *info_sec_name;
1008 __u64 remain_len;
1009 void *data;
1010
1011 record_size = ext_info->rec_size;
1012 sinfo = ext_info->info;
1013 remain_len = ext_info->len;
1014 while (remain_len > 0) {
1015 records_len = sinfo->num_info * record_size;
1016 info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
1017 if (strcmp(info_sec_name, sec_name)) {
1018 remain_len -= sec_hdrlen + records_len;
1019 sinfo = (void *)sinfo + sec_hdrlen + records_len;
1020 continue;
1021 }
1022
1023 existing_len = (*cnt) * record_size;
1024 data = realloc(*info, existing_len + records_len);
1025 if (!data)
1026 return -ENOMEM;
1027
1028 memcpy(data + existing_len, sinfo->data, records_len);
1029 /* adjust insn_off only, the rest data will be passed
1030 * to the kernel.
1031 */
1032 for (i = 0; i < sinfo->num_info; i++) {
1033 __u32 *insn_off;
1034
1035 insn_off = data + existing_len + (i * record_size);
1036 *insn_off = *insn_off / sizeof(struct bpf_insn) +
1037 insns_cnt;
1038 }
1039 *info = data;
1040 *cnt += sinfo->num_info;
1041 return 0;
1042 }
1043
1044 return -ENOENT;
1045}
1046
1047int btf_ext__reloc_func_info(const struct btf *btf,
1048 const struct btf_ext *btf_ext,
1049 const char *sec_name, __u32 insns_cnt,
1050 void **func_info, __u32 *cnt)
1051{
1052 return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name,
1053 insns_cnt, func_info, cnt);
1054}
1055
1056int btf_ext__reloc_line_info(const struct btf *btf,
1057 const struct btf_ext *btf_ext,
1058 const char *sec_name, __u32 insns_cnt,
1059 void **line_info, __u32 *cnt)
1060{
1061 return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name,
1062 insns_cnt, line_info, cnt);
1063}
1064
1065__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext)
1066{
1067 return btf_ext->func_info.rec_size;
1068}
1069
1070__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
1071{
1072 return btf_ext->line_info.rec_size;
1073}
1074
1075struct btf_dedup;
1076
1077static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
1078 const struct btf_dedup_opts *opts);
1079static void btf_dedup_free(struct btf_dedup *d);
1080static int btf_dedup_strings(struct btf_dedup *d);
1081static int btf_dedup_prim_types(struct btf_dedup *d);
1082static int btf_dedup_struct_types(struct btf_dedup *d);
1083static int btf_dedup_ref_types(struct btf_dedup *d);
1084static int btf_dedup_compact_types(struct btf_dedup *d);
1085static int btf_dedup_remap_types(struct btf_dedup *d);
1086
1087/*
1088 * Deduplicate BTF types and strings.
1089 *
1090 * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF
1091 * section with all BTF type descriptors and string data. It overwrites that
1092 * memory in-place with deduplicated types and strings without any loss of
1093 * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section
1094 * is provided, all the strings referenced from .BTF.ext section are honored
1095 * and updated to point to the right offsets after deduplication.
1096 *
1097 * If function returns with error, type/string data might be garbled and should
1098 * be discarded.
1099 *
1100 * More verbose and detailed description of both problem btf_dedup is solving,
1101 * as well as solution could be found at:
1102 * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
1103 *
1104 * Problem description and justification
1105 * =====================================
1106 *
1107 * BTF type information is typically emitted either as a result of conversion
1108 * from DWARF to BTF or directly by compiler. In both cases, each compilation
1109 * unit contains information about a subset of all the types that are used
1110 * in an application. These subsets are frequently overlapping and contain a lot
1111 * of duplicated information when later concatenated together into a single
1112 * binary. This algorithm ensures that each unique type is represented by single
1113 * BTF type descriptor, greatly reducing resulting size of BTF data.
1114 *
1115 * Compilation unit isolation and subsequent duplication of data is not the only
1116 * problem. The same type hierarchy (e.g., struct and all the type that struct
1117 * references) in different compilation units can be represented in BTF to
1118 * various degrees of completeness (or, rather, incompleteness) due to
1119 * struct/union forward declarations.
1120 *
1121 * Let's take a look at an example, that we'll use to better understand the
1122 * problem (and solution). Suppose we have two compilation units, each using
1123 * same `struct S`, but each of them having incomplete type information about
1124 * struct's fields:
1125 *
1126 * // CU #1:
1127 * struct S;
1128 * struct A {
1129 * int a;
1130 * struct A* self;
1131 * struct S* parent;
1132 * };
1133 * struct B;
1134 * struct S {
1135 * struct A* a_ptr;
1136 * struct B* b_ptr;
1137 * };
1138 *
1139 * // CU #2:
1140 * struct S;
1141 * struct A;
1142 * struct B {
1143 * int b;
1144 * struct B* self;
1145 * struct S* parent;
1146 * };
1147 * struct S {
1148 * struct A* a_ptr;
1149 * struct B* b_ptr;
1150 * };
1151 *
1152 * In case of CU #1, BTF data will know only that `struct B` exist (but no
1153 * more), but will know the complete type information about `struct A`. While
1154 * for CU #2, it will know full type information about `struct B`, but will
1155 * only know about forward declaration of `struct A` (in BTF terms, it will
1156 * have `BTF_KIND_FWD` type descriptor with name `B`).
1157 *
1158 * This compilation unit isolation means that it's possible that there is no
1159 * single CU with complete type information describing structs `S`, `A`, and
1160 * `B`. Also, we might get tons of duplicated and redundant type information.
1161 *
1162 * Additional complication we need to keep in mind comes from the fact that
1163 * types, in general, can form graphs containing cycles, not just DAGs.
1164 *
1165 * While algorithm does deduplication, it also merges and resolves type
1166 * information (unless disabled throught `struct btf_opts`), whenever possible.
1167 * E.g., in the example above with two compilation units having partial type
1168 * information for structs `A` and `B`, the output of algorithm will emit
1169 * a single copy of each BTF type that describes structs `A`, `B`, and `S`
1170 * (as well as type information for `int` and pointers), as if they were defined
1171 * in a single compilation unit as:
1172 *
1173 * struct A {
1174 * int a;
1175 * struct A* self;
1176 * struct S* parent;
1177 * };
1178 * struct B {
1179 * int b;
1180 * struct B* self;
1181 * struct S* parent;
1182 * };
1183 * struct S {
1184 * struct A* a_ptr;
1185 * struct B* b_ptr;
1186 * };
1187 *
1188 * Algorithm summary
1189 * =================
1190 *
1191 * Algorithm completes its work in 6 separate passes:
1192 *
1193 * 1. Strings deduplication.
1194 * 2. Primitive types deduplication (int, enum, fwd).
1195 * 3. Struct/union types deduplication.
1196 * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
1197 * protos, and const/volatile/restrict modifiers).
1198 * 5. Types compaction.
1199 * 6. Types remapping.
1200 *
1201 * Algorithm determines canonical type descriptor, which is a single
1202 * representative type for each truly unique type. This canonical type is the
1203 * one that will go into final deduplicated BTF type information. For
1204 * struct/unions, it is also the type that algorithm will merge additional type
1205 * information into (while resolving FWDs), as it discovers it from data in
1206 * other CUs. Each input BTF type eventually gets either mapped to itself, if
1207 * that type is canonical, or to some other type, if that type is equivalent
1208 * and was chosen as canonical representative. This mapping is stored in
1209 * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that
1210 * FWD type got resolved to.
1211 *
1212 * To facilitate fast discovery of canonical types, we also maintain canonical
1213 * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash
1214 * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types
1215 * that match that signature. With sufficiently good choice of type signature
1216 * hashing function, we can limit number of canonical types for each unique type
1217 * signature to a very small number, allowing to find canonical type for any
1218 * duplicated type very quickly.
1219 *
1220 * Struct/union deduplication is the most critical part and algorithm for
1221 * deduplicating structs/unions is described in greater details in comments for
1222 * `btf_dedup_is_equiv` function.
1223 */
1224int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
1225 const struct btf_dedup_opts *opts)
1226{
1227 struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
1228 int err;
1229
1230 if (IS_ERR(d)) {
1231 pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
1232 return -EINVAL;
1233 }
1234
1235 err = btf_dedup_strings(d);
1236 if (err < 0) {
1237 pr_debug("btf_dedup_strings failed:%d\n", err);
1238 goto done;
1239 }
1240 err = btf_dedup_prim_types(d);
1241 if (err < 0) {
1242 pr_debug("btf_dedup_prim_types failed:%d\n", err);
1243 goto done;
1244 }
1245 err = btf_dedup_struct_types(d);
1246 if (err < 0) {
1247 pr_debug("btf_dedup_struct_types failed:%d\n", err);
1248 goto done;
1249 }
1250 err = btf_dedup_ref_types(d);
1251 if (err < 0) {
1252 pr_debug("btf_dedup_ref_types failed:%d\n", err);
1253 goto done;
1254 }
1255 err = btf_dedup_compact_types(d);
1256 if (err < 0) {
1257 pr_debug("btf_dedup_compact_types failed:%d\n", err);
1258 goto done;
1259 }
1260 err = btf_dedup_remap_types(d);
1261 if (err < 0) {
1262 pr_debug("btf_dedup_remap_types failed:%d\n", err);
1263 goto done;
1264 }
1265
1266done:
1267 btf_dedup_free(d);
1268 return err;
1269}
1270
1271#define BTF_UNPROCESSED_ID ((__u32)-1)
1272#define BTF_IN_PROGRESS_ID ((__u32)-2)
1273
1274struct btf_dedup {
1275 /* .BTF section to be deduped in-place */
1276 struct btf *btf;
1277 /*
1278 * Optional .BTF.ext section. When provided, any strings referenced
1279 * from it will be taken into account when deduping strings
1280 */
1281 struct btf_ext *btf_ext;
1282 /*
1283 * This is a map from any type's signature hash to a list of possible
1284 * canonical representative type candidates. Hash collisions are
1285 * ignored, so even types of various kinds can share same list of
1286 * candidates, which is fine because we rely on subsequent
1287 * btf_xxx_equal() checks to authoritatively verify type equality.
1288 */
1289 struct hashmap *dedup_table;
1290 /* Canonical types map */
1291 __u32 *map;
1292 /* Hypothetical mapping, used during type graph equivalence checks */
1293 __u32 *hypot_map;
1294 __u32 *hypot_list;
1295 size_t hypot_cnt;
1296 size_t hypot_cap;
1297 /* Various option modifying behavior of algorithm */
1298 struct btf_dedup_opts opts;
1299};
1300
1301struct btf_str_ptr {
1302 const char *str;
1303 __u32 new_off;
1304 bool used;
1305};
1306
1307struct btf_str_ptrs {
1308 struct btf_str_ptr *ptrs;
1309 const char *data;
1310 __u32 cnt;
1311 __u32 cap;
1312};
1313
1314static long hash_combine(long h, long value)
1315{
1316 return h * 31 + value;
1317}
1318
1319#define for_each_dedup_cand(d, node, hash) \
1320 hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
1321
1322static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
1323{
1324 return hashmap__append(d->dedup_table,
1325 (void *)hash, (void *)(long)type_id);
1326}
1327
1328static int btf_dedup_hypot_map_add(struct btf_dedup *d,
1329 __u32 from_id, __u32 to_id)
1330{
1331 if (d->hypot_cnt == d->hypot_cap) {
1332 __u32 *new_list;
1333
1334 d->hypot_cap += max(16, d->hypot_cap / 2);
1335 new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
1336 if (!new_list)
1337 return -ENOMEM;
1338 d->hypot_list = new_list;
1339 }
1340 d->hypot_list[d->hypot_cnt++] = from_id;
1341 d->hypot_map[from_id] = to_id;
1342 return 0;
1343}
1344
1345static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
1346{
1347 int i;
1348
1349 for (i = 0; i < d->hypot_cnt; i++)
1350 d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
1351 d->hypot_cnt = 0;
1352}
1353
1354static void btf_dedup_free(struct btf_dedup *d)
1355{
1356 hashmap__free(d->dedup_table);
1357 d->dedup_table = NULL;
1358
1359 free(d->map);
1360 d->map = NULL;
1361
1362 free(d->hypot_map);
1363 d->hypot_map = NULL;
1364
1365 free(d->hypot_list);
1366 d->hypot_list = NULL;
1367
1368 free(d);
1369}
1370
1371static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
1372{
1373 return (size_t)key;
1374}
1375
1376static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
1377{
1378 return 0;
1379}
1380
1381static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
1382{
1383 return k1 == k2;
1384}
1385
1386static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
1387 const struct btf_dedup_opts *opts)
1388{
1389 struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
1390 hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
1391 int i, err = 0;
1392
1393 if (!d)
1394 return ERR_PTR(-ENOMEM);
1395
1396 d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
1397 /* dedup_table_size is now used only to force collisions in tests */
1398 if (opts && opts->dedup_table_size == 1)
1399 hash_fn = btf_dedup_collision_hash_fn;
1400
1401 d->btf = btf;
1402 d->btf_ext = btf_ext;
1403
1404 d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
1405 if (IS_ERR(d->dedup_table)) {
1406 err = PTR_ERR(d->dedup_table);
1407 d->dedup_table = NULL;
1408 goto done;
1409 }
1410
1411 d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
1412 if (!d->map) {
1413 err = -ENOMEM;
1414 goto done;
1415 }
1416 /* special BTF "void" type is made canonical immediately */
1417 d->map[0] = 0;
1418 for (i = 1; i <= btf->nr_types; i++) {
1419 struct btf_type *t = d->btf->types[i];
1420
1421 /* VAR and DATASEC are never deduped and are self-canonical */
1422 if (btf_is_var(t) || btf_is_datasec(t))
1423 d->map[i] = i;
1424 else
1425 d->map[i] = BTF_UNPROCESSED_ID;
1426 }
1427
1428 d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
1429 if (!d->hypot_map) {
1430 err = -ENOMEM;
1431 goto done;
1432 }
1433 for (i = 0; i <= btf->nr_types; i++)
1434 d->hypot_map[i] = BTF_UNPROCESSED_ID;
1435
1436done:
1437 if (err) {
1438 btf_dedup_free(d);
1439 return ERR_PTR(err);
1440 }
1441
1442 return d;
1443}
1444
1445typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
1446
1447/*
1448 * Iterate over all possible places in .BTF and .BTF.ext that can reference
1449 * string and pass pointer to it to a provided callback `fn`.
1450 */
1451static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
1452{
1453 void *line_data_cur, *line_data_end;
1454 int i, j, r, rec_size;
1455 struct btf_type *t;
1456
1457 for (i = 1; i <= d->btf->nr_types; i++) {
1458 t = d->btf->types[i];
1459 r = fn(&t->name_off, ctx);
1460 if (r)
1461 return r;
1462
1463 switch (btf_kind(t)) {
1464 case BTF_KIND_STRUCT:
1465 case BTF_KIND_UNION: {
1466 struct btf_member *m = btf_members(t);
1467 __u16 vlen = btf_vlen(t);
1468
1469 for (j = 0; j < vlen; j++) {
1470 r = fn(&m->name_off, ctx);
1471 if (r)
1472 return r;
1473 m++;
1474 }
1475 break;
1476 }
1477 case BTF_KIND_ENUM: {
1478 struct btf_enum *m = btf_enum(t);
1479 __u16 vlen = btf_vlen(t);
1480
1481 for (j = 0; j < vlen; j++) {
1482 r = fn(&m->name_off, ctx);
1483 if (r)
1484 return r;
1485 m++;
1486 }
1487 break;
1488 }
1489 case BTF_KIND_FUNC_PROTO: {
1490 struct btf_param *m = btf_params(t);
1491 __u16 vlen = btf_vlen(t);
1492
1493 for (j = 0; j < vlen; j++) {
1494 r = fn(&m->name_off, ctx);
1495 if (r)
1496 return r;
1497 m++;
1498 }
1499 break;
1500 }
1501 default:
1502 break;
1503 }
1504 }
1505
1506 if (!d->btf_ext)
1507 return 0;
1508
1509 line_data_cur = d->btf_ext->line_info.info;
1510 line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
1511 rec_size = d->btf_ext->line_info.rec_size;
1512
1513 while (line_data_cur < line_data_end) {
1514 struct btf_ext_info_sec *sec = line_data_cur;
1515 struct bpf_line_info_min *line_info;
1516 __u32 num_info = sec->num_info;
1517
1518 r = fn(&sec->sec_name_off, ctx);
1519 if (r)
1520 return r;
1521
1522 line_data_cur += sizeof(struct btf_ext_info_sec);
1523 for (i = 0; i < num_info; i++) {
1524 line_info = line_data_cur;
1525 r = fn(&line_info->file_name_off, ctx);
1526 if (r)
1527 return r;
1528 r = fn(&line_info->line_off, ctx);
1529 if (r)
1530 return r;
1531 line_data_cur += rec_size;
1532 }
1533 }
1534
1535 return 0;
1536}
1537
1538static int str_sort_by_content(const void *a1, const void *a2)
1539{
1540 const struct btf_str_ptr *p1 = a1;
1541 const struct btf_str_ptr *p2 = a2;
1542
1543 return strcmp(p1->str, p2->str);
1544}
1545
1546static int str_sort_by_offset(const void *a1, const void *a2)
1547{
1548 const struct btf_str_ptr *p1 = a1;
1549 const struct btf_str_ptr *p2 = a2;
1550
1551 if (p1->str != p2->str)
1552 return p1->str < p2->str ? -1 : 1;
1553 return 0;
1554}
1555
1556static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
1557{
1558 const struct btf_str_ptr *p = pelem;
1559
1560 if (str_ptr != p->str)
1561 return (const char *)str_ptr < p->str ? -1 : 1;
1562 return 0;
1563}
1564
1565static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
1566{
1567 struct btf_str_ptrs *strs;
1568 struct btf_str_ptr *s;
1569
1570 if (*str_off_ptr == 0)
1571 return 0;
1572
1573 strs = ctx;
1574 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
1575 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
1576 if (!s)
1577 return -EINVAL;
1578 s->used = true;
1579 return 0;
1580}
1581
1582static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
1583{
1584 struct btf_str_ptrs *strs;
1585 struct btf_str_ptr *s;
1586
1587 if (*str_off_ptr == 0)
1588 return 0;
1589
1590 strs = ctx;
1591 s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
1592 sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
1593 if (!s)
1594 return -EINVAL;
1595 *str_off_ptr = s->new_off;
1596 return 0;
1597}
1598
1599/*
1600 * Dedup string and filter out those that are not referenced from either .BTF
1601 * or .BTF.ext (if provided) sections.
1602 *
1603 * This is done by building index of all strings in BTF's string section,
1604 * then iterating over all entities that can reference strings (e.g., type
1605 * names, struct field names, .BTF.ext line info, etc) and marking corresponding
1606 * strings as used. After that all used strings are deduped and compacted into
1607 * sequential blob of memory and new offsets are calculated. Then all the string
1608 * references are iterated again and rewritten using new offsets.
1609 */
1610static int btf_dedup_strings(struct btf_dedup *d)
1611{
1612 const struct btf_header *hdr = d->btf->hdr;
1613 char *start = (char *)d->btf->nohdr_data + hdr->str_off;
1614 char *end = start + d->btf->hdr->str_len;
1615 char *p = start, *tmp_strs = NULL;
1616 struct btf_str_ptrs strs = {
1617 .cnt = 0,
1618 .cap = 0,
1619 .ptrs = NULL,
1620 .data = start,
1621 };
1622 int i, j, err = 0, grp_idx;
1623 bool grp_used;
1624
1625 /* build index of all strings */
1626 while (p < end) {
1627 if (strs.cnt + 1 > strs.cap) {
1628 struct btf_str_ptr *new_ptrs;
1629
1630 strs.cap += max(strs.cnt / 2, 16);
1631 new_ptrs = realloc(strs.ptrs,
1632 sizeof(strs.ptrs[0]) * strs.cap);
1633 if (!new_ptrs) {
1634 err = -ENOMEM;
1635 goto done;
1636 }
1637 strs.ptrs = new_ptrs;
1638 }
1639
1640 strs.ptrs[strs.cnt].str = p;
1641 strs.ptrs[strs.cnt].used = false;
1642
1643 p += strlen(p) + 1;
1644 strs.cnt++;
1645 }
1646
1647 /* temporary storage for deduplicated strings */
1648 tmp_strs = malloc(d->btf->hdr->str_len);
1649 if (!tmp_strs) {
1650 err = -ENOMEM;
1651 goto done;
1652 }
1653
1654 /* mark all used strings */
1655 strs.ptrs[0].used = true;
1656 err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
1657 if (err)
1658 goto done;
1659
1660 /* sort strings by context, so that we can identify duplicates */
1661 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
1662
1663 /*
1664 * iterate groups of equal strings and if any instance in a group was
1665 * referenced, emit single instance and remember new offset
1666 */
1667 p = tmp_strs;
1668 grp_idx = 0;
1669 grp_used = strs.ptrs[0].used;
1670 /* iterate past end to avoid code duplication after loop */
1671 for (i = 1; i <= strs.cnt; i++) {
1672 /*
1673 * when i == strs.cnt, we want to skip string comparison and go
1674 * straight to handling last group of strings (otherwise we'd
1675 * need to handle last group after the loop w/ duplicated code)
1676 */
1677 if (i < strs.cnt &&
1678 !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
1679 grp_used = grp_used || strs.ptrs[i].used;
1680 continue;
1681 }
1682
1683 /*
1684 * this check would have been required after the loop to handle
1685 * last group of strings, but due to <= condition in a loop
1686 * we avoid that duplication
1687 */
1688 if (grp_used) {
1689 int new_off = p - tmp_strs;
1690 __u32 len = strlen(strs.ptrs[grp_idx].str);
1691
1692 memmove(p, strs.ptrs[grp_idx].str, len + 1);
1693 for (j = grp_idx; j < i; j++)
1694 strs.ptrs[j].new_off = new_off;
1695 p += len + 1;
1696 }
1697
1698 if (i < strs.cnt) {
1699 grp_idx = i;
1700 grp_used = strs.ptrs[i].used;
1701 }
1702 }
1703
1704 /* replace original strings with deduped ones */
1705 d->btf->hdr->str_len = p - tmp_strs;
1706 memmove(start, tmp_strs, d->btf->hdr->str_len);
1707 end = start + d->btf->hdr->str_len;
1708
1709 /* restore original order for further binary search lookups */
1710 qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
1711
1712 /* remap string offsets */
1713 err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
1714 if (err)
1715 goto done;
1716
1717 d->btf->hdr->str_len = end - start;
1718
1719done:
1720 free(tmp_strs);
1721 free(strs.ptrs);
1722 return err;
1723}
1724
1725static long btf_hash_common(struct btf_type *t)
1726{
1727 long h;
1728
1729 h = hash_combine(0, t->name_off);
1730 h = hash_combine(h, t->info);
1731 h = hash_combine(h, t->size);
1732 return h;
1733}
1734
1735static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
1736{
1737 return t1->name_off == t2->name_off &&
1738 t1->info == t2->info &&
1739 t1->size == t2->size;
1740}
1741
1742/* Calculate type signature hash of INT. */
1743static long btf_hash_int(struct btf_type *t)
1744{
1745 __u32 info = *(__u32 *)(t + 1);
1746 long h;
1747
1748 h = btf_hash_common(t);
1749 h = hash_combine(h, info);
1750 return h;
1751}
1752
1753/* Check structural equality of two INTs. */
1754static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
1755{
1756 __u32 info1, info2;
1757
1758 if (!btf_equal_common(t1, t2))
1759 return false;
1760 info1 = *(__u32 *)(t1 + 1);
1761 info2 = *(__u32 *)(t2 + 1);
1762 return info1 == info2;
1763}
1764
1765/* Calculate type signature hash of ENUM. */
1766static long btf_hash_enum(struct btf_type *t)
1767{
1768 long h;
1769
1770 /* don't hash vlen and enum members to support enum fwd resolving */
1771 h = hash_combine(0, t->name_off);
1772 h = hash_combine(h, t->info & ~0xffff);
1773 h = hash_combine(h, t->size);
1774 return h;
1775}
1776
1777/* Check structural equality of two ENUMs. */
1778static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
1779{
1780 const struct btf_enum *m1, *m2;
1781 __u16 vlen;
1782 int i;
1783
1784 if (!btf_equal_common(t1, t2))
1785 return false;
1786
1787 vlen = btf_vlen(t1);
1788 m1 = btf_enum(t1);
1789 m2 = btf_enum(t2);
1790 for (i = 0; i < vlen; i++) {
1791 if (m1->name_off != m2->name_off || m1->val != m2->val)
1792 return false;
1793 m1++;
1794 m2++;
1795 }
1796 return true;
1797}
1798
1799static inline bool btf_is_enum_fwd(struct btf_type *t)
1800{
1801 return btf_is_enum(t) && btf_vlen(t) == 0;
1802}
1803
1804static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
1805{
1806 if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
1807 return btf_equal_enum(t1, t2);
1808 /* ignore vlen when comparing */
1809 return t1->name_off == t2->name_off &&
1810 (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
1811 t1->size == t2->size;
1812}
1813
1814/*
1815 * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
1816 * as referenced type IDs equivalence is established separately during type
1817 * graph equivalence check algorithm.
1818 */
1819static long btf_hash_struct(struct btf_type *t)
1820{
1821 const struct btf_member *member = btf_members(t);
1822 __u32 vlen = btf_vlen(t);
1823 long h = btf_hash_common(t);
1824 int i;
1825
1826 for (i = 0; i < vlen; i++) {
1827 h = hash_combine(h, member->name_off);
1828 h = hash_combine(h, member->offset);
1829 /* no hashing of referenced type ID, it can be unresolved yet */
1830 member++;
1831 }
1832 return h;
1833}
1834
1835/*
1836 * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
1837 * IDs. This check is performed during type graph equivalence check and
1838 * referenced types equivalence is checked separately.
1839 */
1840static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
1841{
1842 const struct btf_member *m1, *m2;
1843 __u16 vlen;
1844 int i;
1845
1846 if (!btf_equal_common(t1, t2))
1847 return false;
1848
1849 vlen = btf_vlen(t1);
1850 m1 = btf_members(t1);
1851 m2 = btf_members(t2);
1852 for (i = 0; i < vlen; i++) {
1853 if (m1->name_off != m2->name_off || m1->offset != m2->offset)
1854 return false;
1855 m1++;
1856 m2++;
1857 }
1858 return true;
1859}
1860
1861/*
1862 * Calculate type signature hash of ARRAY, including referenced type IDs,
1863 * under assumption that they were already resolved to canonical type IDs and
1864 * are not going to change.
1865 */
1866static long btf_hash_array(struct btf_type *t)
1867{
1868 const struct btf_array *info = btf_array(t);
1869 long h = btf_hash_common(t);
1870
1871 h = hash_combine(h, info->type);
1872 h = hash_combine(h, info->index_type);
1873 h = hash_combine(h, info->nelems);
1874 return h;
1875}
1876
1877/*
1878 * Check exact equality of two ARRAYs, taking into account referenced
1879 * type IDs, under assumption that they were already resolved to canonical
1880 * type IDs and are not going to change.
1881 * This function is called during reference types deduplication to compare
1882 * ARRAY to potential canonical representative.
1883 */
1884static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
1885{
1886 const struct btf_array *info1, *info2;
1887
1888 if (!btf_equal_common(t1, t2))
1889 return false;
1890
1891 info1 = btf_array(t1);
1892 info2 = btf_array(t2);
1893 return info1->type == info2->type &&
1894 info1->index_type == info2->index_type &&
1895 info1->nelems == info2->nelems;
1896}
1897
1898/*
1899 * Check structural compatibility of two ARRAYs, ignoring referenced type
1900 * IDs. This check is performed during type graph equivalence check and
1901 * referenced types equivalence is checked separately.
1902 */
1903static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
1904{
1905 if (!btf_equal_common(t1, t2))
1906 return false;
1907
1908 return btf_array(t1)->nelems == btf_array(t2)->nelems;
1909}
1910
1911/*
1912 * Calculate type signature hash of FUNC_PROTO, including referenced type IDs,
1913 * under assumption that they were already resolved to canonical type IDs and
1914 * are not going to change.
1915 */
1916static long btf_hash_fnproto(struct btf_type *t)
1917{
1918 const struct btf_param *member = btf_params(t);
1919 __u16 vlen = btf_vlen(t);
1920 long h = btf_hash_common(t);
1921 int i;
1922
1923 for (i = 0; i < vlen; i++) {
1924 h = hash_combine(h, member->name_off);
1925 h = hash_combine(h, member->type);
1926 member++;
1927 }
1928 return h;
1929}
1930
1931/*
1932 * Check exact equality of two FUNC_PROTOs, taking into account referenced
1933 * type IDs, under assumption that they were already resolved to canonical
1934 * type IDs and are not going to change.
1935 * This function is called during reference types deduplication to compare
1936 * FUNC_PROTO to potential canonical representative.
1937 */
1938static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
1939{
1940 const struct btf_param *m1, *m2;
1941 __u16 vlen;
1942 int i;
1943
1944 if (!btf_equal_common(t1, t2))
1945 return false;
1946
1947 vlen = btf_vlen(t1);
1948 m1 = btf_params(t1);
1949 m2 = btf_params(t2);
1950 for (i = 0; i < vlen; i++) {
1951 if (m1->name_off != m2->name_off || m1->type != m2->type)
1952 return false;
1953 m1++;
1954 m2++;
1955 }
1956 return true;
1957}
1958
1959/*
1960 * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
1961 * IDs. This check is performed during type graph equivalence check and
1962 * referenced types equivalence is checked separately.
1963 */
1964static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
1965{
1966 const struct btf_param *m1, *m2;
1967 __u16 vlen;
1968 int i;
1969
1970 /* skip return type ID */
1971 if (t1->name_off != t2->name_off || t1->info != t2->info)
1972 return false;
1973
1974 vlen = btf_vlen(t1);
1975 m1 = btf_params(t1);
1976 m2 = btf_params(t2);
1977 for (i = 0; i < vlen; i++) {
1978 if (m1->name_off != m2->name_off)
1979 return false;
1980 m1++;
1981 m2++;
1982 }
1983 return true;
1984}
1985
1986/*
1987 * Deduplicate primitive types, that can't reference other types, by calculating
1988 * their type signature hash and comparing them with any possible canonical
1989 * candidate. If no canonical candidate matches, type itself is marked as
1990 * canonical and is added into `btf_dedup->dedup_table` as another candidate.
1991 */
1992static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
1993{
1994 struct btf_type *t = d->btf->types[type_id];
1995 struct hashmap_entry *hash_entry;
1996 struct btf_type *cand;
1997 /* if we don't find equivalent type, then we are canonical */
1998 __u32 new_id = type_id;
1999 __u32 cand_id;
2000 long h;
2001
2002 switch (btf_kind(t)) {
2003 case BTF_KIND_CONST:
2004 case BTF_KIND_VOLATILE:
2005 case BTF_KIND_RESTRICT:
2006 case BTF_KIND_PTR:
2007 case BTF_KIND_TYPEDEF:
2008 case BTF_KIND_ARRAY:
2009 case BTF_KIND_STRUCT:
2010 case BTF_KIND_UNION:
2011 case BTF_KIND_FUNC:
2012 case BTF_KIND_FUNC_PROTO:
2013 case BTF_KIND_VAR:
2014 case BTF_KIND_DATASEC:
2015 return 0;
2016
2017 case BTF_KIND_INT:
2018 h = btf_hash_int(t);
2019 for_each_dedup_cand(d, hash_entry, h) {
2020 cand_id = (__u32)(long)hash_entry->value;
2021 cand = d->btf->types[cand_id];
2022 if (btf_equal_int(t, cand)) {
2023 new_id = cand_id;
2024 break;
2025 }
2026 }
2027 break;
2028
2029 case BTF_KIND_ENUM:
2030 h = btf_hash_enum(t);
2031 for_each_dedup_cand(d, hash_entry, h) {
2032 cand_id = (__u32)(long)hash_entry->value;
2033 cand = d->btf->types[cand_id];
2034 if (btf_equal_enum(t, cand)) {
2035 new_id = cand_id;
2036 break;
2037 }
2038 if (d->opts.dont_resolve_fwds)
2039 continue;
2040 if (btf_compat_enum(t, cand)) {
2041 if (btf_is_enum_fwd(t)) {
2042 /* resolve fwd to full enum */
2043 new_id = cand_id;
2044 break;
2045 }
2046 /* resolve canonical enum fwd to full enum */
2047 d->map[cand_id] = type_id;
2048 }
2049 }
2050 break;
2051
2052 case BTF_KIND_FWD:
2053 h = btf_hash_common(t);
2054 for_each_dedup_cand(d, hash_entry, h) {
2055 cand_id = (__u32)(long)hash_entry->value;
2056 cand = d->btf->types[cand_id];
2057 if (btf_equal_common(t, cand)) {
2058 new_id = cand_id;
2059 break;
2060 }
2061 }
2062 break;
2063
2064 default:
2065 return -EINVAL;
2066 }
2067
2068 d->map[type_id] = new_id;
2069 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2070 return -ENOMEM;
2071
2072 return 0;
2073}
2074
2075static int btf_dedup_prim_types(struct btf_dedup *d)
2076{
2077 int i, err;
2078
2079 for (i = 1; i <= d->btf->nr_types; i++) {
2080 err = btf_dedup_prim_type(d, i);
2081 if (err)
2082 return err;
2083 }
2084 return 0;
2085}
2086
2087/*
2088 * Check whether type is already mapped into canonical one (could be to itself).
2089 */
2090static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
2091{
2092 return d->map[type_id] <= BTF_MAX_NR_TYPES;
2093}
2094
2095/*
2096 * Resolve type ID into its canonical type ID, if any; otherwise return original
2097 * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow
2098 * STRUCT/UNION link and resolve it into canonical type ID as well.
2099 */
2100static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id)
2101{
2102 while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
2103 type_id = d->map[type_id];
2104 return type_id;
2105}
2106
2107/*
2108 * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original
2109 * type ID.
2110 */
2111static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
2112{
2113 __u32 orig_type_id = type_id;
2114
2115 if (!btf_is_fwd(d->btf->types[type_id]))
2116 return type_id;
2117
2118 while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
2119 type_id = d->map[type_id];
2120
2121 if (!btf_is_fwd(d->btf->types[type_id]))
2122 return type_id;
2123
2124 return orig_type_id;
2125}
2126
2127
2128static inline __u16 btf_fwd_kind(struct btf_type *t)
2129{
2130 return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
2131}
2132
2133/*
2134 * Check equivalence of BTF type graph formed by candidate struct/union (we'll
2135 * call it "candidate graph" in this description for brevity) to a type graph
2136 * formed by (potential) canonical struct/union ("canonical graph" for brevity
2137 * here, though keep in mind that not all types in canonical graph are
2138 * necessarily canonical representatives themselves, some of them might be
2139 * duplicates or its uniqueness might not have been established yet).
2140 * Returns:
2141 * - >0, if type graphs are equivalent;
2142 * - 0, if not equivalent;
2143 * - <0, on error.
2144 *
2145 * Algorithm performs side-by-side DFS traversal of both type graphs and checks
2146 * equivalence of BTF types at each step. If at any point BTF types in candidate
2147 * and canonical graphs are not compatible structurally, whole graphs are
2148 * incompatible. If types are structurally equivalent (i.e., all information
2149 * except referenced type IDs is exactly the same), a mapping from `canon_id` to
2150 * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`).
2151 * If a type references other types, then those referenced types are checked
2152 * for equivalence recursively.
2153 *
2154 * During DFS traversal, if we find that for current `canon_id` type we
2155 * already have some mapping in hypothetical map, we check for two possible
2156 * situations:
2157 * - `canon_id` is mapped to exactly the same type as `cand_id`. This will
2158 * happen when type graphs have cycles. In this case we assume those two
2159 * types are equivalent.
2160 * - `canon_id` is mapped to different type. This is contradiction in our
2161 * hypothetical mapping, because same graph in canonical graph corresponds
2162 * to two different types in candidate graph, which for equivalent type
2163 * graphs shouldn't happen. This condition terminates equivalence check
2164 * with negative result.
2165 *
2166 * If type graphs traversal exhausts types to check and find no contradiction,
2167 * then type graphs are equivalent.
2168 *
2169 * When checking types for equivalence, there is one special case: FWD types.
2170 * If FWD type resolution is allowed and one of the types (either from canonical
2171 * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind
2172 * flag) and their names match, hypothetical mapping is updated to point from
2173 * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully,
2174 * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently.
2175 *
2176 * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution,
2177 * if there are two exactly named (or anonymous) structs/unions that are
2178 * compatible structurally, one of which has FWD field, while other is concrete
2179 * STRUCT/UNION, but according to C sources they are different structs/unions
2180 * that are referencing different types with the same name. This is extremely
2181 * unlikely to happen, but btf_dedup API allows to disable FWD resolution if
2182 * this logic is causing problems.
2183 *
2184 * Doing FWD resolution means that both candidate and/or canonical graphs can
2185 * consists of portions of the graph that come from multiple compilation units.
2186 * This is due to the fact that types within single compilation unit are always
2187 * deduplicated and FWDs are already resolved, if referenced struct/union
2188 * definiton is available. So, if we had unresolved FWD and found corresponding
2189 * STRUCT/UNION, they will be from different compilation units. This
2190 * consequently means that when we "link" FWD to corresponding STRUCT/UNION,
2191 * type graph will likely have at least two different BTF types that describe
2192 * same type (e.g., most probably there will be two different BTF types for the
2193 * same 'int' primitive type) and could even have "overlapping" parts of type
2194 * graph that describe same subset of types.
2195 *
2196 * This in turn means that our assumption that each type in canonical graph
2197 * must correspond to exactly one type in candidate graph might not hold
2198 * anymore and will make it harder to detect contradictions using hypothetical
2199 * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION
2200 * resolution only in canonical graph. FWDs in candidate graphs are never
2201 * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs
2202 * that can occur:
2203 * - Both types in canonical and candidate graphs are FWDs. If they are
2204 * structurally equivalent, then they can either be both resolved to the
2205 * same STRUCT/UNION or not resolved at all. In both cases they are
2206 * equivalent and there is no need to resolve FWD on candidate side.
2207 * - Both types in canonical and candidate graphs are concrete STRUCT/UNION,
2208 * so nothing to resolve as well, algorithm will check equivalence anyway.
2209 * - Type in canonical graph is FWD, while type in candidate is concrete
2210 * STRUCT/UNION. In this case candidate graph comes from single compilation
2211 * unit, so there is exactly one BTF type for each unique C type. After
2212 * resolving FWD into STRUCT/UNION, there might be more than one BTF type
2213 * in canonical graph mapping to single BTF type in candidate graph, but
2214 * because hypothetical mapping maps from canonical to candidate types, it's
2215 * alright, and we still maintain the property of having single `canon_id`
2216 * mapping to single `cand_id` (there could be two different `canon_id`
2217 * mapped to the same `cand_id`, but it's not contradictory).
2218 * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate
2219 * graph is FWD. In this case we are just going to check compatibility of
2220 * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll
2221 * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to
2222 * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs
2223 * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from
2224 * canonical graph.
2225 */
2226static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
2227 __u32 canon_id)
2228{
2229 struct btf_type *cand_type;
2230 struct btf_type *canon_type;
2231 __u32 hypot_type_id;
2232 __u16 cand_kind;
2233 __u16 canon_kind;
2234 int i, eq;
2235
2236 /* if both resolve to the same canonical, they must be equivalent */
2237 if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id))
2238 return 1;
2239
2240 canon_id = resolve_fwd_id(d, canon_id);
2241
2242 hypot_type_id = d->hypot_map[canon_id];
2243 if (hypot_type_id <= BTF_MAX_NR_TYPES)
2244 return hypot_type_id == cand_id;
2245
2246 if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
2247 return -ENOMEM;
2248
2249 cand_type = d->btf->types[cand_id];
2250 canon_type = d->btf->types[canon_id];
2251 cand_kind = btf_kind(cand_type);
2252 canon_kind = btf_kind(canon_type);
2253
2254 if (cand_type->name_off != canon_type->name_off)
2255 return 0;
2256
2257 /* FWD <--> STRUCT/UNION equivalence check, if enabled */
2258 if (!d->opts.dont_resolve_fwds
2259 && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
2260 && cand_kind != canon_kind) {
2261 __u16 real_kind;
2262 __u16 fwd_kind;
2263
2264 if (cand_kind == BTF_KIND_FWD) {
2265 real_kind = canon_kind;
2266 fwd_kind = btf_fwd_kind(cand_type);
2267 } else {
2268 real_kind = cand_kind;
2269 fwd_kind = btf_fwd_kind(canon_type);
2270 }
2271 return fwd_kind == real_kind;
2272 }
2273
2274 if (cand_kind != canon_kind)
2275 return 0;
2276
2277 switch (cand_kind) {
2278 case BTF_KIND_INT:
2279 return btf_equal_int(cand_type, canon_type);
2280
2281 case BTF_KIND_ENUM:
2282 if (d->opts.dont_resolve_fwds)
2283 return btf_equal_enum(cand_type, canon_type);
2284 else
2285 return btf_compat_enum(cand_type, canon_type);
2286
2287 case BTF_KIND_FWD:
2288 return btf_equal_common(cand_type, canon_type);
2289
2290 case BTF_KIND_CONST:
2291 case BTF_KIND_VOLATILE:
2292 case BTF_KIND_RESTRICT:
2293 case BTF_KIND_PTR:
2294 case BTF_KIND_TYPEDEF:
2295 case BTF_KIND_FUNC:
2296 if (cand_type->info != canon_type->info)
2297 return 0;
2298 return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
2299
2300 case BTF_KIND_ARRAY: {
2301 const struct btf_array *cand_arr, *canon_arr;
2302
2303 if (!btf_compat_array(cand_type, canon_type))
2304 return 0;
2305 cand_arr = btf_array(cand_type);
2306 canon_arr = btf_array(canon_type);
2307 eq = btf_dedup_is_equiv(d,
2308 cand_arr->index_type, canon_arr->index_type);
2309 if (eq <= 0)
2310 return eq;
2311 return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
2312 }
2313
2314 case BTF_KIND_STRUCT:
2315 case BTF_KIND_UNION: {
2316 const struct btf_member *cand_m, *canon_m;
2317 __u16 vlen;
2318
2319 if (!btf_shallow_equal_struct(cand_type, canon_type))
2320 return 0;
2321 vlen = btf_vlen(cand_type);
2322 cand_m = btf_members(cand_type);
2323 canon_m = btf_members(canon_type);
2324 for (i = 0; i < vlen; i++) {
2325 eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
2326 if (eq <= 0)
2327 return eq;
2328 cand_m++;
2329 canon_m++;
2330 }
2331
2332 return 1;
2333 }
2334
2335 case BTF_KIND_FUNC_PROTO: {
2336 const struct btf_param *cand_p, *canon_p;
2337 __u16 vlen;
2338
2339 if (!btf_compat_fnproto(cand_type, canon_type))
2340 return 0;
2341 eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
2342 if (eq <= 0)
2343 return eq;
2344 vlen = btf_vlen(cand_type);
2345 cand_p = btf_params(cand_type);
2346 canon_p = btf_params(canon_type);
2347 for (i = 0; i < vlen; i++) {
2348 eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
2349 if (eq <= 0)
2350 return eq;
2351 cand_p++;
2352 canon_p++;
2353 }
2354 return 1;
2355 }
2356
2357 default:
2358 return -EINVAL;
2359 }
2360 return 0;
2361}
2362
2363/*
2364 * Use hypothetical mapping, produced by successful type graph equivalence
2365 * check, to augment existing struct/union canonical mapping, where possible.
2366 *
2367 * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record
2368 * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional:
2369 * it doesn't matter if FWD type was part of canonical graph or candidate one,
2370 * we are recording the mapping anyway. As opposed to carefulness required
2371 * for struct/union correspondence mapping (described below), for FWD resolution
2372 * it's not important, as by the time that FWD type (reference type) will be
2373 * deduplicated all structs/unions will be deduped already anyway.
2374 *
2375 * Recording STRUCT/UNION mapping is purely a performance optimization and is
2376 * not required for correctness. It needs to be done carefully to ensure that
2377 * struct/union from candidate's type graph is not mapped into corresponding
2378 * struct/union from canonical type graph that itself hasn't been resolved into
2379 * canonical representative. The only guarantee we have is that canonical
2380 * struct/union was determined as canonical and that won't change. But any
2381 * types referenced through that struct/union fields could have been not yet
2382 * resolved, so in case like that it's too early to establish any kind of
2383 * correspondence between structs/unions.
2384 *
2385 * No canonical correspondence is derived for primitive types (they are already
2386 * deduplicated completely already anyway) or reference types (they rely on
2387 * stability of struct/union canonical relationship for equivalence checks).
2388 */
2389static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
2390{
2391 __u32 cand_type_id, targ_type_id;
2392 __u16 t_kind, c_kind;
2393 __u32 t_id, c_id;
2394 int i;
2395
2396 for (i = 0; i < d->hypot_cnt; i++) {
2397 cand_type_id = d->hypot_list[i];
2398 targ_type_id = d->hypot_map[cand_type_id];
2399 t_id = resolve_type_id(d, targ_type_id);
2400 c_id = resolve_type_id(d, cand_type_id);
2401 t_kind = btf_kind(d->btf->types[t_id]);
2402 c_kind = btf_kind(d->btf->types[c_id]);
2403 /*
2404 * Resolve FWD into STRUCT/UNION.
2405 * It's ok to resolve FWD into STRUCT/UNION that's not yet
2406 * mapped to canonical representative (as opposed to
2407 * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because
2408 * eventually that struct is going to be mapped and all resolved
2409 * FWDs will automatically resolve to correct canonical
2410 * representative. This will happen before ref type deduping,
2411 * which critically depends on stability of these mapping. This
2412 * stability is not a requirement for STRUCT/UNION equivalence
2413 * checks, though.
2414 */
2415 if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
2416 d->map[c_id] = t_id;
2417 else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
2418 d->map[t_id] = c_id;
2419
2420 if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
2421 c_kind != BTF_KIND_FWD &&
2422 is_type_mapped(d, c_id) &&
2423 !is_type_mapped(d, t_id)) {
2424 /*
2425 * as a perf optimization, we can map struct/union
2426 * that's part of type graph we just verified for
2427 * equivalence. We can do that for struct/union that has
2428 * canonical representative only, though.
2429 */
2430 d->map[t_id] = c_id;
2431 }
2432 }
2433}
2434
2435/*
2436 * Deduplicate struct/union types.
2437 *
2438 * For each struct/union type its type signature hash is calculated, taking
2439 * into account type's name, size, number, order and names of fields, but
2440 * ignoring type ID's referenced from fields, because they might not be deduped
2441 * completely until after reference types deduplication phase. This type hash
2442 * is used to iterate over all potential canonical types, sharing same hash.
2443 * For each canonical candidate we check whether type graphs that they form
2444 * (through referenced types in fields and so on) are equivalent using algorithm
2445 * implemented in `btf_dedup_is_equiv`. If such equivalence is found and
2446 * BTF_KIND_FWD resolution is allowed, then hypothetical mapping
2447 * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence
2448 * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to
2449 * potentially map other structs/unions to their canonical representatives,
2450 * if such relationship hasn't yet been established. This speeds up algorithm
2451 * by eliminating some of the duplicate work.
2452 *
2453 * If no matching canonical representative was found, struct/union is marked
2454 * as canonical for itself and is added into btf_dedup->dedup_table hash map
2455 * for further look ups.
2456 */
2457static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
2458{
2459 struct btf_type *cand_type, *t;
2460 struct hashmap_entry *hash_entry;
2461 /* if we don't find equivalent type, then we are canonical */
2462 __u32 new_id = type_id;
2463 __u16 kind;
2464 long h;
2465
2466 /* already deduped or is in process of deduping (loop detected) */
2467 if (d->map[type_id] <= BTF_MAX_NR_TYPES)
2468 return 0;
2469
2470 t = d->btf->types[type_id];
2471 kind = btf_kind(t);
2472
2473 if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
2474 return 0;
2475
2476 h = btf_hash_struct(t);
2477 for_each_dedup_cand(d, hash_entry, h) {
2478 __u32 cand_id = (__u32)(long)hash_entry->value;
2479 int eq;
2480
2481 /*
2482 * Even though btf_dedup_is_equiv() checks for
2483 * btf_shallow_equal_struct() internally when checking two
2484 * structs (unions) for equivalence, we need to guard here
2485 * from picking matching FWD type as a dedup candidate.
2486 * This can happen due to hash collision. In such case just
2487 * relying on btf_dedup_is_equiv() would lead to potentially
2488 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
2489 * FWD and compatible STRUCT/UNION are considered equivalent.
2490 */
2491 cand_type = d->btf->types[cand_id];
2492 if (!btf_shallow_equal_struct(t, cand_type))
2493 continue;
2494
2495 btf_dedup_clear_hypot_map(d);
2496 eq = btf_dedup_is_equiv(d, type_id, cand_id);
2497 if (eq < 0)
2498 return eq;
2499 if (!eq)
2500 continue;
2501 new_id = cand_id;
2502 btf_dedup_merge_hypot_map(d);
2503 break;
2504 }
2505
2506 d->map[type_id] = new_id;
2507 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2508 return -ENOMEM;
2509
2510 return 0;
2511}
2512
2513static int btf_dedup_struct_types(struct btf_dedup *d)
2514{
2515 int i, err;
2516
2517 for (i = 1; i <= d->btf->nr_types; i++) {
2518 err = btf_dedup_struct_type(d, i);
2519 if (err)
2520 return err;
2521 }
2522 return 0;
2523}
2524
2525/*
2526 * Deduplicate reference type.
2527 *
2528 * Once all primitive and struct/union types got deduplicated, we can easily
2529 * deduplicate all other (reference) BTF types. This is done in two steps:
2530 *
2531 * 1. Resolve all referenced type IDs into their canonical type IDs. This
2532 * resolution can be done either immediately for primitive or struct/union types
2533 * (because they were deduped in previous two phases) or recursively for
2534 * reference types. Recursion will always terminate at either primitive or
2535 * struct/union type, at which point we can "unwind" chain of reference types
2536 * one by one. There is no danger of encountering cycles because in C type
2537 * system the only way to form type cycle is through struct/union, so any chain
2538 * of reference types, even those taking part in a type cycle, will inevitably
2539 * reach struct/union at some point.
2540 *
2541 * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
2542 * becomes "stable", in the sense that no further deduplication will cause
2543 * any changes to it. With that, it's now possible to calculate type's signature
2544 * hash (this time taking into account referenced type IDs) and loop over all
2545 * potential canonical representatives. If no match was found, current type
2546 * will become canonical representative of itself and will be added into
2547 * btf_dedup->dedup_table as another possible canonical representative.
2548 */
2549static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
2550{
2551 struct hashmap_entry *hash_entry;
2552 __u32 new_id = type_id, cand_id;
2553 struct btf_type *t, *cand;
2554 /* if we don't find equivalent type, then we are representative type */
2555 int ref_type_id;
2556 long h;
2557
2558 if (d->map[type_id] == BTF_IN_PROGRESS_ID)
2559 return -ELOOP;
2560 if (d->map[type_id] <= BTF_MAX_NR_TYPES)
2561 return resolve_type_id(d, type_id);
2562
2563 t = d->btf->types[type_id];
2564 d->map[type_id] = BTF_IN_PROGRESS_ID;
2565
2566 switch (btf_kind(t)) {
2567 case BTF_KIND_CONST:
2568 case BTF_KIND_VOLATILE:
2569 case BTF_KIND_RESTRICT:
2570 case BTF_KIND_PTR:
2571 case BTF_KIND_TYPEDEF:
2572 case BTF_KIND_FUNC:
2573 ref_type_id = btf_dedup_ref_type(d, t->type);
2574 if (ref_type_id < 0)
2575 return ref_type_id;
2576 t->type = ref_type_id;
2577
2578 h = btf_hash_common(t);
2579 for_each_dedup_cand(d, hash_entry, h) {
2580 cand_id = (__u32)(long)hash_entry->value;
2581 cand = d->btf->types[cand_id];
2582 if (btf_equal_common(t, cand)) {
2583 new_id = cand_id;
2584 break;
2585 }
2586 }
2587 break;
2588
2589 case BTF_KIND_ARRAY: {
2590 struct btf_array *info = btf_array(t);
2591
2592 ref_type_id = btf_dedup_ref_type(d, info->type);
2593 if (ref_type_id < 0)
2594 return ref_type_id;
2595 info->type = ref_type_id;
2596
2597 ref_type_id = btf_dedup_ref_type(d, info->index_type);
2598 if (ref_type_id < 0)
2599 return ref_type_id;
2600 info->index_type = ref_type_id;
2601
2602 h = btf_hash_array(t);
2603 for_each_dedup_cand(d, hash_entry, h) {
2604 cand_id = (__u32)(long)hash_entry->value;
2605 cand = d->btf->types[cand_id];
2606 if (btf_equal_array(t, cand)) {
2607 new_id = cand_id;
2608 break;
2609 }
2610 }
2611 break;
2612 }
2613
2614 case BTF_KIND_FUNC_PROTO: {
2615 struct btf_param *param;
2616 __u16 vlen;
2617 int i;
2618
2619 ref_type_id = btf_dedup_ref_type(d, t->type);
2620 if (ref_type_id < 0)
2621 return ref_type_id;
2622 t->type = ref_type_id;
2623
2624 vlen = btf_vlen(t);
2625 param = btf_params(t);
2626 for (i = 0; i < vlen; i++) {
2627 ref_type_id = btf_dedup_ref_type(d, param->type);
2628 if (ref_type_id < 0)
2629 return ref_type_id;
2630 param->type = ref_type_id;
2631 param++;
2632 }
2633
2634 h = btf_hash_fnproto(t);
2635 for_each_dedup_cand(d, hash_entry, h) {
2636 cand_id = (__u32)(long)hash_entry->value;
2637 cand = d->btf->types[cand_id];
2638 if (btf_equal_fnproto(t, cand)) {
2639 new_id = cand_id;
2640 break;
2641 }
2642 }
2643 break;
2644 }
2645
2646 default:
2647 return -EINVAL;
2648 }
2649
2650 d->map[type_id] = new_id;
2651 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
2652 return -ENOMEM;
2653
2654 return new_id;
2655}
2656
2657static int btf_dedup_ref_types(struct btf_dedup *d)
2658{
2659 int i, err;
2660
2661 for (i = 1; i <= d->btf->nr_types; i++) {
2662 err = btf_dedup_ref_type(d, i);
2663 if (err < 0)
2664 return err;
2665 }
2666 /* we won't need d->dedup_table anymore */
2667 hashmap__free(d->dedup_table);
2668 d->dedup_table = NULL;
2669 return 0;
2670}
2671
2672/*
2673 * Compact types.
2674 *
2675 * After we established for each type its corresponding canonical representative
2676 * type, we now can eliminate types that are not canonical and leave only
2677 * canonical ones layed out sequentially in memory by copying them over
2678 * duplicates. During compaction btf_dedup->hypot_map array is reused to store
2679 * a map from original type ID to a new compacted type ID, which will be used
2680 * during next phase to "fix up" type IDs, referenced from struct/union and
2681 * reference types.
2682 */
2683static int btf_dedup_compact_types(struct btf_dedup *d)
2684{
2685 struct btf_type **new_types;
2686 __u32 next_type_id = 1;
2687 char *types_start, *p;
2688 int i, len;
2689
2690 /* we are going to reuse hypot_map to store compaction remapping */
2691 d->hypot_map[0] = 0;
2692 for (i = 1; i <= d->btf->nr_types; i++)
2693 d->hypot_map[i] = BTF_UNPROCESSED_ID;
2694
2695 types_start = d->btf->nohdr_data + d->btf->hdr->type_off;
2696 p = types_start;
2697
2698 for (i = 1; i <= d->btf->nr_types; i++) {
2699 if (d->map[i] != i)
2700 continue;
2701
2702 len = btf_type_size(d->btf->types[i]);
2703 if (len < 0)
2704 return len;
2705
2706 memmove(p, d->btf->types[i], len);
2707 d->hypot_map[i] = next_type_id;
2708 d->btf->types[next_type_id] = (struct btf_type *)p;
2709 p += len;
2710 next_type_id++;
2711 }
2712
2713 /* shrink struct btf's internal types index and update btf_header */
2714 d->btf->nr_types = next_type_id - 1;
2715 d->btf->types_size = d->btf->nr_types;
2716 d->btf->hdr->type_len = p - types_start;
2717 new_types = realloc(d->btf->types,
2718 (1 + d->btf->nr_types) * sizeof(struct btf_type *));
2719 if (!new_types)
2720 return -ENOMEM;
2721 d->btf->types = new_types;
2722
2723 /* make sure string section follows type information without gaps */
2724 d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data;
2725 memmove(p, d->btf->strings, d->btf->hdr->str_len);
2726 d->btf->strings = p;
2727 p += d->btf->hdr->str_len;
2728
2729 d->btf->data_size = p - (char *)d->btf->data;
2730 return 0;
2731}
2732
2733/*
2734 * Figure out final (deduplicated and compacted) type ID for provided original
2735 * `type_id` by first resolving it into corresponding canonical type ID and
2736 * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
2737 * which is populated during compaction phase.
2738 */
2739static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
2740{
2741 __u32 resolved_type_id, new_type_id;
2742
2743 resolved_type_id = resolve_type_id(d, type_id);
2744 new_type_id = d->hypot_map[resolved_type_id];
2745 if (new_type_id > BTF_MAX_NR_TYPES)
2746 return -EINVAL;
2747 return new_type_id;
2748}
2749
2750/*
2751 * Remap referenced type IDs into deduped type IDs.
2752 *
2753 * After BTF types are deduplicated and compacted, their final type IDs may
2754 * differ from original ones. The map from original to a corresponding
2755 * deduped type ID is stored in btf_dedup->hypot_map and is populated during
2756 * compaction phase. During remapping phase we are rewriting all type IDs
2757 * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
2758 * their final deduped type IDs.
2759 */
2760static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
2761{
2762 struct btf_type *t = d->btf->types[type_id];
2763 int i, r;
2764
2765 switch (btf_kind(t)) {
2766 case BTF_KIND_INT:
2767 case BTF_KIND_ENUM:
2768 break;
2769
2770 case BTF_KIND_FWD:
2771 case BTF_KIND_CONST:
2772 case BTF_KIND_VOLATILE:
2773 case BTF_KIND_RESTRICT:
2774 case BTF_KIND_PTR:
2775 case BTF_KIND_TYPEDEF:
2776 case BTF_KIND_FUNC:
2777 case BTF_KIND_VAR:
2778 r = btf_dedup_remap_type_id(d, t->type);
2779 if (r < 0)
2780 return r;
2781 t->type = r;
2782 break;
2783
2784 case BTF_KIND_ARRAY: {
2785 struct btf_array *arr_info = btf_array(t);
2786
2787 r = btf_dedup_remap_type_id(d, arr_info->type);
2788 if (r < 0)
2789 return r;
2790 arr_info->type = r;
2791 r = btf_dedup_remap_type_id(d, arr_info->index_type);
2792 if (r < 0)
2793 return r;
2794 arr_info->index_type = r;
2795 break;
2796 }
2797
2798 case BTF_KIND_STRUCT:
2799 case BTF_KIND_UNION: {
2800 struct btf_member *member = btf_members(t);
2801 __u16 vlen = btf_vlen(t);
2802
2803 for (i = 0; i < vlen; i++) {
2804 r = btf_dedup_remap_type_id(d, member->type);
2805 if (r < 0)
2806 return r;
2807 member->type = r;
2808 member++;
2809 }
2810 break;
2811 }
2812
2813 case BTF_KIND_FUNC_PROTO: {
2814 struct btf_param *param = btf_params(t);
2815 __u16 vlen = btf_vlen(t);
2816
2817 r = btf_dedup_remap_type_id(d, t->type);
2818 if (r < 0)
2819 return r;
2820 t->type = r;
2821
2822 for (i = 0; i < vlen; i++) {
2823 r = btf_dedup_remap_type_id(d, param->type);
2824 if (r < 0)
2825 return r;
2826 param->type = r;
2827 param++;
2828 }
2829 break;
2830 }
2831
2832 case BTF_KIND_DATASEC: {
2833 struct btf_var_secinfo *var = btf_var_secinfos(t);
2834 __u16 vlen = btf_vlen(t);
2835
2836 for (i = 0; i < vlen; i++) {
2837 r = btf_dedup_remap_type_id(d, var->type);
2838 if (r < 0)
2839 return r;
2840 var->type = r;
2841 var++;
2842 }
2843 break;
2844 }
2845
2846 default:
2847 return -EINVAL;
2848 }
2849
2850 return 0;
2851}
2852
2853static int btf_dedup_remap_types(struct btf_dedup *d)
2854{
2855 int i, r;
2856
2857 for (i = 1; i <= d->btf->nr_types; i++) {
2858 r = btf_dedup_remap_type(d, i);
2859 if (r < 0)
2860 return r;
2861 }
2862 return 0;
2863}
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2/* Copyright (c) 2018 Facebook */
3
4#include <byteswap.h>
5#include <endian.h>
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>
9#include <fcntl.h>
10#include <unistd.h>
11#include <errno.h>
12#include <sys/utsname.h>
13#include <sys/param.h>
14#include <sys/stat.h>
15#include <linux/kernel.h>
16#include <linux/err.h>
17#include <linux/btf.h>
18#include <gelf.h>
19#include "btf.h"
20#include "bpf.h"
21#include "libbpf.h"
22#include "libbpf_internal.h"
23#include "hashmap.h"
24#include "strset.h"
25#include "str_error.h"
26
27#define BTF_MAX_NR_TYPES 0x7fffffffU
28#define BTF_MAX_STR_OFFSET 0x7fffffffU
29
30static struct btf_type btf_void;
31
32struct btf {
33 /* raw BTF data in native endianness */
34 void *raw_data;
35 /* raw BTF data in non-native endianness */
36 void *raw_data_swapped;
37 __u32 raw_size;
38 /* whether target endianness differs from the native one */
39 bool swapped_endian;
40
41 /*
42 * When BTF is loaded from an ELF or raw memory it is stored
43 * in a contiguous memory block. The hdr, type_data, and, strs_data
44 * point inside that memory region to their respective parts of BTF
45 * representation:
46 *
47 * +--------------------------------+
48 * | Header | Types | Strings |
49 * +--------------------------------+
50 * ^ ^ ^
51 * | | |
52 * hdr | |
53 * types_data-+ |
54 * strs_data------------+
55 *
56 * If BTF data is later modified, e.g., due to types added or
57 * removed, BTF deduplication performed, etc, this contiguous
58 * representation is broken up into three independently allocated
59 * memory regions to be able to modify them independently.
60 * raw_data is nulled out at that point, but can be later allocated
61 * and cached again if user calls btf__raw_data(), at which point
62 * raw_data will contain a contiguous copy of header, types, and
63 * strings:
64 *
65 * +----------+ +---------+ +-----------+
66 * | Header | | Types | | Strings |
67 * +----------+ +---------+ +-----------+
68 * ^ ^ ^
69 * | | |
70 * hdr | |
71 * types_data----+ |
72 * strset__data(strs_set)-----+
73 *
74 * +----------+---------+-----------+
75 * | Header | Types | Strings |
76 * raw_data----->+----------+---------+-----------+
77 */
78 struct btf_header *hdr;
79
80 void *types_data;
81 size_t types_data_cap; /* used size stored in hdr->type_len */
82
83 /* type ID to `struct btf_type *` lookup index
84 * type_offs[0] corresponds to the first non-VOID type:
85 * - for base BTF it's type [1];
86 * - for split BTF it's the first non-base BTF type.
87 */
88 __u32 *type_offs;
89 size_t type_offs_cap;
90 /* number of types in this BTF instance:
91 * - doesn't include special [0] void type;
92 * - for split BTF counts number of types added on top of base BTF.
93 */
94 __u32 nr_types;
95 /* if not NULL, points to the base BTF on top of which the current
96 * split BTF is based
97 */
98 struct btf *base_btf;
99 /* BTF type ID of the first type in this BTF instance:
100 * - for base BTF it's equal to 1;
101 * - for split BTF it's equal to biggest type ID of base BTF plus 1.
102 */
103 int start_id;
104 /* logical string offset of this BTF instance:
105 * - for base BTF it's equal to 0;
106 * - for split BTF it's equal to total size of base BTF's string section size.
107 */
108 int start_str_off;
109
110 /* only one of strs_data or strs_set can be non-NULL, depending on
111 * whether BTF is in a modifiable state (strs_set is used) or not
112 * (strs_data points inside raw_data)
113 */
114 void *strs_data;
115 /* a set of unique strings */
116 struct strset *strs_set;
117 /* whether strings are already deduplicated */
118 bool strs_deduped;
119
120 /* whether base_btf should be freed in btf_free for this instance */
121 bool owns_base;
122
123 /* BTF object FD, if loaded into kernel */
124 int fd;
125
126 /* Pointer size (in bytes) for a target architecture of this BTF */
127 int ptr_sz;
128};
129
130static inline __u64 ptr_to_u64(const void *ptr)
131{
132 return (__u64) (unsigned long) ptr;
133}
134
135/* Ensure given dynamically allocated memory region pointed to by *data* with
136 * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough
137 * memory to accommodate *add_cnt* new elements, assuming *cur_cnt* elements
138 * are already used. At most *max_cnt* elements can be ever allocated.
139 * If necessary, memory is reallocated and all existing data is copied over,
140 * new pointer to the memory region is stored at *data, new memory region
141 * capacity (in number of elements) is stored in *cap.
142 * On success, memory pointer to the beginning of unused memory is returned.
143 * On error, NULL is returned.
144 */
145void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
146 size_t cur_cnt, size_t max_cnt, size_t add_cnt)
147{
148 size_t new_cnt;
149 void *new_data;
150
151 if (cur_cnt + add_cnt <= *cap_cnt)
152 return *data + cur_cnt * elem_sz;
153
154 /* requested more than the set limit */
155 if (cur_cnt + add_cnt > max_cnt)
156 return NULL;
157
158 new_cnt = *cap_cnt;
159 new_cnt += new_cnt / 4; /* expand by 25% */
160 if (new_cnt < 16) /* but at least 16 elements */
161 new_cnt = 16;
162 if (new_cnt > max_cnt) /* but not exceeding a set limit */
163 new_cnt = max_cnt;
164 if (new_cnt < cur_cnt + add_cnt) /* also ensure we have enough memory */
165 new_cnt = cur_cnt + add_cnt;
166
167 new_data = libbpf_reallocarray(*data, new_cnt, elem_sz);
168 if (!new_data)
169 return NULL;
170
171 /* zero out newly allocated portion of memory */
172 memset(new_data + (*cap_cnt) * elem_sz, 0, (new_cnt - *cap_cnt) * elem_sz);
173
174 *data = new_data;
175 *cap_cnt = new_cnt;
176 return new_data + cur_cnt * elem_sz;
177}
178
179/* Ensure given dynamically allocated memory region has enough allocated space
180 * to accommodate *need_cnt* elements of size *elem_sz* bytes each
181 */
182int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
183{
184 void *p;
185
186 if (need_cnt <= *cap_cnt)
187 return 0;
188
189 p = libbpf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
190 if (!p)
191 return -ENOMEM;
192
193 return 0;
194}
195
196static void *btf_add_type_offs_mem(struct btf *btf, size_t add_cnt)
197{
198 return libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
199 btf->nr_types, BTF_MAX_NR_TYPES, add_cnt);
200}
201
202static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
203{
204 __u32 *p;
205
206 p = btf_add_type_offs_mem(btf, 1);
207 if (!p)
208 return -ENOMEM;
209
210 *p = type_off;
211 return 0;
212}
213
214static void btf_bswap_hdr(struct btf_header *h)
215{
216 h->magic = bswap_16(h->magic);
217 h->hdr_len = bswap_32(h->hdr_len);
218 h->type_off = bswap_32(h->type_off);
219 h->type_len = bswap_32(h->type_len);
220 h->str_off = bswap_32(h->str_off);
221 h->str_len = bswap_32(h->str_len);
222}
223
224static int btf_parse_hdr(struct btf *btf)
225{
226 struct btf_header *hdr = btf->hdr;
227 __u32 meta_left;
228
229 if (btf->raw_size < sizeof(struct btf_header)) {
230 pr_debug("BTF header not found\n");
231 return -EINVAL;
232 }
233
234 if (hdr->magic == bswap_16(BTF_MAGIC)) {
235 btf->swapped_endian = true;
236 if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) {
237 pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n",
238 bswap_32(hdr->hdr_len));
239 return -ENOTSUP;
240 }
241 btf_bswap_hdr(hdr);
242 } else if (hdr->magic != BTF_MAGIC) {
243 pr_debug("Invalid BTF magic: %x\n", hdr->magic);
244 return -EINVAL;
245 }
246
247 if (btf->raw_size < hdr->hdr_len) {
248 pr_debug("BTF header len %u larger than data size %u\n",
249 hdr->hdr_len, btf->raw_size);
250 return -EINVAL;
251 }
252
253 meta_left = btf->raw_size - hdr->hdr_len;
254 if (meta_left < (long long)hdr->str_off + hdr->str_len) {
255 pr_debug("Invalid BTF total size: %u\n", btf->raw_size);
256 return -EINVAL;
257 }
258
259 if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) {
260 pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n",
261 hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len);
262 return -EINVAL;
263 }
264
265 if (hdr->type_off % 4) {
266 pr_debug("BTF type section is not aligned to 4 bytes\n");
267 return -EINVAL;
268 }
269
270 return 0;
271}
272
273static int btf_parse_str_sec(struct btf *btf)
274{
275 const struct btf_header *hdr = btf->hdr;
276 const char *start = btf->strs_data;
277 const char *end = start + btf->hdr->str_len;
278
279 if (btf->base_btf && hdr->str_len == 0)
280 return 0;
281 if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) {
282 pr_debug("Invalid BTF string section\n");
283 return -EINVAL;
284 }
285 if (!btf->base_btf && start[0]) {
286 pr_debug("Invalid BTF string section\n");
287 return -EINVAL;
288 }
289 return 0;
290}
291
292static int btf_type_size(const struct btf_type *t)
293{
294 const int base_size = sizeof(struct btf_type);
295 __u16 vlen = btf_vlen(t);
296
297 switch (btf_kind(t)) {
298 case BTF_KIND_FWD:
299 case BTF_KIND_CONST:
300 case BTF_KIND_VOLATILE:
301 case BTF_KIND_RESTRICT:
302 case BTF_KIND_PTR:
303 case BTF_KIND_TYPEDEF:
304 case BTF_KIND_FUNC:
305 case BTF_KIND_FLOAT:
306 case BTF_KIND_TYPE_TAG:
307 return base_size;
308 case BTF_KIND_INT:
309 return base_size + sizeof(__u32);
310 case BTF_KIND_ENUM:
311 return base_size + vlen * sizeof(struct btf_enum);
312 case BTF_KIND_ENUM64:
313 return base_size + vlen * sizeof(struct btf_enum64);
314 case BTF_KIND_ARRAY:
315 return base_size + sizeof(struct btf_array);
316 case BTF_KIND_STRUCT:
317 case BTF_KIND_UNION:
318 return base_size + vlen * sizeof(struct btf_member);
319 case BTF_KIND_FUNC_PROTO:
320 return base_size + vlen * sizeof(struct btf_param);
321 case BTF_KIND_VAR:
322 return base_size + sizeof(struct btf_var);
323 case BTF_KIND_DATASEC:
324 return base_size + vlen * sizeof(struct btf_var_secinfo);
325 case BTF_KIND_DECL_TAG:
326 return base_size + sizeof(struct btf_decl_tag);
327 default:
328 pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
329 return -EINVAL;
330 }
331}
332
333static void btf_bswap_type_base(struct btf_type *t)
334{
335 t->name_off = bswap_32(t->name_off);
336 t->info = bswap_32(t->info);
337 t->type = bswap_32(t->type);
338}
339
340static int btf_bswap_type_rest(struct btf_type *t)
341{
342 struct btf_var_secinfo *v;
343 struct btf_enum64 *e64;
344 struct btf_member *m;
345 struct btf_array *a;
346 struct btf_param *p;
347 struct btf_enum *e;
348 __u16 vlen = btf_vlen(t);
349 int i;
350
351 switch (btf_kind(t)) {
352 case BTF_KIND_FWD:
353 case BTF_KIND_CONST:
354 case BTF_KIND_VOLATILE:
355 case BTF_KIND_RESTRICT:
356 case BTF_KIND_PTR:
357 case BTF_KIND_TYPEDEF:
358 case BTF_KIND_FUNC:
359 case BTF_KIND_FLOAT:
360 case BTF_KIND_TYPE_TAG:
361 return 0;
362 case BTF_KIND_INT:
363 *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
364 return 0;
365 case BTF_KIND_ENUM:
366 for (i = 0, e = btf_enum(t); i < vlen; i++, e++) {
367 e->name_off = bswap_32(e->name_off);
368 e->val = bswap_32(e->val);
369 }
370 return 0;
371 case BTF_KIND_ENUM64:
372 for (i = 0, e64 = btf_enum64(t); i < vlen; i++, e64++) {
373 e64->name_off = bswap_32(e64->name_off);
374 e64->val_lo32 = bswap_32(e64->val_lo32);
375 e64->val_hi32 = bswap_32(e64->val_hi32);
376 }
377 return 0;
378 case BTF_KIND_ARRAY:
379 a = btf_array(t);
380 a->type = bswap_32(a->type);
381 a->index_type = bswap_32(a->index_type);
382 a->nelems = bswap_32(a->nelems);
383 return 0;
384 case BTF_KIND_STRUCT:
385 case BTF_KIND_UNION:
386 for (i = 0, m = btf_members(t); i < vlen; i++, m++) {
387 m->name_off = bswap_32(m->name_off);
388 m->type = bswap_32(m->type);
389 m->offset = bswap_32(m->offset);
390 }
391 return 0;
392 case BTF_KIND_FUNC_PROTO:
393 for (i = 0, p = btf_params(t); i < vlen; i++, p++) {
394 p->name_off = bswap_32(p->name_off);
395 p->type = bswap_32(p->type);
396 }
397 return 0;
398 case BTF_KIND_VAR:
399 btf_var(t)->linkage = bswap_32(btf_var(t)->linkage);
400 return 0;
401 case BTF_KIND_DATASEC:
402 for (i = 0, v = btf_var_secinfos(t); i < vlen; i++, v++) {
403 v->type = bswap_32(v->type);
404 v->offset = bswap_32(v->offset);
405 v->size = bswap_32(v->size);
406 }
407 return 0;
408 case BTF_KIND_DECL_TAG:
409 btf_decl_tag(t)->component_idx = bswap_32(btf_decl_tag(t)->component_idx);
410 return 0;
411 default:
412 pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
413 return -EINVAL;
414 }
415}
416
417static int btf_parse_type_sec(struct btf *btf)
418{
419 struct btf_header *hdr = btf->hdr;
420 void *next_type = btf->types_data;
421 void *end_type = next_type + hdr->type_len;
422 int err, type_size;
423
424 while (next_type + sizeof(struct btf_type) <= end_type) {
425 if (btf->swapped_endian)
426 btf_bswap_type_base(next_type);
427
428 type_size = btf_type_size(next_type);
429 if (type_size < 0)
430 return type_size;
431 if (next_type + type_size > end_type) {
432 pr_warn("BTF type [%d] is malformed\n", btf->start_id + btf->nr_types);
433 return -EINVAL;
434 }
435
436 if (btf->swapped_endian && btf_bswap_type_rest(next_type))
437 return -EINVAL;
438
439 err = btf_add_type_idx_entry(btf, next_type - btf->types_data);
440 if (err)
441 return err;
442
443 next_type += type_size;
444 btf->nr_types++;
445 }
446
447 if (next_type != end_type) {
448 pr_warn("BTF types data is malformed\n");
449 return -EINVAL;
450 }
451
452 return 0;
453}
454
455static int btf_validate_str(const struct btf *btf, __u32 str_off, const char *what, __u32 type_id)
456{
457 const char *s;
458
459 s = btf__str_by_offset(btf, str_off);
460 if (!s) {
461 pr_warn("btf: type [%u]: invalid %s (string offset %u)\n", type_id, what, str_off);
462 return -EINVAL;
463 }
464
465 return 0;
466}
467
468static int btf_validate_id(const struct btf *btf, __u32 id, __u32 ctx_id)
469{
470 const struct btf_type *t;
471
472 t = btf__type_by_id(btf, id);
473 if (!t) {
474 pr_warn("btf: type [%u]: invalid referenced type ID %u\n", ctx_id, id);
475 return -EINVAL;
476 }
477
478 return 0;
479}
480
481static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __u32 id)
482{
483 __u32 kind = btf_kind(t);
484 int err, i, n;
485
486 err = btf_validate_str(btf, t->name_off, "type name", id);
487 if (err)
488 return err;
489
490 switch (kind) {
491 case BTF_KIND_UNKN:
492 case BTF_KIND_INT:
493 case BTF_KIND_FWD:
494 case BTF_KIND_FLOAT:
495 break;
496 case BTF_KIND_PTR:
497 case BTF_KIND_TYPEDEF:
498 case BTF_KIND_VOLATILE:
499 case BTF_KIND_CONST:
500 case BTF_KIND_RESTRICT:
501 case BTF_KIND_VAR:
502 case BTF_KIND_DECL_TAG:
503 case BTF_KIND_TYPE_TAG:
504 err = btf_validate_id(btf, t->type, id);
505 if (err)
506 return err;
507 break;
508 case BTF_KIND_ARRAY: {
509 const struct btf_array *a = btf_array(t);
510
511 err = btf_validate_id(btf, a->type, id);
512 err = err ?: btf_validate_id(btf, a->index_type, id);
513 if (err)
514 return err;
515 break;
516 }
517 case BTF_KIND_STRUCT:
518 case BTF_KIND_UNION: {
519 const struct btf_member *m = btf_members(t);
520
521 n = btf_vlen(t);
522 for (i = 0; i < n; i++, m++) {
523 err = btf_validate_str(btf, m->name_off, "field name", id);
524 err = err ?: btf_validate_id(btf, m->type, id);
525 if (err)
526 return err;
527 }
528 break;
529 }
530 case BTF_KIND_ENUM: {
531 const struct btf_enum *m = btf_enum(t);
532
533 n = btf_vlen(t);
534 for (i = 0; i < n; i++, m++) {
535 err = btf_validate_str(btf, m->name_off, "enum name", id);
536 if (err)
537 return err;
538 }
539 break;
540 }
541 case BTF_KIND_ENUM64: {
542 const struct btf_enum64 *m = btf_enum64(t);
543
544 n = btf_vlen(t);
545 for (i = 0; i < n; i++, m++) {
546 err = btf_validate_str(btf, m->name_off, "enum name", id);
547 if (err)
548 return err;
549 }
550 break;
551 }
552 case BTF_KIND_FUNC: {
553 const struct btf_type *ft;
554
555 err = btf_validate_id(btf, t->type, id);
556 if (err)
557 return err;
558 ft = btf__type_by_id(btf, t->type);
559 if (btf_kind(ft) != BTF_KIND_FUNC_PROTO) {
560 pr_warn("btf: type [%u]: referenced type [%u] is not FUNC_PROTO\n", id, t->type);
561 return -EINVAL;
562 }
563 break;
564 }
565 case BTF_KIND_FUNC_PROTO: {
566 const struct btf_param *m = btf_params(t);
567
568 n = btf_vlen(t);
569 for (i = 0; i < n; i++, m++) {
570 err = btf_validate_str(btf, m->name_off, "param name", id);
571 err = err ?: btf_validate_id(btf, m->type, id);
572 if (err)
573 return err;
574 }
575 break;
576 }
577 case BTF_KIND_DATASEC: {
578 const struct btf_var_secinfo *m = btf_var_secinfos(t);
579
580 n = btf_vlen(t);
581 for (i = 0; i < n; i++, m++) {
582 err = btf_validate_id(btf, m->type, id);
583 if (err)
584 return err;
585 }
586 break;
587 }
588 default:
589 pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind);
590 return -EINVAL;
591 }
592 return 0;
593}
594
595/* Validate basic sanity of BTF. It's intentionally less thorough than
596 * kernel's validation and validates only properties of BTF that libbpf relies
597 * on to be correct (e.g., valid type IDs, valid string offsets, etc)
598 */
599static int btf_sanity_check(const struct btf *btf)
600{
601 const struct btf_type *t;
602 __u32 i, n = btf__type_cnt(btf);
603 int err;
604
605 for (i = btf->start_id; i < n; i++) {
606 t = btf_type_by_id(btf, i);
607 err = btf_validate_type(btf, t, i);
608 if (err)
609 return err;
610 }
611 return 0;
612}
613
614__u32 btf__type_cnt(const struct btf *btf)
615{
616 return btf->start_id + btf->nr_types;
617}
618
619const struct btf *btf__base_btf(const struct btf *btf)
620{
621 return btf->base_btf;
622}
623
624/* internal helper returning non-const pointer to a type */
625struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id)
626{
627 if (type_id == 0)
628 return &btf_void;
629 if (type_id < btf->start_id)
630 return btf_type_by_id(btf->base_btf, type_id);
631 return btf->types_data + btf->type_offs[type_id - btf->start_id];
632}
633
634const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
635{
636 if (type_id >= btf->start_id + btf->nr_types)
637 return errno = EINVAL, NULL;
638 return btf_type_by_id((struct btf *)btf, type_id);
639}
640
641static int determine_ptr_size(const struct btf *btf)
642{
643 static const char * const long_aliases[] = {
644 "long",
645 "long int",
646 "int long",
647 "unsigned long",
648 "long unsigned",
649 "unsigned long int",
650 "unsigned int long",
651 "long unsigned int",
652 "long int unsigned",
653 "int unsigned long",
654 "int long unsigned",
655 };
656 const struct btf_type *t;
657 const char *name;
658 int i, j, n;
659
660 if (btf->base_btf && btf->base_btf->ptr_sz > 0)
661 return btf->base_btf->ptr_sz;
662
663 n = btf__type_cnt(btf);
664 for (i = 1; i < n; i++) {
665 t = btf__type_by_id(btf, i);
666 if (!btf_is_int(t))
667 continue;
668
669 if (t->size != 4 && t->size != 8)
670 continue;
671
672 name = btf__name_by_offset(btf, t->name_off);
673 if (!name)
674 continue;
675
676 for (j = 0; j < ARRAY_SIZE(long_aliases); j++) {
677 if (strcmp(name, long_aliases[j]) == 0)
678 return t->size;
679 }
680 }
681
682 return -1;
683}
684
685static size_t btf_ptr_sz(const struct btf *btf)
686{
687 if (!btf->ptr_sz)
688 ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
689 return btf->ptr_sz < 0 ? sizeof(void *) : btf->ptr_sz;
690}
691
692/* Return pointer size this BTF instance assumes. The size is heuristically
693 * determined by looking for 'long' or 'unsigned long' integer type and
694 * recording its size in bytes. If BTF type information doesn't have any such
695 * type, this function returns 0. In the latter case, native architecture's
696 * pointer size is assumed, so will be either 4 or 8, depending on
697 * architecture that libbpf was compiled for. It's possible to override
698 * guessed value by using btf__set_pointer_size() API.
699 */
700size_t btf__pointer_size(const struct btf *btf)
701{
702 if (!btf->ptr_sz)
703 ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
704
705 if (btf->ptr_sz < 0)
706 /* not enough BTF type info to guess */
707 return 0;
708
709 return btf->ptr_sz;
710}
711
712/* Override or set pointer size in bytes. Only values of 4 and 8 are
713 * supported.
714 */
715int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
716{
717 if (ptr_sz != 4 && ptr_sz != 8)
718 return libbpf_err(-EINVAL);
719 btf->ptr_sz = ptr_sz;
720 return 0;
721}
722
723static bool is_host_big_endian(void)
724{
725#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
726 return false;
727#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
728 return true;
729#else
730# error "Unrecognized __BYTE_ORDER__"
731#endif
732}
733
734enum btf_endianness btf__endianness(const struct btf *btf)
735{
736 if (is_host_big_endian())
737 return btf->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN;
738 else
739 return btf->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN;
740}
741
742int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
743{
744 if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
745 return libbpf_err(-EINVAL);
746
747 btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
748 if (!btf->swapped_endian) {
749 free(btf->raw_data_swapped);
750 btf->raw_data_swapped = NULL;
751 }
752 return 0;
753}
754
755static bool btf_type_is_void(const struct btf_type *t)
756{
757 return t == &btf_void || btf_is_fwd(t);
758}
759
760static bool btf_type_is_void_or_null(const struct btf_type *t)
761{
762 return !t || btf_type_is_void(t);
763}
764
765#define MAX_RESOLVE_DEPTH 32
766
767__s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
768{
769 const struct btf_array *array;
770 const struct btf_type *t;
771 __u32 nelems = 1;
772 __s64 size = -1;
773 int i;
774
775 t = btf__type_by_id(btf, type_id);
776 for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) {
777 switch (btf_kind(t)) {
778 case BTF_KIND_INT:
779 case BTF_KIND_STRUCT:
780 case BTF_KIND_UNION:
781 case BTF_KIND_ENUM:
782 case BTF_KIND_ENUM64:
783 case BTF_KIND_DATASEC:
784 case BTF_KIND_FLOAT:
785 size = t->size;
786 goto done;
787 case BTF_KIND_PTR:
788 size = btf_ptr_sz(btf);
789 goto done;
790 case BTF_KIND_TYPEDEF:
791 case BTF_KIND_VOLATILE:
792 case BTF_KIND_CONST:
793 case BTF_KIND_RESTRICT:
794 case BTF_KIND_VAR:
795 case BTF_KIND_DECL_TAG:
796 case BTF_KIND_TYPE_TAG:
797 type_id = t->type;
798 break;
799 case BTF_KIND_ARRAY:
800 array = btf_array(t);
801 if (nelems && array->nelems > UINT32_MAX / nelems)
802 return libbpf_err(-E2BIG);
803 nelems *= array->nelems;
804 type_id = array->type;
805 break;
806 default:
807 return libbpf_err(-EINVAL);
808 }
809
810 t = btf__type_by_id(btf, type_id);
811 }
812
813done:
814 if (size < 0)
815 return libbpf_err(-EINVAL);
816 if (nelems && size > UINT32_MAX / nelems)
817 return libbpf_err(-E2BIG);
818
819 return nelems * size;
820}
821
822int btf__align_of(const struct btf *btf, __u32 id)
823{
824 const struct btf_type *t = btf__type_by_id(btf, id);
825 __u16 kind = btf_kind(t);
826
827 switch (kind) {
828 case BTF_KIND_INT:
829 case BTF_KIND_ENUM:
830 case BTF_KIND_ENUM64:
831 case BTF_KIND_FLOAT:
832 return min(btf_ptr_sz(btf), (size_t)t->size);
833 case BTF_KIND_PTR:
834 return btf_ptr_sz(btf);
835 case BTF_KIND_TYPEDEF:
836 case BTF_KIND_VOLATILE:
837 case BTF_KIND_CONST:
838 case BTF_KIND_RESTRICT:
839 case BTF_KIND_TYPE_TAG:
840 return btf__align_of(btf, t->type);
841 case BTF_KIND_ARRAY:
842 return btf__align_of(btf, btf_array(t)->type);
843 case BTF_KIND_STRUCT:
844 case BTF_KIND_UNION: {
845 const struct btf_member *m = btf_members(t);
846 __u16 vlen = btf_vlen(t);
847 int i, max_align = 1, align;
848
849 for (i = 0; i < vlen; i++, m++) {
850 align = btf__align_of(btf, m->type);
851 if (align <= 0)
852 return libbpf_err(align);
853 max_align = max(max_align, align);
854
855 /* if field offset isn't aligned according to field
856 * type's alignment, then struct must be packed
857 */
858 if (btf_member_bitfield_size(t, i) == 0 &&
859 (m->offset % (8 * align)) != 0)
860 return 1;
861 }
862
863 /* if struct/union size isn't a multiple of its alignment,
864 * then struct must be packed
865 */
866 if ((t->size % max_align) != 0)
867 return 1;
868
869 return max_align;
870 }
871 default:
872 pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
873 return errno = EINVAL, 0;
874 }
875}
876
877int btf__resolve_type(const struct btf *btf, __u32 type_id)
878{
879 const struct btf_type *t;
880 int depth = 0;
881
882 t = btf__type_by_id(btf, type_id);
883 while (depth < MAX_RESOLVE_DEPTH &&
884 !btf_type_is_void_or_null(t) &&
885 (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) {
886 type_id = t->type;
887 t = btf__type_by_id(btf, type_id);
888 depth++;
889 }
890
891 if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
892 return libbpf_err(-EINVAL);
893
894 return type_id;
895}
896
897__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
898{
899 __u32 i, nr_types = btf__type_cnt(btf);
900
901 if (!strcmp(type_name, "void"))
902 return 0;
903
904 for (i = 1; i < nr_types; i++) {
905 const struct btf_type *t = btf__type_by_id(btf, i);
906 const char *name = btf__name_by_offset(btf, t->name_off);
907
908 if (name && !strcmp(type_name, name))
909 return i;
910 }
911
912 return libbpf_err(-ENOENT);
913}
914
915static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
916 const char *type_name, __u32 kind)
917{
918 __u32 i, nr_types = btf__type_cnt(btf);
919
920 if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
921 return 0;
922
923 for (i = start_id; i < nr_types; i++) {
924 const struct btf_type *t = btf__type_by_id(btf, i);
925 const char *name;
926
927 if (btf_kind(t) != kind)
928 continue;
929 name = btf__name_by_offset(btf, t->name_off);
930 if (name && !strcmp(type_name, name))
931 return i;
932 }
933
934 return libbpf_err(-ENOENT);
935}
936
937__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
938 __u32 kind)
939{
940 return btf_find_by_name_kind(btf, btf->start_id, type_name, kind);
941}
942
943__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
944 __u32 kind)
945{
946 return btf_find_by_name_kind(btf, 1, type_name, kind);
947}
948
949static bool btf_is_modifiable(const struct btf *btf)
950{
951 return (void *)btf->hdr != btf->raw_data;
952}
953
954void btf__free(struct btf *btf)
955{
956 if (IS_ERR_OR_NULL(btf))
957 return;
958
959 if (btf->fd >= 0)
960 close(btf->fd);
961
962 if (btf_is_modifiable(btf)) {
963 /* if BTF was modified after loading, it will have a split
964 * in-memory representation for header, types, and strings
965 * sections, so we need to free all of them individually. It
966 * might still have a cached contiguous raw data present,
967 * which will be unconditionally freed below.
968 */
969 free(btf->hdr);
970 free(btf->types_data);
971 strset__free(btf->strs_set);
972 }
973 free(btf->raw_data);
974 free(btf->raw_data_swapped);
975 free(btf->type_offs);
976 if (btf->owns_base)
977 btf__free(btf->base_btf);
978 free(btf);
979}
980
981static struct btf *btf_new_empty(struct btf *base_btf)
982{
983 struct btf *btf;
984
985 btf = calloc(1, sizeof(*btf));
986 if (!btf)
987 return ERR_PTR(-ENOMEM);
988
989 btf->nr_types = 0;
990 btf->start_id = 1;
991 btf->start_str_off = 0;
992 btf->fd = -1;
993 btf->ptr_sz = sizeof(void *);
994 btf->swapped_endian = false;
995
996 if (base_btf) {
997 btf->base_btf = base_btf;
998 btf->start_id = btf__type_cnt(base_btf);
999 btf->start_str_off = base_btf->hdr->str_len;
1000 btf->swapped_endian = base_btf->swapped_endian;
1001 }
1002
1003 /* +1 for empty string at offset 0 */
1004 btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1);
1005 btf->raw_data = calloc(1, btf->raw_size);
1006 if (!btf->raw_data) {
1007 free(btf);
1008 return ERR_PTR(-ENOMEM);
1009 }
1010
1011 btf->hdr = btf->raw_data;
1012 btf->hdr->hdr_len = sizeof(struct btf_header);
1013 btf->hdr->magic = BTF_MAGIC;
1014 btf->hdr->version = BTF_VERSION;
1015
1016 btf->types_data = btf->raw_data + btf->hdr->hdr_len;
1017 btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
1018 btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */
1019
1020 return btf;
1021}
1022
1023struct btf *btf__new_empty(void)
1024{
1025 return libbpf_ptr(btf_new_empty(NULL));
1026}
1027
1028struct btf *btf__new_empty_split(struct btf *base_btf)
1029{
1030 return libbpf_ptr(btf_new_empty(base_btf));
1031}
1032
1033static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
1034{
1035 struct btf *btf;
1036 int err;
1037
1038 btf = calloc(1, sizeof(struct btf));
1039 if (!btf)
1040 return ERR_PTR(-ENOMEM);
1041
1042 btf->nr_types = 0;
1043 btf->start_id = 1;
1044 btf->start_str_off = 0;
1045 btf->fd = -1;
1046
1047 if (base_btf) {
1048 btf->base_btf = base_btf;
1049 btf->start_id = btf__type_cnt(base_btf);
1050 btf->start_str_off = base_btf->hdr->str_len;
1051 }
1052
1053 btf->raw_data = malloc(size);
1054 if (!btf->raw_data) {
1055 err = -ENOMEM;
1056 goto done;
1057 }
1058 memcpy(btf->raw_data, data, size);
1059 btf->raw_size = size;
1060
1061 btf->hdr = btf->raw_data;
1062 err = btf_parse_hdr(btf);
1063 if (err)
1064 goto done;
1065
1066 btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off;
1067 btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off;
1068
1069 err = btf_parse_str_sec(btf);
1070 err = err ?: btf_parse_type_sec(btf);
1071 err = err ?: btf_sanity_check(btf);
1072 if (err)
1073 goto done;
1074
1075done:
1076 if (err) {
1077 btf__free(btf);
1078 return ERR_PTR(err);
1079 }
1080
1081 return btf;
1082}
1083
1084struct btf *btf__new(const void *data, __u32 size)
1085{
1086 return libbpf_ptr(btf_new(data, size, NULL));
1087}
1088
1089struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
1090{
1091 return libbpf_ptr(btf_new(data, size, base_btf));
1092}
1093
1094struct btf_elf_secs {
1095 Elf_Data *btf_data;
1096 Elf_Data *btf_ext_data;
1097 Elf_Data *btf_base_data;
1098};
1099
1100static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs *secs)
1101{
1102 Elf_Scn *scn = NULL;
1103 Elf_Data *data;
1104 GElf_Ehdr ehdr;
1105 size_t shstrndx;
1106 int idx = 0;
1107
1108 if (!gelf_getehdr(elf, &ehdr)) {
1109 pr_warn("failed to get EHDR from %s\n", path);
1110 goto err;
1111 }
1112
1113 if (elf_getshdrstrndx(elf, &shstrndx)) {
1114 pr_warn("failed to get section names section index for %s\n",
1115 path);
1116 goto err;
1117 }
1118
1119 if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) {
1120 pr_warn("failed to get e_shstrndx from %s\n", path);
1121 goto err;
1122 }
1123
1124 while ((scn = elf_nextscn(elf, scn)) != NULL) {
1125 Elf_Data **field;
1126 GElf_Shdr sh;
1127 char *name;
1128
1129 idx++;
1130 if (gelf_getshdr(scn, &sh) != &sh) {
1131 pr_warn("failed to get section(%d) header from %s\n",
1132 idx, path);
1133 goto err;
1134 }
1135 name = elf_strptr(elf, shstrndx, sh.sh_name);
1136 if (!name) {
1137 pr_warn("failed to get section(%d) name from %s\n",
1138 idx, path);
1139 goto err;
1140 }
1141
1142 if (strcmp(name, BTF_ELF_SEC) == 0)
1143 field = &secs->btf_data;
1144 else if (strcmp(name, BTF_EXT_ELF_SEC) == 0)
1145 field = &secs->btf_ext_data;
1146 else if (strcmp(name, BTF_BASE_ELF_SEC) == 0)
1147 field = &secs->btf_base_data;
1148 else
1149 continue;
1150
1151 data = elf_getdata(scn, 0);
1152 if (!data) {
1153 pr_warn("failed to get section(%d, %s) data from %s\n",
1154 idx, name, path);
1155 goto err;
1156 }
1157 *field = data;
1158 }
1159
1160 return 0;
1161
1162err:
1163 return -LIBBPF_ERRNO__FORMAT;
1164}
1165
1166static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
1167 struct btf_ext **btf_ext)
1168{
1169 struct btf_elf_secs secs = {};
1170 struct btf *dist_base_btf = NULL;
1171 struct btf *btf = NULL;
1172 int err = 0, fd = -1;
1173 Elf *elf = NULL;
1174
1175 if (elf_version(EV_CURRENT) == EV_NONE) {
1176 pr_warn("failed to init libelf for %s\n", path);
1177 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
1178 }
1179
1180 fd = open(path, O_RDONLY | O_CLOEXEC);
1181 if (fd < 0) {
1182 err = -errno;
1183 pr_warn("failed to open %s: %s\n", path, errstr(err));
1184 return ERR_PTR(err);
1185 }
1186
1187 elf = elf_begin(fd, ELF_C_READ, NULL);
1188 if (!elf) {
1189 err = -LIBBPF_ERRNO__FORMAT;
1190 pr_warn("failed to open %s as ELF file\n", path);
1191 goto done;
1192 }
1193
1194 err = btf_find_elf_sections(elf, path, &secs);
1195 if (err)
1196 goto done;
1197
1198 if (!secs.btf_data) {
1199 pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path);
1200 err = -ENODATA;
1201 goto done;
1202 }
1203
1204 if (secs.btf_base_data) {
1205 dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size,
1206 NULL);
1207 if (IS_ERR(dist_base_btf)) {
1208 err = PTR_ERR(dist_base_btf);
1209 dist_base_btf = NULL;
1210 goto done;
1211 }
1212 }
1213
1214 btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
1215 dist_base_btf ?: base_btf);
1216 if (IS_ERR(btf)) {
1217 err = PTR_ERR(btf);
1218 goto done;
1219 }
1220 if (dist_base_btf && base_btf) {
1221 err = btf__relocate(btf, base_btf);
1222 if (err)
1223 goto done;
1224 btf__free(dist_base_btf);
1225 dist_base_btf = NULL;
1226 }
1227
1228 if (dist_base_btf)
1229 btf->owns_base = true;
1230
1231 switch (gelf_getclass(elf)) {
1232 case ELFCLASS32:
1233 btf__set_pointer_size(btf, 4);
1234 break;
1235 case ELFCLASS64:
1236 btf__set_pointer_size(btf, 8);
1237 break;
1238 default:
1239 pr_warn("failed to get ELF class (bitness) for %s\n", path);
1240 break;
1241 }
1242
1243 if (btf_ext && secs.btf_ext_data) {
1244 *btf_ext = btf_ext__new(secs.btf_ext_data->d_buf, secs.btf_ext_data->d_size);
1245 if (IS_ERR(*btf_ext)) {
1246 err = PTR_ERR(*btf_ext);
1247 goto done;
1248 }
1249 } else if (btf_ext) {
1250 *btf_ext = NULL;
1251 }
1252done:
1253 if (elf)
1254 elf_end(elf);
1255 close(fd);
1256
1257 if (!err)
1258 return btf;
1259
1260 if (btf_ext)
1261 btf_ext__free(*btf_ext);
1262 btf__free(dist_base_btf);
1263 btf__free(btf);
1264
1265 return ERR_PTR(err);
1266}
1267
1268struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
1269{
1270 return libbpf_ptr(btf_parse_elf(path, NULL, btf_ext));
1271}
1272
1273struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf)
1274{
1275 return libbpf_ptr(btf_parse_elf(path, base_btf, NULL));
1276}
1277
1278static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
1279{
1280 struct btf *btf = NULL;
1281 void *data = NULL;
1282 FILE *f = NULL;
1283 __u16 magic;
1284 int err = 0;
1285 long sz;
1286
1287 f = fopen(path, "rbe");
1288 if (!f) {
1289 err = -errno;
1290 goto err_out;
1291 }
1292
1293 /* check BTF magic */
1294 if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) {
1295 err = -EIO;
1296 goto err_out;
1297 }
1298 if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) {
1299 /* definitely not a raw BTF */
1300 err = -EPROTO;
1301 goto err_out;
1302 }
1303
1304 /* get file size */
1305 if (fseek(f, 0, SEEK_END)) {
1306 err = -errno;
1307 goto err_out;
1308 }
1309 sz = ftell(f);
1310 if (sz < 0) {
1311 err = -errno;
1312 goto err_out;
1313 }
1314 /* rewind to the start */
1315 if (fseek(f, 0, SEEK_SET)) {
1316 err = -errno;
1317 goto err_out;
1318 }
1319
1320 /* pre-alloc memory and read all of BTF data */
1321 data = malloc(sz);
1322 if (!data) {
1323 err = -ENOMEM;
1324 goto err_out;
1325 }
1326 if (fread(data, 1, sz, f) < sz) {
1327 err = -EIO;
1328 goto err_out;
1329 }
1330
1331 /* finally parse BTF data */
1332 btf = btf_new(data, sz, base_btf);
1333
1334err_out:
1335 free(data);
1336 if (f)
1337 fclose(f);
1338 return err ? ERR_PTR(err) : btf;
1339}
1340
1341struct btf *btf__parse_raw(const char *path)
1342{
1343 return libbpf_ptr(btf_parse_raw(path, NULL));
1344}
1345
1346struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
1347{
1348 return libbpf_ptr(btf_parse_raw(path, base_btf));
1349}
1350
1351static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
1352{
1353 struct btf *btf;
1354 int err;
1355
1356 if (btf_ext)
1357 *btf_ext = NULL;
1358
1359 btf = btf_parse_raw(path, base_btf);
1360 err = libbpf_get_error(btf);
1361 if (!err)
1362 return btf;
1363 if (err != -EPROTO)
1364 return ERR_PTR(err);
1365 return btf_parse_elf(path, base_btf, btf_ext);
1366}
1367
1368struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
1369{
1370 return libbpf_ptr(btf_parse(path, NULL, btf_ext));
1371}
1372
1373struct btf *btf__parse_split(const char *path, struct btf *base_btf)
1374{
1375 return libbpf_ptr(btf_parse(path, base_btf, NULL));
1376}
1377
1378static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
1379
1380int btf_load_into_kernel(struct btf *btf,
1381 char *log_buf, size_t log_sz, __u32 log_level,
1382 int token_fd)
1383{
1384 LIBBPF_OPTS(bpf_btf_load_opts, opts);
1385 __u32 buf_sz = 0, raw_size;
1386 char *buf = NULL, *tmp;
1387 void *raw_data;
1388 int err = 0;
1389
1390 if (btf->fd >= 0)
1391 return libbpf_err(-EEXIST);
1392 if (log_sz && !log_buf)
1393 return libbpf_err(-EINVAL);
1394
1395 /* cache native raw data representation */
1396 raw_data = btf_get_raw_data(btf, &raw_size, false);
1397 if (!raw_data) {
1398 err = -ENOMEM;
1399 goto done;
1400 }
1401 btf->raw_size = raw_size;
1402 btf->raw_data = raw_data;
1403
1404retry_load:
1405 /* if log_level is 0, we won't provide log_buf/log_size to the kernel,
1406 * initially. Only if BTF loading fails, we bump log_level to 1 and
1407 * retry, using either auto-allocated or custom log_buf. This way
1408 * non-NULL custom log_buf provides a buffer just in case, but hopes
1409 * for successful load and no need for log_buf.
1410 */
1411 if (log_level) {
1412 /* if caller didn't provide custom log_buf, we'll keep
1413 * allocating our own progressively bigger buffers for BTF
1414 * verification log
1415 */
1416 if (!log_buf) {
1417 buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2);
1418 tmp = realloc(buf, buf_sz);
1419 if (!tmp) {
1420 err = -ENOMEM;
1421 goto done;
1422 }
1423 buf = tmp;
1424 buf[0] = '\0';
1425 }
1426
1427 opts.log_buf = log_buf ? log_buf : buf;
1428 opts.log_size = log_buf ? log_sz : buf_sz;
1429 opts.log_level = log_level;
1430 }
1431
1432 opts.token_fd = token_fd;
1433 if (token_fd)
1434 opts.btf_flags |= BPF_F_TOKEN_FD;
1435
1436 btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
1437 if (btf->fd < 0) {
1438 /* time to turn on verbose mode and try again */
1439 if (log_level == 0) {
1440 log_level = 1;
1441 goto retry_load;
1442 }
1443 /* only retry if caller didn't provide custom log_buf, but
1444 * make sure we can never overflow buf_sz
1445 */
1446 if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2)
1447 goto retry_load;
1448
1449 err = -errno;
1450 pr_warn("BTF loading error: %s\n", errstr(err));
1451 /* don't print out contents of custom log_buf */
1452 if (!log_buf && buf[0])
1453 pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf);
1454 }
1455
1456done:
1457 free(buf);
1458 return libbpf_err(err);
1459}
1460
1461int btf__load_into_kernel(struct btf *btf)
1462{
1463 return btf_load_into_kernel(btf, NULL, 0, 0, 0);
1464}
1465
1466int btf__fd(const struct btf *btf)
1467{
1468 return btf->fd;
1469}
1470
1471void btf__set_fd(struct btf *btf, int fd)
1472{
1473 btf->fd = fd;
1474}
1475
1476static const void *btf_strs_data(const struct btf *btf)
1477{
1478 return btf->strs_data ? btf->strs_data : strset__data(btf->strs_set);
1479}
1480
1481static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
1482{
1483 struct btf_header *hdr = btf->hdr;
1484 struct btf_type *t;
1485 void *data, *p;
1486 __u32 data_sz;
1487 int i;
1488
1489 data = swap_endian ? btf->raw_data_swapped : btf->raw_data;
1490 if (data) {
1491 *size = btf->raw_size;
1492 return data;
1493 }
1494
1495 data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len;
1496 data = calloc(1, data_sz);
1497 if (!data)
1498 return NULL;
1499 p = data;
1500
1501 memcpy(p, hdr, hdr->hdr_len);
1502 if (swap_endian)
1503 btf_bswap_hdr(p);
1504 p += hdr->hdr_len;
1505
1506 memcpy(p, btf->types_data, hdr->type_len);
1507 if (swap_endian) {
1508 for (i = 0; i < btf->nr_types; i++) {
1509 t = p + btf->type_offs[i];
1510 /* btf_bswap_type_rest() relies on native t->info, so
1511 * we swap base type info after we swapped all the
1512 * additional information
1513 */
1514 if (btf_bswap_type_rest(t))
1515 goto err_out;
1516 btf_bswap_type_base(t);
1517 }
1518 }
1519 p += hdr->type_len;
1520
1521 memcpy(p, btf_strs_data(btf), hdr->str_len);
1522 p += hdr->str_len;
1523
1524 *size = data_sz;
1525 return data;
1526err_out:
1527 free(data);
1528 return NULL;
1529}
1530
1531const void *btf__raw_data(const struct btf *btf_ro, __u32 *size)
1532{
1533 struct btf *btf = (struct btf *)btf_ro;
1534 __u32 data_sz;
1535 void *data;
1536
1537 data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
1538 if (!data)
1539 return errno = ENOMEM, NULL;
1540
1541 btf->raw_size = data_sz;
1542 if (btf->swapped_endian)
1543 btf->raw_data_swapped = data;
1544 else
1545 btf->raw_data = data;
1546 *size = data_sz;
1547 return data;
1548}
1549
1550__attribute__((alias("btf__raw_data")))
1551const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
1552
1553const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
1554{
1555 if (offset < btf->start_str_off)
1556 return btf__str_by_offset(btf->base_btf, offset);
1557 else if (offset - btf->start_str_off < btf->hdr->str_len)
1558 return btf_strs_data(btf) + (offset - btf->start_str_off);
1559 else
1560 return errno = EINVAL, NULL;
1561}
1562
1563const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
1564{
1565 return btf__str_by_offset(btf, offset);
1566}
1567
1568struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
1569{
1570 struct bpf_btf_info btf_info;
1571 __u32 len = sizeof(btf_info);
1572 __u32 last_size;
1573 struct btf *btf;
1574 void *ptr;
1575 int err;
1576
1577 /* we won't know btf_size until we call bpf_btf_get_info_by_fd(). so
1578 * let's start with a sane default - 4KiB here - and resize it only if
1579 * bpf_btf_get_info_by_fd() needs a bigger buffer.
1580 */
1581 last_size = 4096;
1582 ptr = malloc(last_size);
1583 if (!ptr)
1584 return ERR_PTR(-ENOMEM);
1585
1586 memset(&btf_info, 0, sizeof(btf_info));
1587 btf_info.btf = ptr_to_u64(ptr);
1588 btf_info.btf_size = last_size;
1589 err = bpf_btf_get_info_by_fd(btf_fd, &btf_info, &len);
1590
1591 if (!err && btf_info.btf_size > last_size) {
1592 void *temp_ptr;
1593
1594 last_size = btf_info.btf_size;
1595 temp_ptr = realloc(ptr, last_size);
1596 if (!temp_ptr) {
1597 btf = ERR_PTR(-ENOMEM);
1598 goto exit_free;
1599 }
1600 ptr = temp_ptr;
1601
1602 len = sizeof(btf_info);
1603 memset(&btf_info, 0, sizeof(btf_info));
1604 btf_info.btf = ptr_to_u64(ptr);
1605 btf_info.btf_size = last_size;
1606
1607 err = bpf_btf_get_info_by_fd(btf_fd, &btf_info, &len);
1608 }
1609
1610 if (err || btf_info.btf_size > last_size) {
1611 btf = err ? ERR_PTR(-errno) : ERR_PTR(-E2BIG);
1612 goto exit_free;
1613 }
1614
1615 btf = btf_new(ptr, btf_info.btf_size, base_btf);
1616
1617exit_free:
1618 free(ptr);
1619 return btf;
1620}
1621
1622struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
1623{
1624 struct btf *btf;
1625 int btf_fd;
1626
1627 btf_fd = bpf_btf_get_fd_by_id(id);
1628 if (btf_fd < 0)
1629 return libbpf_err_ptr(-errno);
1630
1631 btf = btf_get_from_fd(btf_fd, base_btf);
1632 close(btf_fd);
1633
1634 return libbpf_ptr(btf);
1635}
1636
1637struct btf *btf__load_from_kernel_by_id(__u32 id)
1638{
1639 return btf__load_from_kernel_by_id_split(id, NULL);
1640}
1641
1642static void btf_invalidate_raw_data(struct btf *btf)
1643{
1644 if (btf->raw_data) {
1645 free(btf->raw_data);
1646 btf->raw_data = NULL;
1647 }
1648 if (btf->raw_data_swapped) {
1649 free(btf->raw_data_swapped);
1650 btf->raw_data_swapped = NULL;
1651 }
1652}
1653
1654/* Ensure BTF is ready to be modified (by splitting into a three memory
1655 * regions for header, types, and strings). Also invalidate cached
1656 * raw_data, if any.
1657 */
1658static int btf_ensure_modifiable(struct btf *btf)
1659{
1660 void *hdr, *types;
1661 struct strset *set = NULL;
1662 int err = -ENOMEM;
1663
1664 if (btf_is_modifiable(btf)) {
1665 /* any BTF modification invalidates raw_data */
1666 btf_invalidate_raw_data(btf);
1667 return 0;
1668 }
1669
1670 /* split raw data into three memory regions */
1671 hdr = malloc(btf->hdr->hdr_len);
1672 types = malloc(btf->hdr->type_len);
1673 if (!hdr || !types)
1674 goto err_out;
1675
1676 memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
1677 memcpy(types, btf->types_data, btf->hdr->type_len);
1678
1679 /* build lookup index for all strings */
1680 set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len);
1681 if (IS_ERR(set)) {
1682 err = PTR_ERR(set);
1683 goto err_out;
1684 }
1685
1686 /* only when everything was successful, update internal state */
1687 btf->hdr = hdr;
1688 btf->types_data = types;
1689 btf->types_data_cap = btf->hdr->type_len;
1690 btf->strs_data = NULL;
1691 btf->strs_set = set;
1692 /* if BTF was created from scratch, all strings are guaranteed to be
1693 * unique and deduplicated
1694 */
1695 if (btf->hdr->str_len == 0)
1696 btf->strs_deduped = true;
1697 if (!btf->base_btf && btf->hdr->str_len == 1)
1698 btf->strs_deduped = true;
1699
1700 /* invalidate raw_data representation */
1701 btf_invalidate_raw_data(btf);
1702
1703 return 0;
1704
1705err_out:
1706 strset__free(set);
1707 free(hdr);
1708 free(types);
1709 return err;
1710}
1711
1712/* Find an offset in BTF string section that corresponds to a given string *s*.
1713 * Returns:
1714 * - >0 offset into string section, if string is found;
1715 * - -ENOENT, if string is not in the string section;
1716 * - <0, on any other error.
1717 */
1718int btf__find_str(struct btf *btf, const char *s)
1719{
1720 int off;
1721
1722 if (btf->base_btf) {
1723 off = btf__find_str(btf->base_btf, s);
1724 if (off != -ENOENT)
1725 return off;
1726 }
1727
1728 /* BTF needs to be in a modifiable state to build string lookup index */
1729 if (btf_ensure_modifiable(btf))
1730 return libbpf_err(-ENOMEM);
1731
1732 off = strset__find_str(btf->strs_set, s);
1733 if (off < 0)
1734 return libbpf_err(off);
1735
1736 return btf->start_str_off + off;
1737}
1738
1739/* Add a string s to the BTF string section.
1740 * Returns:
1741 * - > 0 offset into string section, on success;
1742 * - < 0, on error.
1743 */
1744int btf__add_str(struct btf *btf, const char *s)
1745{
1746 int off;
1747
1748 if (btf->base_btf) {
1749 off = btf__find_str(btf->base_btf, s);
1750 if (off != -ENOENT)
1751 return off;
1752 }
1753
1754 if (btf_ensure_modifiable(btf))
1755 return libbpf_err(-ENOMEM);
1756
1757 off = strset__add_str(btf->strs_set, s);
1758 if (off < 0)
1759 return libbpf_err(off);
1760
1761 btf->hdr->str_len = strset__data_size(btf->strs_set);
1762
1763 return btf->start_str_off + off;
1764}
1765
1766static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
1767{
1768 return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
1769 btf->hdr->type_len, UINT_MAX, add_sz);
1770}
1771
1772static void btf_type_inc_vlen(struct btf_type *t)
1773{
1774 t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
1775}
1776
1777static int btf_commit_type(struct btf *btf, int data_sz)
1778{
1779 int err;
1780
1781 err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
1782 if (err)
1783 return libbpf_err(err);
1784
1785 btf->hdr->type_len += data_sz;
1786 btf->hdr->str_off += data_sz;
1787 btf->nr_types++;
1788 return btf->start_id + btf->nr_types - 1;
1789}
1790
1791struct btf_pipe {
1792 const struct btf *src;
1793 struct btf *dst;
1794 struct hashmap *str_off_map; /* map string offsets from src to dst */
1795};
1796
1797static int btf_rewrite_str(struct btf_pipe *p, __u32 *str_off)
1798{
1799 long mapped_off;
1800 int off, err;
1801
1802 if (!*str_off) /* nothing to do for empty strings */
1803 return 0;
1804
1805 if (p->str_off_map &&
1806 hashmap__find(p->str_off_map, *str_off, &mapped_off)) {
1807 *str_off = mapped_off;
1808 return 0;
1809 }
1810
1811 off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
1812 if (off < 0)
1813 return off;
1814
1815 /* Remember string mapping from src to dst. It avoids
1816 * performing expensive string comparisons.
1817 */
1818 if (p->str_off_map) {
1819 err = hashmap__append(p->str_off_map, *str_off, off);
1820 if (err)
1821 return err;
1822 }
1823
1824 *str_off = off;
1825 return 0;
1826}
1827
1828static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type)
1829{
1830 struct btf_field_iter it;
1831 struct btf_type *t;
1832 __u32 *str_off;
1833 int sz, err;
1834
1835 sz = btf_type_size(src_type);
1836 if (sz < 0)
1837 return libbpf_err(sz);
1838
1839 /* deconstruct BTF, if necessary, and invalidate raw_data */
1840 if (btf_ensure_modifiable(p->dst))
1841 return libbpf_err(-ENOMEM);
1842
1843 t = btf_add_type_mem(p->dst, sz);
1844 if (!t)
1845 return libbpf_err(-ENOMEM);
1846
1847 memcpy(t, src_type, sz);
1848
1849 err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS);
1850 if (err)
1851 return libbpf_err(err);
1852
1853 while ((str_off = btf_field_iter_next(&it))) {
1854 err = btf_rewrite_str(p, str_off);
1855 if (err)
1856 return libbpf_err(err);
1857 }
1858
1859 return btf_commit_type(p->dst, sz);
1860}
1861
1862int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
1863{
1864 struct btf_pipe p = { .src = src_btf, .dst = btf };
1865
1866 return btf_add_type(&p, src_type);
1867}
1868
1869static size_t btf_dedup_identity_hash_fn(long key, void *ctx);
1870static bool btf_dedup_equal_fn(long k1, long k2, void *ctx);
1871
1872int btf__add_btf(struct btf *btf, const struct btf *src_btf)
1873{
1874 struct btf_pipe p = { .src = src_btf, .dst = btf };
1875 int data_sz, sz, cnt, i, err, old_strs_len;
1876 __u32 *off;
1877 void *t;
1878
1879 /* appending split BTF isn't supported yet */
1880 if (src_btf->base_btf)
1881 return libbpf_err(-ENOTSUP);
1882
1883 /* deconstruct BTF, if necessary, and invalidate raw_data */
1884 if (btf_ensure_modifiable(btf))
1885 return libbpf_err(-ENOMEM);
1886
1887 /* remember original strings section size if we have to roll back
1888 * partial strings section changes
1889 */
1890 old_strs_len = btf->hdr->str_len;
1891
1892 data_sz = src_btf->hdr->type_len;
1893 cnt = btf__type_cnt(src_btf) - 1;
1894
1895 /* pre-allocate enough memory for new types */
1896 t = btf_add_type_mem(btf, data_sz);
1897 if (!t)
1898 return libbpf_err(-ENOMEM);
1899
1900 /* pre-allocate enough memory for type offset index for new types */
1901 off = btf_add_type_offs_mem(btf, cnt);
1902 if (!off)
1903 return libbpf_err(-ENOMEM);
1904
1905 /* Map the string offsets from src_btf to the offsets from btf to improve performance */
1906 p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
1907 if (IS_ERR(p.str_off_map))
1908 return libbpf_err(-ENOMEM);
1909
1910 /* bulk copy types data for all types from src_btf */
1911 memcpy(t, src_btf->types_data, data_sz);
1912
1913 for (i = 0; i < cnt; i++) {
1914 struct btf_field_iter it;
1915 __u32 *type_id, *str_off;
1916
1917 sz = btf_type_size(t);
1918 if (sz < 0) {
1919 /* unlikely, has to be corrupted src_btf */
1920 err = sz;
1921 goto err_out;
1922 }
1923
1924 /* fill out type ID to type offset mapping for lookups by type ID */
1925 *off = t - btf->types_data;
1926
1927 /* add, dedup, and remap strings referenced by this BTF type */
1928 err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS);
1929 if (err)
1930 goto err_out;
1931 while ((str_off = btf_field_iter_next(&it))) {
1932 err = btf_rewrite_str(&p, str_off);
1933 if (err)
1934 goto err_out;
1935 }
1936
1937 /* remap all type IDs referenced from this BTF type */
1938 err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
1939 if (err)
1940 goto err_out;
1941
1942 while ((type_id = btf_field_iter_next(&it))) {
1943 if (!*type_id) /* nothing to do for VOID references */
1944 continue;
1945
1946 /* we haven't updated btf's type count yet, so
1947 * btf->start_id + btf->nr_types - 1 is the type ID offset we should
1948 * add to all newly added BTF types
1949 */
1950 *type_id += btf->start_id + btf->nr_types - 1;
1951 }
1952
1953 /* go to next type data and type offset index entry */
1954 t += sz;
1955 off++;
1956 }
1957
1958 /* Up until now any of the copied type data was effectively invisible,
1959 * so if we exited early before this point due to error, BTF would be
1960 * effectively unmodified. There would be extra internal memory
1961 * pre-allocated, but it would not be available for querying. But now
1962 * that we've copied and rewritten all the data successfully, we can
1963 * update type count and various internal offsets and sizes to
1964 * "commit" the changes and made them visible to the outside world.
1965 */
1966 btf->hdr->type_len += data_sz;
1967 btf->hdr->str_off += data_sz;
1968 btf->nr_types += cnt;
1969
1970 hashmap__free(p.str_off_map);
1971
1972 /* return type ID of the first added BTF type */
1973 return btf->start_id + btf->nr_types - cnt;
1974err_out:
1975 /* zero out preallocated memory as if it was just allocated with
1976 * libbpf_add_mem()
1977 */
1978 memset(btf->types_data + btf->hdr->type_len, 0, data_sz);
1979 memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len);
1980
1981 /* and now restore original strings section size; types data size
1982 * wasn't modified, so doesn't need restoring, see big comment above
1983 */
1984 btf->hdr->str_len = old_strs_len;
1985
1986 hashmap__free(p.str_off_map);
1987
1988 return libbpf_err(err);
1989}
1990
1991/*
1992 * Append new BTF_KIND_INT type with:
1993 * - *name* - non-empty, non-NULL type name;
1994 * - *sz* - power-of-2 (1, 2, 4, ..) size of the type, in bytes;
1995 * - encoding is a combination of BTF_INT_SIGNED, BTF_INT_CHAR, BTF_INT_BOOL.
1996 * Returns:
1997 * - >0, type ID of newly added BTF type;
1998 * - <0, on error.
1999 */
2000int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding)
2001{
2002 struct btf_type *t;
2003 int sz, name_off;
2004
2005 /* non-empty name */
2006 if (!name || !name[0])
2007 return libbpf_err(-EINVAL);
2008 /* byte_sz must be power of 2 */
2009 if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
2010 return libbpf_err(-EINVAL);
2011 if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
2012 return libbpf_err(-EINVAL);
2013
2014 /* deconstruct BTF, if necessary, and invalidate raw_data */
2015 if (btf_ensure_modifiable(btf))
2016 return libbpf_err(-ENOMEM);
2017
2018 sz = sizeof(struct btf_type) + sizeof(int);
2019 t = btf_add_type_mem(btf, sz);
2020 if (!t)
2021 return libbpf_err(-ENOMEM);
2022
2023 /* if something goes wrong later, we might end up with an extra string,
2024 * but that shouldn't be a problem, because BTF can't be constructed
2025 * completely anyway and will most probably be just discarded
2026 */
2027 name_off = btf__add_str(btf, name);
2028 if (name_off < 0)
2029 return name_off;
2030
2031 t->name_off = name_off;
2032 t->info = btf_type_info(BTF_KIND_INT, 0, 0);
2033 t->size = byte_sz;
2034 /* set INT info, we don't allow setting legacy bit offset/size */
2035 *(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8);
2036
2037 return btf_commit_type(btf, sz);
2038}
2039
2040/*
2041 * Append new BTF_KIND_FLOAT type with:
2042 * - *name* - non-empty, non-NULL type name;
2043 * - *sz* - size of the type, in bytes;
2044 * Returns:
2045 * - >0, type ID of newly added BTF type;
2046 * - <0, on error.
2047 */
2048int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
2049{
2050 struct btf_type *t;
2051 int sz, name_off;
2052
2053 /* non-empty name */
2054 if (!name || !name[0])
2055 return libbpf_err(-EINVAL);
2056
2057 /* byte_sz must be one of the explicitly allowed values */
2058 if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
2059 byte_sz != 16)
2060 return libbpf_err(-EINVAL);
2061
2062 if (btf_ensure_modifiable(btf))
2063 return libbpf_err(-ENOMEM);
2064
2065 sz = sizeof(struct btf_type);
2066 t = btf_add_type_mem(btf, sz);
2067 if (!t)
2068 return libbpf_err(-ENOMEM);
2069
2070 name_off = btf__add_str(btf, name);
2071 if (name_off < 0)
2072 return name_off;
2073
2074 t->name_off = name_off;
2075 t->info = btf_type_info(BTF_KIND_FLOAT, 0, 0);
2076 t->size = byte_sz;
2077
2078 return btf_commit_type(btf, sz);
2079}
2080
2081/* it's completely legal to append BTF types with type IDs pointing forward to
2082 * types that haven't been appended yet, so we only make sure that id looks
2083 * sane, we can't guarantee that ID will always be valid
2084 */
2085static int validate_type_id(int id)
2086{
2087 if (id < 0 || id > BTF_MAX_NR_TYPES)
2088 return -EINVAL;
2089 return 0;
2090}
2091
2092/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */
2093static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
2094{
2095 struct btf_type *t;
2096 int sz, name_off = 0;
2097
2098 if (validate_type_id(ref_type_id))
2099 return libbpf_err(-EINVAL);
2100
2101 if (btf_ensure_modifiable(btf))
2102 return libbpf_err(-ENOMEM);
2103
2104 sz = sizeof(struct btf_type);
2105 t = btf_add_type_mem(btf, sz);
2106 if (!t)
2107 return libbpf_err(-ENOMEM);
2108
2109 if (name && name[0]) {
2110 name_off = btf__add_str(btf, name);
2111 if (name_off < 0)
2112 return name_off;
2113 }
2114
2115 t->name_off = name_off;
2116 t->info = btf_type_info(kind, 0, 0);
2117 t->type = ref_type_id;
2118
2119 return btf_commit_type(btf, sz);
2120}
2121
2122/*
2123 * Append new BTF_KIND_PTR type with:
2124 * - *ref_type_id* - referenced type ID, it might not exist yet;
2125 * Returns:
2126 * - >0, type ID of newly added BTF type;
2127 * - <0, on error.
2128 */
2129int btf__add_ptr(struct btf *btf, int ref_type_id)
2130{
2131 return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id);
2132}
2133
2134/*
2135 * Append new BTF_KIND_ARRAY type with:
2136 * - *index_type_id* - type ID of the type describing array index;
2137 * - *elem_type_id* - type ID of the type describing array element;
2138 * - *nr_elems* - the size of the array;
2139 * Returns:
2140 * - >0, type ID of newly added BTF type;
2141 * - <0, on error.
2142 */
2143int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 nr_elems)
2144{
2145 struct btf_type *t;
2146 struct btf_array *a;
2147 int sz;
2148
2149 if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
2150 return libbpf_err(-EINVAL);
2151
2152 if (btf_ensure_modifiable(btf))
2153 return libbpf_err(-ENOMEM);
2154
2155 sz = sizeof(struct btf_type) + sizeof(struct btf_array);
2156 t = btf_add_type_mem(btf, sz);
2157 if (!t)
2158 return libbpf_err(-ENOMEM);
2159
2160 t->name_off = 0;
2161 t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
2162 t->size = 0;
2163
2164 a = btf_array(t);
2165 a->type = elem_type_id;
2166 a->index_type = index_type_id;
2167 a->nelems = nr_elems;
2168
2169 return btf_commit_type(btf, sz);
2170}
2171
2172/* generic STRUCT/UNION append function */
2173static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz)
2174{
2175 struct btf_type *t;
2176 int sz, name_off = 0;
2177
2178 if (btf_ensure_modifiable(btf))
2179 return libbpf_err(-ENOMEM);
2180
2181 sz = sizeof(struct btf_type);
2182 t = btf_add_type_mem(btf, sz);
2183 if (!t)
2184 return libbpf_err(-ENOMEM);
2185
2186 if (name && name[0]) {
2187 name_off = btf__add_str(btf, name);
2188 if (name_off < 0)
2189 return name_off;
2190 }
2191
2192 /* start out with vlen=0 and no kflag; this will be adjusted when
2193 * adding each member
2194 */
2195 t->name_off = name_off;
2196 t->info = btf_type_info(kind, 0, 0);
2197 t->size = bytes_sz;
2198
2199 return btf_commit_type(btf, sz);
2200}
2201
2202/*
2203 * Append new BTF_KIND_STRUCT type with:
2204 * - *name* - name of the struct, can be NULL or empty for anonymous structs;
2205 * - *byte_sz* - size of the struct, in bytes;
2206 *
2207 * Struct initially has no fields in it. Fields can be added by
2208 * btf__add_field() right after btf__add_struct() succeeds.
2209 *
2210 * Returns:
2211 * - >0, type ID of newly added BTF type;
2212 * - <0, on error.
2213 */
2214int btf__add_struct(struct btf *btf, const char *name, __u32 byte_sz)
2215{
2216 return btf_add_composite(btf, BTF_KIND_STRUCT, name, byte_sz);
2217}
2218
2219/*
2220 * Append new BTF_KIND_UNION type with:
2221 * - *name* - name of the union, can be NULL or empty for anonymous union;
2222 * - *byte_sz* - size of the union, in bytes;
2223 *
2224 * Union initially has no fields in it. Fields can be added by
2225 * btf__add_field() right after btf__add_union() succeeds. All fields
2226 * should have *bit_offset* of 0.
2227 *
2228 * Returns:
2229 * - >0, type ID of newly added BTF type;
2230 * - <0, on error.
2231 */
2232int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz)
2233{
2234 return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz);
2235}
2236
2237static struct btf_type *btf_last_type(struct btf *btf)
2238{
2239 return btf_type_by_id(btf, btf__type_cnt(btf) - 1);
2240}
2241
2242/*
2243 * Append new field for the current STRUCT/UNION type with:
2244 * - *name* - name of the field, can be NULL or empty for anonymous field;
2245 * - *type_id* - type ID for the type describing field type;
2246 * - *bit_offset* - bit offset of the start of the field within struct/union;
2247 * - *bit_size* - bit size of a bitfield, 0 for non-bitfield fields;
2248 * Returns:
2249 * - 0, on success;
2250 * - <0, on error.
2251 */
2252int btf__add_field(struct btf *btf, const char *name, int type_id,
2253 __u32 bit_offset, __u32 bit_size)
2254{
2255 struct btf_type *t;
2256 struct btf_member *m;
2257 bool is_bitfield;
2258 int sz, name_off = 0;
2259
2260 /* last type should be union/struct */
2261 if (btf->nr_types == 0)
2262 return libbpf_err(-EINVAL);
2263 t = btf_last_type(btf);
2264 if (!btf_is_composite(t))
2265 return libbpf_err(-EINVAL);
2266
2267 if (validate_type_id(type_id))
2268 return libbpf_err(-EINVAL);
2269 /* best-effort bit field offset/size enforcement */
2270 is_bitfield = bit_size || (bit_offset % 8 != 0);
2271 if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
2272 return libbpf_err(-EINVAL);
2273
2274 /* only offset 0 is allowed for unions */
2275 if (btf_is_union(t) && bit_offset)
2276 return libbpf_err(-EINVAL);
2277
2278 /* decompose and invalidate raw data */
2279 if (btf_ensure_modifiable(btf))
2280 return libbpf_err(-ENOMEM);
2281
2282 sz = sizeof(struct btf_member);
2283 m = btf_add_type_mem(btf, sz);
2284 if (!m)
2285 return libbpf_err(-ENOMEM);
2286
2287 if (name && name[0]) {
2288 name_off = btf__add_str(btf, name);
2289 if (name_off < 0)
2290 return name_off;
2291 }
2292
2293 m->name_off = name_off;
2294 m->type = type_id;
2295 m->offset = bit_offset | (bit_size << 24);
2296
2297 /* btf_add_type_mem can invalidate t pointer */
2298 t = btf_last_type(btf);
2299 /* update parent type's vlen and kflag */
2300 t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
2301
2302 btf->hdr->type_len += sz;
2303 btf->hdr->str_off += sz;
2304 return 0;
2305}
2306
2307static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz,
2308 bool is_signed, __u8 kind)
2309{
2310 struct btf_type *t;
2311 int sz, name_off = 0;
2312
2313 /* byte_sz must be power of 2 */
2314 if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
2315 return libbpf_err(-EINVAL);
2316
2317 if (btf_ensure_modifiable(btf))
2318 return libbpf_err(-ENOMEM);
2319
2320 sz = sizeof(struct btf_type);
2321 t = btf_add_type_mem(btf, sz);
2322 if (!t)
2323 return libbpf_err(-ENOMEM);
2324
2325 if (name && name[0]) {
2326 name_off = btf__add_str(btf, name);
2327 if (name_off < 0)
2328 return name_off;
2329 }
2330
2331 /* start out with vlen=0; it will be adjusted when adding enum values */
2332 t->name_off = name_off;
2333 t->info = btf_type_info(kind, 0, is_signed);
2334 t->size = byte_sz;
2335
2336 return btf_commit_type(btf, sz);
2337}
2338
2339/*
2340 * Append new BTF_KIND_ENUM type with:
2341 * - *name* - name of the enum, can be NULL or empty for anonymous enums;
2342 * - *byte_sz* - size of the enum, in bytes.
2343 *
2344 * Enum initially has no enum values in it (and corresponds to enum forward
2345 * declaration). Enumerator values can be added by btf__add_enum_value()
2346 * immediately after btf__add_enum() succeeds.
2347 *
2348 * Returns:
2349 * - >0, type ID of newly added BTF type;
2350 * - <0, on error.
2351 */
2352int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
2353{
2354 /*
2355 * set the signedness to be unsigned, it will change to signed
2356 * if any later enumerator is negative.
2357 */
2358 return btf_add_enum_common(btf, name, byte_sz, false, BTF_KIND_ENUM);
2359}
2360
2361/*
2362 * Append new enum value for the current ENUM type with:
2363 * - *name* - name of the enumerator value, can't be NULL or empty;
2364 * - *value* - integer value corresponding to enum value *name*;
2365 * Returns:
2366 * - 0, on success;
2367 * - <0, on error.
2368 */
2369int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
2370{
2371 struct btf_type *t;
2372 struct btf_enum *v;
2373 int sz, name_off;
2374
2375 /* last type should be BTF_KIND_ENUM */
2376 if (btf->nr_types == 0)
2377 return libbpf_err(-EINVAL);
2378 t = btf_last_type(btf);
2379 if (!btf_is_enum(t))
2380 return libbpf_err(-EINVAL);
2381
2382 /* non-empty name */
2383 if (!name || !name[0])
2384 return libbpf_err(-EINVAL);
2385 if (value < INT_MIN || value > UINT_MAX)
2386 return libbpf_err(-E2BIG);
2387
2388 /* decompose and invalidate raw data */
2389 if (btf_ensure_modifiable(btf))
2390 return libbpf_err(-ENOMEM);
2391
2392 sz = sizeof(struct btf_enum);
2393 v = btf_add_type_mem(btf, sz);
2394 if (!v)
2395 return libbpf_err(-ENOMEM);
2396
2397 name_off = btf__add_str(btf, name);
2398 if (name_off < 0)
2399 return name_off;
2400
2401 v->name_off = name_off;
2402 v->val = value;
2403
2404 /* update parent type's vlen */
2405 t = btf_last_type(btf);
2406 btf_type_inc_vlen(t);
2407
2408 /* if negative value, set signedness to signed */
2409 if (value < 0)
2410 t->info = btf_type_info(btf_kind(t), btf_vlen(t), true);
2411
2412 btf->hdr->type_len += sz;
2413 btf->hdr->str_off += sz;
2414 return 0;
2415}
2416
2417/*
2418 * Append new BTF_KIND_ENUM64 type with:
2419 * - *name* - name of the enum, can be NULL or empty for anonymous enums;
2420 * - *byte_sz* - size of the enum, in bytes.
2421 * - *is_signed* - whether the enum values are signed or not;
2422 *
2423 * Enum initially has no enum values in it (and corresponds to enum forward
2424 * declaration). Enumerator values can be added by btf__add_enum64_value()
2425 * immediately after btf__add_enum64() succeeds.
2426 *
2427 * Returns:
2428 * - >0, type ID of newly added BTF type;
2429 * - <0, on error.
2430 */
2431int btf__add_enum64(struct btf *btf, const char *name, __u32 byte_sz,
2432 bool is_signed)
2433{
2434 return btf_add_enum_common(btf, name, byte_sz, is_signed,
2435 BTF_KIND_ENUM64);
2436}
2437
2438/*
2439 * Append new enum value for the current ENUM64 type with:
2440 * - *name* - name of the enumerator value, can't be NULL or empty;
2441 * - *value* - integer value corresponding to enum value *name*;
2442 * Returns:
2443 * - 0, on success;
2444 * - <0, on error.
2445 */
2446int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
2447{
2448 struct btf_enum64 *v;
2449 struct btf_type *t;
2450 int sz, name_off;
2451
2452 /* last type should be BTF_KIND_ENUM64 */
2453 if (btf->nr_types == 0)
2454 return libbpf_err(-EINVAL);
2455 t = btf_last_type(btf);
2456 if (!btf_is_enum64(t))
2457 return libbpf_err(-EINVAL);
2458
2459 /* non-empty name */
2460 if (!name || !name[0])
2461 return libbpf_err(-EINVAL);
2462
2463 /* decompose and invalidate raw data */
2464 if (btf_ensure_modifiable(btf))
2465 return libbpf_err(-ENOMEM);
2466
2467 sz = sizeof(struct btf_enum64);
2468 v = btf_add_type_mem(btf, sz);
2469 if (!v)
2470 return libbpf_err(-ENOMEM);
2471
2472 name_off = btf__add_str(btf, name);
2473 if (name_off < 0)
2474 return name_off;
2475
2476 v->name_off = name_off;
2477 v->val_lo32 = (__u32)value;
2478 v->val_hi32 = value >> 32;
2479
2480 /* update parent type's vlen */
2481 t = btf_last_type(btf);
2482 btf_type_inc_vlen(t);
2483
2484 btf->hdr->type_len += sz;
2485 btf->hdr->str_off += sz;
2486 return 0;
2487}
2488
2489/*
2490 * Append new BTF_KIND_FWD type with:
2491 * - *name*, non-empty/non-NULL name;
2492 * - *fwd_kind*, kind of forward declaration, one of BTF_FWD_STRUCT,
2493 * BTF_FWD_UNION, or BTF_FWD_ENUM;
2494 * Returns:
2495 * - >0, type ID of newly added BTF type;
2496 * - <0, on error.
2497 */
2498int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
2499{
2500 if (!name || !name[0])
2501 return libbpf_err(-EINVAL);
2502
2503 switch (fwd_kind) {
2504 case BTF_FWD_STRUCT:
2505 case BTF_FWD_UNION: {
2506 struct btf_type *t;
2507 int id;
2508
2509 id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0);
2510 if (id <= 0)
2511 return id;
2512 t = btf_type_by_id(btf, id);
2513 t->info = btf_type_info(BTF_KIND_FWD, 0, fwd_kind == BTF_FWD_UNION);
2514 return id;
2515 }
2516 case BTF_FWD_ENUM:
2517 /* enum forward in BTF currently is just an enum with no enum
2518 * values; we also assume a standard 4-byte size for it
2519 */
2520 return btf__add_enum(btf, name, sizeof(int));
2521 default:
2522 return libbpf_err(-EINVAL);
2523 }
2524}
2525
2526/*
2527 * Append new BTF_KING_TYPEDEF type with:
2528 * - *name*, non-empty/non-NULL name;
2529 * - *ref_type_id* - referenced type ID, it might not exist yet;
2530 * Returns:
2531 * - >0, type ID of newly added BTF type;
2532 * - <0, on error.
2533 */
2534int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
2535{
2536 if (!name || !name[0])
2537 return libbpf_err(-EINVAL);
2538
2539 return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
2540}
2541
2542/*
2543 * Append new BTF_KIND_VOLATILE type with:
2544 * - *ref_type_id* - referenced type ID, it might not exist yet;
2545 * Returns:
2546 * - >0, type ID of newly added BTF type;
2547 * - <0, on error.
2548 */
2549int btf__add_volatile(struct btf *btf, int ref_type_id)
2550{
2551 return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id);
2552}
2553
2554/*
2555 * Append new BTF_KIND_CONST type with:
2556 * - *ref_type_id* - referenced type ID, it might not exist yet;
2557 * Returns:
2558 * - >0, type ID of newly added BTF type;
2559 * - <0, on error.
2560 */
2561int btf__add_const(struct btf *btf, int ref_type_id)
2562{
2563 return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id);
2564}
2565
2566/*
2567 * Append new BTF_KIND_RESTRICT type with:
2568 * - *ref_type_id* - referenced type ID, it might not exist yet;
2569 * Returns:
2570 * - >0, type ID of newly added BTF type;
2571 * - <0, on error.
2572 */
2573int btf__add_restrict(struct btf *btf, int ref_type_id)
2574{
2575 return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id);
2576}
2577
2578/*
2579 * Append new BTF_KIND_TYPE_TAG type with:
2580 * - *value*, non-empty/non-NULL tag value;
2581 * - *ref_type_id* - referenced type ID, it might not exist yet;
2582 * Returns:
2583 * - >0, type ID of newly added BTF type;
2584 * - <0, on error.
2585 */
2586int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
2587{
2588 if (!value || !value[0])
2589 return libbpf_err(-EINVAL);
2590
2591 return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
2592}
2593
2594/*
2595 * Append new BTF_KIND_FUNC type with:
2596 * - *name*, non-empty/non-NULL name;
2597 * - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet;
2598 * Returns:
2599 * - >0, type ID of newly added BTF type;
2600 * - <0, on error.
2601 */
2602int btf__add_func(struct btf *btf, const char *name,
2603 enum btf_func_linkage linkage, int proto_type_id)
2604{
2605 int id;
2606
2607 if (!name || !name[0])
2608 return libbpf_err(-EINVAL);
2609 if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
2610 linkage != BTF_FUNC_EXTERN)
2611 return libbpf_err(-EINVAL);
2612
2613 id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
2614 if (id > 0) {
2615 struct btf_type *t = btf_type_by_id(btf, id);
2616
2617 t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
2618 }
2619 return libbpf_err(id);
2620}
2621
2622/*
2623 * Append new BTF_KIND_FUNC_PROTO with:
2624 * - *ret_type_id* - type ID for return result of a function.
2625 *
2626 * Function prototype initially has no arguments, but they can be added by
2627 * btf__add_func_param() one by one, immediately after
2628 * btf__add_func_proto() succeeded.
2629 *
2630 * Returns:
2631 * - >0, type ID of newly added BTF type;
2632 * - <0, on error.
2633 */
2634int btf__add_func_proto(struct btf *btf, int ret_type_id)
2635{
2636 struct btf_type *t;
2637 int sz;
2638
2639 if (validate_type_id(ret_type_id))
2640 return libbpf_err(-EINVAL);
2641
2642 if (btf_ensure_modifiable(btf))
2643 return libbpf_err(-ENOMEM);
2644
2645 sz = sizeof(struct btf_type);
2646 t = btf_add_type_mem(btf, sz);
2647 if (!t)
2648 return libbpf_err(-ENOMEM);
2649
2650 /* start out with vlen=0; this will be adjusted when adding enum
2651 * values, if necessary
2652 */
2653 t->name_off = 0;
2654 t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0);
2655 t->type = ret_type_id;
2656
2657 return btf_commit_type(btf, sz);
2658}
2659
2660/*
2661 * Append new function parameter for current FUNC_PROTO type with:
2662 * - *name* - parameter name, can be NULL or empty;
2663 * - *type_id* - type ID describing the type of the parameter.
2664 * Returns:
2665 * - 0, on success;
2666 * - <0, on error.
2667 */
2668int btf__add_func_param(struct btf *btf, const char *name, int type_id)
2669{
2670 struct btf_type *t;
2671 struct btf_param *p;
2672 int sz, name_off = 0;
2673
2674 if (validate_type_id(type_id))
2675 return libbpf_err(-EINVAL);
2676
2677 /* last type should be BTF_KIND_FUNC_PROTO */
2678 if (btf->nr_types == 0)
2679 return libbpf_err(-EINVAL);
2680 t = btf_last_type(btf);
2681 if (!btf_is_func_proto(t))
2682 return libbpf_err(-EINVAL);
2683
2684 /* decompose and invalidate raw data */
2685 if (btf_ensure_modifiable(btf))
2686 return libbpf_err(-ENOMEM);
2687
2688 sz = sizeof(struct btf_param);
2689 p = btf_add_type_mem(btf, sz);
2690 if (!p)
2691 return libbpf_err(-ENOMEM);
2692
2693 if (name && name[0]) {
2694 name_off = btf__add_str(btf, name);
2695 if (name_off < 0)
2696 return name_off;
2697 }
2698
2699 p->name_off = name_off;
2700 p->type = type_id;
2701
2702 /* update parent type's vlen */
2703 t = btf_last_type(btf);
2704 btf_type_inc_vlen(t);
2705
2706 btf->hdr->type_len += sz;
2707 btf->hdr->str_off += sz;
2708 return 0;
2709}
2710
2711/*
2712 * Append new BTF_KIND_VAR type with:
2713 * - *name* - non-empty/non-NULL name;
2714 * - *linkage* - variable linkage, one of BTF_VAR_STATIC,
2715 * BTF_VAR_GLOBAL_ALLOCATED, or BTF_VAR_GLOBAL_EXTERN;
2716 * - *type_id* - type ID of the type describing the type of the variable.
2717 * Returns:
2718 * - >0, type ID of newly added BTF type;
2719 * - <0, on error.
2720 */
2721int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
2722{
2723 struct btf_type *t;
2724 struct btf_var *v;
2725 int sz, name_off;
2726
2727 /* non-empty name */
2728 if (!name || !name[0])
2729 return libbpf_err(-EINVAL);
2730 if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
2731 linkage != BTF_VAR_GLOBAL_EXTERN)
2732 return libbpf_err(-EINVAL);
2733 if (validate_type_id(type_id))
2734 return libbpf_err(-EINVAL);
2735
2736 /* deconstruct BTF, if necessary, and invalidate raw_data */
2737 if (btf_ensure_modifiable(btf))
2738 return libbpf_err(-ENOMEM);
2739
2740 sz = sizeof(struct btf_type) + sizeof(struct btf_var);
2741 t = btf_add_type_mem(btf, sz);
2742 if (!t)
2743 return libbpf_err(-ENOMEM);
2744
2745 name_off = btf__add_str(btf, name);
2746 if (name_off < 0)
2747 return name_off;
2748
2749 t->name_off = name_off;
2750 t->info = btf_type_info(BTF_KIND_VAR, 0, 0);
2751 t->type = type_id;
2752
2753 v = btf_var(t);
2754 v->linkage = linkage;
2755
2756 return btf_commit_type(btf, sz);
2757}
2758
2759/*
2760 * Append new BTF_KIND_DATASEC type with:
2761 * - *name* - non-empty/non-NULL name;
2762 * - *byte_sz* - data section size, in bytes.
2763 *
2764 * Data section is initially empty. Variables info can be added with
2765 * btf__add_datasec_var_info() calls, after btf__add_datasec() succeeds.
2766 *
2767 * Returns:
2768 * - >0, type ID of newly added BTF type;
2769 * - <0, on error.
2770 */
2771int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
2772{
2773 struct btf_type *t;
2774 int sz, name_off;
2775
2776 /* non-empty name */
2777 if (!name || !name[0])
2778 return libbpf_err(-EINVAL);
2779
2780 if (btf_ensure_modifiable(btf))
2781 return libbpf_err(-ENOMEM);
2782
2783 sz = sizeof(struct btf_type);
2784 t = btf_add_type_mem(btf, sz);
2785 if (!t)
2786 return libbpf_err(-ENOMEM);
2787
2788 name_off = btf__add_str(btf, name);
2789 if (name_off < 0)
2790 return name_off;
2791
2792 /* start with vlen=0, which will be update as var_secinfos are added */
2793 t->name_off = name_off;
2794 t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0);
2795 t->size = byte_sz;
2796
2797 return btf_commit_type(btf, sz);
2798}
2799
2800/*
2801 * Append new data section variable information entry for current DATASEC type:
2802 * - *var_type_id* - type ID, describing type of the variable;
2803 * - *offset* - variable offset within data section, in bytes;
2804 * - *byte_sz* - variable size, in bytes.
2805 *
2806 * Returns:
2807 * - 0, on success;
2808 * - <0, on error.
2809 */
2810int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz)
2811{
2812 struct btf_type *t;
2813 struct btf_var_secinfo *v;
2814 int sz;
2815
2816 /* last type should be BTF_KIND_DATASEC */
2817 if (btf->nr_types == 0)
2818 return libbpf_err(-EINVAL);
2819 t = btf_last_type(btf);
2820 if (!btf_is_datasec(t))
2821 return libbpf_err(-EINVAL);
2822
2823 if (validate_type_id(var_type_id))
2824 return libbpf_err(-EINVAL);
2825
2826 /* decompose and invalidate raw data */
2827 if (btf_ensure_modifiable(btf))
2828 return libbpf_err(-ENOMEM);
2829
2830 sz = sizeof(struct btf_var_secinfo);
2831 v = btf_add_type_mem(btf, sz);
2832 if (!v)
2833 return libbpf_err(-ENOMEM);
2834
2835 v->type = var_type_id;
2836 v->offset = offset;
2837 v->size = byte_sz;
2838
2839 /* update parent type's vlen */
2840 t = btf_last_type(btf);
2841 btf_type_inc_vlen(t);
2842
2843 btf->hdr->type_len += sz;
2844 btf->hdr->str_off += sz;
2845 return 0;
2846}
2847
2848/*
2849 * Append new BTF_KIND_DECL_TAG type with:
2850 * - *value* - non-empty/non-NULL string;
2851 * - *ref_type_id* - referenced type ID, it might not exist yet;
2852 * - *component_idx* - -1 for tagging reference type, otherwise struct/union
2853 * member or function argument index;
2854 * Returns:
2855 * - >0, type ID of newly added BTF type;
2856 * - <0, on error.
2857 */
2858int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
2859 int component_idx)
2860{
2861 struct btf_type *t;
2862 int sz, value_off;
2863
2864 if (!value || !value[0] || component_idx < -1)
2865 return libbpf_err(-EINVAL);
2866
2867 if (validate_type_id(ref_type_id))
2868 return libbpf_err(-EINVAL);
2869
2870 if (btf_ensure_modifiable(btf))
2871 return libbpf_err(-ENOMEM);
2872
2873 sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag);
2874 t = btf_add_type_mem(btf, sz);
2875 if (!t)
2876 return libbpf_err(-ENOMEM);
2877
2878 value_off = btf__add_str(btf, value);
2879 if (value_off < 0)
2880 return value_off;
2881
2882 t->name_off = value_off;
2883 t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false);
2884 t->type = ref_type_id;
2885 btf_decl_tag(t)->component_idx = component_idx;
2886
2887 return btf_commit_type(btf, sz);
2888}
2889
2890struct btf_ext_sec_info_param {
2891 __u32 off;
2892 __u32 len;
2893 __u32 min_rec_size;
2894 struct btf_ext_info *ext_info;
2895 const char *desc;
2896};
2897
2898/*
2899 * Parse a single info subsection of the BTF.ext info data:
2900 * - validate subsection structure and elements
2901 * - save info subsection start and sizing details in struct btf_ext
2902 * - endian-independent operation, for calling before byte-swapping
2903 */
2904static int btf_ext_parse_sec_info(struct btf_ext *btf_ext,
2905 struct btf_ext_sec_info_param *ext_sec,
2906 bool is_native)
2907{
2908 const struct btf_ext_info_sec *sinfo;
2909 struct btf_ext_info *ext_info;
2910 __u32 info_left, record_size;
2911 size_t sec_cnt = 0;
2912 void *info;
2913
2914 if (ext_sec->len == 0)
2915 return 0;
2916
2917 if (ext_sec->off & 0x03) {
2918 pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
2919 ext_sec->desc);
2920 return -EINVAL;
2921 }
2922
2923 /* The start of the info sec (including the __u32 record_size). */
2924 info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
2925 info_left = ext_sec->len;
2926
2927 if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
2928 pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
2929 ext_sec->desc, ext_sec->off, ext_sec->len);
2930 return -EINVAL;
2931 }
2932
2933 /* At least a record size */
2934 if (info_left < sizeof(__u32)) {
2935 pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
2936 return -EINVAL;
2937 }
2938
2939 /* The record size needs to meet either the minimum standard or, when
2940 * handling non-native endianness data, the exact standard so as
2941 * to allow safe byte-swapping.
2942 */
2943 record_size = is_native ? *(__u32 *)info : bswap_32(*(__u32 *)info);
2944 if (record_size < ext_sec->min_rec_size ||
2945 (!is_native && record_size != ext_sec->min_rec_size) ||
2946 record_size & 0x03) {
2947 pr_debug("%s section in .BTF.ext has invalid record size %u\n",
2948 ext_sec->desc, record_size);
2949 return -EINVAL;
2950 }
2951
2952 sinfo = info + sizeof(__u32);
2953 info_left -= sizeof(__u32);
2954
2955 /* If no records, return failure now so .BTF.ext won't be used. */
2956 if (!info_left) {
2957 pr_debug("%s section in .BTF.ext has no records\n", ext_sec->desc);
2958 return -EINVAL;
2959 }
2960
2961 while (info_left) {
2962 unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec);
2963 __u64 total_record_size;
2964 __u32 num_records;
2965
2966 if (info_left < sec_hdrlen) {
2967 pr_debug("%s section header is not found in .BTF.ext\n",
2968 ext_sec->desc);
2969 return -EINVAL;
2970 }
2971
2972 num_records = is_native ? sinfo->num_info : bswap_32(sinfo->num_info);
2973 if (num_records == 0) {
2974 pr_debug("%s section has incorrect num_records in .BTF.ext\n",
2975 ext_sec->desc);
2976 return -EINVAL;
2977 }
2978
2979 total_record_size = sec_hdrlen + (__u64)num_records * record_size;
2980 if (info_left < total_record_size) {
2981 pr_debug("%s section has incorrect num_records in .BTF.ext\n",
2982 ext_sec->desc);
2983 return -EINVAL;
2984 }
2985
2986 info_left -= total_record_size;
2987 sinfo = (void *)sinfo + total_record_size;
2988 sec_cnt++;
2989 }
2990
2991 ext_info = ext_sec->ext_info;
2992 ext_info->len = ext_sec->len - sizeof(__u32);
2993 ext_info->rec_size = record_size;
2994 ext_info->info = info + sizeof(__u32);
2995 ext_info->sec_cnt = sec_cnt;
2996
2997 return 0;
2998}
2999
3000/* Parse all info secs in the BTF.ext info data */
3001static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native)
3002{
3003 struct btf_ext_sec_info_param func_info = {
3004 .off = btf_ext->hdr->func_info_off,
3005 .len = btf_ext->hdr->func_info_len,
3006 .min_rec_size = sizeof(struct bpf_func_info_min),
3007 .ext_info = &btf_ext->func_info,
3008 .desc = "func_info"
3009 };
3010 struct btf_ext_sec_info_param line_info = {
3011 .off = btf_ext->hdr->line_info_off,
3012 .len = btf_ext->hdr->line_info_len,
3013 .min_rec_size = sizeof(struct bpf_line_info_min),
3014 .ext_info = &btf_ext->line_info,
3015 .desc = "line_info",
3016 };
3017 struct btf_ext_sec_info_param core_relo = {
3018 .off = btf_ext->hdr->core_relo_off,
3019 .len = btf_ext->hdr->core_relo_len,
3020 .min_rec_size = sizeof(struct bpf_core_relo),
3021 .ext_info = &btf_ext->core_relo_info,
3022 .desc = "core_relo",
3023 };
3024 int err;
3025
3026 err = btf_ext_parse_sec_info(btf_ext, &func_info, is_native);
3027 if (err)
3028 return err;
3029
3030 err = btf_ext_parse_sec_info(btf_ext, &line_info, is_native);
3031 if (err)
3032 return err;
3033
3034 if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
3035 return 0; /* skip core relos parsing */
3036
3037 err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native);
3038 if (err)
3039 return err;
3040
3041 return 0;
3042}
3043
3044/* Swap byte-order of BTF.ext header with any endianness */
3045static void btf_ext_bswap_hdr(struct btf_ext_header *h)
3046{
3047 bool is_native = h->magic == BTF_MAGIC;
3048 __u32 hdr_len;
3049
3050 hdr_len = is_native ? h->hdr_len : bswap_32(h->hdr_len);
3051
3052 h->magic = bswap_16(h->magic);
3053 h->hdr_len = bswap_32(h->hdr_len);
3054 h->func_info_off = bswap_32(h->func_info_off);
3055 h->func_info_len = bswap_32(h->func_info_len);
3056 h->line_info_off = bswap_32(h->line_info_off);
3057 h->line_info_len = bswap_32(h->line_info_len);
3058
3059 if (hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
3060 return;
3061
3062 h->core_relo_off = bswap_32(h->core_relo_off);
3063 h->core_relo_len = bswap_32(h->core_relo_len);
3064}
3065
3066/* Swap byte-order of generic info subsection */
3067static void btf_ext_bswap_info_sec(void *info, __u32 len, bool is_native,
3068 info_rec_bswap_fn bswap_fn)
3069{
3070 struct btf_ext_info_sec *sec;
3071 __u32 info_left, rec_size, *rs;
3072
3073 if (len == 0)
3074 return;
3075
3076 rs = info; /* info record size */
3077 rec_size = is_native ? *rs : bswap_32(*rs);
3078 *rs = bswap_32(*rs);
3079
3080 sec = info + sizeof(__u32); /* info sec #1 */
3081 info_left = len - sizeof(__u32);
3082 while (info_left) {
3083 unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec);
3084 __u32 i, num_recs;
3085 void *p;
3086
3087 num_recs = is_native ? sec->num_info : bswap_32(sec->num_info);
3088 sec->sec_name_off = bswap_32(sec->sec_name_off);
3089 sec->num_info = bswap_32(sec->num_info);
3090 p = sec->data; /* info rec #1 */
3091 for (i = 0; i < num_recs; i++, p += rec_size)
3092 bswap_fn(p);
3093 sec = p;
3094 info_left -= sec_hdrlen + (__u64)rec_size * num_recs;
3095 }
3096}
3097
3098/*
3099 * Swap byte-order of all info data in a BTF.ext section
3100 * - requires BTF.ext hdr in native endianness
3101 */
3102static void btf_ext_bswap_info(struct btf_ext *btf_ext, void *data)
3103{
3104 const bool is_native = btf_ext->swapped_endian;
3105 const struct btf_ext_header *h = data;
3106 void *info;
3107
3108 /* Swap func_info subsection byte-order */
3109 info = data + h->hdr_len + h->func_info_off;
3110 btf_ext_bswap_info_sec(info, h->func_info_len, is_native,
3111 (info_rec_bswap_fn)bpf_func_info_bswap);
3112
3113 /* Swap line_info subsection byte-order */
3114 info = data + h->hdr_len + h->line_info_off;
3115 btf_ext_bswap_info_sec(info, h->line_info_len, is_native,
3116 (info_rec_bswap_fn)bpf_line_info_bswap);
3117
3118 /* Swap core_relo subsection byte-order (if present) */
3119 if (h->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
3120 return;
3121
3122 info = data + h->hdr_len + h->core_relo_off;
3123 btf_ext_bswap_info_sec(info, h->core_relo_len, is_native,
3124 (info_rec_bswap_fn)bpf_core_relo_bswap);
3125}
3126
3127/* Parse hdr data and info sections: check and convert to native endianness */
3128static int btf_ext_parse(struct btf_ext *btf_ext)
3129{
3130 __u32 hdr_len, data_size = btf_ext->data_size;
3131 struct btf_ext_header *hdr = btf_ext->hdr;
3132 bool swapped_endian = false;
3133 int err;
3134
3135 if (data_size < offsetofend(struct btf_ext_header, hdr_len)) {
3136 pr_debug("BTF.ext header too short\n");
3137 return -EINVAL;
3138 }
3139
3140 hdr_len = hdr->hdr_len;
3141 if (hdr->magic == bswap_16(BTF_MAGIC)) {
3142 swapped_endian = true;
3143 hdr_len = bswap_32(hdr_len);
3144 } else if (hdr->magic != BTF_MAGIC) {
3145 pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
3146 return -EINVAL;
3147 }
3148
3149 /* Ensure known version of structs, current BTF_VERSION == 1 */
3150 if (hdr->version != 1) {
3151 pr_debug("Unsupported BTF.ext version:%u\n", hdr->version);
3152 return -ENOTSUP;
3153 }
3154
3155 if (hdr->flags) {
3156 pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags);
3157 return -ENOTSUP;
3158 }
3159
3160 if (data_size < hdr_len) {
3161 pr_debug("BTF.ext header not found\n");
3162 return -EINVAL;
3163 } else if (data_size == hdr_len) {
3164 pr_debug("BTF.ext has no data\n");
3165 return -EINVAL;
3166 }
3167
3168 /* Verify mandatory hdr info details present */
3169 if (hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
3170 pr_warn("BTF.ext header missing func_info, line_info\n");
3171 return -EINVAL;
3172 }
3173
3174 /* Keep hdr native byte-order in memory for introspection */
3175 if (swapped_endian)
3176 btf_ext_bswap_hdr(btf_ext->hdr);
3177
3178 /* Validate info subsections and cache key metadata */
3179 err = btf_ext_parse_info(btf_ext, !swapped_endian);
3180 if (err)
3181 return err;
3182
3183 /* Keep infos native byte-order in memory for introspection */
3184 if (swapped_endian)
3185 btf_ext_bswap_info(btf_ext, btf_ext->data);
3186
3187 /*
3188 * Set btf_ext->swapped_endian only after all header and info data has
3189 * been swapped, helping bswap functions determine if their data are
3190 * in native byte-order when called.
3191 */
3192 btf_ext->swapped_endian = swapped_endian;
3193 return 0;
3194}
3195
3196void btf_ext__free(struct btf_ext *btf_ext)
3197{
3198 if (IS_ERR_OR_NULL(btf_ext))
3199 return;
3200 free(btf_ext->func_info.sec_idxs);
3201 free(btf_ext->line_info.sec_idxs);
3202 free(btf_ext->core_relo_info.sec_idxs);
3203 free(btf_ext->data);
3204 free(btf_ext->data_swapped);
3205 free(btf_ext);
3206}
3207
3208struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
3209{
3210 struct btf_ext *btf_ext;
3211 int err;
3212
3213 btf_ext = calloc(1, sizeof(struct btf_ext));
3214 if (!btf_ext)
3215 return libbpf_err_ptr(-ENOMEM);
3216
3217 btf_ext->data_size = size;
3218 btf_ext->data = malloc(size);
3219 if (!btf_ext->data) {
3220 err = -ENOMEM;
3221 goto done;
3222 }
3223 memcpy(btf_ext->data, data, size);
3224
3225 err = btf_ext_parse(btf_ext);
3226
3227done:
3228 if (err) {
3229 btf_ext__free(btf_ext);
3230 return libbpf_err_ptr(err);
3231 }
3232
3233 return btf_ext;
3234}
3235
3236static void *btf_ext_raw_data(const struct btf_ext *btf_ext_ro, bool swap_endian)
3237{
3238 struct btf_ext *btf_ext = (struct btf_ext *)btf_ext_ro;
3239 const __u32 data_sz = btf_ext->data_size;
3240 void *data;
3241
3242 /* Return native data (always present) or swapped data if present */
3243 if (!swap_endian)
3244 return btf_ext->data;
3245 else if (btf_ext->data_swapped)
3246 return btf_ext->data_swapped;
3247
3248 /* Recreate missing swapped data, then cache and return */
3249 data = calloc(1, data_sz);
3250 if (!data)
3251 return NULL;
3252 memcpy(data, btf_ext->data, data_sz);
3253
3254 btf_ext_bswap_info(btf_ext, data);
3255 btf_ext_bswap_hdr(data);
3256 btf_ext->data_swapped = data;
3257 return data;
3258}
3259
3260const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size)
3261{
3262 void *data;
3263
3264 data = btf_ext_raw_data(btf_ext, btf_ext->swapped_endian);
3265 if (!data)
3266 return errno = ENOMEM, NULL;
3267
3268 *size = btf_ext->data_size;
3269 return data;
3270}
3271
3272__attribute__((alias("btf_ext__raw_data")))
3273const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size);
3274
3275enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext)
3276{
3277 if (is_host_big_endian())
3278 return btf_ext->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN;
3279 else
3280 return btf_ext->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN;
3281}
3282
3283int btf_ext__set_endianness(struct btf_ext *btf_ext, enum btf_endianness endian)
3284{
3285 if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
3286 return libbpf_err(-EINVAL);
3287
3288 btf_ext->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
3289
3290 if (!btf_ext->swapped_endian) {
3291 free(btf_ext->data_swapped);
3292 btf_ext->data_swapped = NULL;
3293 }
3294 return 0;
3295}
3296
3297struct btf_dedup;
3298
3299static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);
3300static void btf_dedup_free(struct btf_dedup *d);
3301static int btf_dedup_prep(struct btf_dedup *d);
3302static int btf_dedup_strings(struct btf_dedup *d);
3303static int btf_dedup_prim_types(struct btf_dedup *d);
3304static int btf_dedup_struct_types(struct btf_dedup *d);
3305static int btf_dedup_ref_types(struct btf_dedup *d);
3306static int btf_dedup_resolve_fwds(struct btf_dedup *d);
3307static int btf_dedup_compact_types(struct btf_dedup *d);
3308static int btf_dedup_remap_types(struct btf_dedup *d);
3309
3310/*
3311 * Deduplicate BTF types and strings.
3312 *
3313 * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF
3314 * section with all BTF type descriptors and string data. It overwrites that
3315 * memory in-place with deduplicated types and strings without any loss of
3316 * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section
3317 * is provided, all the strings referenced from .BTF.ext section are honored
3318 * and updated to point to the right offsets after deduplication.
3319 *
3320 * If function returns with error, type/string data might be garbled and should
3321 * be discarded.
3322 *
3323 * More verbose and detailed description of both problem btf_dedup is solving,
3324 * as well as solution could be found at:
3325 * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
3326 *
3327 * Problem description and justification
3328 * =====================================
3329 *
3330 * BTF type information is typically emitted either as a result of conversion
3331 * from DWARF to BTF or directly by compiler. In both cases, each compilation
3332 * unit contains information about a subset of all the types that are used
3333 * in an application. These subsets are frequently overlapping and contain a lot
3334 * of duplicated information when later concatenated together into a single
3335 * binary. This algorithm ensures that each unique type is represented by single
3336 * BTF type descriptor, greatly reducing resulting size of BTF data.
3337 *
3338 * Compilation unit isolation and subsequent duplication of data is not the only
3339 * problem. The same type hierarchy (e.g., struct and all the type that struct
3340 * references) in different compilation units can be represented in BTF to
3341 * various degrees of completeness (or, rather, incompleteness) due to
3342 * struct/union forward declarations.
3343 *
3344 * Let's take a look at an example, that we'll use to better understand the
3345 * problem (and solution). Suppose we have two compilation units, each using
3346 * same `struct S`, but each of them having incomplete type information about
3347 * struct's fields:
3348 *
3349 * // CU #1:
3350 * struct S;
3351 * struct A {
3352 * int a;
3353 * struct A* self;
3354 * struct S* parent;
3355 * };
3356 * struct B;
3357 * struct S {
3358 * struct A* a_ptr;
3359 * struct B* b_ptr;
3360 * };
3361 *
3362 * // CU #2:
3363 * struct S;
3364 * struct A;
3365 * struct B {
3366 * int b;
3367 * struct B* self;
3368 * struct S* parent;
3369 * };
3370 * struct S {
3371 * struct A* a_ptr;
3372 * struct B* b_ptr;
3373 * };
3374 *
3375 * In case of CU #1, BTF data will know only that `struct B` exist (but no
3376 * more), but will know the complete type information about `struct A`. While
3377 * for CU #2, it will know full type information about `struct B`, but will
3378 * only know about forward declaration of `struct A` (in BTF terms, it will
3379 * have `BTF_KIND_FWD` type descriptor with name `B`).
3380 *
3381 * This compilation unit isolation means that it's possible that there is no
3382 * single CU with complete type information describing structs `S`, `A`, and
3383 * `B`. Also, we might get tons of duplicated and redundant type information.
3384 *
3385 * Additional complication we need to keep in mind comes from the fact that
3386 * types, in general, can form graphs containing cycles, not just DAGs.
3387 *
3388 * While algorithm does deduplication, it also merges and resolves type
3389 * information (unless disabled throught `struct btf_opts`), whenever possible.
3390 * E.g., in the example above with two compilation units having partial type
3391 * information for structs `A` and `B`, the output of algorithm will emit
3392 * a single copy of each BTF type that describes structs `A`, `B`, and `S`
3393 * (as well as type information for `int` and pointers), as if they were defined
3394 * in a single compilation unit as:
3395 *
3396 * struct A {
3397 * int a;
3398 * struct A* self;
3399 * struct S* parent;
3400 * };
3401 * struct B {
3402 * int b;
3403 * struct B* self;
3404 * struct S* parent;
3405 * };
3406 * struct S {
3407 * struct A* a_ptr;
3408 * struct B* b_ptr;
3409 * };
3410 *
3411 * Algorithm summary
3412 * =================
3413 *
3414 * Algorithm completes its work in 7 separate passes:
3415 *
3416 * 1. Strings deduplication.
3417 * 2. Primitive types deduplication (int, enum, fwd).
3418 * 3. Struct/union types deduplication.
3419 * 4. Resolve unambiguous forward declarations.
3420 * 5. Reference types deduplication (pointers, typedefs, arrays, funcs, func
3421 * protos, and const/volatile/restrict modifiers).
3422 * 6. Types compaction.
3423 * 7. Types remapping.
3424 *
3425 * Algorithm determines canonical type descriptor, which is a single
3426 * representative type for each truly unique type. This canonical type is the
3427 * one that will go into final deduplicated BTF type information. For
3428 * struct/unions, it is also the type that algorithm will merge additional type
3429 * information into (while resolving FWDs), as it discovers it from data in
3430 * other CUs. Each input BTF type eventually gets either mapped to itself, if
3431 * that type is canonical, or to some other type, if that type is equivalent
3432 * and was chosen as canonical representative. This mapping is stored in
3433 * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that
3434 * FWD type got resolved to.
3435 *
3436 * To facilitate fast discovery of canonical types, we also maintain canonical
3437 * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash
3438 * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types
3439 * that match that signature. With sufficiently good choice of type signature
3440 * hashing function, we can limit number of canonical types for each unique type
3441 * signature to a very small number, allowing to find canonical type for any
3442 * duplicated type very quickly.
3443 *
3444 * Struct/union deduplication is the most critical part and algorithm for
3445 * deduplicating structs/unions is described in greater details in comments for
3446 * `btf_dedup_is_equiv` function.
3447 */
3448int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
3449{
3450 struct btf_dedup *d;
3451 int err;
3452
3453 if (!OPTS_VALID(opts, btf_dedup_opts))
3454 return libbpf_err(-EINVAL);
3455
3456 d = btf_dedup_new(btf, opts);
3457 if (IS_ERR(d)) {
3458 pr_debug("btf_dedup_new failed: %ld\n", PTR_ERR(d));
3459 return libbpf_err(-EINVAL);
3460 }
3461
3462 if (btf_ensure_modifiable(btf)) {
3463 err = -ENOMEM;
3464 goto done;
3465 }
3466
3467 err = btf_dedup_prep(d);
3468 if (err) {
3469 pr_debug("btf_dedup_prep failed: %s\n", errstr(err));
3470 goto done;
3471 }
3472 err = btf_dedup_strings(d);
3473 if (err < 0) {
3474 pr_debug("btf_dedup_strings failed: %s\n", errstr(err));
3475 goto done;
3476 }
3477 err = btf_dedup_prim_types(d);
3478 if (err < 0) {
3479 pr_debug("btf_dedup_prim_types failed: %s\n", errstr(err));
3480 goto done;
3481 }
3482 err = btf_dedup_struct_types(d);
3483 if (err < 0) {
3484 pr_debug("btf_dedup_struct_types failed: %s\n", errstr(err));
3485 goto done;
3486 }
3487 err = btf_dedup_resolve_fwds(d);
3488 if (err < 0) {
3489 pr_debug("btf_dedup_resolve_fwds failed: %s\n", errstr(err));
3490 goto done;
3491 }
3492 err = btf_dedup_ref_types(d);
3493 if (err < 0) {
3494 pr_debug("btf_dedup_ref_types failed: %s\n", errstr(err));
3495 goto done;
3496 }
3497 err = btf_dedup_compact_types(d);
3498 if (err < 0) {
3499 pr_debug("btf_dedup_compact_types failed: %s\n", errstr(err));
3500 goto done;
3501 }
3502 err = btf_dedup_remap_types(d);
3503 if (err < 0) {
3504 pr_debug("btf_dedup_remap_types failed: %s\n", errstr(err));
3505 goto done;
3506 }
3507
3508done:
3509 btf_dedup_free(d);
3510 return libbpf_err(err);
3511}
3512
3513#define BTF_UNPROCESSED_ID ((__u32)-1)
3514#define BTF_IN_PROGRESS_ID ((__u32)-2)
3515
3516struct btf_dedup {
3517 /* .BTF section to be deduped in-place */
3518 struct btf *btf;
3519 /*
3520 * Optional .BTF.ext section. When provided, any strings referenced
3521 * from it will be taken into account when deduping strings
3522 */
3523 struct btf_ext *btf_ext;
3524 /*
3525 * This is a map from any type's signature hash to a list of possible
3526 * canonical representative type candidates. Hash collisions are
3527 * ignored, so even types of various kinds can share same list of
3528 * candidates, which is fine because we rely on subsequent
3529 * btf_xxx_equal() checks to authoritatively verify type equality.
3530 */
3531 struct hashmap *dedup_table;
3532 /* Canonical types map */
3533 __u32 *map;
3534 /* Hypothetical mapping, used during type graph equivalence checks */
3535 __u32 *hypot_map;
3536 __u32 *hypot_list;
3537 size_t hypot_cnt;
3538 size_t hypot_cap;
3539 /* Whether hypothetical mapping, if successful, would need to adjust
3540 * already canonicalized types (due to a new forward declaration to
3541 * concrete type resolution). In such case, during split BTF dedup
3542 * candidate type would still be considered as different, because base
3543 * BTF is considered to be immutable.
3544 */
3545 bool hypot_adjust_canon;
3546 /* Various option modifying behavior of algorithm */
3547 struct btf_dedup_opts opts;
3548 /* temporary strings deduplication state */
3549 struct strset *strs_set;
3550};
3551
3552static unsigned long hash_combine(unsigned long h, unsigned long value)
3553{
3554 return h * 31 + value;
3555}
3556
3557#define for_each_dedup_cand(d, node, hash) \
3558 hashmap__for_each_key_entry(d->dedup_table, node, hash)
3559
3560static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
3561{
3562 return hashmap__append(d->dedup_table, hash, type_id);
3563}
3564
3565static int btf_dedup_hypot_map_add(struct btf_dedup *d,
3566 __u32 from_id, __u32 to_id)
3567{
3568 if (d->hypot_cnt == d->hypot_cap) {
3569 __u32 *new_list;
3570
3571 d->hypot_cap += max((size_t)16, d->hypot_cap / 2);
3572 new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32));
3573 if (!new_list)
3574 return -ENOMEM;
3575 d->hypot_list = new_list;
3576 }
3577 d->hypot_list[d->hypot_cnt++] = from_id;
3578 d->hypot_map[from_id] = to_id;
3579 return 0;
3580}
3581
3582static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
3583{
3584 int i;
3585
3586 for (i = 0; i < d->hypot_cnt; i++)
3587 d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
3588 d->hypot_cnt = 0;
3589 d->hypot_adjust_canon = false;
3590}
3591
3592static void btf_dedup_free(struct btf_dedup *d)
3593{
3594 hashmap__free(d->dedup_table);
3595 d->dedup_table = NULL;
3596
3597 free(d->map);
3598 d->map = NULL;
3599
3600 free(d->hypot_map);
3601 d->hypot_map = NULL;
3602
3603 free(d->hypot_list);
3604 d->hypot_list = NULL;
3605
3606 free(d);
3607}
3608
3609static size_t btf_dedup_identity_hash_fn(long key, void *ctx)
3610{
3611 return key;
3612}
3613
3614static size_t btf_dedup_collision_hash_fn(long key, void *ctx)
3615{
3616 return 0;
3617}
3618
3619static bool btf_dedup_equal_fn(long k1, long k2, void *ctx)
3620{
3621 return k1 == k2;
3622}
3623
3624static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts)
3625{
3626 struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
3627 hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
3628 int i, err = 0, type_cnt;
3629
3630 if (!d)
3631 return ERR_PTR(-ENOMEM);
3632
3633 if (OPTS_GET(opts, force_collisions, false))
3634 hash_fn = btf_dedup_collision_hash_fn;
3635
3636 d->btf = btf;
3637 d->btf_ext = OPTS_GET(opts, btf_ext, NULL);
3638
3639 d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
3640 if (IS_ERR(d->dedup_table)) {
3641 err = PTR_ERR(d->dedup_table);
3642 d->dedup_table = NULL;
3643 goto done;
3644 }
3645
3646 type_cnt = btf__type_cnt(btf);
3647 d->map = malloc(sizeof(__u32) * type_cnt);
3648 if (!d->map) {
3649 err = -ENOMEM;
3650 goto done;
3651 }
3652 /* special BTF "void" type is made canonical immediately */
3653 d->map[0] = 0;
3654 for (i = 1; i < type_cnt; i++) {
3655 struct btf_type *t = btf_type_by_id(d->btf, i);
3656
3657 /* VAR and DATASEC are never deduped and are self-canonical */
3658 if (btf_is_var(t) || btf_is_datasec(t))
3659 d->map[i] = i;
3660 else
3661 d->map[i] = BTF_UNPROCESSED_ID;
3662 }
3663
3664 d->hypot_map = malloc(sizeof(__u32) * type_cnt);
3665 if (!d->hypot_map) {
3666 err = -ENOMEM;
3667 goto done;
3668 }
3669 for (i = 0; i < type_cnt; i++)
3670 d->hypot_map[i] = BTF_UNPROCESSED_ID;
3671
3672done:
3673 if (err) {
3674 btf_dedup_free(d);
3675 return ERR_PTR(err);
3676 }
3677
3678 return d;
3679}
3680
3681/*
3682 * Iterate over all possible places in .BTF and .BTF.ext that can reference
3683 * string and pass pointer to it to a provided callback `fn`.
3684 */
3685static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void *ctx)
3686{
3687 int i, r;
3688
3689 for (i = 0; i < d->btf->nr_types; i++) {
3690 struct btf_field_iter it;
3691 struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
3692 __u32 *str_off;
3693
3694 r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS);
3695 if (r)
3696 return r;
3697
3698 while ((str_off = btf_field_iter_next(&it))) {
3699 r = fn(str_off, ctx);
3700 if (r)
3701 return r;
3702 }
3703 }
3704
3705 if (!d->btf_ext)
3706 return 0;
3707
3708 r = btf_ext_visit_str_offs(d->btf_ext, fn, ctx);
3709 if (r)
3710 return r;
3711
3712 return 0;
3713}
3714
3715static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
3716{
3717 struct btf_dedup *d = ctx;
3718 __u32 str_off = *str_off_ptr;
3719 const char *s;
3720 int off, err;
3721
3722 /* don't touch empty string or string in main BTF */
3723 if (str_off == 0 || str_off < d->btf->start_str_off)
3724 return 0;
3725
3726 s = btf__str_by_offset(d->btf, str_off);
3727 if (d->btf->base_btf) {
3728 err = btf__find_str(d->btf->base_btf, s);
3729 if (err >= 0) {
3730 *str_off_ptr = err;
3731 return 0;
3732 }
3733 if (err != -ENOENT)
3734 return err;
3735 }
3736
3737 off = strset__add_str(d->strs_set, s);
3738 if (off < 0)
3739 return off;
3740
3741 *str_off_ptr = d->btf->start_str_off + off;
3742 return 0;
3743}
3744
3745/*
3746 * Dedup string and filter out those that are not referenced from either .BTF
3747 * or .BTF.ext (if provided) sections.
3748 *
3749 * This is done by building index of all strings in BTF's string section,
3750 * then iterating over all entities that can reference strings (e.g., type
3751 * names, struct field names, .BTF.ext line info, etc) and marking corresponding
3752 * strings as used. After that all used strings are deduped and compacted into
3753 * sequential blob of memory and new offsets are calculated. Then all the string
3754 * references are iterated again and rewritten using new offsets.
3755 */
3756static int btf_dedup_strings(struct btf_dedup *d)
3757{
3758 int err;
3759
3760 if (d->btf->strs_deduped)
3761 return 0;
3762
3763 d->strs_set = strset__new(BTF_MAX_STR_OFFSET, NULL, 0);
3764 if (IS_ERR(d->strs_set)) {
3765 err = PTR_ERR(d->strs_set);
3766 goto err_out;
3767 }
3768
3769 if (!d->btf->base_btf) {
3770 /* insert empty string; we won't be looking it up during strings
3771 * dedup, but it's good to have it for generic BTF string lookups
3772 */
3773 err = strset__add_str(d->strs_set, "");
3774 if (err < 0)
3775 goto err_out;
3776 }
3777
3778 /* remap string offsets */
3779 err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d);
3780 if (err)
3781 goto err_out;
3782
3783 /* replace BTF string data and hash with deduped ones */
3784 strset__free(d->btf->strs_set);
3785 d->btf->hdr->str_len = strset__data_size(d->strs_set);
3786 d->btf->strs_set = d->strs_set;
3787 d->strs_set = NULL;
3788 d->btf->strs_deduped = true;
3789 return 0;
3790
3791err_out:
3792 strset__free(d->strs_set);
3793 d->strs_set = NULL;
3794
3795 return err;
3796}
3797
3798static long btf_hash_common(struct btf_type *t)
3799{
3800 long h;
3801
3802 h = hash_combine(0, t->name_off);
3803 h = hash_combine(h, t->info);
3804 h = hash_combine(h, t->size);
3805 return h;
3806}
3807
3808static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
3809{
3810 return t1->name_off == t2->name_off &&
3811 t1->info == t2->info &&
3812 t1->size == t2->size;
3813}
3814
3815/* Calculate type signature hash of INT or TAG. */
3816static long btf_hash_int_decl_tag(struct btf_type *t)
3817{
3818 __u32 info = *(__u32 *)(t + 1);
3819 long h;
3820
3821 h = btf_hash_common(t);
3822 h = hash_combine(h, info);
3823 return h;
3824}
3825
3826/* Check structural equality of two INTs or TAGs. */
3827static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2)
3828{
3829 __u32 info1, info2;
3830
3831 if (!btf_equal_common(t1, t2))
3832 return false;
3833 info1 = *(__u32 *)(t1 + 1);
3834 info2 = *(__u32 *)(t2 + 1);
3835 return info1 == info2;
3836}
3837
3838/* Calculate type signature hash of ENUM/ENUM64. */
3839static long btf_hash_enum(struct btf_type *t)
3840{
3841 long h;
3842
3843 /* don't hash vlen, enum members and size to support enum fwd resolving */
3844 h = hash_combine(0, t->name_off);
3845 return h;
3846}
3847
3848static bool btf_equal_enum_members(struct btf_type *t1, struct btf_type *t2)
3849{
3850 const struct btf_enum *m1, *m2;
3851 __u16 vlen;
3852 int i;
3853
3854 vlen = btf_vlen(t1);
3855 m1 = btf_enum(t1);
3856 m2 = btf_enum(t2);
3857 for (i = 0; i < vlen; i++) {
3858 if (m1->name_off != m2->name_off || m1->val != m2->val)
3859 return false;
3860 m1++;
3861 m2++;
3862 }
3863 return true;
3864}
3865
3866static bool btf_equal_enum64_members(struct btf_type *t1, struct btf_type *t2)
3867{
3868 const struct btf_enum64 *m1, *m2;
3869 __u16 vlen;
3870 int i;
3871
3872 vlen = btf_vlen(t1);
3873 m1 = btf_enum64(t1);
3874 m2 = btf_enum64(t2);
3875 for (i = 0; i < vlen; i++) {
3876 if (m1->name_off != m2->name_off || m1->val_lo32 != m2->val_lo32 ||
3877 m1->val_hi32 != m2->val_hi32)
3878 return false;
3879 m1++;
3880 m2++;
3881 }
3882 return true;
3883}
3884
3885/* Check structural equality of two ENUMs or ENUM64s. */
3886static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
3887{
3888 if (!btf_equal_common(t1, t2))
3889 return false;
3890
3891 /* t1 & t2 kinds are identical because of btf_equal_common */
3892 if (btf_kind(t1) == BTF_KIND_ENUM)
3893 return btf_equal_enum_members(t1, t2);
3894 else
3895 return btf_equal_enum64_members(t1, t2);
3896}
3897
3898static inline bool btf_is_enum_fwd(struct btf_type *t)
3899{
3900 return btf_is_any_enum(t) && btf_vlen(t) == 0;
3901}
3902
3903static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
3904{
3905 if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
3906 return btf_equal_enum(t1, t2);
3907 /* At this point either t1 or t2 or both are forward declarations, thus:
3908 * - skip comparing vlen because it is zero for forward declarations;
3909 * - skip comparing size to allow enum forward declarations
3910 * to be compatible with enum64 full declarations;
3911 * - skip comparing kind for the same reason.
3912 */
3913 return t1->name_off == t2->name_off &&
3914 btf_is_any_enum(t1) && btf_is_any_enum(t2);
3915}
3916
3917/*
3918 * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
3919 * as referenced type IDs equivalence is established separately during type
3920 * graph equivalence check algorithm.
3921 */
3922static long btf_hash_struct(struct btf_type *t)
3923{
3924 const struct btf_member *member = btf_members(t);
3925 __u32 vlen = btf_vlen(t);
3926 long h = btf_hash_common(t);
3927 int i;
3928
3929 for (i = 0; i < vlen; i++) {
3930 h = hash_combine(h, member->name_off);
3931 h = hash_combine(h, member->offset);
3932 /* no hashing of referenced type ID, it can be unresolved yet */
3933 member++;
3934 }
3935 return h;
3936}
3937
3938/*
3939 * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced
3940 * type IDs. This check is performed during type graph equivalence check and
3941 * referenced types equivalence is checked separately.
3942 */
3943static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
3944{
3945 const struct btf_member *m1, *m2;
3946 __u16 vlen;
3947 int i;
3948
3949 if (!btf_equal_common(t1, t2))
3950 return false;
3951
3952 vlen = btf_vlen(t1);
3953 m1 = btf_members(t1);
3954 m2 = btf_members(t2);
3955 for (i = 0; i < vlen; i++) {
3956 if (m1->name_off != m2->name_off || m1->offset != m2->offset)
3957 return false;
3958 m1++;
3959 m2++;
3960 }
3961 return true;
3962}
3963
3964/*
3965 * Calculate type signature hash of ARRAY, including referenced type IDs,
3966 * under assumption that they were already resolved to canonical type IDs and
3967 * are not going to change.
3968 */
3969static long btf_hash_array(struct btf_type *t)
3970{
3971 const struct btf_array *info = btf_array(t);
3972 long h = btf_hash_common(t);
3973
3974 h = hash_combine(h, info->type);
3975 h = hash_combine(h, info->index_type);
3976 h = hash_combine(h, info->nelems);
3977 return h;
3978}
3979
3980/*
3981 * Check exact equality of two ARRAYs, taking into account referenced
3982 * type IDs, under assumption that they were already resolved to canonical
3983 * type IDs and are not going to change.
3984 * This function is called during reference types deduplication to compare
3985 * ARRAY to potential canonical representative.
3986 */
3987static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
3988{
3989 const struct btf_array *info1, *info2;
3990
3991 if (!btf_equal_common(t1, t2))
3992 return false;
3993
3994 info1 = btf_array(t1);
3995 info2 = btf_array(t2);
3996 return info1->type == info2->type &&
3997 info1->index_type == info2->index_type &&
3998 info1->nelems == info2->nelems;
3999}
4000
4001/*
4002 * Check structural compatibility of two ARRAYs, ignoring referenced type
4003 * IDs. This check is performed during type graph equivalence check and
4004 * referenced types equivalence is checked separately.
4005 */
4006static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
4007{
4008 if (!btf_equal_common(t1, t2))
4009 return false;
4010
4011 return btf_array(t1)->nelems == btf_array(t2)->nelems;
4012}
4013
4014/*
4015 * Calculate type signature hash of FUNC_PROTO, including referenced type IDs,
4016 * under assumption that they were already resolved to canonical type IDs and
4017 * are not going to change.
4018 */
4019static long btf_hash_fnproto(struct btf_type *t)
4020{
4021 const struct btf_param *member = btf_params(t);
4022 __u16 vlen = btf_vlen(t);
4023 long h = btf_hash_common(t);
4024 int i;
4025
4026 for (i = 0; i < vlen; i++) {
4027 h = hash_combine(h, member->name_off);
4028 h = hash_combine(h, member->type);
4029 member++;
4030 }
4031 return h;
4032}
4033
4034/*
4035 * Check exact equality of two FUNC_PROTOs, taking into account referenced
4036 * type IDs, under assumption that they were already resolved to canonical
4037 * type IDs and are not going to change.
4038 * This function is called during reference types deduplication to compare
4039 * FUNC_PROTO to potential canonical representative.
4040 */
4041static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
4042{
4043 const struct btf_param *m1, *m2;
4044 __u16 vlen;
4045 int i;
4046
4047 if (!btf_equal_common(t1, t2))
4048 return false;
4049
4050 vlen = btf_vlen(t1);
4051 m1 = btf_params(t1);
4052 m2 = btf_params(t2);
4053 for (i = 0; i < vlen; i++) {
4054 if (m1->name_off != m2->name_off || m1->type != m2->type)
4055 return false;
4056 m1++;
4057 m2++;
4058 }
4059 return true;
4060}
4061
4062/*
4063 * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
4064 * IDs. This check is performed during type graph equivalence check and
4065 * referenced types equivalence is checked separately.
4066 */
4067static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
4068{
4069 const struct btf_param *m1, *m2;
4070 __u16 vlen;
4071 int i;
4072
4073 /* skip return type ID */
4074 if (t1->name_off != t2->name_off || t1->info != t2->info)
4075 return false;
4076
4077 vlen = btf_vlen(t1);
4078 m1 = btf_params(t1);
4079 m2 = btf_params(t2);
4080 for (i = 0; i < vlen; i++) {
4081 if (m1->name_off != m2->name_off)
4082 return false;
4083 m1++;
4084 m2++;
4085 }
4086 return true;
4087}
4088
4089/* Prepare split BTF for deduplication by calculating hashes of base BTF's
4090 * types and initializing the rest of the state (canonical type mapping) for
4091 * the fixed base BTF part.
4092 */
4093static int btf_dedup_prep(struct btf_dedup *d)
4094{
4095 struct btf_type *t;
4096 int type_id;
4097 long h;
4098
4099 if (!d->btf->base_btf)
4100 return 0;
4101
4102 for (type_id = 1; type_id < d->btf->start_id; type_id++) {
4103 t = btf_type_by_id(d->btf, type_id);
4104
4105 /* all base BTF types are self-canonical by definition */
4106 d->map[type_id] = type_id;
4107
4108 switch (btf_kind(t)) {
4109 case BTF_KIND_VAR:
4110 case BTF_KIND_DATASEC:
4111 /* VAR and DATASEC are never hash/deduplicated */
4112 continue;
4113 case BTF_KIND_CONST:
4114 case BTF_KIND_VOLATILE:
4115 case BTF_KIND_RESTRICT:
4116 case BTF_KIND_PTR:
4117 case BTF_KIND_FWD:
4118 case BTF_KIND_TYPEDEF:
4119 case BTF_KIND_FUNC:
4120 case BTF_KIND_FLOAT:
4121 case BTF_KIND_TYPE_TAG:
4122 h = btf_hash_common(t);
4123 break;
4124 case BTF_KIND_INT:
4125 case BTF_KIND_DECL_TAG:
4126 h = btf_hash_int_decl_tag(t);
4127 break;
4128 case BTF_KIND_ENUM:
4129 case BTF_KIND_ENUM64:
4130 h = btf_hash_enum(t);
4131 break;
4132 case BTF_KIND_STRUCT:
4133 case BTF_KIND_UNION:
4134 h = btf_hash_struct(t);
4135 break;
4136 case BTF_KIND_ARRAY:
4137 h = btf_hash_array(t);
4138 break;
4139 case BTF_KIND_FUNC_PROTO:
4140 h = btf_hash_fnproto(t);
4141 break;
4142 default:
4143 pr_debug("unknown kind %d for type [%d]\n", btf_kind(t), type_id);
4144 return -EINVAL;
4145 }
4146 if (btf_dedup_table_add(d, h, type_id))
4147 return -ENOMEM;
4148 }
4149
4150 return 0;
4151}
4152
4153/*
4154 * Deduplicate primitive types, that can't reference other types, by calculating
4155 * their type signature hash and comparing them with any possible canonical
4156 * candidate. If no canonical candidate matches, type itself is marked as
4157 * canonical and is added into `btf_dedup->dedup_table` as another candidate.
4158 */
4159static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
4160{
4161 struct btf_type *t = btf_type_by_id(d->btf, type_id);
4162 struct hashmap_entry *hash_entry;
4163 struct btf_type *cand;
4164 /* if we don't find equivalent type, then we are canonical */
4165 __u32 new_id = type_id;
4166 __u32 cand_id;
4167 long h;
4168
4169 switch (btf_kind(t)) {
4170 case BTF_KIND_CONST:
4171 case BTF_KIND_VOLATILE:
4172 case BTF_KIND_RESTRICT:
4173 case BTF_KIND_PTR:
4174 case BTF_KIND_TYPEDEF:
4175 case BTF_KIND_ARRAY:
4176 case BTF_KIND_STRUCT:
4177 case BTF_KIND_UNION:
4178 case BTF_KIND_FUNC:
4179 case BTF_KIND_FUNC_PROTO:
4180 case BTF_KIND_VAR:
4181 case BTF_KIND_DATASEC:
4182 case BTF_KIND_DECL_TAG:
4183 case BTF_KIND_TYPE_TAG:
4184 return 0;
4185
4186 case BTF_KIND_INT:
4187 h = btf_hash_int_decl_tag(t);
4188 for_each_dedup_cand(d, hash_entry, h) {
4189 cand_id = hash_entry->value;
4190 cand = btf_type_by_id(d->btf, cand_id);
4191 if (btf_equal_int_tag(t, cand)) {
4192 new_id = cand_id;
4193 break;
4194 }
4195 }
4196 break;
4197
4198 case BTF_KIND_ENUM:
4199 case BTF_KIND_ENUM64:
4200 h = btf_hash_enum(t);
4201 for_each_dedup_cand(d, hash_entry, h) {
4202 cand_id = hash_entry->value;
4203 cand = btf_type_by_id(d->btf, cand_id);
4204 if (btf_equal_enum(t, cand)) {
4205 new_id = cand_id;
4206 break;
4207 }
4208 if (btf_compat_enum(t, cand)) {
4209 if (btf_is_enum_fwd(t)) {
4210 /* resolve fwd to full enum */
4211 new_id = cand_id;
4212 break;
4213 }
4214 /* resolve canonical enum fwd to full enum */
4215 d->map[cand_id] = type_id;
4216 }
4217 }
4218 break;
4219
4220 case BTF_KIND_FWD:
4221 case BTF_KIND_FLOAT:
4222 h = btf_hash_common(t);
4223 for_each_dedup_cand(d, hash_entry, h) {
4224 cand_id = hash_entry->value;
4225 cand = btf_type_by_id(d->btf, cand_id);
4226 if (btf_equal_common(t, cand)) {
4227 new_id = cand_id;
4228 break;
4229 }
4230 }
4231 break;
4232
4233 default:
4234 return -EINVAL;
4235 }
4236
4237 d->map[type_id] = new_id;
4238 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
4239 return -ENOMEM;
4240
4241 return 0;
4242}
4243
4244static int btf_dedup_prim_types(struct btf_dedup *d)
4245{
4246 int i, err;
4247
4248 for (i = 0; i < d->btf->nr_types; i++) {
4249 err = btf_dedup_prim_type(d, d->btf->start_id + i);
4250 if (err)
4251 return err;
4252 }
4253 return 0;
4254}
4255
4256/*
4257 * Check whether type is already mapped into canonical one (could be to itself).
4258 */
4259static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
4260{
4261 return d->map[type_id] <= BTF_MAX_NR_TYPES;
4262}
4263
4264/*
4265 * Resolve type ID into its canonical type ID, if any; otherwise return original
4266 * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow
4267 * STRUCT/UNION link and resolve it into canonical type ID as well.
4268 */
4269static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id)
4270{
4271 while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
4272 type_id = d->map[type_id];
4273 return type_id;
4274}
4275
4276/*
4277 * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original
4278 * type ID.
4279 */
4280static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
4281{
4282 __u32 orig_type_id = type_id;
4283
4284 if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
4285 return type_id;
4286
4287 while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
4288 type_id = d->map[type_id];
4289
4290 if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))
4291 return type_id;
4292
4293 return orig_type_id;
4294}
4295
4296
4297static inline __u16 btf_fwd_kind(struct btf_type *t)
4298{
4299 return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
4300}
4301
4302/* Check if given two types are identical ARRAY definitions */
4303static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
4304{
4305 struct btf_type *t1, *t2;
4306
4307 t1 = btf_type_by_id(d->btf, id1);
4308 t2 = btf_type_by_id(d->btf, id2);
4309 if (!btf_is_array(t1) || !btf_is_array(t2))
4310 return false;
4311
4312 return btf_equal_array(t1, t2);
4313}
4314
4315/* Check if given two types are identical STRUCT/UNION definitions */
4316static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
4317{
4318 const struct btf_member *m1, *m2;
4319 struct btf_type *t1, *t2;
4320 int n, i;
4321
4322 t1 = btf_type_by_id(d->btf, id1);
4323 t2 = btf_type_by_id(d->btf, id2);
4324
4325 if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
4326 return false;
4327
4328 if (!btf_shallow_equal_struct(t1, t2))
4329 return false;
4330
4331 m1 = btf_members(t1);
4332 m2 = btf_members(t2);
4333 for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
4334 if (m1->type != m2->type &&
4335 !btf_dedup_identical_arrays(d, m1->type, m2->type) &&
4336 !btf_dedup_identical_structs(d, m1->type, m2->type))
4337 return false;
4338 }
4339 return true;
4340}
4341
4342/*
4343 * Check equivalence of BTF type graph formed by candidate struct/union (we'll
4344 * call it "candidate graph" in this description for brevity) to a type graph
4345 * formed by (potential) canonical struct/union ("canonical graph" for brevity
4346 * here, though keep in mind that not all types in canonical graph are
4347 * necessarily canonical representatives themselves, some of them might be
4348 * duplicates or its uniqueness might not have been established yet).
4349 * Returns:
4350 * - >0, if type graphs are equivalent;
4351 * - 0, if not equivalent;
4352 * - <0, on error.
4353 *
4354 * Algorithm performs side-by-side DFS traversal of both type graphs and checks
4355 * equivalence of BTF types at each step. If at any point BTF types in candidate
4356 * and canonical graphs are not compatible structurally, whole graphs are
4357 * incompatible. If types are structurally equivalent (i.e., all information
4358 * except referenced type IDs is exactly the same), a mapping from `canon_id` to
4359 * a `cand_id` is recoded in hypothetical mapping (`btf_dedup->hypot_map`).
4360 * If a type references other types, then those referenced types are checked
4361 * for equivalence recursively.
4362 *
4363 * During DFS traversal, if we find that for current `canon_id` type we
4364 * already have some mapping in hypothetical map, we check for two possible
4365 * situations:
4366 * - `canon_id` is mapped to exactly the same type as `cand_id`. This will
4367 * happen when type graphs have cycles. In this case we assume those two
4368 * types are equivalent.
4369 * - `canon_id` is mapped to different type. This is contradiction in our
4370 * hypothetical mapping, because same graph in canonical graph corresponds
4371 * to two different types in candidate graph, which for equivalent type
4372 * graphs shouldn't happen. This condition terminates equivalence check
4373 * with negative result.
4374 *
4375 * If type graphs traversal exhausts types to check and find no contradiction,
4376 * then type graphs are equivalent.
4377 *
4378 * When checking types for equivalence, there is one special case: FWD types.
4379 * If FWD type resolution is allowed and one of the types (either from canonical
4380 * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind
4381 * flag) and their names match, hypothetical mapping is updated to point from
4382 * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully,
4383 * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently.
4384 *
4385 * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution,
4386 * if there are two exactly named (or anonymous) structs/unions that are
4387 * compatible structurally, one of which has FWD field, while other is concrete
4388 * STRUCT/UNION, but according to C sources they are different structs/unions
4389 * that are referencing different types with the same name. This is extremely
4390 * unlikely to happen, but btf_dedup API allows to disable FWD resolution if
4391 * this logic is causing problems.
4392 *
4393 * Doing FWD resolution means that both candidate and/or canonical graphs can
4394 * consists of portions of the graph that come from multiple compilation units.
4395 * This is due to the fact that types within single compilation unit are always
4396 * deduplicated and FWDs are already resolved, if referenced struct/union
4397 * definition is available. So, if we had unresolved FWD and found corresponding
4398 * STRUCT/UNION, they will be from different compilation units. This
4399 * consequently means that when we "link" FWD to corresponding STRUCT/UNION,
4400 * type graph will likely have at least two different BTF types that describe
4401 * same type (e.g., most probably there will be two different BTF types for the
4402 * same 'int' primitive type) and could even have "overlapping" parts of type
4403 * graph that describe same subset of types.
4404 *
4405 * This in turn means that our assumption that each type in canonical graph
4406 * must correspond to exactly one type in candidate graph might not hold
4407 * anymore and will make it harder to detect contradictions using hypothetical
4408 * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION
4409 * resolution only in canonical graph. FWDs in candidate graphs are never
4410 * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs
4411 * that can occur:
4412 * - Both types in canonical and candidate graphs are FWDs. If they are
4413 * structurally equivalent, then they can either be both resolved to the
4414 * same STRUCT/UNION or not resolved at all. In both cases they are
4415 * equivalent and there is no need to resolve FWD on candidate side.
4416 * - Both types in canonical and candidate graphs are concrete STRUCT/UNION,
4417 * so nothing to resolve as well, algorithm will check equivalence anyway.
4418 * - Type in canonical graph is FWD, while type in candidate is concrete
4419 * STRUCT/UNION. In this case candidate graph comes from single compilation
4420 * unit, so there is exactly one BTF type for each unique C type. After
4421 * resolving FWD into STRUCT/UNION, there might be more than one BTF type
4422 * in canonical graph mapping to single BTF type in candidate graph, but
4423 * because hypothetical mapping maps from canonical to candidate types, it's
4424 * alright, and we still maintain the property of having single `canon_id`
4425 * mapping to single `cand_id` (there could be two different `canon_id`
4426 * mapped to the same `cand_id`, but it's not contradictory).
4427 * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate
4428 * graph is FWD. In this case we are just going to check compatibility of
4429 * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll
4430 * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to
4431 * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs
4432 * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from
4433 * canonical graph.
4434 */
4435static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
4436 __u32 canon_id)
4437{
4438 struct btf_type *cand_type;
4439 struct btf_type *canon_type;
4440 __u32 hypot_type_id;
4441 __u16 cand_kind;
4442 __u16 canon_kind;
4443 int i, eq;
4444
4445 /* if both resolve to the same canonical, they must be equivalent */
4446 if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id))
4447 return 1;
4448
4449 canon_id = resolve_fwd_id(d, canon_id);
4450
4451 hypot_type_id = d->hypot_map[canon_id];
4452 if (hypot_type_id <= BTF_MAX_NR_TYPES) {
4453 if (hypot_type_id == cand_id)
4454 return 1;
4455 /* In some cases compiler will generate different DWARF types
4456 * for *identical* array type definitions and use them for
4457 * different fields within the *same* struct. This breaks type
4458 * equivalence check, which makes an assumption that candidate
4459 * types sub-graph has a consistent and deduped-by-compiler
4460 * types within a single CU. So work around that by explicitly
4461 * allowing identical array types here.
4462 */
4463 if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
4464 return 1;
4465 /* It turns out that similar situation can happen with
4466 * struct/union sometimes, sigh... Handle the case where
4467 * structs/unions are exactly the same, down to the referenced
4468 * type IDs. Anything more complicated (e.g., if referenced
4469 * types are different, but equivalent) is *way more*
4470 * complicated and requires a many-to-many equivalence mapping.
4471 */
4472 if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
4473 return 1;
4474 return 0;
4475 }
4476
4477 if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
4478 return -ENOMEM;
4479
4480 cand_type = btf_type_by_id(d->btf, cand_id);
4481 canon_type = btf_type_by_id(d->btf, canon_id);
4482 cand_kind = btf_kind(cand_type);
4483 canon_kind = btf_kind(canon_type);
4484
4485 if (cand_type->name_off != canon_type->name_off)
4486 return 0;
4487
4488 /* FWD <--> STRUCT/UNION equivalence check, if enabled */
4489 if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
4490 && cand_kind != canon_kind) {
4491 __u16 real_kind;
4492 __u16 fwd_kind;
4493
4494 if (cand_kind == BTF_KIND_FWD) {
4495 real_kind = canon_kind;
4496 fwd_kind = btf_fwd_kind(cand_type);
4497 } else {
4498 real_kind = cand_kind;
4499 fwd_kind = btf_fwd_kind(canon_type);
4500 /* we'd need to resolve base FWD to STRUCT/UNION */
4501 if (fwd_kind == real_kind && canon_id < d->btf->start_id)
4502 d->hypot_adjust_canon = true;
4503 }
4504 return fwd_kind == real_kind;
4505 }
4506
4507 if (cand_kind != canon_kind)
4508 return 0;
4509
4510 switch (cand_kind) {
4511 case BTF_KIND_INT:
4512 return btf_equal_int_tag(cand_type, canon_type);
4513
4514 case BTF_KIND_ENUM:
4515 case BTF_KIND_ENUM64:
4516 return btf_compat_enum(cand_type, canon_type);
4517
4518 case BTF_KIND_FWD:
4519 case BTF_KIND_FLOAT:
4520 return btf_equal_common(cand_type, canon_type);
4521
4522 case BTF_KIND_CONST:
4523 case BTF_KIND_VOLATILE:
4524 case BTF_KIND_RESTRICT:
4525 case BTF_KIND_PTR:
4526 case BTF_KIND_TYPEDEF:
4527 case BTF_KIND_FUNC:
4528 case BTF_KIND_TYPE_TAG:
4529 if (cand_type->info != canon_type->info)
4530 return 0;
4531 return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
4532
4533 case BTF_KIND_ARRAY: {
4534 const struct btf_array *cand_arr, *canon_arr;
4535
4536 if (!btf_compat_array(cand_type, canon_type))
4537 return 0;
4538 cand_arr = btf_array(cand_type);
4539 canon_arr = btf_array(canon_type);
4540 eq = btf_dedup_is_equiv(d, cand_arr->index_type, canon_arr->index_type);
4541 if (eq <= 0)
4542 return eq;
4543 return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
4544 }
4545
4546 case BTF_KIND_STRUCT:
4547 case BTF_KIND_UNION: {
4548 const struct btf_member *cand_m, *canon_m;
4549 __u16 vlen;
4550
4551 if (!btf_shallow_equal_struct(cand_type, canon_type))
4552 return 0;
4553 vlen = btf_vlen(cand_type);
4554 cand_m = btf_members(cand_type);
4555 canon_m = btf_members(canon_type);
4556 for (i = 0; i < vlen; i++) {
4557 eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
4558 if (eq <= 0)
4559 return eq;
4560 cand_m++;
4561 canon_m++;
4562 }
4563
4564 return 1;
4565 }
4566
4567 case BTF_KIND_FUNC_PROTO: {
4568 const struct btf_param *cand_p, *canon_p;
4569 __u16 vlen;
4570
4571 if (!btf_compat_fnproto(cand_type, canon_type))
4572 return 0;
4573 eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
4574 if (eq <= 0)
4575 return eq;
4576 vlen = btf_vlen(cand_type);
4577 cand_p = btf_params(cand_type);
4578 canon_p = btf_params(canon_type);
4579 for (i = 0; i < vlen; i++) {
4580 eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
4581 if (eq <= 0)
4582 return eq;
4583 cand_p++;
4584 canon_p++;
4585 }
4586 return 1;
4587 }
4588
4589 default:
4590 return -EINVAL;
4591 }
4592 return 0;
4593}
4594
4595/*
4596 * Use hypothetical mapping, produced by successful type graph equivalence
4597 * check, to augment existing struct/union canonical mapping, where possible.
4598 *
4599 * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record
4600 * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional:
4601 * it doesn't matter if FWD type was part of canonical graph or candidate one,
4602 * we are recording the mapping anyway. As opposed to carefulness required
4603 * for struct/union correspondence mapping (described below), for FWD resolution
4604 * it's not important, as by the time that FWD type (reference type) will be
4605 * deduplicated all structs/unions will be deduped already anyway.
4606 *
4607 * Recording STRUCT/UNION mapping is purely a performance optimization and is
4608 * not required for correctness. It needs to be done carefully to ensure that
4609 * struct/union from candidate's type graph is not mapped into corresponding
4610 * struct/union from canonical type graph that itself hasn't been resolved into
4611 * canonical representative. The only guarantee we have is that canonical
4612 * struct/union was determined as canonical and that won't change. But any
4613 * types referenced through that struct/union fields could have been not yet
4614 * resolved, so in case like that it's too early to establish any kind of
4615 * correspondence between structs/unions.
4616 *
4617 * No canonical correspondence is derived for primitive types (they are already
4618 * deduplicated completely already anyway) or reference types (they rely on
4619 * stability of struct/union canonical relationship for equivalence checks).
4620 */
4621static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
4622{
4623 __u32 canon_type_id, targ_type_id;
4624 __u16 t_kind, c_kind;
4625 __u32 t_id, c_id;
4626 int i;
4627
4628 for (i = 0; i < d->hypot_cnt; i++) {
4629 canon_type_id = d->hypot_list[i];
4630 targ_type_id = d->hypot_map[canon_type_id];
4631 t_id = resolve_type_id(d, targ_type_id);
4632 c_id = resolve_type_id(d, canon_type_id);
4633 t_kind = btf_kind(btf__type_by_id(d->btf, t_id));
4634 c_kind = btf_kind(btf__type_by_id(d->btf, c_id));
4635 /*
4636 * Resolve FWD into STRUCT/UNION.
4637 * It's ok to resolve FWD into STRUCT/UNION that's not yet
4638 * mapped to canonical representative (as opposed to
4639 * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because
4640 * eventually that struct is going to be mapped and all resolved
4641 * FWDs will automatically resolve to correct canonical
4642 * representative. This will happen before ref type deduping,
4643 * which critically depends on stability of these mapping. This
4644 * stability is not a requirement for STRUCT/UNION equivalence
4645 * checks, though.
4646 */
4647
4648 /* if it's the split BTF case, we still need to point base FWD
4649 * to STRUCT/UNION in a split BTF, because FWDs from split BTF
4650 * will be resolved against base FWD. If we don't point base
4651 * canonical FWD to the resolved STRUCT/UNION, then all the
4652 * FWDs in split BTF won't be correctly resolved to a proper
4653 * STRUCT/UNION.
4654 */
4655 if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
4656 d->map[c_id] = t_id;
4657
4658 /* if graph equivalence determined that we'd need to adjust
4659 * base canonical types, then we need to only point base FWDs
4660 * to STRUCTs/UNIONs and do no more modifications. For all
4661 * other purposes the type graphs were not equivalent.
4662 */
4663 if (d->hypot_adjust_canon)
4664 continue;
4665
4666 if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
4667 d->map[t_id] = c_id;
4668
4669 if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
4670 c_kind != BTF_KIND_FWD &&
4671 is_type_mapped(d, c_id) &&
4672 !is_type_mapped(d, t_id)) {
4673 /*
4674 * as a perf optimization, we can map struct/union
4675 * that's part of type graph we just verified for
4676 * equivalence. We can do that for struct/union that has
4677 * canonical representative only, though.
4678 */
4679 d->map[t_id] = c_id;
4680 }
4681 }
4682}
4683
4684/*
4685 * Deduplicate struct/union types.
4686 *
4687 * For each struct/union type its type signature hash is calculated, taking
4688 * into account type's name, size, number, order and names of fields, but
4689 * ignoring type ID's referenced from fields, because they might not be deduped
4690 * completely until after reference types deduplication phase. This type hash
4691 * is used to iterate over all potential canonical types, sharing same hash.
4692 * For each canonical candidate we check whether type graphs that they form
4693 * (through referenced types in fields and so on) are equivalent using algorithm
4694 * implemented in `btf_dedup_is_equiv`. If such equivalence is found and
4695 * BTF_KIND_FWD resolution is allowed, then hypothetical mapping
4696 * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence
4697 * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to
4698 * potentially map other structs/unions to their canonical representatives,
4699 * if such relationship hasn't yet been established. This speeds up algorithm
4700 * by eliminating some of the duplicate work.
4701 *
4702 * If no matching canonical representative was found, struct/union is marked
4703 * as canonical for itself and is added into btf_dedup->dedup_table hash map
4704 * for further look ups.
4705 */
4706static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
4707{
4708 struct btf_type *cand_type, *t;
4709 struct hashmap_entry *hash_entry;
4710 /* if we don't find equivalent type, then we are canonical */
4711 __u32 new_id = type_id;
4712 __u16 kind;
4713 long h;
4714
4715 /* already deduped or is in process of deduping (loop detected) */
4716 if (d->map[type_id] <= BTF_MAX_NR_TYPES)
4717 return 0;
4718
4719 t = btf_type_by_id(d->btf, type_id);
4720 kind = btf_kind(t);
4721
4722 if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
4723 return 0;
4724
4725 h = btf_hash_struct(t);
4726 for_each_dedup_cand(d, hash_entry, h) {
4727 __u32 cand_id = hash_entry->value;
4728 int eq;
4729
4730 /*
4731 * Even though btf_dedup_is_equiv() checks for
4732 * btf_shallow_equal_struct() internally when checking two
4733 * structs (unions) for equivalence, we need to guard here
4734 * from picking matching FWD type as a dedup candidate.
4735 * This can happen due to hash collision. In such case just
4736 * relying on btf_dedup_is_equiv() would lead to potentially
4737 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
4738 * FWD and compatible STRUCT/UNION are considered equivalent.
4739 */
4740 cand_type = btf_type_by_id(d->btf, cand_id);
4741 if (!btf_shallow_equal_struct(t, cand_type))
4742 continue;
4743
4744 btf_dedup_clear_hypot_map(d);
4745 eq = btf_dedup_is_equiv(d, type_id, cand_id);
4746 if (eq < 0)
4747 return eq;
4748 if (!eq)
4749 continue;
4750 btf_dedup_merge_hypot_map(d);
4751 if (d->hypot_adjust_canon) /* not really equivalent */
4752 continue;
4753 new_id = cand_id;
4754 break;
4755 }
4756
4757 d->map[type_id] = new_id;
4758 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
4759 return -ENOMEM;
4760
4761 return 0;
4762}
4763
4764static int btf_dedup_struct_types(struct btf_dedup *d)
4765{
4766 int i, err;
4767
4768 for (i = 0; i < d->btf->nr_types; i++) {
4769 err = btf_dedup_struct_type(d, d->btf->start_id + i);
4770 if (err)
4771 return err;
4772 }
4773 return 0;
4774}
4775
4776/*
4777 * Deduplicate reference type.
4778 *
4779 * Once all primitive and struct/union types got deduplicated, we can easily
4780 * deduplicate all other (reference) BTF types. This is done in two steps:
4781 *
4782 * 1. Resolve all referenced type IDs into their canonical type IDs. This
4783 * resolution can be done either immediately for primitive or struct/union types
4784 * (because they were deduped in previous two phases) or recursively for
4785 * reference types. Recursion will always terminate at either primitive or
4786 * struct/union type, at which point we can "unwind" chain of reference types
4787 * one by one. There is no danger of encountering cycles because in C type
4788 * system the only way to form type cycle is through struct/union, so any chain
4789 * of reference types, even those taking part in a type cycle, will inevitably
4790 * reach struct/union at some point.
4791 *
4792 * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
4793 * becomes "stable", in the sense that no further deduplication will cause
4794 * any changes to it. With that, it's now possible to calculate type's signature
4795 * hash (this time taking into account referenced type IDs) and loop over all
4796 * potential canonical representatives. If no match was found, current type
4797 * will become canonical representative of itself and will be added into
4798 * btf_dedup->dedup_table as another possible canonical representative.
4799 */
4800static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
4801{
4802 struct hashmap_entry *hash_entry;
4803 __u32 new_id = type_id, cand_id;
4804 struct btf_type *t, *cand;
4805 /* if we don't find equivalent type, then we are representative type */
4806 int ref_type_id;
4807 long h;
4808
4809 if (d->map[type_id] == BTF_IN_PROGRESS_ID)
4810 return -ELOOP;
4811 if (d->map[type_id] <= BTF_MAX_NR_TYPES)
4812 return resolve_type_id(d, type_id);
4813
4814 t = btf_type_by_id(d->btf, type_id);
4815 d->map[type_id] = BTF_IN_PROGRESS_ID;
4816
4817 switch (btf_kind(t)) {
4818 case BTF_KIND_CONST:
4819 case BTF_KIND_VOLATILE:
4820 case BTF_KIND_RESTRICT:
4821 case BTF_KIND_PTR:
4822 case BTF_KIND_TYPEDEF:
4823 case BTF_KIND_FUNC:
4824 case BTF_KIND_TYPE_TAG:
4825 ref_type_id = btf_dedup_ref_type(d, t->type);
4826 if (ref_type_id < 0)
4827 return ref_type_id;
4828 t->type = ref_type_id;
4829
4830 h = btf_hash_common(t);
4831 for_each_dedup_cand(d, hash_entry, h) {
4832 cand_id = hash_entry->value;
4833 cand = btf_type_by_id(d->btf, cand_id);
4834 if (btf_equal_common(t, cand)) {
4835 new_id = cand_id;
4836 break;
4837 }
4838 }
4839 break;
4840
4841 case BTF_KIND_DECL_TAG:
4842 ref_type_id = btf_dedup_ref_type(d, t->type);
4843 if (ref_type_id < 0)
4844 return ref_type_id;
4845 t->type = ref_type_id;
4846
4847 h = btf_hash_int_decl_tag(t);
4848 for_each_dedup_cand(d, hash_entry, h) {
4849 cand_id = hash_entry->value;
4850 cand = btf_type_by_id(d->btf, cand_id);
4851 if (btf_equal_int_tag(t, cand)) {
4852 new_id = cand_id;
4853 break;
4854 }
4855 }
4856 break;
4857
4858 case BTF_KIND_ARRAY: {
4859 struct btf_array *info = btf_array(t);
4860
4861 ref_type_id = btf_dedup_ref_type(d, info->type);
4862 if (ref_type_id < 0)
4863 return ref_type_id;
4864 info->type = ref_type_id;
4865
4866 ref_type_id = btf_dedup_ref_type(d, info->index_type);
4867 if (ref_type_id < 0)
4868 return ref_type_id;
4869 info->index_type = ref_type_id;
4870
4871 h = btf_hash_array(t);
4872 for_each_dedup_cand(d, hash_entry, h) {
4873 cand_id = hash_entry->value;
4874 cand = btf_type_by_id(d->btf, cand_id);
4875 if (btf_equal_array(t, cand)) {
4876 new_id = cand_id;
4877 break;
4878 }
4879 }
4880 break;
4881 }
4882
4883 case BTF_KIND_FUNC_PROTO: {
4884 struct btf_param *param;
4885 __u16 vlen;
4886 int i;
4887
4888 ref_type_id = btf_dedup_ref_type(d, t->type);
4889 if (ref_type_id < 0)
4890 return ref_type_id;
4891 t->type = ref_type_id;
4892
4893 vlen = btf_vlen(t);
4894 param = btf_params(t);
4895 for (i = 0; i < vlen; i++) {
4896 ref_type_id = btf_dedup_ref_type(d, param->type);
4897 if (ref_type_id < 0)
4898 return ref_type_id;
4899 param->type = ref_type_id;
4900 param++;
4901 }
4902
4903 h = btf_hash_fnproto(t);
4904 for_each_dedup_cand(d, hash_entry, h) {
4905 cand_id = hash_entry->value;
4906 cand = btf_type_by_id(d->btf, cand_id);
4907 if (btf_equal_fnproto(t, cand)) {
4908 new_id = cand_id;
4909 break;
4910 }
4911 }
4912 break;
4913 }
4914
4915 default:
4916 return -EINVAL;
4917 }
4918
4919 d->map[type_id] = new_id;
4920 if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
4921 return -ENOMEM;
4922
4923 return new_id;
4924}
4925
4926static int btf_dedup_ref_types(struct btf_dedup *d)
4927{
4928 int i, err;
4929
4930 for (i = 0; i < d->btf->nr_types; i++) {
4931 err = btf_dedup_ref_type(d, d->btf->start_id + i);
4932 if (err < 0)
4933 return err;
4934 }
4935 /* we won't need d->dedup_table anymore */
4936 hashmap__free(d->dedup_table);
4937 d->dedup_table = NULL;
4938 return 0;
4939}
4940
4941/*
4942 * Collect a map from type names to type ids for all canonical structs
4943 * and unions. If the same name is shared by several canonical types
4944 * use a special value 0 to indicate this fact.
4945 */
4946static int btf_dedup_fill_unique_names_map(struct btf_dedup *d, struct hashmap *names_map)
4947{
4948 __u32 nr_types = btf__type_cnt(d->btf);
4949 struct btf_type *t;
4950 __u32 type_id;
4951 __u16 kind;
4952 int err;
4953
4954 /*
4955 * Iterate over base and split module ids in order to get all
4956 * available structs in the map.
4957 */
4958 for (type_id = 1; type_id < nr_types; ++type_id) {
4959 t = btf_type_by_id(d->btf, type_id);
4960 kind = btf_kind(t);
4961
4962 if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
4963 continue;
4964
4965 /* Skip non-canonical types */
4966 if (type_id != d->map[type_id])
4967 continue;
4968
4969 err = hashmap__add(names_map, t->name_off, type_id);
4970 if (err == -EEXIST)
4971 err = hashmap__set(names_map, t->name_off, 0, NULL, NULL);
4972
4973 if (err)
4974 return err;
4975 }
4976
4977 return 0;
4978}
4979
4980static int btf_dedup_resolve_fwd(struct btf_dedup *d, struct hashmap *names_map, __u32 type_id)
4981{
4982 struct btf_type *t = btf_type_by_id(d->btf, type_id);
4983 enum btf_fwd_kind fwd_kind = btf_kflag(t);
4984 __u16 cand_kind, kind = btf_kind(t);
4985 struct btf_type *cand_t;
4986 uintptr_t cand_id;
4987
4988 if (kind != BTF_KIND_FWD)
4989 return 0;
4990
4991 /* Skip if this FWD already has a mapping */
4992 if (type_id != d->map[type_id])
4993 return 0;
4994
4995 if (!hashmap__find(names_map, t->name_off, &cand_id))
4996 return 0;
4997
4998 /* Zero is a special value indicating that name is not unique */
4999 if (!cand_id)
5000 return 0;
5001
5002 cand_t = btf_type_by_id(d->btf, cand_id);
5003 cand_kind = btf_kind(cand_t);
5004 if ((cand_kind == BTF_KIND_STRUCT && fwd_kind != BTF_FWD_STRUCT) ||
5005 (cand_kind == BTF_KIND_UNION && fwd_kind != BTF_FWD_UNION))
5006 return 0;
5007
5008 d->map[type_id] = cand_id;
5009
5010 return 0;
5011}
5012
5013/*
5014 * Resolve unambiguous forward declarations.
5015 *
5016 * The lion's share of all FWD declarations is resolved during
5017 * `btf_dedup_struct_types` phase when different type graphs are
5018 * compared against each other. However, if in some compilation unit a
5019 * FWD declaration is not a part of a type graph compared against
5020 * another type graph that declaration's canonical type would not be
5021 * changed. Example:
5022 *
5023 * CU #1:
5024 *
5025 * struct foo;
5026 * struct foo *some_global;
5027 *
5028 * CU #2:
5029 *
5030 * struct foo { int u; };
5031 * struct foo *another_global;
5032 *
5033 * After `btf_dedup_struct_types` the BTF looks as follows:
5034 *
5035 * [1] STRUCT 'foo' size=4 vlen=1 ...
5036 * [2] INT 'int' size=4 ...
5037 * [3] PTR '(anon)' type_id=1
5038 * [4] FWD 'foo' fwd_kind=struct
5039 * [5] PTR '(anon)' type_id=4
5040 *
5041 * This pass assumes that such FWD declarations should be mapped to
5042 * structs or unions with identical name in case if the name is not
5043 * ambiguous.
5044 */
5045static int btf_dedup_resolve_fwds(struct btf_dedup *d)
5046{
5047 int i, err;
5048 struct hashmap *names_map;
5049
5050 names_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
5051 if (IS_ERR(names_map))
5052 return PTR_ERR(names_map);
5053
5054 err = btf_dedup_fill_unique_names_map(d, names_map);
5055 if (err < 0)
5056 goto exit;
5057
5058 for (i = 0; i < d->btf->nr_types; i++) {
5059 err = btf_dedup_resolve_fwd(d, names_map, d->btf->start_id + i);
5060 if (err < 0)
5061 break;
5062 }
5063
5064exit:
5065 hashmap__free(names_map);
5066 return err;
5067}
5068
5069/*
5070 * Compact types.
5071 *
5072 * After we established for each type its corresponding canonical representative
5073 * type, we now can eliminate types that are not canonical and leave only
5074 * canonical ones layed out sequentially in memory by copying them over
5075 * duplicates. During compaction btf_dedup->hypot_map array is reused to store
5076 * a map from original type ID to a new compacted type ID, which will be used
5077 * during next phase to "fix up" type IDs, referenced from struct/union and
5078 * reference types.
5079 */
5080static int btf_dedup_compact_types(struct btf_dedup *d)
5081{
5082 __u32 *new_offs;
5083 __u32 next_type_id = d->btf->start_id;
5084 const struct btf_type *t;
5085 void *p;
5086 int i, id, len;
5087
5088 /* we are going to reuse hypot_map to store compaction remapping */
5089 d->hypot_map[0] = 0;
5090 /* base BTF types are not renumbered */
5091 for (id = 1; id < d->btf->start_id; id++)
5092 d->hypot_map[id] = id;
5093 for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++)
5094 d->hypot_map[id] = BTF_UNPROCESSED_ID;
5095
5096 p = d->btf->types_data;
5097
5098 for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) {
5099 if (d->map[id] != id)
5100 continue;
5101
5102 t = btf__type_by_id(d->btf, id);
5103 len = btf_type_size(t);
5104 if (len < 0)
5105 return len;
5106
5107 memmove(p, t, len);
5108 d->hypot_map[id] = next_type_id;
5109 d->btf->type_offs[next_type_id - d->btf->start_id] = p - d->btf->types_data;
5110 p += len;
5111 next_type_id++;
5112 }
5113
5114 /* shrink struct btf's internal types index and update btf_header */
5115 d->btf->nr_types = next_type_id - d->btf->start_id;
5116 d->btf->type_offs_cap = d->btf->nr_types;
5117 d->btf->hdr->type_len = p - d->btf->types_data;
5118 new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
5119 sizeof(*new_offs));
5120 if (d->btf->type_offs_cap && !new_offs)
5121 return -ENOMEM;
5122 d->btf->type_offs = new_offs;
5123 d->btf->hdr->str_off = d->btf->hdr->type_len;
5124 d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len;
5125 return 0;
5126}
5127
5128/*
5129 * Figure out final (deduplicated and compacted) type ID for provided original
5130 * `type_id` by first resolving it into corresponding canonical type ID and
5131 * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
5132 * which is populated during compaction phase.
5133 */
5134static int btf_dedup_remap_type_id(__u32 *type_id, void *ctx)
5135{
5136 struct btf_dedup *d = ctx;
5137 __u32 resolved_type_id, new_type_id;
5138
5139 resolved_type_id = resolve_type_id(d, *type_id);
5140 new_type_id = d->hypot_map[resolved_type_id];
5141 if (new_type_id > BTF_MAX_NR_TYPES)
5142 return -EINVAL;
5143
5144 *type_id = new_type_id;
5145 return 0;
5146}
5147
5148/*
5149 * Remap referenced type IDs into deduped type IDs.
5150 *
5151 * After BTF types are deduplicated and compacted, their final type IDs may
5152 * differ from original ones. The map from original to a corresponding
5153 * deduped type ID is stored in btf_dedup->hypot_map and is populated during
5154 * compaction phase. During remapping phase we are rewriting all type IDs
5155 * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
5156 * their final deduped type IDs.
5157 */
5158static int btf_dedup_remap_types(struct btf_dedup *d)
5159{
5160 int i, r;
5161
5162 for (i = 0; i < d->btf->nr_types; i++) {
5163 struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
5164 struct btf_field_iter it;
5165 __u32 *type_id;
5166
5167 r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
5168 if (r)
5169 return r;
5170
5171 while ((type_id = btf_field_iter_next(&it))) {
5172 __u32 resolved_id, new_id;
5173
5174 resolved_id = resolve_type_id(d, *type_id);
5175 new_id = d->hypot_map[resolved_id];
5176 if (new_id > BTF_MAX_NR_TYPES)
5177 return -EINVAL;
5178
5179 *type_id = new_id;
5180 }
5181 }
5182
5183 if (!d->btf_ext)
5184 return 0;
5185
5186 r = btf_ext_visit_type_ids(d->btf_ext, btf_dedup_remap_type_id, d);
5187 if (r)
5188 return r;
5189
5190 return 0;
5191}
5192
5193/*
5194 * Probe few well-known locations for vmlinux kernel image and try to load BTF
5195 * data out of it to use for target BTF.
5196 */
5197struct btf *btf__load_vmlinux_btf(void)
5198{
5199 const char *sysfs_btf_path = "/sys/kernel/btf/vmlinux";
5200 /* fall back locations, trying to find vmlinux on disk */
5201 const char *locations[] = {
5202 "/boot/vmlinux-%1$s",
5203 "/lib/modules/%1$s/vmlinux-%1$s",
5204 "/lib/modules/%1$s/build/vmlinux",
5205 "/usr/lib/modules/%1$s/kernel/vmlinux",
5206 "/usr/lib/debug/boot/vmlinux-%1$s",
5207 "/usr/lib/debug/boot/vmlinux-%1$s.debug",
5208 "/usr/lib/debug/lib/modules/%1$s/vmlinux",
5209 };
5210 char path[PATH_MAX + 1];
5211 struct utsname buf;
5212 struct btf *btf;
5213 int i, err;
5214
5215 /* is canonical sysfs location accessible? */
5216 if (faccessat(AT_FDCWD, sysfs_btf_path, F_OK, AT_EACCESS) < 0) {
5217 pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
5218 sysfs_btf_path);
5219 } else {
5220 btf = btf__parse(sysfs_btf_path, NULL);
5221 if (!btf) {
5222 err = -errno;
5223 pr_warn("failed to read kernel BTF from '%s': %s\n",
5224 sysfs_btf_path, errstr(err));
5225 return libbpf_err_ptr(err);
5226 }
5227 pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path);
5228 return btf;
5229 }
5230
5231 /* try fallback locations */
5232 uname(&buf);
5233 for (i = 0; i < ARRAY_SIZE(locations); i++) {
5234 snprintf(path, PATH_MAX, locations[i], buf.release);
5235
5236 if (faccessat(AT_FDCWD, path, R_OK, AT_EACCESS))
5237 continue;
5238
5239 btf = btf__parse(path, NULL);
5240 err = libbpf_get_error(btf);
5241 pr_debug("loading kernel BTF '%s': %s\n", path, errstr(err));
5242 if (err)
5243 continue;
5244
5245 return btf;
5246 }
5247
5248 pr_warn("failed to find valid kernel BTF\n");
5249 return libbpf_err_ptr(-ESRCH);
5250}
5251
5252struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf")));
5253
5254struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf)
5255{
5256 char path[80];
5257
5258 snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name);
5259 return btf__parse_split(path, vmlinux_btf);
5260}
5261
5262int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx)
5263{
5264 const struct btf_ext_info *seg;
5265 struct btf_ext_info_sec *sec;
5266 int i, err;
5267
5268 seg = &btf_ext->func_info;
5269 for_each_btf_ext_sec(seg, sec) {
5270 struct bpf_func_info_min *rec;
5271
5272 for_each_btf_ext_rec(seg, sec, i, rec) {
5273 err = visit(&rec->type_id, ctx);
5274 if (err < 0)
5275 return err;
5276 }
5277 }
5278
5279 seg = &btf_ext->core_relo_info;
5280 for_each_btf_ext_sec(seg, sec) {
5281 struct bpf_core_relo *rec;
5282
5283 for_each_btf_ext_rec(seg, sec, i, rec) {
5284 err = visit(&rec->type_id, ctx);
5285 if (err < 0)
5286 return err;
5287 }
5288 }
5289
5290 return 0;
5291}
5292
5293int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx)
5294{
5295 const struct btf_ext_info *seg;
5296 struct btf_ext_info_sec *sec;
5297 int i, err;
5298
5299 seg = &btf_ext->func_info;
5300 for_each_btf_ext_sec(seg, sec) {
5301 err = visit(&sec->sec_name_off, ctx);
5302 if (err)
5303 return err;
5304 }
5305
5306 seg = &btf_ext->line_info;
5307 for_each_btf_ext_sec(seg, sec) {
5308 struct bpf_line_info_min *rec;
5309
5310 err = visit(&sec->sec_name_off, ctx);
5311 if (err)
5312 return err;
5313
5314 for_each_btf_ext_rec(seg, sec, i, rec) {
5315 err = visit(&rec->file_name_off, ctx);
5316 if (err)
5317 return err;
5318 err = visit(&rec->line_off, ctx);
5319 if (err)
5320 return err;
5321 }
5322 }
5323
5324 seg = &btf_ext->core_relo_info;
5325 for_each_btf_ext_sec(seg, sec) {
5326 struct bpf_core_relo *rec;
5327
5328 err = visit(&sec->sec_name_off, ctx);
5329 if (err)
5330 return err;
5331
5332 for_each_btf_ext_rec(seg, sec, i, rec) {
5333 err = visit(&rec->access_str_off, ctx);
5334 if (err)
5335 return err;
5336 }
5337 }
5338
5339 return 0;
5340}
5341
5342struct btf_distill {
5343 struct btf_pipe pipe;
5344 int *id_map;
5345 unsigned int split_start_id;
5346 unsigned int split_start_str;
5347 int diff_id;
5348};
5349
5350static int btf_add_distilled_type_ids(struct btf_distill *dist, __u32 i)
5351{
5352 struct btf_type *split_t = btf_type_by_id(dist->pipe.src, i);
5353 struct btf_field_iter it;
5354 __u32 *id;
5355 int err;
5356
5357 err = btf_field_iter_init(&it, split_t, BTF_FIELD_ITER_IDS);
5358 if (err)
5359 return err;
5360 while ((id = btf_field_iter_next(&it))) {
5361 struct btf_type *base_t;
5362
5363 if (!*id)
5364 continue;
5365 /* split BTF id, not needed */
5366 if (*id >= dist->split_start_id)
5367 continue;
5368 /* already added ? */
5369 if (dist->id_map[*id] > 0)
5370 continue;
5371
5372 /* only a subset of base BTF types should be referenced from
5373 * split BTF; ensure nothing unexpected is referenced.
5374 */
5375 base_t = btf_type_by_id(dist->pipe.src, *id);
5376 switch (btf_kind(base_t)) {
5377 case BTF_KIND_INT:
5378 case BTF_KIND_FLOAT:
5379 case BTF_KIND_FWD:
5380 case BTF_KIND_ARRAY:
5381 case BTF_KIND_STRUCT:
5382 case BTF_KIND_UNION:
5383 case BTF_KIND_TYPEDEF:
5384 case BTF_KIND_ENUM:
5385 case BTF_KIND_ENUM64:
5386 case BTF_KIND_PTR:
5387 case BTF_KIND_CONST:
5388 case BTF_KIND_RESTRICT:
5389 case BTF_KIND_VOLATILE:
5390 case BTF_KIND_FUNC_PROTO:
5391 case BTF_KIND_TYPE_TAG:
5392 dist->id_map[*id] = *id;
5393 break;
5394 default:
5395 pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n",
5396 *id, btf_kind(base_t));
5397 return -EINVAL;
5398 }
5399 /* If a base type is used, ensure types it refers to are
5400 * marked as used also; so for example if we find a PTR to INT
5401 * we need both the PTR and INT.
5402 *
5403 * The only exception is named struct/unions, since distilled
5404 * base BTF composite types have no members.
5405 */
5406 if (btf_is_composite(base_t) && base_t->name_off)
5407 continue;
5408 err = btf_add_distilled_type_ids(dist, *id);
5409 if (err)
5410 return err;
5411 }
5412 return 0;
5413}
5414
5415static int btf_add_distilled_types(struct btf_distill *dist)
5416{
5417 bool adding_to_base = dist->pipe.dst->start_id == 1;
5418 int id = btf__type_cnt(dist->pipe.dst);
5419 struct btf_type *t;
5420 int i, err = 0;
5421
5422
5423 /* Add types for each of the required references to either distilled
5424 * base or split BTF, depending on type characteristics.
5425 */
5426 for (i = 1; i < dist->split_start_id; i++) {
5427 const char *name;
5428 int kind;
5429
5430 if (!dist->id_map[i])
5431 continue;
5432 t = btf_type_by_id(dist->pipe.src, i);
5433 kind = btf_kind(t);
5434 name = btf__name_by_offset(dist->pipe.src, t->name_off);
5435
5436 switch (kind) {
5437 case BTF_KIND_INT:
5438 case BTF_KIND_FLOAT:
5439 case BTF_KIND_FWD:
5440 /* Named int, float, fwd are added to base. */
5441 if (!adding_to_base)
5442 continue;
5443 err = btf_add_type(&dist->pipe, t);
5444 break;
5445 case BTF_KIND_STRUCT:
5446 case BTF_KIND_UNION:
5447 /* Named struct/union are added to base as 0-vlen
5448 * struct/union of same size. Anonymous struct/unions
5449 * are added to split BTF as-is.
5450 */
5451 if (adding_to_base) {
5452 if (!t->name_off)
5453 continue;
5454 err = btf_add_composite(dist->pipe.dst, kind, name, t->size);
5455 } else {
5456 if (t->name_off)
5457 continue;
5458 err = btf_add_type(&dist->pipe, t);
5459 }
5460 break;
5461 case BTF_KIND_ENUM:
5462 case BTF_KIND_ENUM64:
5463 /* Named enum[64]s are added to base as a sized
5464 * enum; relocation will match with appropriately-named
5465 * and sized enum or enum64.
5466 *
5467 * Anonymous enums are added to split BTF as-is.
5468 */
5469 if (adding_to_base) {
5470 if (!t->name_off)
5471 continue;
5472 err = btf__add_enum(dist->pipe.dst, name, t->size);
5473 } else {
5474 if (t->name_off)
5475 continue;
5476 err = btf_add_type(&dist->pipe, t);
5477 }
5478 break;
5479 case BTF_KIND_ARRAY:
5480 case BTF_KIND_TYPEDEF:
5481 case BTF_KIND_PTR:
5482 case BTF_KIND_CONST:
5483 case BTF_KIND_RESTRICT:
5484 case BTF_KIND_VOLATILE:
5485 case BTF_KIND_FUNC_PROTO:
5486 case BTF_KIND_TYPE_TAG:
5487 /* All other types are added to split BTF. */
5488 if (adding_to_base)
5489 continue;
5490 err = btf_add_type(&dist->pipe, t);
5491 break;
5492 default:
5493 pr_warn("unexpected kind when adding base type '%s'[%u] of kind [%u] to distilled base BTF.\n",
5494 name, i, kind);
5495 return -EINVAL;
5496
5497 }
5498 if (err < 0)
5499 break;
5500 dist->id_map[i] = id++;
5501 }
5502 return err;
5503}
5504
5505/* Split BTF ids without a mapping will be shifted downwards since distilled
5506 * base BTF is smaller than the original base BTF. For those that have a
5507 * mapping (either to base or updated split BTF), update the id based on
5508 * that mapping.
5509 */
5510static int btf_update_distilled_type_ids(struct btf_distill *dist, __u32 i)
5511{
5512 struct btf_type *t = btf_type_by_id(dist->pipe.dst, i);
5513 struct btf_field_iter it;
5514 __u32 *id;
5515 int err;
5516
5517 err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS);
5518 if (err)
5519 return err;
5520 while ((id = btf_field_iter_next(&it))) {
5521 if (dist->id_map[*id])
5522 *id = dist->id_map[*id];
5523 else if (*id >= dist->split_start_id)
5524 *id -= dist->diff_id;
5525 }
5526 return 0;
5527}
5528
5529/* Create updated split BTF with distilled base BTF; distilled base BTF
5530 * consists of BTF information required to clarify the types that split
5531 * BTF refers to, omitting unneeded details. Specifically it will contain
5532 * base types and memberless definitions of named structs, unions and enumerated
5533 * types. Associated reference types like pointers, arrays and anonymous
5534 * structs, unions and enumerated types will be added to split BTF.
5535 * Size is recorded for named struct/unions to help guide matching to the
5536 * target base BTF during later relocation.
5537 *
5538 * The only case where structs, unions or enumerated types are fully represented
5539 * is when they are anonymous; in such cases, the anonymous type is added to
5540 * split BTF in full.
5541 *
5542 * We return newly-created split BTF where the split BTF refers to a newly-created
5543 * distilled base BTF. Both must be freed separately by the caller.
5544 */
5545int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf,
5546 struct btf **new_split_btf)
5547{
5548 struct btf *new_base = NULL, *new_split = NULL;
5549 const struct btf *old_base;
5550 unsigned int n = btf__type_cnt(src_btf);
5551 struct btf_distill dist = {};
5552 struct btf_type *t;
5553 int i, err = 0;
5554
5555 /* src BTF must be split BTF. */
5556 old_base = btf__base_btf(src_btf);
5557 if (!new_base_btf || !new_split_btf || !old_base)
5558 return libbpf_err(-EINVAL);
5559
5560 new_base = btf__new_empty();
5561 if (!new_base)
5562 return libbpf_err(-ENOMEM);
5563
5564 btf__set_endianness(new_base, btf__endianness(src_btf));
5565
5566 dist.id_map = calloc(n, sizeof(*dist.id_map));
5567 if (!dist.id_map) {
5568 err = -ENOMEM;
5569 goto done;
5570 }
5571 dist.pipe.src = src_btf;
5572 dist.pipe.dst = new_base;
5573 dist.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
5574 if (IS_ERR(dist.pipe.str_off_map)) {
5575 err = -ENOMEM;
5576 goto done;
5577 }
5578 dist.split_start_id = btf__type_cnt(old_base);
5579 dist.split_start_str = old_base->hdr->str_len;
5580
5581 /* Pass over src split BTF; generate the list of base BTF type ids it
5582 * references; these will constitute our distilled BTF set to be
5583 * distributed over base and split BTF as appropriate.
5584 */
5585 for (i = src_btf->start_id; i < n; i++) {
5586 err = btf_add_distilled_type_ids(&dist, i);
5587 if (err < 0)
5588 goto done;
5589 }
5590 /* Next add types for each of the required references to base BTF and split BTF
5591 * in turn.
5592 */
5593 err = btf_add_distilled_types(&dist);
5594 if (err < 0)
5595 goto done;
5596
5597 /* Create new split BTF with distilled base BTF as its base; the final
5598 * state is split BTF with distilled base BTF that represents enough
5599 * about its base references to allow it to be relocated with the base
5600 * BTF available.
5601 */
5602 new_split = btf__new_empty_split(new_base);
5603 if (!new_split) {
5604 err = -errno;
5605 goto done;
5606 }
5607 dist.pipe.dst = new_split;
5608 /* First add all split types */
5609 for (i = src_btf->start_id; i < n; i++) {
5610 t = btf_type_by_id(src_btf, i);
5611 err = btf_add_type(&dist.pipe, t);
5612 if (err < 0)
5613 goto done;
5614 }
5615 /* Now add distilled types to split BTF that are not added to base. */
5616 err = btf_add_distilled_types(&dist);
5617 if (err < 0)
5618 goto done;
5619
5620 /* All split BTF ids will be shifted downwards since there are less base
5621 * BTF ids in distilled base BTF.
5622 */
5623 dist.diff_id = dist.split_start_id - btf__type_cnt(new_base);
5624
5625 n = btf__type_cnt(new_split);
5626 /* Now update base/split BTF ids. */
5627 for (i = 1; i < n; i++) {
5628 err = btf_update_distilled_type_ids(&dist, i);
5629 if (err < 0)
5630 break;
5631 }
5632done:
5633 free(dist.id_map);
5634 hashmap__free(dist.pipe.str_off_map);
5635 if (err) {
5636 btf__free(new_split);
5637 btf__free(new_base);
5638 return libbpf_err(err);
5639 }
5640 *new_base_btf = new_base;
5641 *new_split_btf = new_split;
5642
5643 return 0;
5644}
5645
5646const struct btf_header *btf_header(const struct btf *btf)
5647{
5648 return btf->hdr;
5649}
5650
5651void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
5652{
5653 btf->base_btf = (struct btf *)base_btf;
5654 btf->start_id = btf__type_cnt(base_btf);
5655 btf->start_str_off = base_btf->hdr->str_len;
5656}
5657
5658int btf__relocate(struct btf *btf, const struct btf *base_btf)
5659{
5660 int err = btf_relocate(btf, base_btf, NULL);
5661
5662 if (!err)
5663 btf->owns_base = false;
5664 return libbpf_err(err);
5665}