Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
  4 *
  5 * Test it with:
  6 *
  7 * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
  8 *
  9 * This exactly matches what is marshalled into the raw_syscall:sys_enter
 10 * payload expected by the 'perf trace' beautifiers.
 11 *
 12 * For now it just uses the existing tracepoint augmentation code in 'perf
 13 * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
 14 * code that will combine entry/exit in a strace like way.
 15 */
 16
 17#include <linux/bpf.h>
 18#include <bpf/bpf_helpers.h>
 19#include <linux/limits.h>
 20
 21// FIXME: These should come from system headers
 22typedef char bool;
 23typedef int pid_t;
 24typedef long long int __s64;
 25typedef __s64 time64_t;
 26
 27struct timespec64 {
 28	time64_t	tv_sec;
 29	long int	tv_nsec;
 30};
 31
 32/* bpf-output associated map */
 33struct __augmented_syscalls__ {
 34	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
 35	__type(key, int);
 36	__type(value, __u32);
 37	__uint(max_entries, __NR_CPUS__);
 38} __augmented_syscalls__ SEC(".maps");
 39
 40/*
 41 * What to augment at entry?
 42 *
 43 * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
 44 */
 45struct syscalls_sys_enter {
 46	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
 47	__type(key, __u32);
 48	__type(value, __u32);
 49	__uint(max_entries, 512);
 50} syscalls_sys_enter SEC(".maps");
 51
 52/*
 53 * What to augment at exit?
 54 *
 55 * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
 56 */
 57struct syscalls_sys_exit {
 58	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
 59	__type(key, __u32);
 60	__type(value, __u32);
 61	__uint(max_entries, 512);
 62} syscalls_sys_exit SEC(".maps");
 63
 64struct syscall_enter_args {
 65	unsigned long long common_tp_fields;
 66	long		   syscall_nr;
 67	unsigned long	   args[6];
 68};
 69
 70struct syscall_exit_args {
 71	unsigned long long common_tp_fields;
 72	long		   syscall_nr;
 73	long		   ret;
 74};
 75
 76struct augmented_arg {
 77	unsigned int	size;
 78	int		err;
 79	char		value[PATH_MAX];
 80};
 81
 82struct pids_filtered {
 83	__uint(type, BPF_MAP_TYPE_HASH);
 84	__type(key, pid_t);
 85	__type(value, bool);
 86	__uint(max_entries, 64);
 87} pids_filtered SEC(".maps");
 88
 89/*
 90 * Desired design of maximum size and alignment (see RFC2553)
 91 */
 92#define SS_MAXSIZE   128     /* Implementation specific max size */
 93
 94typedef unsigned short sa_family_t;
 95
 96/*
 97 * FIXME: Should come from system headers
 98 *
 99 * The definition uses anonymous union and struct in order to control the
100 * default alignment.
101 */
102struct sockaddr_storage {
103	union {
104		struct {
105			sa_family_t    ss_family; /* address family */
106			/* Following field(s) are implementation specific */
107			char __data[SS_MAXSIZE - sizeof(unsigned short)];
108				/* space to achieve desired size, */
109				/* _SS_MAXSIZE value minus size of ss_family */
110		};
111		void *__align; /* implementation specific desired alignment */
112	};
113};
114
115struct augmented_args_payload {
116       struct syscall_enter_args args;
117       union {
118		struct {
119			struct augmented_arg arg, arg2;
120		};
121		struct sockaddr_storage saddr;
122		char   __data[sizeof(struct augmented_arg)];
123	};
124};
125
126// We need more tmp space than the BPF stack can give us
127struct augmented_args_tmp {
128	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
129	__type(key, int);
130	__type(value, struct augmented_args_payload);
131	__uint(max_entries, 1);
132} augmented_args_tmp SEC(".maps");
133
134static inline struct augmented_args_payload *augmented_args_payload(void)
135{
136	int key = 0;
137	return bpf_map_lookup_elem(&augmented_args_tmp, &key);
138}
139
140static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
141{
142	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
143	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
144}
145
146static inline
147unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
148{
149	unsigned int augmented_len = sizeof(*augmented_arg);
150	int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg);
151
152	augmented_arg->size = augmented_arg->err = 0;
153	/*
154	 * probe_read_str may return < 0, e.g. -EFAULT
155	 * So we leave that in the augmented_arg->size that userspace will
156	 */
157	if (string_len > 0) {
158		augmented_len -= sizeof(augmented_arg->value) - string_len;
159		augmented_len &= sizeof(augmented_arg->value) - 1;
160		augmented_arg->size = string_len;
161	} else {
162		/*
163		 * So that username notice the error while still being able
164		 * to skip this augmented arg record
165		 */
166		augmented_arg->err = string_len;
167		augmented_len = offsetof(struct augmented_arg, value);
168	}
169
170	return augmented_len;
171}
172
173SEC("!raw_syscalls:unaugmented")
174int syscall_unaugmented(struct syscall_enter_args *args)
175{
176	return 1;
177}
178
179/*
180 * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in
181 * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go
182 * on from there, reading the first syscall arg as a string, i.e. open's
183 * filename.
184 */
185SEC("!syscalls:sys_enter_connect")
186int sys_enter_connect(struct syscall_enter_args *args)
187{
188	struct augmented_args_payload *augmented_args = augmented_args_payload();
189	const void *sockaddr_arg = (const void *)args->args[1];
190	unsigned int socklen = args->args[2];
191	unsigned int len = sizeof(augmented_args->args);
192
193        if (augmented_args == NULL)
194                return 1; /* Failure: don't filter */
195
196	if (socklen > sizeof(augmented_args->saddr))
197		socklen = sizeof(augmented_args->saddr);
198
199	bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
200
201	return augmented__output(args, augmented_args, len + socklen);
202}
203
204SEC("!syscalls:sys_enter_sendto")
205int sys_enter_sendto(struct syscall_enter_args *args)
206{
207	struct augmented_args_payload *augmented_args = augmented_args_payload();
208	const void *sockaddr_arg = (const void *)args->args[4];
209	unsigned int socklen = args->args[5];
210	unsigned int len = sizeof(augmented_args->args);
211
212        if (augmented_args == NULL)
213                return 1; /* Failure: don't filter */
214
215	if (socklen > sizeof(augmented_args->saddr))
216		socklen = sizeof(augmented_args->saddr);
217
218	bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
219
220	return augmented__output(args, augmented_args, len + socklen);
221}
222
223SEC("!syscalls:sys_enter_open")
224int sys_enter_open(struct syscall_enter_args *args)
225{
226	struct augmented_args_payload *augmented_args = augmented_args_payload();
227	const void *filename_arg = (const void *)args->args[0];
228	unsigned int len = sizeof(augmented_args->args);
229
230        if (augmented_args == NULL)
231                return 1; /* Failure: don't filter */
232
233	len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
234
235	return augmented__output(args, augmented_args, len);
236}
237
238SEC("!syscalls:sys_enter_openat")
239int sys_enter_openat(struct syscall_enter_args *args)
240{
241	struct augmented_args_payload *augmented_args = augmented_args_payload();
242	const void *filename_arg = (const void *)args->args[1];
243	unsigned int len = sizeof(augmented_args->args);
244
245        if (augmented_args == NULL)
246                return 1; /* Failure: don't filter */
247
248	len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
249
250	return augmented__output(args, augmented_args, len);
251}
252
253SEC("!syscalls:sys_enter_rename")
254int sys_enter_rename(struct syscall_enter_args *args)
255{
256	struct augmented_args_payload *augmented_args = augmented_args_payload();
257	const void *oldpath_arg = (const void *)args->args[0],
258		   *newpath_arg = (const void *)args->args[1];
259	unsigned int len = sizeof(augmented_args->args), oldpath_len;
260
261        if (augmented_args == NULL)
262                return 1; /* Failure: don't filter */
263
264	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
265	len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
266
267	return augmented__output(args, augmented_args, len);
268}
269
270SEC("!syscalls:sys_enter_renameat")
271int sys_enter_renameat(struct syscall_enter_args *args)
272{
273	struct augmented_args_payload *augmented_args = augmented_args_payload();
274	const void *oldpath_arg = (const void *)args->args[1],
275		   *newpath_arg = (const void *)args->args[3];
276	unsigned int len = sizeof(augmented_args->args), oldpath_len;
277
278        if (augmented_args == NULL)
279                return 1; /* Failure: don't filter */
280
281	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
282	len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
283
284	return augmented__output(args, augmented_args, len);
285}
286
287#define PERF_ATTR_SIZE_VER0     64      /* sizeof first published struct */
288
289// we need just the start, get the size to then copy it
290struct perf_event_attr_size {
291        __u32                   type;
292        /*
293         * Size of the attr structure, for fwd/bwd compat.
294         */
295        __u32                   size;
296};
297
298SEC("!syscalls:sys_enter_perf_event_open")
299int sys_enter_perf_event_open(struct syscall_enter_args *args)
300{
301	struct augmented_args_payload *augmented_args = augmented_args_payload();
302	const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
303	unsigned int len = sizeof(augmented_args->args);
304
305        if (augmented_args == NULL)
306		goto failure;
307
308	if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0)
309		goto failure;
310
311	attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
312
313	__u32 size = attr_read->size;
314
315	if (!size)
316		size = PERF_ATTR_SIZE_VER0;
317
318	if (size > sizeof(augmented_args->__data))
319                goto failure;
320
321	// Now that we read attr->size and tested it against the size limits, read it completely
322	if (bpf_probe_read(&augmented_args->__data, size, attr) < 0)
323		goto failure;
324
325	return augmented__output(args, augmented_args, len + size);
326failure:
327	return 1; /* Failure: don't filter */
328}
329
330SEC("!syscalls:sys_enter_clock_nanosleep")
331int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
332{
333	struct augmented_args_payload *augmented_args = augmented_args_payload();
334	const void *rqtp_arg = (const void *)args->args[2];
335	unsigned int len = sizeof(augmented_args->args);
336	__u32 size = sizeof(struct timespec64);
337
338        if (augmented_args == NULL)
339		goto failure;
340
341	if (size > sizeof(augmented_args->__data))
342                goto failure;
343
344	bpf_probe_read(&augmented_args->__data, size, rqtp_arg);
345
346	return augmented__output(args, augmented_args, len + size);
347failure:
348	return 1; /* Failure: don't filter */
349}
350
351static pid_t getpid(void)
352{
353	return bpf_get_current_pid_tgid();
354}
355
356static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
357{
358	return bpf_map_lookup_elem(pids, &pid) != NULL;
359}
360
361SEC("raw_syscalls:sys_enter")
362int sys_enter(struct syscall_enter_args *args)
363{
364	struct augmented_args_payload *augmented_args;
365	/*
366	 * We start len, the amount of data that will be in the perf ring
367	 * buffer, if this is not filtered out by one of pid_filter__has(),
368	 * syscall->enabled, etc, with the non-augmented raw syscall payload,
369	 * i.e. sizeof(augmented_args->args).
370	 *
371	 * We'll add to this as we add augmented syscalls right after that
372	 * initial, non-augmented raw_syscalls:sys_enter payload.
373	 */
374	unsigned int len = sizeof(augmented_args->args);
375
376	if (pid_filter__has(&pids_filtered, getpid()))
377		return 0;
378
379	augmented_args = augmented_args_payload();
380	if (augmented_args == NULL)
381		return 1;
382
383	bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
384
385	/*
386	 * Jump to syscall specific augmenter, even if the default one,
387	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
388	 * unaugmented tracepoint payload.
389	 */
390	bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
391
392	// If not found on the PROG_ARRAY syscalls map, then we're filtering it:
393	return 0;
394}
395
396SEC("raw_syscalls:sys_exit")
397int sys_exit(struct syscall_exit_args *args)
398{
399	struct syscall_exit_args exit_args;
400
401	if (pid_filter__has(&pids_filtered, getpid()))
402		return 0;
403
404	bpf_probe_read(&exit_args, sizeof(exit_args), args);
405	/*
406	 * Jump to syscall specific return augmenter, even if the default one,
407	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
408	 * unaugmented tracepoint payload.
409	 */
410	bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
411	/*
412	 * If not found on the PROG_ARRAY syscalls map, then we're filtering it:
413	 */
414	return 0;
415}
416
417char _license[] SEC("license") = "GPL";