Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * event probes
   4 *
   5 * Part of this code was copied from kernel/trace/trace_kprobe.c written by
   6 * Masami Hiramatsu <mhiramat@kernel.org>
   7 *
   8 * Copyright (C) 2021, VMware Inc, Steven Rostedt <rostedt@goodmis.org>
   9 * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
  10 *
  11 */
  12#include <linux/module.h>
  13#include <linux/mutex.h>
  14#include <linux/ftrace.h>
  15
  16#include "trace_dynevent.h"
  17#include "trace_probe.h"
  18#include "trace_probe_tmpl.h"
  19#include "trace_probe_kernel.h"
  20
  21#define EPROBE_EVENT_SYSTEM "eprobes"
  22
  23struct trace_eprobe {
  24	/* tracepoint system */
  25	const char *event_system;
  26
  27	/* tracepoint event */
  28	const char *event_name;
  29
  30	/* filter string for the tracepoint */
  31	char *filter_str;
  32
  33	struct trace_event_call *event;
  34
  35	struct dyn_event	devent;
  36	struct trace_probe	tp;
  37};
  38
  39struct eprobe_data {
  40	struct trace_event_file	*file;
  41	struct trace_eprobe	*ep;
  42};
  43
 
 
 
 
  44static int __trace_eprobe_create(int argc, const char *argv[]);
  45
  46static void trace_event_probe_cleanup(struct trace_eprobe *ep)
  47{
  48	if (!ep)
  49		return;
  50	trace_probe_cleanup(&ep->tp);
  51	kfree(ep->event_name);
  52	kfree(ep->event_system);
  53	if (ep->event)
  54		trace_event_put_ref(ep->event);
  55	kfree(ep->filter_str);
  56	kfree(ep);
  57}
  58
  59static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev)
  60{
  61	return container_of(ev, struct trace_eprobe, devent);
  62}
  63
  64static int eprobe_dyn_event_create(const char *raw_command)
  65{
  66	return trace_probe_create(raw_command, __trace_eprobe_create);
  67}
  68
  69static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
  70{
  71	struct trace_eprobe *ep = to_trace_eprobe(ev);
  72	int i;
  73
  74	seq_printf(m, "e:%s/%s", trace_probe_group_name(&ep->tp),
  75				trace_probe_name(&ep->tp));
  76	seq_printf(m, " %s.%s", ep->event_system, ep->event_name);
  77
  78	for (i = 0; i < ep->tp.nr_args; i++)
  79		seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
  80	seq_putc(m, '\n');
  81
  82	return 0;
  83}
  84
  85static int unregister_trace_eprobe(struct trace_eprobe *ep)
  86{
  87	/* If other probes are on the event, just unregister eprobe */
  88	if (trace_probe_has_sibling(&ep->tp))
  89		goto unreg;
  90
  91	/* Enabled event can not be unregistered */
  92	if (trace_probe_is_enabled(&ep->tp))
  93		return -EBUSY;
  94
  95	/* Will fail if probe is being used by ftrace or perf */
  96	if (trace_probe_unregister_event_call(&ep->tp))
  97		return -EBUSY;
  98
  99unreg:
 100	dyn_event_remove(&ep->devent);
 101	trace_probe_unlink(&ep->tp);
 102
 103	return 0;
 104}
 105
 106static int eprobe_dyn_event_release(struct dyn_event *ev)
 107{
 108	struct trace_eprobe *ep = to_trace_eprobe(ev);
 109	int ret = unregister_trace_eprobe(ep);
 110
 111	if (!ret)
 112		trace_event_probe_cleanup(ep);
 113	return ret;
 114}
 115
 116static bool eprobe_dyn_event_is_busy(struct dyn_event *ev)
 117{
 118	struct trace_eprobe *ep = to_trace_eprobe(ev);
 119
 120	return trace_probe_is_enabled(&ep->tp);
 121}
 122
 123static bool eprobe_dyn_event_match(const char *system, const char *event,
 124			int argc, const char **argv, struct dyn_event *ev)
 125{
 126	struct trace_eprobe *ep = to_trace_eprobe(ev);
 127	const char *slash;
 128
 129	/*
 130	 * We match the following:
 131	 *  event only			- match all eprobes with event name
 132	 *  system and event only	- match all system/event probes
 133	 *  system only			- match all system probes
 134	 *
 135	 * The below has the above satisfied with more arguments:
 136	 *
 137	 *  attached system/event	- If the arg has the system and event
 138	 *				  the probe is attached to, match
 139	 *				  probes with the attachment.
 140	 *
 141	 *  If any more args are given, then it requires a full match.
 142	 */
 143
 144	/*
 145	 * If system exists, but this probe is not part of that system
 146	 * do not match.
 147	 */
 148	if (system && strcmp(trace_probe_group_name(&ep->tp), system) != 0)
 149		return false;
 150
 151	/* Must match the event name */
 152	if (event[0] != '\0' && strcmp(trace_probe_name(&ep->tp), event) != 0)
 153		return false;
 154
 155	/* No arguments match all */
 156	if (argc < 1)
 157		return true;
 158
 159	/* First argument is the system/event the probe is attached to */
 160
 161	slash = strchr(argv[0], '/');
 162	if (!slash)
 163		slash = strchr(argv[0], '.');
 164	if (!slash)
 165		return false;
 166
 167	if (strncmp(ep->event_system, argv[0], slash - argv[0]))
 168		return false;
 169	if (strcmp(ep->event_name, slash + 1))
 170		return false;
 171
 172	argc--;
 173	argv++;
 174
 175	/* If there are no other args, then match */
 176	if (argc < 1)
 177		return true;
 178
 179	return trace_probe_match_command_args(&ep->tp, argc, argv);
 180}
 181
 182static struct dyn_event_operations eprobe_dyn_event_ops = {
 183	.create = eprobe_dyn_event_create,
 184	.show = eprobe_dyn_event_show,
 185	.is_busy = eprobe_dyn_event_is_busy,
 186	.free = eprobe_dyn_event_release,
 187	.match = eprobe_dyn_event_match,
 188};
 189
 190static struct trace_eprobe *alloc_event_probe(const char *group,
 191					      const char *this_event,
 192					      struct trace_event_call *event,
 193					      int nargs)
 194{
 195	struct trace_eprobe *ep;
 196	const char *event_name;
 197	const char *sys_name;
 198	int ret = -ENOMEM;
 199
 200	if (!event)
 201		return ERR_PTR(-ENODEV);
 202
 203	sys_name = event->class->system;
 204	event_name = trace_event_name(event);
 205
 206	ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL);
 207	if (!ep) {
 208		trace_event_put_ref(event);
 209		goto error;
 210	}
 211	ep->event = event;
 212	ep->event_name = kstrdup(event_name, GFP_KERNEL);
 213	if (!ep->event_name)
 214		goto error;
 215	ep->event_system = kstrdup(sys_name, GFP_KERNEL);
 216	if (!ep->event_system)
 217		goto error;
 218
 219	ret = trace_probe_init(&ep->tp, this_event, group, false);
 220	if (ret < 0)
 221		goto error;
 222
 223	dyn_event_init(&ep->devent, &eprobe_dyn_event_ops);
 224	return ep;
 225error:
 226	trace_event_probe_cleanup(ep);
 227	return ERR_PTR(ret);
 228}
 229
 230static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
 231{
 232	struct probe_arg *parg = &ep->tp.args[i];
 233	struct ftrace_event_field *field;
 234	struct list_head *head;
 235	int ret = -ENOENT;
 236
 237	head = trace_get_fields(ep->event);
 238	list_for_each_entry(field, head, link) {
 239		if (!strcmp(parg->code->data, field->name)) {
 240			kfree(parg->code->data);
 241			parg->code->data = field;
 242			return 0;
 243		}
 244	}
 245
 246	/*
 247	 * Argument not found on event. But allow for comm and COMM
 248	 * to be used to get the current->comm.
 249	 */
 250	if (strcmp(parg->code->data, "COMM") == 0 ||
 251	    strcmp(parg->code->data, "comm") == 0) {
 252		parg->code->op = FETCH_OP_COMM;
 253		ret = 0;
 254	}
 255
 256	kfree(parg->code->data);
 257	parg->code->data = NULL;
 258	return ret;
 259}
 260
 261static int eprobe_event_define_fields(struct trace_event_call *event_call)
 262{
 263	struct eprobe_trace_entry_head field;
 264	struct trace_probe *tp;
 265
 266	tp = trace_probe_primary_from_call(event_call);
 267	if (WARN_ON_ONCE(!tp))
 268		return -ENOENT;
 269
 270	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
 271}
 272
 273static struct trace_event_fields eprobe_fields_array[] = {
 274	{ .type = TRACE_FUNCTION_TYPE,
 275	  .define_fields = eprobe_event_define_fields },
 276	{}
 277};
 278
 279/* Event entry printers */
 280static enum print_line_t
 281print_eprobe_event(struct trace_iterator *iter, int flags,
 282		   struct trace_event *event)
 283{
 284	struct eprobe_trace_entry_head *field;
 285	struct trace_event_call *pevent;
 286	struct trace_event *probed_event;
 287	struct trace_seq *s = &iter->seq;
 288	struct trace_eprobe *ep;
 289	struct trace_probe *tp;
 290	unsigned int type;
 291
 292	field = (struct eprobe_trace_entry_head *)iter->ent;
 293	tp = trace_probe_primary_from_call(
 294		container_of(event, struct trace_event_call, event));
 295	if (WARN_ON_ONCE(!tp))
 296		goto out;
 297
 298	ep = container_of(tp, struct trace_eprobe, tp);
 299	type = ep->event->event.type;
 300
 301	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
 302
 303	probed_event = ftrace_find_event(type);
 304	if (probed_event) {
 305		pevent = container_of(probed_event, struct trace_event_call, event);
 306		trace_seq_printf(s, "%s.%s", pevent->class->system,
 307				 trace_event_name(pevent));
 308	} else {
 309		trace_seq_printf(s, "%u", type);
 310	}
 311
 312	trace_seq_putc(s, ')');
 313
 314	if (print_probe_args(s, tp->args, tp->nr_args,
 315			     (u8 *)&field[1], field) < 0)
 316		goto out;
 317
 318	trace_seq_putc(s, '\n');
 319 out:
 320	return trace_handle_return(s);
 321}
 322
 323static unsigned long get_event_field(struct fetch_insn *code, void *rec)
 
 324{
 325	struct ftrace_event_field *field = code->data;
 326	unsigned long val;
 327	void *addr;
 328
 329	addr = rec + field->offset;
 330
 331	if (is_string_field(field)) {
 332		switch (field->filter_type) {
 333		case FILTER_DYN_STRING:
 334			val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
 335			break;
 336		case FILTER_RDYN_STRING:
 337			val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
 338			break;
 339		case FILTER_STATIC_STRING:
 340			val = (unsigned long)addr;
 341			break;
 342		case FILTER_PTR_STRING:
 343			val = (unsigned long)(*(char *)addr);
 344			break;
 345		default:
 346			WARN_ON_ONCE(1);
 347			return 0;
 348		}
 349		return val;
 350	}
 351
 352	switch (field->size) {
 353	case 1:
 354		if (field->is_signed)
 355			val = *(char *)addr;
 356		else
 357			val = *(unsigned char *)addr;
 358		break;
 359	case 2:
 360		if (field->is_signed)
 361			val = *(short *)addr;
 362		else
 363			val = *(unsigned short *)addr;
 364		break;
 365	case 4:
 366		if (field->is_signed)
 367			val = *(int *)addr;
 368		else
 369			val = *(unsigned int *)addr;
 370		break;
 371	default:
 372		if (field->is_signed)
 373			val = *(long *)addr;
 374		else
 375			val = *(unsigned long *)addr;
 376		break;
 377	}
 378	return val;
 379}
 380
 381static int get_eprobe_size(struct trace_probe *tp, void *rec)
 382{
 383	struct fetch_insn *code;
 384	struct probe_arg *arg;
 385	int i, len, ret = 0;
 386
 387	for (i = 0; i < tp->nr_args; i++) {
 388		arg = tp->args + i;
 389		if (arg->dynamic) {
 390			unsigned long val;
 391
 392			code = arg->code;
 393 retry:
 394			switch (code->op) {
 395			case FETCH_OP_TP_ARG:
 396				val = get_event_field(code, rec);
 397				break;
 398			case FETCH_OP_IMM:
 399				val = code->immediate;
 400				break;
 401			case FETCH_OP_COMM:
 402				val = (unsigned long)current->comm;
 403				break;
 404			case FETCH_OP_DATA:
 405				val = (unsigned long)code->data;
 406				break;
 407			case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
 408				code++;
 409				goto retry;
 410			default:
 411				continue;
 
 412			}
 413			code++;
 414			len = process_fetch_insn_bottom(code, val, NULL, NULL);
 415			if (len > 0)
 416				ret += len;
 417		}
 418	}
 419
 420	return ret;
 421}
 422
 423/* Kprobe specific fetch functions */
 424
 425/* Note that we don't verify it, since the code does not come from user space */
 426static int
 427process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
 428		   void *base)
 429{
 430	unsigned long val;
 
 431
 432 retry:
 433	switch (code->op) {
 434	case FETCH_OP_TP_ARG:
 435		val = get_event_field(code, rec);
 436		break;
 437	case FETCH_OP_IMM:
 438		val = code->immediate;
 439		break;
 440	case FETCH_OP_COMM:
 441		val = (unsigned long)current->comm;
 442		break;
 443	case FETCH_OP_DATA:
 444		val = (unsigned long)code->data;
 445		break;
 446	case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
 447		code++;
 448		goto retry;
 449	default:
 450		return -EILSEQ;
 
 
 451	}
 452	code++;
 453	return process_fetch_insn_bottom(code, val, dest, base);
 454}
 455NOKPROBE_SYMBOL(process_fetch_insn)
 456
 457/* Return the length of string -- including null terminal byte */
 458static nokprobe_inline int
 459fetch_store_strlen_user(unsigned long addr)
 460{
 461	return kern_fetch_store_strlen_user(addr);
 462}
 463
 464/* Return the length of string -- including null terminal byte */
 465static nokprobe_inline int
 466fetch_store_strlen(unsigned long addr)
 467{
 468	return kern_fetch_store_strlen(addr);
 469}
 470
 471/*
 472 * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
 473 * with max length and relative data location.
 474 */
 475static nokprobe_inline int
 476fetch_store_string_user(unsigned long addr, void *dest, void *base)
 477{
 478	return kern_fetch_store_string_user(addr, dest, base);
 479}
 480
 481/*
 482 * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
 483 * length and relative data location.
 484 */
 485static nokprobe_inline int
 486fetch_store_string(unsigned long addr, void *dest, void *base)
 487{
 488	return kern_fetch_store_string(addr, dest, base);
 489}
 490
 491static nokprobe_inline int
 492probe_mem_read_user(void *dest, void *src, size_t size)
 493{
 494	const void __user *uaddr =  (__force const void __user *)src;
 495
 496	return copy_from_user_nofault(dest, uaddr, size);
 497}
 498
 499static nokprobe_inline int
 500probe_mem_read(void *dest, void *src, size_t size)
 501{
 502#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 503	if ((unsigned long)src < TASK_SIZE)
 504		return probe_mem_read_user(dest, src, size);
 505#endif
 506	return copy_from_kernel_nofault(dest, src, size);
 507}
 508
 509/* eprobe handler */
 510static inline void
 511__eprobe_trace_func(struct eprobe_data *edata, void *rec)
 512{
 513	struct eprobe_trace_entry_head *entry;
 514	struct trace_event_call *call = trace_probe_event_call(&edata->ep->tp);
 515	struct trace_event_buffer fbuffer;
 516	int dsize;
 517
 518	if (WARN_ON_ONCE(call != edata->file->event_call))
 519		return;
 520
 521	if (trace_trigger_soft_disabled(edata->file))
 522		return;
 523
 524	dsize = get_eprobe_size(&edata->ep->tp, rec);
 525
 526	entry = trace_event_buffer_reserve(&fbuffer, edata->file,
 527					   sizeof(*entry) + edata->ep->tp.size + dsize);
 528
 529	if (!entry)
 530		return;
 531
 532	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
 533	store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
 534
 535	trace_event_buffer_commit(&fbuffer);
 536}
 537
 538/*
 539 * The event probe implementation uses event triggers to get access to
 540 * the event it is attached to, but is not an actual trigger. The below
 541 * functions are just stubs to fulfill what is needed to use the trigger
 542 * infrastructure.
 543 */
 544static int eprobe_trigger_init(struct event_trigger_data *data)
 545{
 546	return 0;
 547}
 548
 549static void eprobe_trigger_free(struct event_trigger_data *data)
 550{
 551
 552}
 553
 554static int eprobe_trigger_print(struct seq_file *m,
 555				struct event_trigger_data *data)
 556{
 557	/* Do not print eprobe event triggers */
 558	return 0;
 559}
 560
 561static void eprobe_trigger_func(struct event_trigger_data *data,
 562				struct trace_buffer *buffer, void *rec,
 563				struct ring_buffer_event *rbe)
 564{
 565	struct eprobe_data *edata = data->private_data;
 566
 567	if (unlikely(!rec))
 568		return;
 569
 570	__eprobe_trace_func(edata, rec);
 571}
 572
 573static struct event_trigger_ops eprobe_trigger_ops = {
 574	.trigger		= eprobe_trigger_func,
 575	.print			= eprobe_trigger_print,
 576	.init			= eprobe_trigger_init,
 577	.free			= eprobe_trigger_free,
 578};
 579
 580static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops,
 581				    struct trace_event_file *file,
 582				    char *glob, char *cmd,
 583				    char *param_and_filter)
 584{
 585	return -1;
 586}
 587
 588static int eprobe_trigger_reg_func(char *glob,
 589				   struct event_trigger_data *data,
 590				   struct trace_event_file *file)
 591{
 592	return -1;
 593}
 594
 595static void eprobe_trigger_unreg_func(char *glob,
 596				      struct event_trigger_data *data,
 597				      struct trace_event_file *file)
 598{
 599
 600}
 601
 602static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
 603							char *param)
 604{
 605	return &eprobe_trigger_ops;
 606}
 607
 608static struct event_command event_trigger_cmd = {
 609	.name			= "eprobe",
 610	.trigger_type		= ETT_EVENT_EPROBE,
 611	.flags			= EVENT_CMD_FL_NEEDS_REC,
 612	.parse			= eprobe_trigger_cmd_parse,
 613	.reg			= eprobe_trigger_reg_func,
 614	.unreg			= eprobe_trigger_unreg_func,
 615	.unreg_all		= NULL,
 616	.get_trigger_ops	= eprobe_trigger_get_ops,
 617	.set_filter		= NULL,
 618};
 619
 620static struct event_trigger_data *
 621new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
 622{
 623	struct event_trigger_data *trigger;
 624	struct event_filter *filter = NULL;
 625	struct eprobe_data *edata;
 626	int ret;
 627
 628	edata = kzalloc(sizeof(*edata), GFP_KERNEL);
 629	trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
 630	if (!trigger || !edata) {
 631		ret = -ENOMEM;
 632		goto error;
 633	}
 634
 635	trigger->flags = EVENT_TRIGGER_FL_PROBE;
 636	trigger->count = -1;
 637	trigger->ops = &eprobe_trigger_ops;
 638
 639	/*
 640	 * EVENT PROBE triggers are not registered as commands with
 641	 * register_event_command(), as they are not controlled by the user
 642	 * from the trigger file
 643	 */
 644	trigger->cmd_ops = &event_trigger_cmd;
 645
 646	INIT_LIST_HEAD(&trigger->list);
 647
 648	if (ep->filter_str) {
 649		ret = create_event_filter(file->tr, ep->event,
 650					ep->filter_str, false, &filter);
 651		if (ret)
 652			goto error;
 653	}
 654	RCU_INIT_POINTER(trigger->filter, filter);
 655
 656	edata->file = file;
 657	edata->ep = ep;
 658	trigger->private_data = edata;
 659
 660	return trigger;
 661error:
 662	free_event_filter(filter);
 663	kfree(edata);
 664	kfree(trigger);
 665	return ERR_PTR(ret);
 666}
 667
 668static int enable_eprobe(struct trace_eprobe *ep,
 669			 struct trace_event_file *eprobe_file)
 670{
 671	struct event_trigger_data *trigger;
 672	struct trace_event_file *file;
 673	struct trace_array *tr = eprobe_file->tr;
 674
 675	file = find_event_file(tr, ep->event_system, ep->event_name);
 676	if (!file)
 677		return -ENOENT;
 678	trigger = new_eprobe_trigger(ep, eprobe_file);
 679	if (IS_ERR(trigger))
 680		return PTR_ERR(trigger);
 681
 682	list_add_tail_rcu(&trigger->list, &file->triggers);
 683
 684	trace_event_trigger_enable_disable(file, 1);
 685	update_cond_flag(file);
 686
 687	return 0;
 688}
 689
 690static struct trace_event_functions eprobe_funcs = {
 691	.trace		= print_eprobe_event
 692};
 693
 694static int disable_eprobe(struct trace_eprobe *ep,
 695			  struct trace_array *tr)
 696{
 697	struct event_trigger_data *trigger = NULL, *iter;
 698	struct trace_event_file *file;
 699	struct event_filter *filter;
 700	struct eprobe_data *edata;
 701
 702	file = find_event_file(tr, ep->event_system, ep->event_name);
 703	if (!file)
 704		return -ENOENT;
 705
 706	list_for_each_entry(iter, &file->triggers, list) {
 707		if (!(iter->flags & EVENT_TRIGGER_FL_PROBE))
 708			continue;
 709		edata = iter->private_data;
 710		if (edata->ep == ep) {
 711			trigger = iter;
 712			break;
 713		}
 714	}
 715	if (!trigger)
 716		return -ENODEV;
 717
 718	list_del_rcu(&trigger->list);
 719
 720	trace_event_trigger_enable_disable(file, 0);
 721	update_cond_flag(file);
 722
 723	/* Make sure nothing is using the edata or trigger */
 724	tracepoint_synchronize_unregister();
 725
 726	filter = rcu_access_pointer(trigger->filter);
 727
 728	if (filter)
 729		free_event_filter(filter);
 730	kfree(edata);
 731	kfree(trigger);
 732
 733	return 0;
 734}
 735
 736static int enable_trace_eprobe(struct trace_event_call *call,
 737			       struct trace_event_file *file)
 738{
 739	struct trace_probe *pos, *tp;
 740	struct trace_eprobe *ep;
 741	bool enabled;
 742	int ret = 0;
 
 743
 744	tp = trace_probe_primary_from_call(call);
 745	if (WARN_ON_ONCE(!tp))
 746		return -ENODEV;
 747	enabled = trace_probe_is_enabled(tp);
 748
 749	/* This also changes "enabled" state */
 750	if (file) {
 751		ret = trace_probe_add_file(tp, file);
 752		if (ret)
 753			return ret;
 754	} else
 755		trace_probe_set_flag(tp, TP_FLAG_PROFILE);
 756
 757	if (enabled)
 758		return 0;
 759
 760	list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
 761		ep = container_of(pos, struct trace_eprobe, tp);
 762		ret = enable_eprobe(ep, file);
 763		if (ret)
 764			break;
 765		enabled = true;
 
 766	}
 767
 768	if (ret) {
 769		/* Failed to enable one of them. Roll back all */
 770		if (enabled)
 771			disable_eprobe(ep, file->tr);
 
 
 
 
 
 
 
 
 
 
 
 772		if (file)
 773			trace_probe_remove_file(tp, file);
 774		else
 775			trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
 776	}
 777
 778	return ret;
 779}
 780
 781static int disable_trace_eprobe(struct trace_event_call *call,
 782				struct trace_event_file *file)
 783{
 784	struct trace_probe *pos, *tp;
 785	struct trace_eprobe *ep;
 786
 787	tp = trace_probe_primary_from_call(call);
 788	if (WARN_ON_ONCE(!tp))
 789		return -ENODEV;
 790
 791	if (file) {
 792		if (!trace_probe_get_file_link(tp, file))
 793			return -ENOENT;
 794		if (!trace_probe_has_single_file(tp))
 795			goto out;
 796		trace_probe_clear_flag(tp, TP_FLAG_TRACE);
 797	} else
 798		trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
 799
 800	if (!trace_probe_is_enabled(tp)) {
 801		list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
 802			ep = container_of(pos, struct trace_eprobe, tp);
 803			disable_eprobe(ep, file->tr);
 804		}
 805	}
 806
 807 out:
 808	if (file)
 809		/*
 810		 * Synchronization is done in below function. For perf event,
 811		 * file == NULL and perf_trace_event_unreg() calls
 812		 * tracepoint_synchronize_unregister() to ensure synchronize
 813		 * event. We don't need to care about it.
 814		 */
 815		trace_probe_remove_file(tp, file);
 816
 817	return 0;
 818}
 819
 820static int eprobe_register(struct trace_event_call *event,
 821			   enum trace_reg type, void *data)
 822{
 823	struct trace_event_file *file = data;
 824
 825	switch (type) {
 826	case TRACE_REG_REGISTER:
 827		return enable_trace_eprobe(event, file);
 828	case TRACE_REG_UNREGISTER:
 829		return disable_trace_eprobe(event, file);
 830#ifdef CONFIG_PERF_EVENTS
 831	case TRACE_REG_PERF_REGISTER:
 832	case TRACE_REG_PERF_UNREGISTER:
 833	case TRACE_REG_PERF_OPEN:
 834	case TRACE_REG_PERF_CLOSE:
 835	case TRACE_REG_PERF_ADD:
 836	case TRACE_REG_PERF_DEL:
 837		return 0;
 838#endif
 839	}
 840	return 0;
 841}
 842
 843static inline void init_trace_eprobe_call(struct trace_eprobe *ep)
 844{
 845	struct trace_event_call *call = trace_probe_event_call(&ep->tp);
 846
 847	call->flags = TRACE_EVENT_FL_EPROBE;
 848	call->event.funcs = &eprobe_funcs;
 849	call->class->fields_array = eprobe_fields_array;
 850	call->class->reg = eprobe_register;
 851}
 852
 853static struct trace_event_call *
 854find_and_get_event(const char *system, const char *event_name)
 855{
 856	struct trace_event_call *tp_event;
 857	const char *name;
 858
 859	list_for_each_entry(tp_event, &ftrace_events, list) {
 860		/* Skip other probes and ftrace events */
 861		if (tp_event->flags &
 862		    (TRACE_EVENT_FL_IGNORE_ENABLE |
 863		     TRACE_EVENT_FL_KPROBE |
 864		     TRACE_EVENT_FL_UPROBE |
 865		     TRACE_EVENT_FL_EPROBE))
 866			continue;
 867		if (!tp_event->class->system ||
 868		    strcmp(system, tp_event->class->system))
 869			continue;
 870		name = trace_event_name(tp_event);
 871		if (!name || strcmp(event_name, name))
 872			continue;
 873		if (!trace_event_try_get_ref(tp_event)) {
 874			return NULL;
 875			break;
 876		}
 877		return tp_event;
 878		break;
 879	}
 880	return NULL;
 881}
 882
 883static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
 884{
 885	unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT;
 
 
 
 886	int ret;
 887
 888	ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags);
 889	if (ret)
 890		return ret;
 891
 892	if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG) {
 893		ret = trace_eprobe_tp_arg_update(ep, i);
 894		if (ret)
 895			trace_probe_log_err(0, BAD_ATTACH_ARG);
 896	}
 897
 898	/* Handle symbols "@" */
 899	if (!ret)
 900		ret = traceprobe_update_arg(&ep->tp.args[i]);
 901
 
 902	return ret;
 903}
 904
 905static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[])
 906{
 907	struct event_filter *dummy = NULL;
 908	int i, ret, len = 0;
 909	char *p;
 910
 911	if (argc == 0) {
 912		trace_probe_log_err(0, NO_EP_FILTER);
 913		return -EINVAL;
 914	}
 915
 916	/* Recover the filter string */
 917	for (i = 0; i < argc; i++)
 918		len += strlen(argv[i]) + 1;
 919
 920	ep->filter_str = kzalloc(len, GFP_KERNEL);
 921	if (!ep->filter_str)
 922		return -ENOMEM;
 923
 924	p = ep->filter_str;
 925	for (i = 0; i < argc; i++) {
 926		ret = snprintf(p, len, "%s ", argv[i]);
 927		if (ret < 0)
 928			goto error;
 929		if (ret > len) {
 930			ret = -E2BIG;
 931			goto error;
 932		}
 933		p += ret;
 934		len -= ret;
 935	}
 936	p[-1] = '\0';
 937
 938	/*
 939	 * Ensure the filter string can be parsed correctly. Note, this
 940	 * filter string is for the original event, not for the eprobe.
 941	 */
 942	ret = create_event_filter(top_trace_array(), ep->event, ep->filter_str,
 943				  true, &dummy);
 944	free_event_filter(dummy);
 945	if (ret)
 946		goto error;
 947
 948	return 0;
 949error:
 950	kfree(ep->filter_str);
 951	ep->filter_str = NULL;
 952	return ret;
 953}
 954
 955static int __trace_eprobe_create(int argc, const char *argv[])
 956{
 957	/*
 958	 * Argument syntax:
 959	 *      e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] [if FILTER]
 960	 * Fetch args (no space):
 961	 *  <name>=$<field>[:TYPE]
 962	 */
 963	const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
 964	const char *sys_event = NULL, *sys_name = NULL;
 965	struct trace_event_call *event_call;
 966	struct trace_eprobe *ep = NULL;
 967	char buf1[MAX_EVENT_NAME_LEN];
 968	char buf2[MAX_EVENT_NAME_LEN];
 969	char gbuf[MAX_EVENT_NAME_LEN];
 970	int ret = 0, filter_idx = 0;
 971	int i, filter_cnt;
 972
 973	if (argc < 2 || argv[0][0] != 'e')
 974		return -ECANCELED;
 975
 976	trace_probe_log_init("event_probe", argc, argv);
 977
 978	event = strchr(&argv[0][1], ':');
 979	if (event) {
 980		event++;
 981		ret = traceprobe_parse_event_name(&event, &group, gbuf,
 982						  event - argv[0]);
 983		if (ret)
 984			goto parse_error;
 985	}
 986
 987	trace_probe_log_set_index(1);
 988	sys_event = argv[1];
 989	ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0);
 990	if (ret || !sys_event || !sys_name) {
 991		trace_probe_log_err(0, NO_EVENT_INFO);
 992		goto parse_error;
 993	}
 994
 995	if (!event) {
 996		strscpy(buf1, sys_event, MAX_EVENT_NAME_LEN);
 997		event = buf1;
 998	}
 999
1000	for (i = 2; i < argc; i++) {
1001		if (!strcmp(argv[i], "if")) {
1002			filter_idx = i + 1;
1003			filter_cnt = argc - filter_idx;
1004			argc = i;
1005			break;
1006		}
1007	}
1008
1009	mutex_lock(&event_mutex);
1010	event_call = find_and_get_event(sys_name, sys_event);
1011	ep = alloc_event_probe(group, event, event_call, argc - 2);
1012	mutex_unlock(&event_mutex);
1013
1014	if (IS_ERR(ep)) {
1015		ret = PTR_ERR(ep);
1016		if (ret == -ENODEV)
1017			trace_probe_log_err(0, BAD_ATTACH_EVENT);
1018		/* This must return -ENOMEM or missing event, else there is a bug */
1019		WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV);
1020		ep = NULL;
1021		goto error;
1022	}
1023
1024	if (filter_idx) {
1025		trace_probe_log_set_index(filter_idx);
1026		ret = trace_eprobe_parse_filter(ep, filter_cnt, argv + filter_idx);
1027		if (ret)
1028			goto parse_error;
1029	} else
1030		ep->filter_str = NULL;
1031
1032	argc -= 2; argv += 2;
1033	/* parse arguments */
1034	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
1035		trace_probe_log_set_index(i + 2);
1036		ret = trace_eprobe_tp_update_arg(ep, argv, i);
1037		if (ret)
1038			goto error;
1039	}
1040	ret = traceprobe_set_print_fmt(&ep->tp, PROBE_PRINT_EVENT);
1041	if (ret < 0)
1042		goto error;
1043	init_trace_eprobe_call(ep);
1044	mutex_lock(&event_mutex);
1045	ret = trace_probe_register_event_call(&ep->tp);
1046	if (ret) {
1047		if (ret == -EEXIST) {
1048			trace_probe_log_set_index(0);
1049			trace_probe_log_err(0, EVENT_EXIST);
1050		}
1051		mutex_unlock(&event_mutex);
1052		goto error;
1053	}
1054	ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
1055	mutex_unlock(&event_mutex);
1056	return ret;
1057parse_error:
1058	ret = -EINVAL;
1059error:
1060	trace_event_probe_cleanup(ep);
1061	return ret;
1062}
1063
1064/*
1065 * Register dynevent at core_initcall. This allows kernel to setup eprobe
1066 * events in postcore_initcall without tracefs.
1067 */
1068static __init int trace_events_eprobe_init_early(void)
1069{
1070	int err = 0;
1071
1072	err = dyn_event_register(&eprobe_dyn_event_ops);
1073	if (err)
1074		pr_warn("Could not register eprobe_dyn_event_ops\n");
1075
1076	return err;
1077}
1078core_initcall(trace_events_eprobe_init_early);
v6.9.4
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * event probes
  4 *
  5 * Part of this code was copied from kernel/trace/trace_kprobe.c written by
  6 * Masami Hiramatsu <mhiramat@kernel.org>
  7 *
  8 * Copyright (C) 2021, VMware Inc, Steven Rostedt <rostedt@goodmis.org>
  9 * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
 10 *
 11 */
 12#include <linux/module.h>
 13#include <linux/mutex.h>
 14#include <linux/ftrace.h>
 15
 16#include "trace_dynevent.h"
 17#include "trace_probe.h"
 18#include "trace_probe_tmpl.h"
 19#include "trace_probe_kernel.h"
 20
 21#define EPROBE_EVENT_SYSTEM "eprobes"
 22
 23struct trace_eprobe {
 24	/* tracepoint system */
 25	const char *event_system;
 26
 27	/* tracepoint event */
 28	const char *event_name;
 29
 30	/* filter string for the tracepoint */
 31	char *filter_str;
 32
 33	struct trace_event_call *event;
 34
 35	struct dyn_event	devent;
 36	struct trace_probe	tp;
 37};
 38
 39struct eprobe_data {
 40	struct trace_event_file	*file;
 41	struct trace_eprobe	*ep;
 42};
 43
 44
 45#define for_each_trace_eprobe_tp(ep, _tp) \
 46	list_for_each_entry(ep, trace_probe_probe_list(_tp), tp.list)
 47
 48static int __trace_eprobe_create(int argc, const char *argv[]);
 49
 50static void trace_event_probe_cleanup(struct trace_eprobe *ep)
 51{
 52	if (!ep)
 53		return;
 54	trace_probe_cleanup(&ep->tp);
 55	kfree(ep->event_name);
 56	kfree(ep->event_system);
 57	if (ep->event)
 58		trace_event_put_ref(ep->event);
 59	kfree(ep->filter_str);
 60	kfree(ep);
 61}
 62
 63static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev)
 64{
 65	return container_of(ev, struct trace_eprobe, devent);
 66}
 67
 68static int eprobe_dyn_event_create(const char *raw_command)
 69{
 70	return trace_probe_create(raw_command, __trace_eprobe_create);
 71}
 72
 73static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
 74{
 75	struct trace_eprobe *ep = to_trace_eprobe(ev);
 76	int i;
 77
 78	seq_printf(m, "e:%s/%s", trace_probe_group_name(&ep->tp),
 79				trace_probe_name(&ep->tp));
 80	seq_printf(m, " %s.%s", ep->event_system, ep->event_name);
 81
 82	for (i = 0; i < ep->tp.nr_args; i++)
 83		seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
 84	seq_putc(m, '\n');
 85
 86	return 0;
 87}
 88
 89static int unregister_trace_eprobe(struct trace_eprobe *ep)
 90{
 91	/* If other probes are on the event, just unregister eprobe */
 92	if (trace_probe_has_sibling(&ep->tp))
 93		goto unreg;
 94
 95	/* Enabled event can not be unregistered */
 96	if (trace_probe_is_enabled(&ep->tp))
 97		return -EBUSY;
 98
 99	/* Will fail if probe is being used by ftrace or perf */
100	if (trace_probe_unregister_event_call(&ep->tp))
101		return -EBUSY;
102
103unreg:
104	dyn_event_remove(&ep->devent);
105	trace_probe_unlink(&ep->tp);
106
107	return 0;
108}
109
110static int eprobe_dyn_event_release(struct dyn_event *ev)
111{
112	struct trace_eprobe *ep = to_trace_eprobe(ev);
113	int ret = unregister_trace_eprobe(ep);
114
115	if (!ret)
116		trace_event_probe_cleanup(ep);
117	return ret;
118}
119
120static bool eprobe_dyn_event_is_busy(struct dyn_event *ev)
121{
122	struct trace_eprobe *ep = to_trace_eprobe(ev);
123
124	return trace_probe_is_enabled(&ep->tp);
125}
126
127static bool eprobe_dyn_event_match(const char *system, const char *event,
128			int argc, const char **argv, struct dyn_event *ev)
129{
130	struct trace_eprobe *ep = to_trace_eprobe(ev);
131	const char *slash;
132
133	/*
134	 * We match the following:
135	 *  event only			- match all eprobes with event name
136	 *  system and event only	- match all system/event probes
137	 *  system only			- match all system probes
138	 *
139	 * The below has the above satisfied with more arguments:
140	 *
141	 *  attached system/event	- If the arg has the system and event
142	 *				  the probe is attached to, match
143	 *				  probes with the attachment.
144	 *
145	 *  If any more args are given, then it requires a full match.
146	 */
147
148	/*
149	 * If system exists, but this probe is not part of that system
150	 * do not match.
151	 */
152	if (system && strcmp(trace_probe_group_name(&ep->tp), system) != 0)
153		return false;
154
155	/* Must match the event name */
156	if (event[0] != '\0' && strcmp(trace_probe_name(&ep->tp), event) != 0)
157		return false;
158
159	/* No arguments match all */
160	if (argc < 1)
161		return true;
162
163	/* First argument is the system/event the probe is attached to */
164
165	slash = strchr(argv[0], '/');
166	if (!slash)
167		slash = strchr(argv[0], '.');
168	if (!slash)
169		return false;
170
171	if (strncmp(ep->event_system, argv[0], slash - argv[0]))
172		return false;
173	if (strcmp(ep->event_name, slash + 1))
174		return false;
175
176	argc--;
177	argv++;
178
179	/* If there are no other args, then match */
180	if (argc < 1)
181		return true;
182
183	return trace_probe_match_command_args(&ep->tp, argc, argv);
184}
185
186static struct dyn_event_operations eprobe_dyn_event_ops = {
187	.create = eprobe_dyn_event_create,
188	.show = eprobe_dyn_event_show,
189	.is_busy = eprobe_dyn_event_is_busy,
190	.free = eprobe_dyn_event_release,
191	.match = eprobe_dyn_event_match,
192};
193
194static struct trace_eprobe *alloc_event_probe(const char *group,
195					      const char *this_event,
196					      struct trace_event_call *event,
197					      int nargs)
198{
199	struct trace_eprobe *ep;
200	const char *event_name;
201	const char *sys_name;
202	int ret = -ENOMEM;
203
204	if (!event)
205		return ERR_PTR(-ENODEV);
206
207	sys_name = event->class->system;
208	event_name = trace_event_name(event);
209
210	ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL);
211	if (!ep) {
212		trace_event_put_ref(event);
213		goto error;
214	}
215	ep->event = event;
216	ep->event_name = kstrdup(event_name, GFP_KERNEL);
217	if (!ep->event_name)
218		goto error;
219	ep->event_system = kstrdup(sys_name, GFP_KERNEL);
220	if (!ep->event_system)
221		goto error;
222
223	ret = trace_probe_init(&ep->tp, this_event, group, false, nargs);
224	if (ret < 0)
225		goto error;
226
227	dyn_event_init(&ep->devent, &eprobe_dyn_event_ops);
228	return ep;
229error:
230	trace_event_probe_cleanup(ep);
231	return ERR_PTR(ret);
232}
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234static int eprobe_event_define_fields(struct trace_event_call *event_call)
235{
236	struct eprobe_trace_entry_head field;
237	struct trace_probe *tp;
238
239	tp = trace_probe_primary_from_call(event_call);
240	if (WARN_ON_ONCE(!tp))
241		return -ENOENT;
242
243	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
244}
245
246static struct trace_event_fields eprobe_fields_array[] = {
247	{ .type = TRACE_FUNCTION_TYPE,
248	  .define_fields = eprobe_event_define_fields },
249	{}
250};
251
252/* Event entry printers */
253static enum print_line_t
254print_eprobe_event(struct trace_iterator *iter, int flags,
255		   struct trace_event *event)
256{
257	struct eprobe_trace_entry_head *field;
258	struct trace_event_call *pevent;
259	struct trace_event *probed_event;
260	struct trace_seq *s = &iter->seq;
261	struct trace_eprobe *ep;
262	struct trace_probe *tp;
263	unsigned int type;
264
265	field = (struct eprobe_trace_entry_head *)iter->ent;
266	tp = trace_probe_primary_from_call(
267		container_of(event, struct trace_event_call, event));
268	if (WARN_ON_ONCE(!tp))
269		goto out;
270
271	ep = container_of(tp, struct trace_eprobe, tp);
272	type = ep->event->event.type;
273
274	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
275
276	probed_event = ftrace_find_event(type);
277	if (probed_event) {
278		pevent = container_of(probed_event, struct trace_event_call, event);
279		trace_seq_printf(s, "%s.%s", pevent->class->system,
280				 trace_event_name(pevent));
281	} else {
282		trace_seq_printf(s, "%u", type);
283	}
284
285	trace_seq_putc(s, ')');
286
287	if (trace_probe_print_args(s, tp->args, tp->nr_args,
288			     (u8 *)&field[1], field) < 0)
289		goto out;
290
291	trace_seq_putc(s, '\n');
292 out:
293	return trace_handle_return(s);
294}
295
296static nokprobe_inline unsigned long
297get_event_field(struct fetch_insn *code, void *rec)
298{
299	struct ftrace_event_field *field = code->data;
300	unsigned long val;
301	void *addr;
302
303	addr = rec + field->offset;
304
305	if (is_string_field(field)) {
306		switch (field->filter_type) {
307		case FILTER_DYN_STRING:
308			val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
309			break;
310		case FILTER_RDYN_STRING:
311			val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
312			break;
313		case FILTER_STATIC_STRING:
314			val = (unsigned long)addr;
315			break;
316		case FILTER_PTR_STRING:
317			val = (unsigned long)(*(char *)addr);
318			break;
319		default:
320			WARN_ON_ONCE(1);
321			return 0;
322		}
323		return val;
324	}
325
326	switch (field->size) {
327	case 1:
328		if (field->is_signed)
329			val = *(char *)addr;
330		else
331			val = *(unsigned char *)addr;
332		break;
333	case 2:
334		if (field->is_signed)
335			val = *(short *)addr;
336		else
337			val = *(unsigned short *)addr;
338		break;
339	case 4:
340		if (field->is_signed)
341			val = *(int *)addr;
342		else
343			val = *(unsigned int *)addr;
344		break;
345	default:
346		if (field->is_signed)
347			val = *(long *)addr;
348		else
349			val = *(unsigned long *)addr;
350		break;
351	}
352	return val;
353}
354
355static int get_eprobe_size(struct trace_probe *tp, void *rec)
356{
357	struct fetch_insn *code;
358	struct probe_arg *arg;
359	int i, len, ret = 0;
360
361	for (i = 0; i < tp->nr_args; i++) {
362		arg = tp->args + i;
363		if (arg->dynamic) {
364			unsigned long val;
365
366			code = arg->code;
367 retry:
368			switch (code->op) {
369			case FETCH_OP_TP_ARG:
370				val = get_event_field(code, rec);
371				break;
 
 
 
 
 
 
 
 
 
372			case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
373				code++;
374				goto retry;
375			default:
376				if (process_common_fetch_insn(code, &val) < 0)
377					continue;
378			}
379			code++;
380			len = process_fetch_insn_bottom(code, val, NULL, NULL);
381			if (len > 0)
382				ret += len;
383		}
384	}
385
386	return ret;
387}
388
389/* Kprobe specific fetch functions */
390
391/* Note that we don't verify it, since the code does not come from user space */
392static int
393process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
394		   void *dest, void *base)
395{
396	unsigned long val;
397	int ret;
398
399 retry:
400	switch (code->op) {
401	case FETCH_OP_TP_ARG:
402		val = get_event_field(code, rec);
403		break;
 
 
 
 
 
 
 
 
 
404	case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
405		code++;
406		goto retry;
407	default:
408		ret = process_common_fetch_insn(code, &val);
409		if (ret < 0)
410			return ret;
411	}
412	code++;
413	return process_fetch_insn_bottom(code, val, dest, base);
414}
415NOKPROBE_SYMBOL(process_fetch_insn)
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417/* eprobe handler */
418static inline void
419__eprobe_trace_func(struct eprobe_data *edata, void *rec)
420{
421	struct eprobe_trace_entry_head *entry;
422	struct trace_event_call *call = trace_probe_event_call(&edata->ep->tp);
423	struct trace_event_buffer fbuffer;
424	int dsize;
425
426	if (WARN_ON_ONCE(call != edata->file->event_call))
427		return;
428
429	if (trace_trigger_soft_disabled(edata->file))
430		return;
431
432	dsize = get_eprobe_size(&edata->ep->tp, rec);
433
434	entry = trace_event_buffer_reserve(&fbuffer, edata->file,
435					   sizeof(*entry) + edata->ep->tp.size + dsize);
436
437	if (!entry)
438		return;
439
440	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
441	store_trace_args(&entry[1], &edata->ep->tp, rec, NULL, sizeof(*entry), dsize);
442
443	trace_event_buffer_commit(&fbuffer);
444}
445
446/*
447 * The event probe implementation uses event triggers to get access to
448 * the event it is attached to, but is not an actual trigger. The below
449 * functions are just stubs to fulfill what is needed to use the trigger
450 * infrastructure.
451 */
452static int eprobe_trigger_init(struct event_trigger_data *data)
453{
454	return 0;
455}
456
457static void eprobe_trigger_free(struct event_trigger_data *data)
458{
459
460}
461
462static int eprobe_trigger_print(struct seq_file *m,
463				struct event_trigger_data *data)
464{
465	/* Do not print eprobe event triggers */
466	return 0;
467}
468
469static void eprobe_trigger_func(struct event_trigger_data *data,
470				struct trace_buffer *buffer, void *rec,
471				struct ring_buffer_event *rbe)
472{
473	struct eprobe_data *edata = data->private_data;
474
475	if (unlikely(!rec))
476		return;
477
478	__eprobe_trace_func(edata, rec);
479}
480
481static struct event_trigger_ops eprobe_trigger_ops = {
482	.trigger		= eprobe_trigger_func,
483	.print			= eprobe_trigger_print,
484	.init			= eprobe_trigger_init,
485	.free			= eprobe_trigger_free,
486};
487
488static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops,
489				    struct trace_event_file *file,
490				    char *glob, char *cmd,
491				    char *param_and_filter)
492{
493	return -1;
494}
495
496static int eprobe_trigger_reg_func(char *glob,
497				   struct event_trigger_data *data,
498				   struct trace_event_file *file)
499{
500	return -1;
501}
502
503static void eprobe_trigger_unreg_func(char *glob,
504				      struct event_trigger_data *data,
505				      struct trace_event_file *file)
506{
507
508}
509
510static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
511							char *param)
512{
513	return &eprobe_trigger_ops;
514}
515
516static struct event_command event_trigger_cmd = {
517	.name			= "eprobe",
518	.trigger_type		= ETT_EVENT_EPROBE,
519	.flags			= EVENT_CMD_FL_NEEDS_REC,
520	.parse			= eprobe_trigger_cmd_parse,
521	.reg			= eprobe_trigger_reg_func,
522	.unreg			= eprobe_trigger_unreg_func,
523	.unreg_all		= NULL,
524	.get_trigger_ops	= eprobe_trigger_get_ops,
525	.set_filter		= NULL,
526};
527
528static struct event_trigger_data *
529new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
530{
531	struct event_trigger_data *trigger;
532	struct event_filter *filter = NULL;
533	struct eprobe_data *edata;
534	int ret;
535
536	edata = kzalloc(sizeof(*edata), GFP_KERNEL);
537	trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
538	if (!trigger || !edata) {
539		ret = -ENOMEM;
540		goto error;
541	}
542
543	trigger->flags = EVENT_TRIGGER_FL_PROBE;
544	trigger->count = -1;
545	trigger->ops = &eprobe_trigger_ops;
546
547	/*
548	 * EVENT PROBE triggers are not registered as commands with
549	 * register_event_command(), as they are not controlled by the user
550	 * from the trigger file
551	 */
552	trigger->cmd_ops = &event_trigger_cmd;
553
554	INIT_LIST_HEAD(&trigger->list);
555
556	if (ep->filter_str) {
557		ret = create_event_filter(file->tr, ep->event,
558					ep->filter_str, false, &filter);
559		if (ret)
560			goto error;
561	}
562	RCU_INIT_POINTER(trigger->filter, filter);
563
564	edata->file = file;
565	edata->ep = ep;
566	trigger->private_data = edata;
567
568	return trigger;
569error:
570	free_event_filter(filter);
571	kfree(edata);
572	kfree(trigger);
573	return ERR_PTR(ret);
574}
575
576static int enable_eprobe(struct trace_eprobe *ep,
577			 struct trace_event_file *eprobe_file)
578{
579	struct event_trigger_data *trigger;
580	struct trace_event_file *file;
581	struct trace_array *tr = eprobe_file->tr;
582
583	file = find_event_file(tr, ep->event_system, ep->event_name);
584	if (!file)
585		return -ENOENT;
586	trigger = new_eprobe_trigger(ep, eprobe_file);
587	if (IS_ERR(trigger))
588		return PTR_ERR(trigger);
589
590	list_add_tail_rcu(&trigger->list, &file->triggers);
591
592	trace_event_trigger_enable_disable(file, 1);
593	update_cond_flag(file);
594
595	return 0;
596}
597
598static struct trace_event_functions eprobe_funcs = {
599	.trace		= print_eprobe_event
600};
601
602static int disable_eprobe(struct trace_eprobe *ep,
603			  struct trace_array *tr)
604{
605	struct event_trigger_data *trigger = NULL, *iter;
606	struct trace_event_file *file;
607	struct event_filter *filter;
608	struct eprobe_data *edata;
609
610	file = find_event_file(tr, ep->event_system, ep->event_name);
611	if (!file)
612		return -ENOENT;
613
614	list_for_each_entry(iter, &file->triggers, list) {
615		if (!(iter->flags & EVENT_TRIGGER_FL_PROBE))
616			continue;
617		edata = iter->private_data;
618		if (edata->ep == ep) {
619			trigger = iter;
620			break;
621		}
622	}
623	if (!trigger)
624		return -ENODEV;
625
626	list_del_rcu(&trigger->list);
627
628	trace_event_trigger_enable_disable(file, 0);
629	update_cond_flag(file);
630
631	/* Make sure nothing is using the edata or trigger */
632	tracepoint_synchronize_unregister();
633
634	filter = rcu_access_pointer(trigger->filter);
635
636	if (filter)
637		free_event_filter(filter);
638	kfree(edata);
639	kfree(trigger);
640
641	return 0;
642}
643
644static int enable_trace_eprobe(struct trace_event_call *call,
645			       struct trace_event_file *file)
646{
647	struct trace_probe *tp;
648	struct trace_eprobe *ep;
649	bool enabled;
650	int ret = 0;
651	int cnt = 0;
652
653	tp = trace_probe_primary_from_call(call);
654	if (WARN_ON_ONCE(!tp))
655		return -ENODEV;
656	enabled = trace_probe_is_enabled(tp);
657
658	/* This also changes "enabled" state */
659	if (file) {
660		ret = trace_probe_add_file(tp, file);
661		if (ret)
662			return ret;
663	} else
664		trace_probe_set_flag(tp, TP_FLAG_PROFILE);
665
666	if (enabled)
667		return 0;
668
669	for_each_trace_eprobe_tp(ep, tp) {
 
670		ret = enable_eprobe(ep, file);
671		if (ret)
672			break;
673		enabled = true;
674		cnt++;
675	}
676
677	if (ret) {
678		/* Failed to enable one of them. Roll back all */
679		if (enabled) {
680			/*
681			 * It's a bug if one failed for something other than memory
682			 * not being available but another eprobe succeeded.
683			 */
684			WARN_ON_ONCE(ret != -ENOMEM);
685
686			for_each_trace_eprobe_tp(ep, tp) {
687				disable_eprobe(ep, file->tr);
688				if (!--cnt)
689					break;
690			}
691		}
692		if (file)
693			trace_probe_remove_file(tp, file);
694		else
695			trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
696	}
697
698	return ret;
699}
700
701static int disable_trace_eprobe(struct trace_event_call *call,
702				struct trace_event_file *file)
703{
704	struct trace_probe *tp;
705	struct trace_eprobe *ep;
706
707	tp = trace_probe_primary_from_call(call);
708	if (WARN_ON_ONCE(!tp))
709		return -ENODEV;
710
711	if (file) {
712		if (!trace_probe_get_file_link(tp, file))
713			return -ENOENT;
714		if (!trace_probe_has_single_file(tp))
715			goto out;
716		trace_probe_clear_flag(tp, TP_FLAG_TRACE);
717	} else
718		trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
719
720	if (!trace_probe_is_enabled(tp)) {
721		for_each_trace_eprobe_tp(ep, tp)
 
722			disable_eprobe(ep, file->tr);
 
723	}
724
725 out:
726	if (file)
727		/*
728		 * Synchronization is done in below function. For perf event,
729		 * file == NULL and perf_trace_event_unreg() calls
730		 * tracepoint_synchronize_unregister() to ensure synchronize
731		 * event. We don't need to care about it.
732		 */
733		trace_probe_remove_file(tp, file);
734
735	return 0;
736}
737
738static int eprobe_register(struct trace_event_call *event,
739			   enum trace_reg type, void *data)
740{
741	struct trace_event_file *file = data;
742
743	switch (type) {
744	case TRACE_REG_REGISTER:
745		return enable_trace_eprobe(event, file);
746	case TRACE_REG_UNREGISTER:
747		return disable_trace_eprobe(event, file);
748#ifdef CONFIG_PERF_EVENTS
749	case TRACE_REG_PERF_REGISTER:
750	case TRACE_REG_PERF_UNREGISTER:
751	case TRACE_REG_PERF_OPEN:
752	case TRACE_REG_PERF_CLOSE:
753	case TRACE_REG_PERF_ADD:
754	case TRACE_REG_PERF_DEL:
755		return 0;
756#endif
757	}
758	return 0;
759}
760
761static inline void init_trace_eprobe_call(struct trace_eprobe *ep)
762{
763	struct trace_event_call *call = trace_probe_event_call(&ep->tp);
764
765	call->flags = TRACE_EVENT_FL_EPROBE;
766	call->event.funcs = &eprobe_funcs;
767	call->class->fields_array = eprobe_fields_array;
768	call->class->reg = eprobe_register;
769}
770
771static struct trace_event_call *
772find_and_get_event(const char *system, const char *event_name)
773{
774	struct trace_event_call *tp_event;
775	const char *name;
776
777	list_for_each_entry(tp_event, &ftrace_events, list) {
778		/* Skip other probes and ftrace events */
779		if (tp_event->flags &
780		    (TRACE_EVENT_FL_IGNORE_ENABLE |
781		     TRACE_EVENT_FL_KPROBE |
782		     TRACE_EVENT_FL_UPROBE |
783		     TRACE_EVENT_FL_EPROBE))
784			continue;
785		if (!tp_event->class->system ||
786		    strcmp(system, tp_event->class->system))
787			continue;
788		name = trace_event_name(tp_event);
789		if (!name || strcmp(event_name, name))
790			continue;
791		if (!trace_event_try_get_ref(tp_event))
792			return NULL;
 
 
793		return tp_event;
 
794	}
795	return NULL;
796}
797
798static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
799{
800	struct traceprobe_parse_context ctx = {
801		.event = ep->event,
802		.flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT,
803	};
804	int ret;
805
806	ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], &ctx);
 
 
 
 
 
 
 
 
 
807	/* Handle symbols "@" */
808	if (!ret)
809		ret = traceprobe_update_arg(&ep->tp.args[i]);
810
811	traceprobe_finish_parse(&ctx);
812	return ret;
813}
814
815static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[])
816{
817	struct event_filter *dummy = NULL;
818	int i, ret, len = 0;
819	char *p;
820
821	if (argc == 0) {
822		trace_probe_log_err(0, NO_EP_FILTER);
823		return -EINVAL;
824	}
825
826	/* Recover the filter string */
827	for (i = 0; i < argc; i++)
828		len += strlen(argv[i]) + 1;
829
830	ep->filter_str = kzalloc(len, GFP_KERNEL);
831	if (!ep->filter_str)
832		return -ENOMEM;
833
834	p = ep->filter_str;
835	for (i = 0; i < argc; i++) {
836		if (i)
837			ret = snprintf(p, len, " %s", argv[i]);
838		else
839			ret = snprintf(p, len, "%s", argv[i]);
 
 
 
840		p += ret;
841		len -= ret;
842	}
 
843
844	/*
845	 * Ensure the filter string can be parsed correctly. Note, this
846	 * filter string is for the original event, not for the eprobe.
847	 */
848	ret = create_event_filter(top_trace_array(), ep->event, ep->filter_str,
849				  true, &dummy);
850	free_event_filter(dummy);
851	if (ret)
852		goto error;
853
854	return 0;
855error:
856	kfree(ep->filter_str);
857	ep->filter_str = NULL;
858	return ret;
859}
860
861static int __trace_eprobe_create(int argc, const char *argv[])
862{
863	/*
864	 * Argument syntax:
865	 *      e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] [if FILTER]
866	 * Fetch args (no space):
867	 *  <name>=$<field>[:TYPE]
868	 */
869	const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
870	const char *sys_event = NULL, *sys_name = NULL;
871	struct trace_event_call *event_call;
872	struct trace_eprobe *ep = NULL;
873	char buf1[MAX_EVENT_NAME_LEN];
874	char buf2[MAX_EVENT_NAME_LEN];
875	char gbuf[MAX_EVENT_NAME_LEN];
876	int ret = 0, filter_idx = 0;
877	int i, filter_cnt;
878
879	if (argc < 2 || argv[0][0] != 'e')
880		return -ECANCELED;
881
882	trace_probe_log_init("event_probe", argc, argv);
883
884	event = strchr(&argv[0][1], ':');
885	if (event) {
886		event++;
887		ret = traceprobe_parse_event_name(&event, &group, gbuf,
888						  event - argv[0]);
889		if (ret)
890			goto parse_error;
891	}
892
893	trace_probe_log_set_index(1);
894	sys_event = argv[1];
895	ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0);
896	if (ret || !sys_event || !sys_name) {
897		trace_probe_log_err(0, NO_EVENT_INFO);
898		goto parse_error;
899	}
900
901	if (!event) {
902		strscpy(buf1, sys_event, MAX_EVENT_NAME_LEN);
903		event = buf1;
904	}
905
906	for (i = 2; i < argc; i++) {
907		if (!strcmp(argv[i], "if")) {
908			filter_idx = i + 1;
909			filter_cnt = argc - filter_idx;
910			argc = i;
911			break;
912		}
913	}
914
915	mutex_lock(&event_mutex);
916	event_call = find_and_get_event(sys_name, sys_event);
917	ep = alloc_event_probe(group, event, event_call, argc - 2);
918	mutex_unlock(&event_mutex);
919
920	if (IS_ERR(ep)) {
921		ret = PTR_ERR(ep);
922		if (ret == -ENODEV)
923			trace_probe_log_err(0, BAD_ATTACH_EVENT);
924		/* This must return -ENOMEM or missing event, else there is a bug */
925		WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV);
926		ep = NULL;
927		goto error;
928	}
929
930	if (filter_idx) {
931		trace_probe_log_set_index(filter_idx);
932		ret = trace_eprobe_parse_filter(ep, filter_cnt, argv + filter_idx);
933		if (ret)
934			goto parse_error;
935	} else
936		ep->filter_str = NULL;
937
938	argc -= 2; argv += 2;
939	/* parse arguments */
940	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
941		trace_probe_log_set_index(i + 2);
942		ret = trace_eprobe_tp_update_arg(ep, argv, i);
943		if (ret)
944			goto error;
945	}
946	ret = traceprobe_set_print_fmt(&ep->tp, PROBE_PRINT_EVENT);
947	if (ret < 0)
948		goto error;
949	init_trace_eprobe_call(ep);
950	mutex_lock(&event_mutex);
951	ret = trace_probe_register_event_call(&ep->tp);
952	if (ret) {
953		if (ret == -EEXIST) {
954			trace_probe_log_set_index(0);
955			trace_probe_log_err(0, EVENT_EXIST);
956		}
957		mutex_unlock(&event_mutex);
958		goto error;
959	}
960	ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
961	mutex_unlock(&event_mutex);
962	return ret;
963parse_error:
964	ret = -EINVAL;
965error:
966	trace_event_probe_cleanup(ep);
967	return ret;
968}
969
970/*
971 * Register dynevent at core_initcall. This allows kernel to setup eprobe
972 * events in postcore_initcall without tracefs.
973 */
974static __init int trace_events_eprobe_init_early(void)
975{
976	int err = 0;
977
978	err = dyn_event_register(&eprobe_dyn_event_ops);
979	if (err)
980		pr_warn("Could not register eprobe_dyn_event_ops\n");
981
982	return err;
983}
984core_initcall(trace_events_eprobe_init_early);