rdtgroup.c - arch/x86/kernel/cpu/resctrl/rdtgroup.c - Linux diff v5.4

   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * User interface for Resource Alloction in Resource Director Technology(RDT)
   4 *
   5 * Copyright (C) 2016 Intel Corporation
   6 *
   7 * Author: Fenghua Yu <fenghua.yu@intel.com>
   8 *
   9 * More information about RDT be found in the Intel (R) x86 Architecture
  10 * Software Developer Manual.
  11 */
  12
  13#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
  14
  15#include <linux/cacheinfo.h>
  16#include <linux/cpu.h>
  17#include <linux/debugfs.h>
  18#include <linux/fs.h>
  19#include <linux/fs_parser.h>
  20#include <linux/sysfs.h>
  21#include <linux/kernfs.h>
  22#include <linux/seq_buf.h>
  23#include <linux/seq_file.h>
  24#include <linux/sched/signal.h>
  25#include <linux/sched/task.h>
  26#include <linux/slab.h>
  27#include <linux/task_work.h>
  28#include <linux/user_namespace.h>
  29
  30#include <uapi/linux/magic.h>
  31
  32#include <asm/resctrl_sched.h>
  33#include "internal.h"
  34
  35DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
  36DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  37DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
  38static struct kernfs_root *rdt_root;
  39struct rdtgroup rdtgroup_default;
  40LIST_HEAD(rdt_all_groups);
  41
 
 
 
  42/* Kernel fs node for "info" directory under root */
  43static struct kernfs_node *kn_info;
  44
  45/* Kernel fs node for "mon_groups" directory under root */
  46static struct kernfs_node *kn_mongrp;
  47
  48/* Kernel fs node for "mon_data" directory under root */
  49static struct kernfs_node *kn_mondata;
  50
  51static struct seq_buf last_cmd_status;
  52static char last_cmd_status_buf[512];
  53
  54struct dentry *debugfs_resctrl;
  55
  56void rdt_last_cmd_clear(void)
  57{
  58	lockdep_assert_held(&rdtgroup_mutex);
  59	seq_buf_clear(&last_cmd_status);
  60}
  61
  62void rdt_last_cmd_puts(const char *s)
  63{
  64	lockdep_assert_held(&rdtgroup_mutex);
  65	seq_buf_puts(&last_cmd_status, s);
  66}
  67
  68void rdt_last_cmd_printf(const char *fmt, ...)
  69{
  70	va_list ap;
  71
  72	va_start(ap, fmt);
  73	lockdep_assert_held(&rdtgroup_mutex);
  74	seq_buf_vprintf(&last_cmd_status, fmt, ap);
  75	va_end(ap);
  76}
  77
  78/*
  79 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  80 * we can keep a bitmap of free CLOSIDs in a single integer.
  81 *
  82 * Using a global CLOSID across all resources has some advantages and
  83 * some drawbacks:
  84 * + We can simply set "current->closid" to assign a task to a resource
  85 *   group.
  86 * + Context switch code can avoid extra memory references deciding which
  87 *   CLOSID to load into the PQR_ASSOC MSR
  88 * - We give up some options in configuring resource groups across multi-socket
  89 *   systems.
  90 * - Our choices on how to configure each resource become progressively more
  91 *   limited as the number of resources grows.
  92 */
  93static int closid_free_map;
  94static int closid_free_map_len;
  95
  96int closids_supported(void)
  97{
  98	return closid_free_map_len;
  99}
 100
 101static void closid_init(void)
 102{
 103	struct rdt_resource *r;
 104	int rdt_min_closid = 32;
 105
 106	/* Compute rdt_min_closid across all resources */
 107	for_each_alloc_enabled_rdt_resource(r)
 108		rdt_min_closid = min(rdt_min_closid, r->num_closid);
 109
 110	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
 111
 112	/* CLOSID 0 is always reserved for the default group */
 113	closid_free_map &= ~1;
 114	closid_free_map_len = rdt_min_closid;
 115}
 116
 117static int closid_alloc(void)
 118{
 119	u32 closid = ffs(closid_free_map);
 120
 121	if (closid == 0)
 122		return -ENOSPC;
 123	closid--;
 124	closid_free_map &= ~(1 << closid);
 125
 126	return closid;
 127}
 128
 129void closid_free(int closid)
 130{
 131	closid_free_map |= 1 << closid;
 132}
 133
 134/**
 135 * closid_allocated - test if provided closid is in use
 136 * @closid: closid to be tested
 137 *
 138 * Return: true if @closid is currently associated with a resource group,
 139 * false if @closid is free
 140 */
 141static bool closid_allocated(unsigned int closid)
 142{
 143	return (closid_free_map & (1 << closid)) == 0;
 144}
 145
 146/**
 147 * rdtgroup_mode_by_closid - Return mode of resource group with closid
 148 * @closid: closid if the resource group
 149 *
 150 * Each resource group is associated with a @closid. Here the mode
 151 * of a resource group can be queried by searching for it using its closid.
 152 *
 153 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
 154 */
 155enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
 156{
 157	struct rdtgroup *rdtgrp;
 158
 159	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
 160		if (rdtgrp->closid == closid)
 161			return rdtgrp->mode;
 162	}
 163
 164	return RDT_NUM_MODES;
 165}
 166
 167static const char * const rdt_mode_str[] = {
 168	[RDT_MODE_SHAREABLE]		= "shareable",
 169	[RDT_MODE_EXCLUSIVE]		= "exclusive",
 170	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
 171	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
 172};
 173
 174/**
 175 * rdtgroup_mode_str - Return the string representation of mode
 176 * @mode: the resource group mode as &enum rdtgroup_mode
 177 *
 178 * Return: string representation of valid mode, "unknown" otherwise
 179 */
 180static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
 181{
 182	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
 183		return "unknown";
 184
 185	return rdt_mode_str[mode];
 186}
 187
 188/* set uid and gid of rdtgroup dirs and files to that of the creator */
 189static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
 190{
 191	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
 192				.ia_uid = current_fsuid(),
 193				.ia_gid = current_fsgid(), };
 194
 195	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
 196	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
 197		return 0;
 198
 199	return kernfs_setattr(kn, &iattr);
 200}
 201
 202static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 203{
 204	struct kernfs_node *kn;
 205	int ret;
 206
 207	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
 208				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 209				  0, rft->kf_ops, rft, NULL, NULL);
 210	if (IS_ERR(kn))
 211		return PTR_ERR(kn);
 212
 213	ret = rdtgroup_kn_set_ugid(kn);
 214	if (ret) {
 215		kernfs_remove(kn);
 216		return ret;
 217	}
 218
 219	return 0;
 220}
 221
 222static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
 223{
 224	struct kernfs_open_file *of = m->private;
 225	struct rftype *rft = of->kn->priv;
 226
 227	if (rft->seq_show)
 228		return rft->seq_show(of, m, arg);
 229	return 0;
 230}
 231
 232static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
 233				   size_t nbytes, loff_t off)
 234{
 235	struct rftype *rft = of->kn->priv;
 236
 237	if (rft->write)
 238		return rft->write(of, buf, nbytes, off);
 239
 240	return -EINVAL;
 241}
 242
 243static struct kernfs_ops rdtgroup_kf_single_ops = {
 244	.atomic_write_len	= PAGE_SIZE,
 245	.write			= rdtgroup_file_write,
 246	.seq_show		= rdtgroup_seqfile_show,
 247};
 248
 249static struct kernfs_ops kf_mondata_ops = {
 250	.atomic_write_len	= PAGE_SIZE,
 251	.seq_show		= rdtgroup_mondata_show,
 252};
 253
 254static bool is_cpu_list(struct kernfs_open_file *of)
 255{
 256	struct rftype *rft = of->kn->priv;
 257
 258	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
 259}
 260
 261static int rdtgroup_cpus_show(struct kernfs_open_file *of,
 262			      struct seq_file *s, void *v)
 263{
 264	struct rdtgroup *rdtgrp;
 265	struct cpumask *mask;
 266	int ret = 0;
 267
 268	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 269
 270	if (rdtgrp) {
 271		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
 272			if (!rdtgrp->plr->d) {
 273				rdt_last_cmd_clear();
 274				rdt_last_cmd_puts("Cache domain offline\n");
 275				ret = -ENODEV;
 276			} else {
 277				mask = &rdtgrp->plr->d->cpu_mask;
 278				seq_printf(s, is_cpu_list(of) ?
 279					   "%*pbl\n" : "%*pb\n",
 280					   cpumask_pr_args(mask));
 281			}
 282		} else {
 283			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
 284				   cpumask_pr_args(&rdtgrp->cpu_mask));
 285		}
 286	} else {
 287		ret = -ENOENT;
 288	}
 289	rdtgroup_kn_unlock(of->kn);
 290
 291	return ret;
 292}
 293
 294/*
 295 * This is safe against resctrl_sched_in() called from __switch_to()
 296 * because __switch_to() is executed with interrupts disabled. A local call
 297 * from update_closid_rmid() is proteced against __switch_to() because
 298 * preemption is disabled.
 299 */
 300static void update_cpu_closid_rmid(void *info)
 301{
 302	struct rdtgroup *r = info;
 303
 304	if (r) {
 305		this_cpu_write(pqr_state.default_closid, r->closid);
 306		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
 307	}
 308
 309	/*
 310	 * We cannot unconditionally write the MSR because the current
 311	 * executing task might have its own closid selected. Just reuse
 312	 * the context switch code.
 313	 */
 314	resctrl_sched_in();
 315}
 316
 317/*
 318 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
 319 *
 320 * Per task closids/rmids must have been set up before calling this function.
 321 */
 322static void
 323update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
 324{
 325	int cpu = get_cpu();
 326
 327	if (cpumask_test_cpu(cpu, cpu_mask))
 328		update_cpu_closid_rmid(r);
 329	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
 330	put_cpu();
 331}
 332
 333static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 334			  cpumask_var_t tmpmask)
 335{
 336	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
 337	struct list_head *head;
 338
 339	/* Check whether cpus belong to parent ctrl group */
 340	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
 341	if (cpumask_weight(tmpmask)) {
 342		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
 343		return -EINVAL;
 344	}
 345
 346	/* Check whether cpus are dropped from this group */
 347	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 348	if (cpumask_weight(tmpmask)) {
 349		/* Give any dropped cpus to parent rdtgroup */
 350		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
 351		update_closid_rmid(tmpmask, prgrp);
 352	}
 353
 354	/*
 355	 * If we added cpus, remove them from previous group that owned them
 356	 * and update per-cpu rmid
 357	 */
 358	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 359	if (cpumask_weight(tmpmask)) {
 360		head = &prgrp->mon.crdtgrp_list;
 361		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 362			if (crgrp == rdtgrp)
 363				continue;
 364			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
 365				       tmpmask);
 366		}
 367		update_closid_rmid(tmpmask, rdtgrp);
 368	}
 369
 370	/* Done pushing/pulling - update this group with new mask */
 371	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 372
 373	return 0;
 374}
 375
 376static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
 377{
 378	struct rdtgroup *crgrp;
 379
 380	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
 381	/* update the child mon group masks as well*/
 382	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
 383		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
 384}
 385
 386static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 387			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
 388{
 389	struct rdtgroup *r, *crgrp;
 390	struct list_head *head;
 391
 392	/* Check whether cpus are dropped from this group */
 393	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 394	if (cpumask_weight(tmpmask)) {
 395		/* Can't drop from default group */
 396		if (rdtgrp == &rdtgroup_default) {
 397			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
 398			return -EINVAL;
 399		}
 400
 401		/* Give any dropped cpus to rdtgroup_default */
 402		cpumask_or(&rdtgroup_default.cpu_mask,
 403			   &rdtgroup_default.cpu_mask, tmpmask);
 404		update_closid_rmid(tmpmask, &rdtgroup_default);
 405	}
 406
 407	/*
 408	 * If we added cpus, remove them from previous group and
 409	 * the prev group's child groups that owned them
 410	 * and update per-cpu closid/rmid.
 411	 */
 412	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 413	if (cpumask_weight(tmpmask)) {
 414		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
 415			if (r == rdtgrp)
 416				continue;
 417			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
 418			if (cpumask_weight(tmpmask1))
 419				cpumask_rdtgrp_clear(r, tmpmask1);
 420		}
 421		update_closid_rmid(tmpmask, rdtgrp);
 422	}
 423
 424	/* Done pushing/pulling - update this group with new mask */
 425	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 426
 427	/*
 428	 * Clear child mon group masks since there is a new parent mask
 429	 * now and update the rmid for the cpus the child lost.
 430	 */
 431	head = &rdtgrp->mon.crdtgrp_list;
 432	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 433		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
 434		update_closid_rmid(tmpmask, rdtgrp);
 435		cpumask_clear(&crgrp->cpu_mask);
 436	}
 437
 438	return 0;
 439}
 440
 441static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 442				   char *buf, size_t nbytes, loff_t off)
 443{
 444	cpumask_var_t tmpmask, newmask, tmpmask1;
 445	struct rdtgroup *rdtgrp;
 446	int ret;
 447
 448	if (!buf)
 449		return -EINVAL;
 450
 451	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 452		return -ENOMEM;
 453	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
 454		free_cpumask_var(tmpmask);
 455		return -ENOMEM;
 456	}
 457	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
 458		free_cpumask_var(tmpmask);
 459		free_cpumask_var(newmask);
 460		return -ENOMEM;
 461	}
 462
 463	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 464	if (!rdtgrp) {
 465		ret = -ENOENT;
 466		goto unlock;
 467	}
 468
 469	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 470	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 471		ret = -EINVAL;
 472		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 473		goto unlock;
 474	}
 475
 476	if (is_cpu_list(of))
 477		ret = cpulist_parse(buf, newmask);
 478	else
 479		ret = cpumask_parse(buf, newmask);
 480
 481	if (ret) {
 482		rdt_last_cmd_puts("Bad CPU list/mask\n");
 483		goto unlock;
 484	}
 485
 486	/* check that user didn't specify any offline cpus */
 487	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 488	if (cpumask_weight(tmpmask)) {
 489		ret = -EINVAL;
 490		rdt_last_cmd_puts("Can only assign online CPUs\n");
 491		goto unlock;
 492	}
 493
 494	if (rdtgrp->type == RDTCTRL_GROUP)
 495		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
 496	else if (rdtgrp->type == RDTMON_GROUP)
 497		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
 498	else
 499		ret = -EINVAL;
 500
 501unlock:
 502	rdtgroup_kn_unlock(of->kn);
 503	free_cpumask_var(tmpmask);
 504	free_cpumask_var(newmask);
 505	free_cpumask_var(tmpmask1);
 506
 507	return ret ?: nbytes;
 508}
 509
 510struct task_move_callback {
 511	struct callback_head	work;
 512	struct rdtgroup		*rdtgrp;
 513};
 514
 515static void move_myself(struct callback_head *head)
 
 
 
 
 
 
 
 516{
 517	struct task_move_callback *callback;
 518	struct rdtgroup *rdtgrp;
 519
 520	callback = container_of(head, struct task_move_callback, work);
 521	rdtgrp = callback->rdtgrp;
 522
 
 
 523	/*
 524	 * If resource group was deleted before this task work callback
 525	 * was invoked, then assign the task to root group and free the
 526	 * resource group.
 527	 */
 528	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 529	    (rdtgrp->flags & RDT_DELETED)) {
 530		current->closid = 0;
 531		current->rmid = 0;
 532		kfree(rdtgrp);
 533	}
 534
 535	preempt_disable();
 536	/* update PQR_ASSOC MSR to make resource group go into effect */
 537	resctrl_sched_in();
 538	preempt_enable();
 539
 540	kfree(callback);
 
 
 
 
 
 541}
 542
 543static int __rdtgroup_move_task(struct task_struct *tsk,
 544				struct rdtgroup *rdtgrp)
 545{
 546	struct task_move_callback *callback;
 547	int ret;
 548
 549	callback = kzalloc(sizeof(*callback), GFP_KERNEL);
 550	if (!callback)
 551		return -ENOMEM;
 552	callback->work.func = move_myself;
 553	callback->rdtgrp = rdtgrp;
 554
 555	/*
 556	 * Take a refcount, so rdtgrp cannot be freed before the
 557	 * callback has been invoked.
 
 
 
 
 558	 */
 559	atomic_inc(&rdtgrp->waitcount);
 560	ret = task_work_add(tsk, &callback->work, true);
 561	if (ret) {
 562		/*
 563		 * Task is exiting. Drop the refcount and free the callback.
 564		 * No need to check the refcount as the group cannot be
 565		 * deleted before the write function unlocks rdtgroup_mutex.
 566		 */
 567		atomic_dec(&rdtgrp->waitcount);
 568		kfree(callback);
 569		rdt_last_cmd_puts("Task exited\n");
 570	} else {
 571		/*
 572		 * For ctrl_mon groups move both closid and rmid.
 573		 * For monitor groups, can move the tasks only from
 574		 * their parent CTRL group.
 575		 */
 576		if (rdtgrp->type == RDTCTRL_GROUP) {
 577			tsk->closid = rdtgrp->closid;
 578			tsk->rmid = rdtgrp->mon.rmid;
 579		} else if (rdtgrp->type == RDTMON_GROUP) {
 580			if (rdtgrp->mon.parent->closid == tsk->closid) {
 581				tsk->rmid = rdtgrp->mon.rmid;
 582			} else {
 583				rdt_last_cmd_puts("Can't move task to different control group\n");
 584				ret = -EINVAL;
 585			}
 586		}
 587	}
 588	return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 589}
 590
 591/**
 592 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
 593 * @r: Resource group
 594 *
 595 * Return: 1 if tasks have been assigned to @r, 0 otherwise
 596 */
 597int rdtgroup_tasks_assigned(struct rdtgroup *r)
 598{
 599	struct task_struct *p, *t;
 600	int ret = 0;
 601
 602	lockdep_assert_held(&rdtgroup_mutex);
 603
 604	rcu_read_lock();
 605	for_each_process_thread(p, t) {
 606		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
 607		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) {
 608			ret = 1;
 609			break;
 610		}
 611	}
 612	rcu_read_unlock();
 613
 614	return ret;
 615}
 616
 617static int rdtgroup_task_write_permission(struct task_struct *task,
 618					  struct kernfs_open_file *of)
 619{
 620	const struct cred *tcred = get_task_cred(task);
 621	const struct cred *cred = current_cred();
 622	int ret = 0;
 623
 624	/*
 625	 * Even if we're attaching all tasks in the thread group, we only
 626	 * need to check permissions on one of them.
 627	 */
 628	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 629	    !uid_eq(cred->euid, tcred->uid) &&
 630	    !uid_eq(cred->euid, tcred->suid)) {
 631		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
 632		ret = -EPERM;
 633	}
 634
 635	put_cred(tcred);
 636	return ret;
 637}
 638
 639static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 640			      struct kernfs_open_file *of)
 641{
 642	struct task_struct *tsk;
 643	int ret;
 644
 645	rcu_read_lock();
 646	if (pid) {
 647		tsk = find_task_by_vpid(pid);
 648		if (!tsk) {
 649			rcu_read_unlock();
 650			rdt_last_cmd_printf("No task %d\n", pid);
 651			return -ESRCH;
 652		}
 653	} else {
 654		tsk = current;
 655	}
 656
 657	get_task_struct(tsk);
 658	rcu_read_unlock();
 659
 660	ret = rdtgroup_task_write_permission(tsk, of);
 661	if (!ret)
 662		ret = __rdtgroup_move_task(tsk, rdtgrp);
 663
 664	put_task_struct(tsk);
 665	return ret;
 666}
 667
 668static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 669				    char *buf, size_t nbytes, loff_t off)
 670{
 671	struct rdtgroup *rdtgrp;
 672	int ret = 0;
 673	pid_t pid;
 674
 675	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 676		return -EINVAL;
 677	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 678	if (!rdtgrp) {
 679		rdtgroup_kn_unlock(of->kn);
 680		return -ENOENT;
 681	}
 682	rdt_last_cmd_clear();
 683
 684	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 685	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 686		ret = -EINVAL;
 687		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 688		goto unlock;
 689	}
 690
 691	ret = rdtgroup_move_task(pid, rdtgrp, of);
 692
 693unlock:
 694	rdtgroup_kn_unlock(of->kn);
 695
 696	return ret ?: nbytes;
 697}
 698
 699static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 700{
 701	struct task_struct *p, *t;
 702
 703	rcu_read_lock();
 704	for_each_process_thread(p, t) {
 705		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
 706		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
 707			seq_printf(s, "%d\n", t->pid);
 708	}
 709	rcu_read_unlock();
 710}
 711
 712static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 713			       struct seq_file *s, void *v)
 714{
 715	struct rdtgroup *rdtgrp;
 716	int ret = 0;
 717
 718	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 719	if (rdtgrp)
 720		show_rdt_tasks(rdtgrp, s);
 721	else
 722		ret = -ENOENT;
 723	rdtgroup_kn_unlock(of->kn);
 724
 725	return ret;
 726}
 727
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 728static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
 729				    struct seq_file *seq, void *v)
 730{
 731	int len;
 732
 733	mutex_lock(&rdtgroup_mutex);
 734	len = seq_buf_used(&last_cmd_status);
 735	if (len)
 736		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
 737	else
 738		seq_puts(seq, "ok\n");
 739	mutex_unlock(&rdtgroup_mutex);
 740	return 0;
 741}
 742
 743static int rdt_num_closids_show(struct kernfs_open_file *of,
 744				struct seq_file *seq, void *v)
 745{
 746	struct rdt_resource *r = of->kn->parent->priv;
 747
 748	seq_printf(seq, "%d\n", r->num_closid);
 749	return 0;
 750}
 751
 752static int rdt_default_ctrl_show(struct kernfs_open_file *of,
 753			     struct seq_file *seq, void *v)
 754{
 755	struct rdt_resource *r = of->kn->parent->priv;
 
 756
 757	seq_printf(seq, "%x\n", r->default_ctrl);
 758	return 0;
 759}
 760
 761static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
 762			     struct seq_file *seq, void *v)
 763{
 764	struct rdt_resource *r = of->kn->parent->priv;
 
 765
 766	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
 767	return 0;
 768}
 769
 770static int rdt_shareable_bits_show(struct kernfs_open_file *of,
 771				   struct seq_file *seq, void *v)
 772{
 773	struct rdt_resource *r = of->kn->parent->priv;
 
 774
 775	seq_printf(seq, "%x\n", r->cache.shareable_bits);
 776	return 0;
 777}
 778
 779/**
 780 * rdt_bit_usage_show - Display current usage of resources
 781 *
 782 * A domain is a shared resource that can now be allocated differently. Here
 783 * we display the current regions of the domain as an annotated bitmask.
 784 * For each domain of this resource its allocation bitmask
 785 * is annotated as below to indicate the current usage of the corresponding bit:
 786 *   0 - currently unused
 787 *   X - currently available for sharing and used by software and hardware
 788 *   H - currently used by hardware only but available for software use
 789 *   S - currently used and shareable by software only
 790 *   E - currently used exclusively by one resource group
 791 *   P - currently pseudo-locked by one resource group
 792 */
 793static int rdt_bit_usage_show(struct kernfs_open_file *of,
 794			      struct seq_file *seq, void *v)
 795{
 796	struct rdt_resource *r = of->kn->parent->priv;
 797	/*
 798	 * Use unsigned long even though only 32 bits are used to ensure
 799	 * test_bit() is used safely.
 800	 */
 801	unsigned long sw_shareable = 0, hw_shareable = 0;
 802	unsigned long exclusive = 0, pseudo_locked = 0;
 
 803	struct rdt_domain *dom;
 804	int i, hwb, swb, excl, psl;
 805	enum rdtgrp_mode mode;
 806	bool sep = false;
 807	u32 *ctrl;
 808
 809	mutex_lock(&rdtgroup_mutex);
 810	hw_shareable = r->cache.shareable_bits;
 811	list_for_each_entry(dom, &r->domains, list) {
 812		if (sep)
 813			seq_putc(seq, ';');
 814		ctrl = dom->ctrl_val;
 815		sw_shareable = 0;
 816		exclusive = 0;
 817		seq_printf(seq, "%d=", dom->id);
 818		for (i = 0; i < closids_supported(); i++, ctrl++) {
 819			if (!closid_allocated(i))
 820				continue;
 
 
 821			mode = rdtgroup_mode_by_closid(i);
 822			switch (mode) {
 823			case RDT_MODE_SHAREABLE:
 824				sw_shareable |= *ctrl;
 825				break;
 826			case RDT_MODE_EXCLUSIVE:
 827				exclusive |= *ctrl;
 828				break;
 829			case RDT_MODE_PSEUDO_LOCKSETUP:
 830			/*
 831			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
 832			 * here but not included since the CBM
 833			 * associated with this CLOSID in this mode
 834			 * is not initialized and no task or cpu can be
 835			 * assigned this CLOSID.
 836			 */
 837				break;
 838			case RDT_MODE_PSEUDO_LOCKED:
 839			case RDT_NUM_MODES:
 840				WARN(1,
 841				     "invalid mode for closid %d\n", i);
 842				break;
 843			}
 844		}
 845		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
 846			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
 847			hwb = test_bit(i, &hw_shareable);
 848			swb = test_bit(i, &sw_shareable);
 849			excl = test_bit(i, &exclusive);
 850			psl = test_bit(i, &pseudo_locked);
 851			if (hwb && swb)
 852				seq_putc(seq, 'X');
 853			else if (hwb && !swb)
 854				seq_putc(seq, 'H');
 855			else if (!hwb && swb)
 856				seq_putc(seq, 'S');
 857			else if (excl)
 858				seq_putc(seq, 'E');
 859			else if (psl)
 860				seq_putc(seq, 'P');
 861			else /* Unused bits remain */
 862				seq_putc(seq, '0');
 863		}
 864		sep = true;
 865	}
 866	seq_putc(seq, '\n');
 867	mutex_unlock(&rdtgroup_mutex);
 868	return 0;
 869}
 870
 871static int rdt_min_bw_show(struct kernfs_open_file *of,
 872			     struct seq_file *seq, void *v)
 873{
 874	struct rdt_resource *r = of->kn->parent->priv;
 
 875
 876	seq_printf(seq, "%u\n", r->membw.min_bw);
 877	return 0;
 878}
 879
 880static int rdt_num_rmids_show(struct kernfs_open_file *of,
 881			      struct seq_file *seq, void *v)
 882{
 883	struct rdt_resource *r = of->kn->parent->priv;
 884
 885	seq_printf(seq, "%d\n", r->num_rmid);
 886
 887	return 0;
 888}
 889
 890static int rdt_mon_features_show(struct kernfs_open_file *of,
 891				 struct seq_file *seq, void *v)
 892{
 893	struct rdt_resource *r = of->kn->parent->priv;
 894	struct mon_evt *mevt;
 895
 896	list_for_each_entry(mevt, &r->evt_list, list)
 897		seq_printf(seq, "%s\n", mevt->name);
 898
 899	return 0;
 900}
 901
 902static int rdt_bw_gran_show(struct kernfs_open_file *of,
 903			     struct seq_file *seq, void *v)
 904{
 905	struct rdt_resource *r = of->kn->parent->priv;
 
 906
 907	seq_printf(seq, "%u\n", r->membw.bw_gran);
 908	return 0;
 909}
 910
 911static int rdt_delay_linear_show(struct kernfs_open_file *of,
 912			     struct seq_file *seq, void *v)
 913{
 914	struct rdt_resource *r = of->kn->parent->priv;
 
 915
 916	seq_printf(seq, "%u\n", r->membw.delay_linear);
 917	return 0;
 918}
 919
 920static int max_threshold_occ_show(struct kernfs_open_file *of,
 921				  struct seq_file *seq, void *v)
 922{
 923	struct rdt_resource *r = of->kn->parent->priv;
 924
 925	seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale);
 
 
 
 
 
 
 
 
 
 
 
 
 926
 927	return 0;
 928}
 929
 930static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
 931				       char *buf, size_t nbytes, loff_t off)
 932{
 933	struct rdt_resource *r = of->kn->parent->priv;
 934	unsigned int bytes;
 935	int ret;
 936
 937	ret = kstrtouint(buf, 0, &bytes);
 938	if (ret)
 939		return ret;
 940
 941	if (bytes > (boot_cpu_data.x86_cache_size * 1024))
 942		return -EINVAL;
 943
 944	resctrl_cqm_threshold = bytes / r->mon_scale;
 945
 946	return nbytes;
 947}
 948
 949/*
 950 * rdtgroup_mode_show - Display mode of this resource group
 951 */
 952static int rdtgroup_mode_show(struct kernfs_open_file *of,
 953			      struct seq_file *s, void *v)
 954{
 955	struct rdtgroup *rdtgrp;
 956
 957	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 958	if (!rdtgrp) {
 959		rdtgroup_kn_unlock(of->kn);
 960		return -ENOENT;
 961	}
 962
 963	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
 964
 965	rdtgroup_kn_unlock(of->kn);
 966	return 0;
 967}
 968
 969/**
 970 * rdt_cdp_peer_get - Retrieve CDP peer if it exists
 971 * @r: RDT resource to which RDT domain @d belongs
 972 * @d: Cache instance for which a CDP peer is requested
 973 * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
 974 *         Used to return the result.
 975 * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
 976 *         Used to return the result.
 977 *
 978 * RDT resources are managed independently and by extension the RDT domains
 979 * (RDT resource instances) are managed independently also. The Code and
 980 * Data Prioritization (CDP) RDT resources, while managed independently,
 981 * could refer to the same underlying hardware. For example,
 982 * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
 983 *
 984 * When provided with an RDT resource @r and an instance of that RDT
 985 * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
 986 * resource and the exact instance that shares the same hardware.
 987 *
 988 * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
 989 *         If a CDP peer was found, @r_cdp will point to the peer RDT resource
 990 *         and @d_cdp will point to the peer RDT domain.
 991 */
 992static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
 993			    struct rdt_resource **r_cdp,
 994			    struct rdt_domain **d_cdp)
 995{
 996	struct rdt_resource *_r_cdp = NULL;
 997	struct rdt_domain *_d_cdp = NULL;
 998	int ret = 0;
 999
1000	switch (r->rid) {
1001	case RDT_RESOURCE_L3DATA:
1002		_r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
1003		break;
1004	case RDT_RESOURCE_L3CODE:
1005		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L3DATA];
1006		break;
1007	case RDT_RESOURCE_L2DATA:
1008		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2CODE];
1009		break;
1010	case RDT_RESOURCE_L2CODE:
1011		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2DATA];
1012		break;
1013	default:
1014		ret = -ENOENT;
1015		goto out;
1016	}
1017
1018	/*
1019	 * When a new CPU comes online and CDP is enabled then the new
1020	 * RDT domains (if any) associated with both CDP RDT resources
1021	 * are added in the same CPU online routine while the
1022	 * rdtgroup_mutex is held. It should thus not happen for one
1023	 * RDT domain to exist and be associated with its RDT CDP
1024	 * resource but there is no RDT domain associated with the
1025	 * peer RDT CDP resource. Hence the WARN.
1026	 */
1027	_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
1028	if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
1029		_r_cdp = NULL;
1030		ret = -EINVAL;
1031	}
1032
1033out:
1034	*r_cdp = _r_cdp;
1035	*d_cdp = _d_cdp;
1036
1037	return ret;
1038}
1039
1040/**
1041 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1042 * @r: Resource to which domain instance @d belongs.
1043 * @d: The domain instance for which @closid is being tested.
1044 * @cbm: Capacity bitmask being tested.
1045 * @closid: Intended closid for @cbm.
1046 * @exclusive: Only check if overlaps with exclusive resource groups
1047 *
1048 * Checks if provided @cbm intended to be used for @closid on domain
1049 * @d overlaps with any other closids or other hardware usage associated
1050 * with this domain. If @exclusive is true then only overlaps with
1051 * resource groups in exclusive mode will be considered. If @exclusive
1052 * is false then overlaps with any resource group or hardware entities
1053 * will be considered.
1054 *
1055 * @cbm is unsigned long, even if only 32 bits are used, to make the
1056 * bitmap functions work correctly.
1057 *
1058 * Return: false if CBM does not overlap, true if it does.
1059 */
1060static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1061				    unsigned long cbm, int closid, bool exclusive)
 
1062{
1063	enum rdtgrp_mode mode;
1064	unsigned long ctrl_b;
1065	u32 *ctrl;
1066	int i;
1067
1068	/* Check for any overlap with regions used by hardware directly */
1069	if (!exclusive) {
1070		ctrl_b = r->cache.shareable_bits;
1071		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1072			return true;
1073	}
1074
1075	/* Check for overlap with other resource groups */
1076	ctrl = d->ctrl_val;
1077	for (i = 0; i < closids_supported(); i++, ctrl++) {
1078		ctrl_b = *ctrl;
1079		mode = rdtgroup_mode_by_closid(i);
1080		if (closid_allocated(i) && i != closid &&
1081		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1082			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1083				if (exclusive) {
1084					if (mode == RDT_MODE_EXCLUSIVE)
1085						return true;
1086					continue;
1087				}
1088				return true;
1089			}
1090		}
1091	}
1092
1093	return false;
1094}
1095
1096/**
1097 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1098 * @r: Resource to which domain instance @d belongs.
1099 * @d: The domain instance for which @closid is being tested.
1100 * @cbm: Capacity bitmask being tested.
1101 * @closid: Intended closid for @cbm.
1102 * @exclusive: Only check if overlaps with exclusive resource groups
1103 *
1104 * Resources that can be allocated using a CBM can use the CBM to control
1105 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1106 * for overlap. Overlap test is not limited to the specific resource for
1107 * which the CBM is intended though - when dealing with CDP resources that
1108 * share the underlying hardware the overlap check should be performed on
1109 * the CDP resource sharing the hardware also.
1110 *
1111 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1112 * overlap test.
1113 *
1114 * Return: true if CBM overlap detected, false if there is no overlap
1115 */
1116bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1117			   unsigned long cbm, int closid, bool exclusive)
1118{
1119	struct rdt_resource *r_cdp;
1120	struct rdt_domain *d_cdp;
1121
1122	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
 
1123		return true;
1124
1125	if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
1126		return false;
1127
1128	return  __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
1129}
1130
1131/**
1132 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1133 *
1134 * An exclusive resource group implies that there should be no sharing of
1135 * its allocated resources. At the time this group is considered to be
1136 * exclusive this test can determine if its current schemata supports this
1137 * setting by testing for overlap with all other resource groups.
1138 *
1139 * Return: true if resource group can be exclusive, false if there is overlap
1140 * with allocations of other resource groups and thus this resource group
1141 * cannot be exclusive.
1142 */
1143static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1144{
1145	int closid = rdtgrp->closid;
 
1146	struct rdt_resource *r;
1147	bool has_cache = false;
1148	struct rdt_domain *d;
 
1149
1150	for_each_alloc_enabled_rdt_resource(r) {
 
1151		if (r->rid == RDT_RESOURCE_MBA)
1152			continue;
1153		has_cache = true;
1154		list_for_each_entry(d, &r->domains, list) {
1155			if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
1156						  rdtgrp->closid, false)) {
 
1157				rdt_last_cmd_puts("Schemata overlaps\n");
1158				return false;
1159			}
1160		}
1161	}
1162
1163	if (!has_cache) {
1164		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1165		return false;
1166	}
1167
1168	return true;
1169}
1170
1171/**
1172 * rdtgroup_mode_write - Modify the resource group's mode
1173 *
1174 */
1175static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1176				   char *buf, size_t nbytes, loff_t off)
1177{
1178	struct rdtgroup *rdtgrp;
1179	enum rdtgrp_mode mode;
1180	int ret = 0;
1181
1182	/* Valid input requires a trailing newline */
1183	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1184		return -EINVAL;
1185	buf[nbytes - 1] = '\0';
1186
1187	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1188	if (!rdtgrp) {
1189		rdtgroup_kn_unlock(of->kn);
1190		return -ENOENT;
1191	}
1192
1193	rdt_last_cmd_clear();
1194
1195	mode = rdtgrp->mode;
1196
1197	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1198	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1199	    (!strcmp(buf, "pseudo-locksetup") &&
1200	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1201	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1202		goto out;
1203
1204	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1205		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1206		ret = -EINVAL;
1207		goto out;
1208	}
1209
1210	if (!strcmp(buf, "shareable")) {
1211		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1212			ret = rdtgroup_locksetup_exit(rdtgrp);
1213			if (ret)
1214				goto out;
1215		}
1216		rdtgrp->mode = RDT_MODE_SHAREABLE;
1217	} else if (!strcmp(buf, "exclusive")) {
1218		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1219			ret = -EINVAL;
1220			goto out;
1221		}
1222		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1223			ret = rdtgroup_locksetup_exit(rdtgrp);
1224			if (ret)
1225				goto out;
1226		}
1227		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1228	} else if (!strcmp(buf, "pseudo-locksetup")) {
1229		ret = rdtgroup_locksetup_enter(rdtgrp);
1230		if (ret)
1231			goto out;
1232		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1233	} else {
1234		rdt_last_cmd_puts("Unknown or unsupported mode\n");
1235		ret = -EINVAL;
1236	}
1237
1238out:
1239	rdtgroup_kn_unlock(of->kn);
1240	return ret ?: nbytes;
1241}
1242
1243/**
1244 * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1245 * @r: RDT resource to which @d belongs.
1246 * @d: RDT domain instance.
1247 * @cbm: bitmask for which the size should be computed.
1248 *
1249 * The bitmask provided associated with the RDT domain instance @d will be
1250 * translated into how many bytes it represents. The size in bytes is
1251 * computed by first dividing the total cache size by the CBM length to
1252 * determine how many bytes each bit in the bitmask represents. The result
1253 * is multiplied with the number of bits set in the bitmask.
1254 *
1255 * @cbm is unsigned long, even if only 32 bits are used to make the
1256 * bitmap functions work correctly.
1257 */
1258unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1259				  struct rdt_domain *d, unsigned long cbm)
1260{
1261	struct cpu_cacheinfo *ci;
1262	unsigned int size = 0;
1263	int num_b, i;
1264
1265	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1266	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
1267	for (i = 0; i < ci->num_leaves; i++) {
1268		if (ci->info_list[i].level == r->cache_level) {
1269			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
1270			break;
1271		}
1272	}
1273
1274	return size;
1275}
1276
1277/**
1278 * rdtgroup_size_show - Display size in bytes of allocated regions
1279 *
1280 * The "size" file mirrors the layout of the "schemata" file, printing the
1281 * size in bytes of each region instead of the capacity bitmask.
1282 *
1283 */
1284static int rdtgroup_size_show(struct kernfs_open_file *of,
1285			      struct seq_file *s, void *v)
1286{
 
 
1287	struct rdtgroup *rdtgrp;
1288	struct rdt_resource *r;
1289	struct rdt_domain *d;
1290	unsigned int size;
1291	int ret = 0;
 
1292	bool sep;
1293	u32 ctrl;
1294
1295	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1296	if (!rdtgrp) {
1297		rdtgroup_kn_unlock(of->kn);
1298		return -ENOENT;
1299	}
1300
1301	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1302		if (!rdtgrp->plr->d) {
1303			rdt_last_cmd_clear();
1304			rdt_last_cmd_puts("Cache domain offline\n");
1305			ret = -ENODEV;
1306		} else {
1307			seq_printf(s, "%*s:", max_name_width,
1308				   rdtgrp->plr->r->name);
1309			size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
1310						    rdtgrp->plr->d,
1311						    rdtgrp->plr->cbm);
1312			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
1313		}
1314		goto out;
1315	}
1316
1317	for_each_alloc_enabled_rdt_resource(r) {
 
 
 
 
1318		sep = false;
1319		seq_printf(s, "%*s:", max_name_width, r->name);
1320		list_for_each_entry(d, &r->domains, list) {
1321			if (sep)
1322				seq_putc(s, ';');
1323			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1324				size = 0;
1325			} else {
1326				ctrl = (!is_mba_sc(r) ?
1327						d->ctrl_val[rdtgrp->closid] :
1328						d->mbps_val[rdtgrp->closid]);
 
 
 
1329				if (r->rid == RDT_RESOURCE_MBA)
1330					size = ctrl;
1331				else
1332					size = rdtgroup_cbm_to_size(r, d, ctrl);
1333			}
1334			seq_printf(s, "%d=%u", d->id, size);
1335			sep = true;
1336		}
1337		seq_putc(s, '\n');
1338	}
1339
1340out:
1341	rdtgroup_kn_unlock(of->kn);
1342
1343	return ret;
1344}
1345
1346/* rdtgroup information files for one cache resource. */
1347static struct rftype res_common_files[] = {
1348	{
1349		.name		= "last_cmd_status",
1350		.mode		= 0444,
1351		.kf_ops		= &rdtgroup_kf_single_ops,
1352		.seq_show	= rdt_last_cmd_status_show,
1353		.fflags		= RF_TOP_INFO,
1354	},
1355	{
1356		.name		= "num_closids",
1357		.mode		= 0444,
1358		.kf_ops		= &rdtgroup_kf_single_ops,
1359		.seq_show	= rdt_num_closids_show,
1360		.fflags		= RF_CTRL_INFO,
1361	},
1362	{
1363		.name		= "mon_features",
1364		.mode		= 0444,
1365		.kf_ops		= &rdtgroup_kf_single_ops,
1366		.seq_show	= rdt_mon_features_show,
1367		.fflags		= RF_MON_INFO,
1368	},
1369	{
1370		.name		= "num_rmids",
1371		.mode		= 0444,
1372		.kf_ops		= &rdtgroup_kf_single_ops,
1373		.seq_show	= rdt_num_rmids_show,
1374		.fflags		= RF_MON_INFO,
1375	},
1376	{
1377		.name		= "cbm_mask",
1378		.mode		= 0444,
1379		.kf_ops		= &rdtgroup_kf_single_ops,
1380		.seq_show	= rdt_default_ctrl_show,
1381		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1382	},
1383	{
1384		.name		= "min_cbm_bits",
1385		.mode		= 0444,
1386		.kf_ops		= &rdtgroup_kf_single_ops,
1387		.seq_show	= rdt_min_cbm_bits_show,
1388		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1389	},
1390	{
1391		.name		= "shareable_bits",
1392		.mode		= 0444,
1393		.kf_ops		= &rdtgroup_kf_single_ops,
1394		.seq_show	= rdt_shareable_bits_show,
1395		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1396	},
1397	{
1398		.name		= "bit_usage",
1399		.mode		= 0444,
1400		.kf_ops		= &rdtgroup_kf_single_ops,
1401		.seq_show	= rdt_bit_usage_show,
1402		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1403	},
1404	{
1405		.name		= "min_bandwidth",
1406		.mode		= 0444,
1407		.kf_ops		= &rdtgroup_kf_single_ops,
1408		.seq_show	= rdt_min_bw_show,
1409		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1410	},
1411	{
1412		.name		= "bandwidth_gran",
1413		.mode		= 0444,
1414		.kf_ops		= &rdtgroup_kf_single_ops,
1415		.seq_show	= rdt_bw_gran_show,
1416		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1417	},
1418	{
1419		.name		= "delay_linear",
1420		.mode		= 0444,
1421		.kf_ops		= &rdtgroup_kf_single_ops,
1422		.seq_show	= rdt_delay_linear_show,
1423		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1424	},
 
 
 
 
 
 
 
 
 
 
 
1425	{
1426		.name		= "max_threshold_occupancy",
1427		.mode		= 0644,
1428		.kf_ops		= &rdtgroup_kf_single_ops,
1429		.write		= max_threshold_occ_write,
1430		.seq_show	= max_threshold_occ_show,
1431		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
1432	},
1433	{
1434		.name		= "cpus",
1435		.mode		= 0644,
1436		.kf_ops		= &rdtgroup_kf_single_ops,
1437		.write		= rdtgroup_cpus_write,
1438		.seq_show	= rdtgroup_cpus_show,
1439		.fflags		= RFTYPE_BASE,
1440	},
1441	{
1442		.name		= "cpus_list",
1443		.mode		= 0644,
1444		.kf_ops		= &rdtgroup_kf_single_ops,
1445		.write		= rdtgroup_cpus_write,
1446		.seq_show	= rdtgroup_cpus_show,
1447		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1448		.fflags		= RFTYPE_BASE,
1449	},
1450	{
1451		.name		= "tasks",
1452		.mode		= 0644,
1453		.kf_ops		= &rdtgroup_kf_single_ops,
1454		.write		= rdtgroup_tasks_write,
1455		.seq_show	= rdtgroup_tasks_show,
1456		.fflags		= RFTYPE_BASE,
1457	},
1458	{
1459		.name		= "schemata",
1460		.mode		= 0644,
1461		.kf_ops		= &rdtgroup_kf_single_ops,
1462		.write		= rdtgroup_schemata_write,
1463		.seq_show	= rdtgroup_schemata_show,
1464		.fflags		= RF_CTRL_BASE,
1465	},
1466	{
1467		.name		= "mode",
1468		.mode		= 0644,
1469		.kf_ops		= &rdtgroup_kf_single_ops,
1470		.write		= rdtgroup_mode_write,
1471		.seq_show	= rdtgroup_mode_show,
1472		.fflags		= RF_CTRL_BASE,
1473	},
1474	{
1475		.name		= "size",
1476		.mode		= 0444,
1477		.kf_ops		= &rdtgroup_kf_single_ops,
1478		.seq_show	= rdtgroup_size_show,
1479		.fflags		= RF_CTRL_BASE,
1480	},
1481
1482};
1483
1484static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1485{
1486	struct rftype *rfts, *rft;
1487	int ret, len;
1488
1489	rfts = res_common_files;
1490	len = ARRAY_SIZE(res_common_files);
1491
1492	lockdep_assert_held(&rdtgroup_mutex);
1493
1494	for (rft = rfts; rft < rfts + len; rft++) {
1495		if ((fflags & rft->fflags) == rft->fflags) {
1496			ret = rdtgroup_add_file(kn, rft);
1497			if (ret)
1498				goto error;
1499		}
1500	}
1501
1502	return 0;
1503error:
1504	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
1505	while (--rft >= rfts) {
1506		if ((fflags & rft->fflags) == rft->fflags)
1507			kernfs_remove_by_name(kn, rft->name);
1508	}
1509	return ret;
1510}
1511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1512/**
1513 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
1514 * @r: The resource group with which the file is associated.
1515 * @name: Name of the file
1516 *
1517 * The permissions of named resctrl file, directory, or link are modified
1518 * to not allow read, write, or execute by any user.
1519 *
1520 * WARNING: This function is intended to communicate to the user that the
1521 * resctrl file has been locked down - that it is not relevant to the
1522 * particular state the system finds itself in. It should not be relied
1523 * on to protect from user access because after the file's permissions
1524 * are restricted the user can still change the permissions using chmod
1525 * from the command line.
1526 *
1527 * Return: 0 on success, <0 on failure.
1528 */
1529int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
1530{
1531	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1532	struct kernfs_node *kn;
1533	int ret = 0;
1534
1535	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1536	if (!kn)
1537		return -ENOENT;
1538
1539	switch (kernfs_type(kn)) {
1540	case KERNFS_DIR:
1541		iattr.ia_mode = S_IFDIR;
1542		break;
1543	case KERNFS_FILE:
1544		iattr.ia_mode = S_IFREG;
1545		break;
1546	case KERNFS_LINK:
1547		iattr.ia_mode = S_IFLNK;
1548		break;
1549	}
1550
1551	ret = kernfs_setattr(kn, &iattr);
1552	kernfs_put(kn);
1553	return ret;
1554}
1555
1556/**
1557 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
1558 * @r: The resource group with which the file is associated.
1559 * @name: Name of the file
1560 * @mask: Mask of permissions that should be restored
1561 *
1562 * Restore the permissions of the named file. If @name is a directory the
1563 * permissions of its parent will be used.
1564 *
1565 * Return: 0 on success, <0 on failure.
1566 */
1567int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
1568			     umode_t mask)
1569{
1570	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1571	struct kernfs_node *kn, *parent;
1572	struct rftype *rfts, *rft;
1573	int ret, len;
1574
1575	rfts = res_common_files;
1576	len = ARRAY_SIZE(res_common_files);
1577
1578	for (rft = rfts; rft < rfts + len; rft++) {
1579		if (!strcmp(rft->name, name))
1580			iattr.ia_mode = rft->mode & mask;
1581	}
1582
1583	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1584	if (!kn)
1585		return -ENOENT;
1586
1587	switch (kernfs_type(kn)) {
1588	case KERNFS_DIR:
1589		parent = kernfs_get_parent(kn);
1590		if (parent) {
1591			iattr.ia_mode |= parent->mode;
1592			kernfs_put(parent);
1593		}
1594		iattr.ia_mode |= S_IFDIR;
1595		break;
1596	case KERNFS_FILE:
1597		iattr.ia_mode |= S_IFREG;
1598		break;
1599	case KERNFS_LINK:
1600		iattr.ia_mode |= S_IFLNK;
1601		break;
1602	}
1603
1604	ret = kernfs_setattr(kn, &iattr);
1605	kernfs_put(kn);
1606	return ret;
1607}
1608
1609static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
1610				      unsigned long fflags)
1611{
1612	struct kernfs_node *kn_subdir;
1613	int ret;
1614
1615	kn_subdir = kernfs_create_dir(kn_info, name,
1616				      kn_info->mode, r);
1617	if (IS_ERR(kn_subdir))
1618		return PTR_ERR(kn_subdir);
1619
1620	kernfs_get(kn_subdir);
1621	ret = rdtgroup_kn_set_ugid(kn_subdir);
1622	if (ret)
1623		return ret;
1624
1625	ret = rdtgroup_add_files(kn_subdir, fflags);
1626	if (!ret)
1627		kernfs_activate(kn_subdir);
1628
1629	return ret;
1630}
1631
1632static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
1633{
 
1634	struct rdt_resource *r;
1635	unsigned long fflags;
1636	char name[32];
1637	int ret;
1638
1639	/* create the directory */
1640	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
1641	if (IS_ERR(kn_info))
1642		return PTR_ERR(kn_info);
1643	kernfs_get(kn_info);
1644
1645	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
1646	if (ret)
1647		goto out_destroy;
1648
1649	for_each_alloc_enabled_rdt_resource(r) {
 
 
1650		fflags =  r->fflags | RF_CTRL_INFO;
1651		ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
1652		if (ret)
1653			goto out_destroy;
1654	}
1655
1656	for_each_mon_enabled_rdt_resource(r) {
1657		fflags =  r->fflags | RF_MON_INFO;
1658		sprintf(name, "%s_MON", r->name);
1659		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
1660		if (ret)
1661			goto out_destroy;
1662	}
1663
1664	/*
1665	 * This extra ref will be put in kernfs_remove() and guarantees
1666	 * that @rdtgrp->kn is always accessible.
1667	 */
1668	kernfs_get(kn_info);
1669
1670	ret = rdtgroup_kn_set_ugid(kn_info);
1671	if (ret)
1672		goto out_destroy;
1673
1674	kernfs_activate(kn_info);
1675
1676	return 0;
1677
1678out_destroy:
1679	kernfs_remove(kn_info);
1680	return ret;
1681}
1682
1683static int
1684mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
1685		    char *name, struct kernfs_node **dest_kn)
1686{
1687	struct kernfs_node *kn;
1688	int ret;
1689
1690	/* create the directory */
1691	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
1692	if (IS_ERR(kn))
1693		return PTR_ERR(kn);
1694
1695	if (dest_kn)
1696		*dest_kn = kn;
1697
1698	/*
1699	 * This extra ref will be put in kernfs_remove() and guarantees
1700	 * that @rdtgrp->kn is always accessible.
1701	 */
1702	kernfs_get(kn);
1703
1704	ret = rdtgroup_kn_set_ugid(kn);
1705	if (ret)
1706		goto out_destroy;
1707
1708	kernfs_activate(kn);
1709
1710	return 0;
1711
1712out_destroy:
1713	kernfs_remove(kn);
1714	return ret;
1715}
1716
1717static void l3_qos_cfg_update(void *arg)
1718{
1719	bool *enable = arg;
1720
1721	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
1722}
1723
1724static void l2_qos_cfg_update(void *arg)
1725{
1726	bool *enable = arg;
1727
1728	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
1729}
1730
1731static inline bool is_mba_linear(void)
1732{
1733	return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
1734}
1735
1736static int set_cache_qos_cfg(int level, bool enable)
1737{
1738	void (*update)(void *arg);
1739	struct rdt_resource *r_l;
1740	cpumask_var_t cpu_mask;
1741	struct rdt_domain *d;
1742	int cpu;
1743
1744	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1745		return -ENOMEM;
1746
1747	if (level == RDT_RESOURCE_L3)
1748		update = l3_qos_cfg_update;
1749	else if (level == RDT_RESOURCE_L2)
1750		update = l2_qos_cfg_update;
1751	else
1752		return -EINVAL;
1753
1754	r_l = &rdt_resources_all[level];
 
 
 
1755	list_for_each_entry(d, &r_l->domains, list) {
1756		/* Pick one CPU from each domain instance to update MSR */
1757		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
 
 
 
 
 
1758	}
1759	cpu = get_cpu();
1760	/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
1761	if (cpumask_test_cpu(cpu, cpu_mask))
1762		update(&enable);
1763	/* Update QOS_CFG MSR on all other cpus in cpu_mask. */
1764	smp_call_function_many(cpu_mask, update, &enable, 1);
1765	put_cpu();
1766
1767	free_cpumask_var(cpu_mask);
1768
1769	return 0;
1770}
1771
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1772/*
1773 * Enable or disable the MBA software controller
1774 * which helps user specify bandwidth in MBps.
1775 * MBA software controller is supported only if
1776 * MBM is supported and MBA is in linear scale.
1777 */
 
 
 
 
 
 
 
 
 
 
 
 
1778static int set_mba_sc(bool mba_sc)
1779{
1780	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
 
1781	struct rdt_domain *d;
 
1782
1783	if (!is_mbm_enabled() || !is_mba_linear() ||
1784	    mba_sc == is_mba_sc(r))
1785		return -EINVAL;
1786
1787	r->membw.mba_sc = mba_sc;
1788	list_for_each_entry(d, &r->domains, list)
1789		setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
 
 
 
1790
1791	return 0;
1792}
1793
1794static int cdp_enable(int level, int data_type, int code_type)
1795{
1796	struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
1797	struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
1798	struct rdt_resource *r_l = &rdt_resources_all[level];
1799	int ret;
1800
1801	if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
1802	    !r_lcode->alloc_capable)
1803		return -EINVAL;
1804
1805	ret = set_cache_qos_cfg(level, true);
1806	if (!ret) {
1807		r_l->alloc_enabled = false;
1808		r_ldata->alloc_enabled = true;
1809		r_lcode->alloc_enabled = true;
1810	}
1811	return ret;
1812}
1813
1814static int cdpl3_enable(void)
1815{
1816	return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
1817			  RDT_RESOURCE_L3CODE);
1818}
1819
1820static int cdpl2_enable(void)
1821{
1822	return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
1823			  RDT_RESOURCE_L2CODE);
1824}
1825
1826static void cdp_disable(int level, int data_type, int code_type)
1827{
1828	struct rdt_resource *r = &rdt_resources_all[level];
1829
1830	r->alloc_enabled = r->alloc_capable;
 
1831
1832	if (rdt_resources_all[data_type].alloc_enabled) {
1833		rdt_resources_all[data_type].alloc_enabled = false;
1834		rdt_resources_all[code_type].alloc_enabled = false;
1835		set_cache_qos_cfg(level, false);
1836	}
1837}
1838
1839static void cdpl3_disable(void)
1840{
1841	cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
1842}
1843
1844static void cdpl2_disable(void)
1845{
1846	cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
1847}
1848
1849static void cdp_disable_all(void)
1850{
1851	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
1852		cdpl3_disable();
1853	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
1854		cdpl2_disable();
1855}
1856
1857/*
1858 * We don't allow rdtgroup directories to be created anywhere
1859 * except the root directory. Thus when looking for the rdtgroup
1860 * structure for a kernfs node we are either looking at a directory,
1861 * in which case the rdtgroup structure is pointed at by the "priv"
1862 * field, otherwise we have a file, and need only look to the parent
1863 * to find the rdtgroup.
1864 */
1865static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
1866{
1867	if (kernfs_type(kn) == KERNFS_DIR) {
1868		/*
1869		 * All the resource directories use "kn->priv"
1870		 * to point to the "struct rdtgroup" for the
1871		 * resource. "info" and its subdirectories don't
1872		 * have rdtgroup structures, so return NULL here.
1873		 */
1874		if (kn == kn_info || kn->parent == kn_info)
1875			return NULL;
1876		else
1877			return kn->priv;
1878	} else {
1879		return kn->parent->priv;
1880	}
1881}
1882
1883struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
1884{
1885	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1886
1887	if (!rdtgrp)
1888		return NULL;
1889
1890	atomic_inc(&rdtgrp->waitcount);
1891	kernfs_break_active_protection(kn);
1892
1893	mutex_lock(&rdtgroup_mutex);
1894
1895	/* Was this group deleted while we waited? */
1896	if (rdtgrp->flags & RDT_DELETED)
1897		return NULL;
1898
1899	return rdtgrp;
1900}
1901
1902void rdtgroup_kn_unlock(struct kernfs_node *kn)
1903{
1904	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1905
1906	if (!rdtgrp)
1907		return;
1908
1909	mutex_unlock(&rdtgroup_mutex);
1910
1911	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
1912	    (rdtgrp->flags & RDT_DELETED)) {
1913		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
1914		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
1915			rdtgroup_pseudo_lock_remove(rdtgrp);
1916		kernfs_unbreak_active_protection(kn);
1917		kernfs_put(rdtgrp->kn);
1918		kfree(rdtgrp);
1919	} else {
1920		kernfs_unbreak_active_protection(kn);
1921	}
1922}
1923
1924static int mkdir_mondata_all(struct kernfs_node *parent_kn,
1925			     struct rdtgroup *prgrp,
1926			     struct kernfs_node **mon_data_kn);
1927
1928static int rdt_enable_ctx(struct rdt_fs_context *ctx)
1929{
1930	int ret = 0;
1931
1932	if (ctx->enable_cdpl2)
1933		ret = cdpl2_enable();
1934
1935	if (!ret && ctx->enable_cdpl3)
1936		ret = cdpl3_enable();
1937
1938	if (!ret && ctx->enable_mba_mbps)
1939		ret = set_mba_sc(true);
1940
1941	return ret;
1942}
1943
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1944static int rdt_get_tree(struct fs_context *fc)
1945{
1946	struct rdt_fs_context *ctx = rdt_fc2context(fc);
1947	struct rdt_domain *dom;
1948	struct rdt_resource *r;
1949	int ret;
1950
1951	cpus_read_lock();
1952	mutex_lock(&rdtgroup_mutex);
1953	/*
1954	 * resctrl file system can only be mounted once.
1955	 */
1956	if (static_branch_unlikely(&rdt_enable_key)) {
1957		ret = -EBUSY;
1958		goto out;
1959	}
1960
1961	ret = rdt_enable_ctx(ctx);
1962	if (ret < 0)
1963		goto out_cdp;
1964
 
 
 
 
 
 
1965	closid_init();
1966
1967	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
1968	if (ret < 0)
1969		goto out_mba;
1970
1971	if (rdt_mon_capable) {
1972		ret = mongroup_create_dir(rdtgroup_default.kn,
1973					  NULL, "mon_groups",
1974					  &kn_mongrp);
1975		if (ret < 0)
1976			goto out_info;
1977		kernfs_get(kn_mongrp);
1978
1979		ret = mkdir_mondata_all(rdtgroup_default.kn,
1980					&rdtgroup_default, &kn_mondata);
1981		if (ret < 0)
1982			goto out_mongrp;
1983		kernfs_get(kn_mondata);
1984		rdtgroup_default.mon.mon_data_kn = kn_mondata;
1985	}
1986
1987	ret = rdt_pseudo_lock_init();
1988	if (ret)
1989		goto out_mondata;
1990
1991	ret = kernfs_get_tree(fc);
1992	if (ret < 0)
1993		goto out_psl;
1994
1995	if (rdt_alloc_capable)
1996		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
1997	if (rdt_mon_capable)
1998		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
1999
2000	if (rdt_alloc_capable || rdt_mon_capable)
2001		static_branch_enable_cpuslocked(&rdt_enable_key);
2002
2003	if (is_mbm_enabled()) {
2004		r = &rdt_resources_all[RDT_RESOURCE_L3];
2005		list_for_each_entry(dom, &r->domains, list)
2006			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
2007	}
2008
2009	goto out;
2010
2011out_psl:
2012	rdt_pseudo_lock_release();
2013out_mondata:
2014	if (rdt_mon_capable)
2015		kernfs_remove(kn_mondata);
2016out_mongrp:
2017	if (rdt_mon_capable)
2018		kernfs_remove(kn_mongrp);
2019out_info:
2020	kernfs_remove(kn_info);
 
 
2021out_mba:
2022	if (ctx->enable_mba_mbps)
2023		set_mba_sc(false);
2024out_cdp:
2025	cdp_disable_all();
2026out:
2027	rdt_last_cmd_clear();
2028	mutex_unlock(&rdtgroup_mutex);
2029	cpus_read_unlock();
2030	return ret;
2031}
2032
2033enum rdt_param {
2034	Opt_cdp,
2035	Opt_cdpl2,
2036	Opt_mba_mbps,
2037	nr__rdt_params
2038};
2039
2040static const struct fs_parameter_spec rdt_param_specs[] = {
2041	fsparam_flag("cdp",		Opt_cdp),
2042	fsparam_flag("cdpl2",		Opt_cdpl2),
2043	fsparam_flag("mba_MBps",	Opt_mba_mbps),
2044	{}
2045};
2046
2047static const struct fs_parameter_description rdt_fs_parameters = {
2048	.name		= "rdt",
2049	.specs		= rdt_param_specs,
2050};
2051
2052static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2053{
2054	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2055	struct fs_parse_result result;
2056	int opt;
2057
2058	opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
2059	if (opt < 0)
2060		return opt;
2061
2062	switch (opt) {
2063	case Opt_cdp:
2064		ctx->enable_cdpl3 = true;
2065		return 0;
2066	case Opt_cdpl2:
2067		ctx->enable_cdpl2 = true;
2068		return 0;
2069	case Opt_mba_mbps:
2070		if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
2071			return -EINVAL;
2072		ctx->enable_mba_mbps = true;
2073		return 0;
2074	}
2075
2076	return -EINVAL;
2077}
2078
2079static void rdt_fs_context_free(struct fs_context *fc)
2080{
2081	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2082
2083	kernfs_free_fs_context(fc);
2084	kfree(ctx);
2085}
2086
2087static const struct fs_context_operations rdt_fs_context_ops = {
2088	.free		= rdt_fs_context_free,
2089	.parse_param	= rdt_parse_param,
2090	.get_tree	= rdt_get_tree,
2091};
2092
2093static int rdt_init_fs_context(struct fs_context *fc)
2094{
2095	struct rdt_fs_context *ctx;
2096
2097	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2098	if (!ctx)
2099		return -ENOMEM;
2100
2101	ctx->kfc.root = rdt_root;
2102	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2103	fc->fs_private = &ctx->kfc;
2104	fc->ops = &rdt_fs_context_ops;
2105	put_user_ns(fc->user_ns);
2106	fc->user_ns = get_user_ns(&init_user_ns);
2107	fc->global = true;
2108	return 0;
2109}
2110
2111static int reset_all_ctrls(struct rdt_resource *r)
2112{
 
 
2113	struct msr_param msr_param;
2114	cpumask_var_t cpu_mask;
2115	struct rdt_domain *d;
2116	int i, cpu;
2117
2118	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2119		return -ENOMEM;
2120
2121	msr_param.res = r;
2122	msr_param.low = 0;
2123	msr_param.high = r->num_closid;
2124
2125	/*
2126	 * Disable resource control for this resource by setting all
2127	 * CBMs in all domains to the maximum mask value. Pick one CPU
2128	 * from each domain to update the MSRs below.
2129	 */
2130	list_for_each_entry(d, &r->domains, list) {
 
2131		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
2132
2133		for (i = 0; i < r->num_closid; i++)
2134			d->ctrl_val[i] = r->default_ctrl;
2135	}
2136	cpu = get_cpu();
2137	/* Update CBM on this cpu if it's in cpu_mask. */
2138	if (cpumask_test_cpu(cpu, cpu_mask))
2139		rdt_ctrl_update(&msr_param);
2140	/* Update CBM on all other cpus in cpu_mask. */
2141	smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
2142	put_cpu();
2143
2144	free_cpumask_var(cpu_mask);
2145
2146	return 0;
2147}
2148
2149static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
2150{
2151	return (rdt_alloc_capable &&
2152		(r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
2153}
2154
2155static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
2156{
2157	return (rdt_mon_capable &&
2158		(r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
2159}
2160
2161/*
2162 * Move tasks from one to the other group. If @from is NULL, then all tasks
2163 * in the systems are moved unconditionally (used for teardown).
2164 *
2165 * If @mask is not NULL the cpus on which moved tasks are running are set
2166 * in that mask so the update smp function call is restricted to affected
2167 * cpus.
2168 */
2169static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2170				 struct cpumask *mask)
2171{
2172	struct task_struct *p, *t;
2173
2174	read_lock(&tasklist_lock);
2175	for_each_process_thread(p, t) {
2176		if (!from || is_closid_match(t, from) ||
2177		    is_rmid_match(t, from)) {
2178			t->closid = to->closid;
2179			t->rmid = to->mon.rmid;
2180
2181#ifdef CONFIG_SMP
2182			/*
2183			 * This is safe on x86 w/o barriers as the ordering
2184			 * of writing to task_cpu() and t->on_cpu is
2185			 * reverse to the reading here. The detection is
2186			 * inaccurate as tasks might move or schedule
2187			 * before the smp function call takes place. In
2188			 * such a case the function call is pointless, but
 
 
 
 
 
 
2189			 * there is no other side effect.
2190			 */
2191			if (mask && t->on_cpu)
2192				cpumask_set_cpu(task_cpu(t), mask);
2193#endif
2194		}
2195	}
2196	read_unlock(&tasklist_lock);
2197}
2198
2199static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2200{
2201	struct rdtgroup *sentry, *stmp;
2202	struct list_head *head;
2203
2204	head = &rdtgrp->mon.crdtgrp_list;
2205	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2206		free_rmid(sentry->mon.rmid);
2207		list_del(&sentry->mon.crdtgrp_list);
2208		kfree(sentry);
 
 
 
 
2209	}
2210}
2211
2212/*
2213 * Forcibly remove all of subdirectories under root.
2214 */
2215static void rmdir_all_sub(void)
2216{
2217	struct rdtgroup *rdtgrp, *tmp;
2218
2219	/* Move all tasks to the default resource group */
2220	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2221
2222	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2223		/* Free any child rmids */
2224		free_all_child_rdtgrp(rdtgrp);
2225
2226		/* Remove each rdtgroup other than root */
2227		if (rdtgrp == &rdtgroup_default)
2228			continue;
2229
2230		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2231		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2232			rdtgroup_pseudo_lock_remove(rdtgrp);
2233
2234		/*
2235		 * Give any CPUs back to the default group. We cannot copy
2236		 * cpu_online_mask because a CPU might have executed the
2237		 * offline callback already, but is still marked online.
2238		 */
2239		cpumask_or(&rdtgroup_default.cpu_mask,
2240			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2241
2242		free_rmid(rdtgrp->mon.rmid);
2243
2244		kernfs_remove(rdtgrp->kn);
2245		list_del(&rdtgrp->rdtgroup_list);
2246		kfree(rdtgrp);
 
 
 
 
2247	}
2248	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2249	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2250
2251	kernfs_remove(kn_info);
2252	kernfs_remove(kn_mongrp);
2253	kernfs_remove(kn_mondata);
2254}
2255
2256static void rdt_kill_sb(struct super_block *sb)
2257{
2258	struct rdt_resource *r;
2259
2260	cpus_read_lock();
2261	mutex_lock(&rdtgroup_mutex);
2262
2263	set_mba_sc(false);
2264
2265	/*Put everything back to default values. */
2266	for_each_alloc_enabled_rdt_resource(r)
2267		reset_all_ctrls(r);
2268	cdp_disable_all();
2269	rmdir_all_sub();
2270	rdt_pseudo_lock_release();
2271	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
 
2272	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
2273	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
2274	static_branch_disable_cpuslocked(&rdt_enable_key);
2275	kernfs_kill_sb(sb);
2276	mutex_unlock(&rdtgroup_mutex);
2277	cpus_read_unlock();
2278}
2279
2280static struct file_system_type rdt_fs_type = {
2281	.name			= "resctrl",
2282	.init_fs_context	= rdt_init_fs_context,
2283	.parameters		= &rdt_fs_parameters,
2284	.kill_sb		= rdt_kill_sb,
2285};
2286
2287static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
2288		       void *priv)
2289{
2290	struct kernfs_node *kn;
2291	int ret = 0;
2292
2293	kn = __kernfs_create_file(parent_kn, name, 0444,
2294				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
2295				  &kf_mondata_ops, priv, NULL, NULL);
2296	if (IS_ERR(kn))
2297		return PTR_ERR(kn);
2298
2299	ret = rdtgroup_kn_set_ugid(kn);
2300	if (ret) {
2301		kernfs_remove(kn);
2302		return ret;
2303	}
2304
2305	return ret;
2306}
2307
2308/*
2309 * Remove all subdirectories of mon_data of ctrl_mon groups
2310 * and monitor groups with given domain id.
2311 */
2312void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
 
2313{
2314	struct rdtgroup *prgrp, *crgrp;
2315	char name[32];
2316
2317	if (!r->mon_enabled)
2318		return;
2319
2320	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2321		sprintf(name, "mon_%s_%02d", r->name, dom_id);
2322		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
2323
2324		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
2325			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
2326	}
2327}
2328
2329static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
2330				struct rdt_domain *d,
2331				struct rdt_resource *r, struct rdtgroup *prgrp)
2332{
2333	union mon_data_bits priv;
2334	struct kernfs_node *kn;
2335	struct mon_evt *mevt;
2336	struct rmid_read rr;
2337	char name[32];
2338	int ret;
2339
2340	sprintf(name, "mon_%s_%02d", r->name, d->id);
2341	/* create the directory */
2342	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2343	if (IS_ERR(kn))
2344		return PTR_ERR(kn);
2345
2346	/*
2347	 * This extra ref will be put in kernfs_remove() and guarantees
2348	 * that kn is always accessible.
2349	 */
2350	kernfs_get(kn);
2351	ret = rdtgroup_kn_set_ugid(kn);
2352	if (ret)
2353		goto out_destroy;
2354
2355	if (WARN_ON(list_empty(&r->evt_list))) {
2356		ret = -EPERM;
2357		goto out_destroy;
2358	}
2359
2360	priv.u.rid = r->rid;
2361	priv.u.domid = d->id;
2362	list_for_each_entry(mevt, &r->evt_list, list) {
2363		priv.u.evtid = mevt->evtid;
2364		ret = mon_addfile(kn, mevt->name, priv.priv);
2365		if (ret)
2366			goto out_destroy;
2367
2368		if (is_mbm_event(mevt->evtid))
2369			mon_event_read(&rr, d, prgrp, mevt->evtid, true);
2370	}
2371	kernfs_activate(kn);
2372	return 0;
2373
2374out_destroy:
2375	kernfs_remove(kn);
2376	return ret;
2377}
2378
2379/*
2380 * Add all subdirectories of mon_data for "ctrl_mon" groups
2381 * and "monitor" groups with given domain id.
2382 */
2383void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
2384				    struct rdt_domain *d)
2385{
2386	struct kernfs_node *parent_kn;
2387	struct rdtgroup *prgrp, *crgrp;
2388	struct list_head *head;
2389
2390	if (!r->mon_enabled)
2391		return;
2392
2393	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2394		parent_kn = prgrp->mon.mon_data_kn;
2395		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
2396
2397		head = &prgrp->mon.crdtgrp_list;
2398		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
2399			parent_kn = crgrp->mon.mon_data_kn;
2400			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
2401		}
2402	}
2403}
2404
2405static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
2406				       struct rdt_resource *r,
2407				       struct rdtgroup *prgrp)
2408{
2409	struct rdt_domain *dom;
2410	int ret;
2411
2412	list_for_each_entry(dom, &r->domains, list) {
2413		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
2414		if (ret)
2415			return ret;
2416	}
2417
2418	return 0;
2419}
2420
2421/*
2422 * This creates a directory mon_data which contains the monitored data.
2423 *
2424 * mon_data has one directory for each domain whic are named
2425 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
2426 * with L3 domain looks as below:
2427 * ./mon_data:
2428 * mon_L3_00
2429 * mon_L3_01
2430 * mon_L3_02
2431 * ...
2432 *
2433 * Each domain directory has one file per event:
2434 * ./mon_L3_00/:
2435 * llc_occupancy
2436 *
2437 */
2438static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2439			     struct rdtgroup *prgrp,
2440			     struct kernfs_node **dest_kn)
2441{
2442	struct rdt_resource *r;
2443	struct kernfs_node *kn;
2444	int ret;
2445
2446	/*
2447	 * Create the mon_data directory first.
2448	 */
2449	ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
2450	if (ret)
2451		return ret;
2452
2453	if (dest_kn)
2454		*dest_kn = kn;
2455
2456	/*
2457	 * Create the subdirectories for each domain. Note that all events
2458	 * in a domain like L3 are grouped into a resource whose domain is L3
2459	 */
2460	for_each_mon_enabled_rdt_resource(r) {
2461		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
2462		if (ret)
2463			goto out_destroy;
2464	}
2465
2466	return 0;
2467
2468out_destroy:
2469	kernfs_remove(kn);
2470	return ret;
2471}
2472
2473/**
2474 * cbm_ensure_valid - Enforce validity on provided CBM
2475 * @_val:	Candidate CBM
2476 * @r:		RDT resource to which the CBM belongs
2477 *
2478 * The provided CBM represents all cache portions available for use. This
2479 * may be represented by a bitmap that does not consist of contiguous ones
2480 * and thus be an invalid CBM.
2481 * Here the provided CBM is forced to be a valid CBM by only considering
2482 * the first set of contiguous bits as valid and clearing all bits.
2483 * The intention here is to provide a valid default CBM with which a new
2484 * resource group is initialized. The user can follow this with a
2485 * modification to the CBM if the default does not satisfy the
2486 * requirements.
2487 */
2488static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
2489{
2490	unsigned int cbm_len = r->cache.cbm_len;
2491	unsigned long first_bit, zero_bit;
2492	unsigned long val = _val;
2493
2494	if (!val)
2495		return 0;
2496
2497	first_bit = find_first_bit(&val, cbm_len);
2498	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
2499
2500	/* Clear any remaining bits to ensure contiguous region */
2501	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
2502	return (u32)val;
2503}
2504
2505/*
2506 * Initialize cache resources per RDT domain
2507 *
2508 * Set the RDT domain up to start off with all usable allocations. That is,
2509 * all shareable and unused bits. All-zero CBM is invalid.
2510 */
2511static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
2512				 u32 closid)
2513{
2514	struct rdt_resource *r_cdp = NULL;
2515	struct rdt_domain *d_cdp = NULL;
 
 
2516	u32 used_b = 0, unused_b = 0;
2517	unsigned long tmp_cbm;
2518	enum rdtgrp_mode mode;
2519	u32 peer_ctl, *ctrl;
2520	int i;
2521
2522	rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
2523	d->have_new_ctrl = false;
2524	d->new_ctrl = r->cache.shareable_bits;
2525	used_b = r->cache.shareable_bits;
2526	ctrl = d->ctrl_val;
2527	for (i = 0; i < closids_supported(); i++, ctrl++) {
2528		if (closid_allocated(i) && i != closid) {
2529			mode = rdtgroup_mode_by_closid(i);
2530			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
2531				/*
2532				 * ctrl values for locksetup aren't relevant
2533				 * until the schemata is written, and the mode
2534				 * becomes RDT_MODE_PSEUDO_LOCKED.
2535				 */
2536				continue;
2537			/*
2538			 * If CDP is active include peer domain's
2539			 * usage to ensure there is no overlap
2540			 * with an exclusive group.
2541			 */
2542			if (d_cdp)
2543				peer_ctl = d_cdp->ctrl_val[i];
 
2544			else
2545				peer_ctl = 0;
2546			used_b |= *ctrl | peer_ctl;
 
 
2547			if (mode == RDT_MODE_SHAREABLE)
2548				d->new_ctrl |= *ctrl | peer_ctl;
2549		}
2550	}
2551	if (d->plr && d->plr->cbm > 0)
2552		used_b |= d->plr->cbm;
2553	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
2554	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
2555	d->new_ctrl |= unused_b;
2556	/*
2557	 * Force the initial CBM to be valid, user can
2558	 * modify the CBM based on system availability.
2559	 */
2560	d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
2561	/*
2562	 * Assign the u32 CBM to an unsigned long to ensure that
2563	 * bitmap_weight() does not access out-of-bound memory.
2564	 */
2565	tmp_cbm = d->new_ctrl;
2566	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
2567		rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
2568		return -ENOSPC;
2569	}
2570	d->have_new_ctrl = true;
2571
2572	return 0;
2573}
2574
2575/*
2576 * Initialize cache resources with default values.
2577 *
2578 * A new RDT group is being created on an allocation capable (CAT)
2579 * supporting system. Set this group up to start off with all usable
2580 * allocations.
2581 *
2582 * If there are no more shareable bits available on any domain then
2583 * the entire allocation will fail.
2584 */
2585static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
2586{
2587	struct rdt_domain *d;
2588	int ret;
2589
2590	list_for_each_entry(d, &r->domains, list) {
2591		ret = __init_one_rdt_domain(d, r, closid);
2592		if (ret < 0)
2593			return ret;
2594	}
2595
2596	return 0;
2597}
2598
2599/* Initialize MBA resource with default values. */
2600static void rdtgroup_init_mba(struct rdt_resource *r)
2601{
 
2602	struct rdt_domain *d;
2603
2604	list_for_each_entry(d, &r->domains, list) {
2605		d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
2606		d->have_new_ctrl = true;
 
 
 
 
 
 
2607	}
2608}
2609
2610/* Initialize the RDT group's allocations. */
2611static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2612{
 
2613	struct rdt_resource *r;
2614	int ret;
2615
2616	for_each_alloc_enabled_rdt_resource(r) {
 
2617		if (r->rid == RDT_RESOURCE_MBA) {
2618			rdtgroup_init_mba(r);
 
 
2619		} else {
2620			ret = rdtgroup_init_cat(r, rdtgrp->closid);
2621			if (ret < 0)
2622				return ret;
2623		}
2624
2625		ret = update_domains(r, rdtgrp->closid);
2626		if (ret < 0) {
2627			rdt_last_cmd_puts("Failed to initialize allocations\n");
2628			return ret;
2629		}
2630
2631	}
2632
2633	rdtgrp->mode = RDT_MODE_SHAREABLE;
2634
2635	return 0;
2636}
2637
2638static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
2639			     struct kernfs_node *prgrp_kn,
2640			     const char *name, umode_t mode,
2641			     enum rdt_group_type rtype, struct rdtgroup **r)
2642{
2643	struct rdtgroup *prdtgrp, *rdtgrp;
2644	struct kernfs_node *kn;
2645	uint files = 0;
2646	int ret;
2647
2648	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
2649	if (!prdtgrp) {
2650		ret = -ENODEV;
2651		goto out_unlock;
2652	}
2653
2654	if (rtype == RDTMON_GROUP &&
2655	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2656	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
2657		ret = -EINVAL;
2658		rdt_last_cmd_puts("Pseudo-locking in progress\n");
2659		goto out_unlock;
2660	}
2661
2662	/* allocate the rdtgroup. */
2663	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
2664	if (!rdtgrp) {
2665		ret = -ENOSPC;
2666		rdt_last_cmd_puts("Kernel out of memory\n");
2667		goto out_unlock;
2668	}
2669	*r = rdtgrp;
2670	rdtgrp->mon.parent = prdtgrp;
2671	rdtgrp->type = rtype;
2672	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
2673
2674	/* kernfs creates the directory for rdtgrp */
2675	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
2676	if (IS_ERR(kn)) {
2677		ret = PTR_ERR(kn);
2678		rdt_last_cmd_puts("kernfs create error\n");
2679		goto out_free_rgrp;
2680	}
2681	rdtgrp->kn = kn;
2682
2683	/*
2684	 * kernfs_remove() will drop the reference count on "kn" which
2685	 * will free it. But we still need it to stick around for the
2686	 * rdtgroup_kn_unlock(kn} call below. Take one extra reference
2687	 * here, which will be dropped inside rdtgroup_kn_unlock().
2688	 */
2689	kernfs_get(kn);
2690
2691	ret = rdtgroup_kn_set_ugid(kn);
2692	if (ret) {
2693		rdt_last_cmd_puts("kernfs perm error\n");
2694		goto out_destroy;
2695	}
2696
2697	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
2698	ret = rdtgroup_add_files(kn, files);
2699	if (ret) {
2700		rdt_last_cmd_puts("kernfs fill error\n");
2701		goto out_destroy;
2702	}
2703
2704	if (rdt_mon_capable) {
2705		ret = alloc_rmid();
2706		if (ret < 0) {
2707			rdt_last_cmd_puts("Out of RMIDs\n");
2708			goto out_destroy;
2709		}
2710		rdtgrp->mon.rmid = ret;
2711
2712		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
2713		if (ret) {
2714			rdt_last_cmd_puts("kernfs subdir error\n");
2715			goto out_idfree;
2716		}
2717	}
2718	kernfs_activate(kn);
2719
2720	/*
2721	 * The caller unlocks the prgrp_kn upon success.
2722	 */
2723	return 0;
2724
2725out_idfree:
2726	free_rmid(rdtgrp->mon.rmid);
2727out_destroy:
 
2728	kernfs_remove(rdtgrp->kn);
2729out_free_rgrp:
2730	kfree(rdtgrp);
2731out_unlock:
2732	rdtgroup_kn_unlock(prgrp_kn);
2733	return ret;
2734}
2735
2736static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
2737{
2738	kernfs_remove(rgrp->kn);
2739	free_rmid(rgrp->mon.rmid);
2740	kfree(rgrp);
2741}
2742
2743/*
2744 * Create a monitor group under "mon_groups" directory of a control
2745 * and monitor group(ctrl_mon). This is a resource group
2746 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
2747 */
2748static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
2749			      struct kernfs_node *prgrp_kn,
2750			      const char *name,
2751			      umode_t mode)
2752{
2753	struct rdtgroup *rdtgrp, *prgrp;
2754	int ret;
2755
2756	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP,
2757				&rdtgrp);
2758	if (ret)
2759		return ret;
2760
2761	prgrp = rdtgrp->mon.parent;
2762	rdtgrp->closid = prgrp->closid;
2763
2764	/*
2765	 * Add the rdtgrp to the list of rdtgrps the parent
2766	 * ctrl_mon group has to track.
2767	 */
2768	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
2769
2770	rdtgroup_kn_unlock(prgrp_kn);
2771	return ret;
2772}
2773
2774/*
2775 * These are rdtgroups created under the root directory. Can be used
2776 * to allocate and monitor resources.
2777 */
2778static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
2779				   struct kernfs_node *prgrp_kn,
2780				   const char *name, umode_t mode)
2781{
2782	struct rdtgroup *rdtgrp;
2783	struct kernfs_node *kn;
2784	u32 closid;
2785	int ret;
2786
2787	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP,
2788				&rdtgrp);
2789	if (ret)
2790		return ret;
2791
2792	kn = rdtgrp->kn;
2793	ret = closid_alloc();
2794	if (ret < 0) {
2795		rdt_last_cmd_puts("Out of CLOSIDs\n");
2796		goto out_common_fail;
2797	}
2798	closid = ret;
2799	ret = 0;
2800
2801	rdtgrp->closid = closid;
2802	ret = rdtgroup_init_alloc(rdtgrp);
2803	if (ret < 0)
2804		goto out_id_free;
2805
2806	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
2807
2808	if (rdt_mon_capable) {
2809		/*
2810		 * Create an empty mon_groups directory to hold the subset
2811		 * of tasks and cpus to monitor.
2812		 */
2813		ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
2814		if (ret) {
2815			rdt_last_cmd_puts("kernfs subdir error\n");
2816			goto out_del_list;
2817		}
2818	}
2819
2820	goto out_unlock;
2821
2822out_del_list:
2823	list_del(&rdtgrp->rdtgroup_list);
2824out_id_free:
2825	closid_free(closid);
2826out_common_fail:
2827	mkdir_rdt_prepare_clean(rdtgrp);
2828out_unlock:
2829	rdtgroup_kn_unlock(prgrp_kn);
2830	return ret;
2831}
2832
2833/*
2834 * We allow creating mon groups only with in a directory called "mon_groups"
2835 * which is present in every ctrl_mon group. Check if this is a valid
2836 * "mon_groups" directory.
2837 *
2838 * 1. The directory should be named "mon_groups".
2839 * 2. The mon group itself should "not" be named "mon_groups".
2840 *   This makes sure "mon_groups" directory always has a ctrl_mon group
2841 *   as parent.
2842 */
2843static bool is_mon_groups(struct kernfs_node *kn, const char *name)
2844{
2845	return (!strcmp(kn->name, "mon_groups") &&
2846		strcmp(name, "mon_groups"));
2847}
2848
2849static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
2850			  umode_t mode)
2851{
2852	/* Do not accept '\n' to avoid unparsable situation. */
2853	if (strchr(name, '\n'))
2854		return -EINVAL;
2855
2856	/*
2857	 * If the parent directory is the root directory and RDT
2858	 * allocation is supported, add a control and monitoring
2859	 * subdirectory
2860	 */
2861	if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
2862		return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode);
2863
2864	/*
2865	 * If RDT monitoring is supported and the parent directory is a valid
2866	 * "mon_groups" directory, add a monitoring subdirectory.
2867	 */
2868	if (rdt_mon_capable && is_mon_groups(parent_kn, name))
2869		return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode);
2870
2871	return -EPERM;
2872}
2873
2874static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
2875			      cpumask_var_t tmpmask)
2876{
2877	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
2878	int cpu;
2879
2880	/* Give any tasks back to the parent group */
2881	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
2882
2883	/* Update per cpu rmid of the moved CPUs first */
2884	for_each_cpu(cpu, &rdtgrp->cpu_mask)
2885		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
2886	/*
2887	 * Update the MSR on moved CPUs and CPUs which have moved
2888	 * task running on them.
2889	 */
2890	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
2891	update_closid_rmid(tmpmask, NULL);
2892
2893	rdtgrp->flags = RDT_DELETED;
2894	free_rmid(rdtgrp->mon.rmid);
2895
2896	/*
2897	 * Remove the rdtgrp from the parent ctrl_mon group's list
2898	 */
2899	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
2900	list_del(&rdtgrp->mon.crdtgrp_list);
2901
2902	/*
2903	 * one extra hold on this, will drop when we kfree(rdtgrp)
2904	 * in rdtgroup_kn_unlock()
2905	 */
2906	kernfs_get(kn);
2907	kernfs_remove(rdtgrp->kn);
2908
2909	return 0;
2910}
2911
2912static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
2913				struct rdtgroup *rdtgrp)
2914{
2915	rdtgrp->flags = RDT_DELETED;
2916	list_del(&rdtgrp->rdtgroup_list);
2917
2918	/*
2919	 * one extra hold on this, will drop when we kfree(rdtgrp)
2920	 * in rdtgroup_kn_unlock()
2921	 */
2922	kernfs_get(kn);
2923	kernfs_remove(rdtgrp->kn);
2924	return 0;
2925}
2926
2927static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
2928			       cpumask_var_t tmpmask)
2929{
2930	int cpu;
2931
2932	/* Give any tasks back to the default group */
2933	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
2934
2935	/* Give any CPUs back to the default group */
2936	cpumask_or(&rdtgroup_default.cpu_mask,
2937		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2938
2939	/* Update per cpu closid and rmid of the moved CPUs first */
2940	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
2941		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
2942		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
2943	}
2944
2945	/*
2946	 * Update the MSR on moved CPUs and CPUs which have moved
2947	 * task running on them.
2948	 */
2949	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
2950	update_closid_rmid(tmpmask, NULL);
2951
2952	closid_free(rdtgrp->closid);
2953	free_rmid(rdtgrp->mon.rmid);
2954
 
 
2955	/*
2956	 * Free all the child monitor group rmids.
2957	 */
2958	free_all_child_rdtgrp(rdtgrp);
2959
2960	rdtgroup_ctrl_remove(kn, rdtgrp);
2961
2962	return 0;
2963}
2964
2965static int rdtgroup_rmdir(struct kernfs_node *kn)
2966{
2967	struct kernfs_node *parent_kn = kn->parent;
2968	struct rdtgroup *rdtgrp;
2969	cpumask_var_t tmpmask;
2970	int ret = 0;
2971
2972	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
2973		return -ENOMEM;
2974
2975	rdtgrp = rdtgroup_kn_lock_live(kn);
2976	if (!rdtgrp) {
2977		ret = -EPERM;
2978		goto out;
2979	}
2980
2981	/*
2982	 * If the rdtgroup is a ctrl_mon group and parent directory
2983	 * is the root directory, remove the ctrl_mon group.
2984	 *
2985	 * If the rdtgroup is a mon group and parent directory
2986	 * is a valid "mon_groups" directory, remove the mon group.
2987	 */
2988	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) {
 
2989		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2990		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
2991			ret = rdtgroup_ctrl_remove(kn, rdtgrp);
2992		} else {
2993			ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
2994		}
2995	} else if (rdtgrp->type == RDTMON_GROUP &&
2996		 is_mon_groups(parent_kn, kn->name)) {
2997		ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
2998	} else {
2999		ret = -EPERM;
3000	}
3001
3002out:
3003	rdtgroup_kn_unlock(kn);
3004	free_cpumask_var(tmpmask);
3005	return ret;
3006}
3007
3008static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
3009{
3010	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
3011		seq_puts(seq, ",cdp");
3012
3013	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
3014		seq_puts(seq, ",cdpl2");
3015
3016	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
3017		seq_puts(seq, ",mba_MBps");
3018
3019	return 0;
3020}
3021
3022static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
3023	.mkdir		= rdtgroup_mkdir,
3024	.rmdir		= rdtgroup_rmdir,
3025	.show_options	= rdtgroup_show_options,
3026};
3027
3028static int __init rdtgroup_setup_root(void)
3029{
3030	int ret;
3031
3032	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
3033				      KERNFS_ROOT_CREATE_DEACTIVATED |
3034				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
3035				      &rdtgroup_default);
3036	if (IS_ERR(rdt_root))
3037		return PTR_ERR(rdt_root);
3038
3039	mutex_lock(&rdtgroup_mutex);
3040
3041	rdtgroup_default.closid = 0;
3042	rdtgroup_default.mon.rmid = 0;
3043	rdtgroup_default.type = RDTCTRL_GROUP;
3044	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
3045
3046	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
3047
3048	ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE);
3049	if (ret) {
3050		kernfs_destroy_root(rdt_root);
3051		goto out;
3052	}
3053
3054	rdtgroup_default.kn = rdt_root->kn;
3055	kernfs_activate(rdtgroup_default.kn);
3056
3057out:
3058	mutex_unlock(&rdtgroup_mutex);
3059
3060	return ret;
3061}
3062
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3063/*
3064 * rdtgroup_init - rdtgroup initialization
3065 *
3066 * Setup resctrl file system including set up root, create mount point,
3067 * register rdtgroup filesystem, and initialize files under root directory.
3068 *
3069 * Return: 0 on success or -errno
3070 */
3071int __init rdtgroup_init(void)
3072{
3073	int ret = 0;
3074
3075	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
3076		     sizeof(last_cmd_status_buf));
3077
3078	ret = rdtgroup_setup_root();
3079	if (ret)
3080		return ret;
3081
3082	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
3083	if (ret)
3084		goto cleanup_root;
3085
3086	ret = register_filesystem(&rdt_fs_type);
3087	if (ret)
3088		goto cleanup_mountpoint;
3089
3090	/*
3091	 * Adding the resctrl debugfs directory here may not be ideal since
3092	 * it would let the resctrl debugfs directory appear on the debugfs
3093	 * filesystem before the resctrl filesystem is mounted.
3094	 * It may also be ok since that would enable debugging of RDT before
3095	 * resctrl is mounted.
3096	 * The reason why the debugfs directory is created here and not in
3097	 * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and
3098	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
3099	 * (the lockdep class of inode->i_rwsem). Other filesystem
3100	 * interactions (eg. SyS_getdents) have the lock ordering:
3101	 * &sb->s_type->i_mutex_key --> &mm->mmap_sem
3102	 * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex
3103	 * is taken, thus creating dependency:
3104	 * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause
3105	 * issues considering the other two lock dependencies.
3106	 * By creating the debugfs directory here we avoid a dependency
3107	 * that may cause deadlock (even though file operations cannot
3108	 * occur until the filesystem is mounted, but I do not know how to
3109	 * tell lockdep that).
3110	 */
3111	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
3112
3113	return 0;
3114
3115cleanup_mountpoint:
3116	sysfs_remove_mount_point(fs_kobj, "resctrl");
3117cleanup_root:
3118	kernfs_destroy_root(rdt_root);
3119
3120	return ret;
3121}
3122
3123void __exit rdtgroup_exit(void)
3124{
3125	debugfs_remove_recursive(debugfs_resctrl);
3126	unregister_filesystem(&rdt_fs_type);
3127	sysfs_remove_mount_point(fs_kobj, "resctrl");
3128	kernfs_destroy_root(rdt_root);
3129}

   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * User interface for Resource Allocation in Resource Director Technology(RDT)
   4 *
   5 * Copyright (C) 2016 Intel Corporation
   6 *
   7 * Author: Fenghua Yu <fenghua.yu@intel.com>
   8 *
   9 * More information about RDT be found in the Intel (R) x86 Architecture
  10 * Software Developer Manual.
  11 */
  12
  13#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
  14
  15#include <linux/cacheinfo.h>
  16#include <linux/cpu.h>
  17#include <linux/debugfs.h>
  18#include <linux/fs.h>
  19#include <linux/fs_parser.h>
  20#include <linux/sysfs.h>
  21#include <linux/kernfs.h>
  22#include <linux/seq_buf.h>
  23#include <linux/seq_file.h>
  24#include <linux/sched/signal.h>
  25#include <linux/sched/task.h>
  26#include <linux/slab.h>
  27#include <linux/task_work.h>
  28#include <linux/user_namespace.h>
  29
  30#include <uapi/linux/magic.h>
  31
  32#include <asm/resctrl.h>
  33#include "internal.h"
  34
  35DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
  36DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  37DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
  38static struct kernfs_root *rdt_root;
  39struct rdtgroup rdtgroup_default;
  40LIST_HEAD(rdt_all_groups);
  41
  42/* list of entries for the schemata file */
  43LIST_HEAD(resctrl_schema_all);
  44
  45/* Kernel fs node for "info" directory under root */
  46static struct kernfs_node *kn_info;
  47
  48/* Kernel fs node for "mon_groups" directory under root */
  49static struct kernfs_node *kn_mongrp;
  50
  51/* Kernel fs node for "mon_data" directory under root */
  52static struct kernfs_node *kn_mondata;
  53
  54static struct seq_buf last_cmd_status;
  55static char last_cmd_status_buf[512];
  56
  57struct dentry *debugfs_resctrl;
  58
  59void rdt_last_cmd_clear(void)
  60{
  61	lockdep_assert_held(&rdtgroup_mutex);
  62	seq_buf_clear(&last_cmd_status);
  63}
  64
  65void rdt_last_cmd_puts(const char *s)
  66{
  67	lockdep_assert_held(&rdtgroup_mutex);
  68	seq_buf_puts(&last_cmd_status, s);
  69}
  70
  71void rdt_last_cmd_printf(const char *fmt, ...)
  72{
  73	va_list ap;
  74
  75	va_start(ap, fmt);
  76	lockdep_assert_held(&rdtgroup_mutex);
  77	seq_buf_vprintf(&last_cmd_status, fmt, ap);
  78	va_end(ap);
  79}
  80
  81/*
  82 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  83 * we can keep a bitmap of free CLOSIDs in a single integer.
  84 *
  85 * Using a global CLOSID across all resources has some advantages and
  86 * some drawbacks:
  87 * + We can simply set "current->closid" to assign a task to a resource
  88 *   group.
  89 * + Context switch code can avoid extra memory references deciding which
  90 *   CLOSID to load into the PQR_ASSOC MSR
  91 * - We give up some options in configuring resource groups across multi-socket
  92 *   systems.
  93 * - Our choices on how to configure each resource become progressively more
  94 *   limited as the number of resources grows.
  95 */
  96static int closid_free_map;
  97static int closid_free_map_len;
  98
  99int closids_supported(void)
 100{
 101	return closid_free_map_len;
 102}
 103
 104static void closid_init(void)
 105{
 106	struct resctrl_schema *s;
 107	u32 rdt_min_closid = 32;
 108
 109	/* Compute rdt_min_closid across all resources */
 110	list_for_each_entry(s, &resctrl_schema_all, list)
 111		rdt_min_closid = min(rdt_min_closid, s->num_closid);
 112
 113	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
 114
 115	/* CLOSID 0 is always reserved for the default group */
 116	closid_free_map &= ~1;
 117	closid_free_map_len = rdt_min_closid;
 118}
 119
 120static int closid_alloc(void)
 121{
 122	u32 closid = ffs(closid_free_map);
 123
 124	if (closid == 0)
 125		return -ENOSPC;
 126	closid--;
 127	closid_free_map &= ~(1 << closid);
 128
 129	return closid;
 130}
 131
 132void closid_free(int closid)
 133{
 134	closid_free_map |= 1 << closid;
 135}
 136
 137/**
 138 * closid_allocated - test if provided closid is in use
 139 * @closid: closid to be tested
 140 *
 141 * Return: true if @closid is currently associated with a resource group,
 142 * false if @closid is free
 143 */
 144static bool closid_allocated(unsigned int closid)
 145{
 146	return (closid_free_map & (1 << closid)) == 0;
 147}
 148
 149/**
 150 * rdtgroup_mode_by_closid - Return mode of resource group with closid
 151 * @closid: closid if the resource group
 152 *
 153 * Each resource group is associated with a @closid. Here the mode
 154 * of a resource group can be queried by searching for it using its closid.
 155 *
 156 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
 157 */
 158enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
 159{
 160	struct rdtgroup *rdtgrp;
 161
 162	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
 163		if (rdtgrp->closid == closid)
 164			return rdtgrp->mode;
 165	}
 166
 167	return RDT_NUM_MODES;
 168}
 169
 170static const char * const rdt_mode_str[] = {
 171	[RDT_MODE_SHAREABLE]		= "shareable",
 172	[RDT_MODE_EXCLUSIVE]		= "exclusive",
 173	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
 174	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
 175};
 176
 177/**
 178 * rdtgroup_mode_str - Return the string representation of mode
 179 * @mode: the resource group mode as &enum rdtgroup_mode
 180 *
 181 * Return: string representation of valid mode, "unknown" otherwise
 182 */
 183static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
 184{
 185	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
 186		return "unknown";
 187
 188	return rdt_mode_str[mode];
 189}
 190
 191/* set uid and gid of rdtgroup dirs and files to that of the creator */
 192static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
 193{
 194	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
 195				.ia_uid = current_fsuid(),
 196				.ia_gid = current_fsgid(), };
 197
 198	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
 199	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
 200		return 0;
 201
 202	return kernfs_setattr(kn, &iattr);
 203}
 204
 205static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 206{
 207	struct kernfs_node *kn;
 208	int ret;
 209
 210	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
 211				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 212				  0, rft->kf_ops, rft, NULL, NULL);
 213	if (IS_ERR(kn))
 214		return PTR_ERR(kn);
 215
 216	ret = rdtgroup_kn_set_ugid(kn);
 217	if (ret) {
 218		kernfs_remove(kn);
 219		return ret;
 220	}
 221
 222	return 0;
 223}
 224
 225static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
 226{
 227	struct kernfs_open_file *of = m->private;
 228	struct rftype *rft = of->kn->priv;
 229
 230	if (rft->seq_show)
 231		return rft->seq_show(of, m, arg);
 232	return 0;
 233}
 234
 235static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
 236				   size_t nbytes, loff_t off)
 237{
 238	struct rftype *rft = of->kn->priv;
 239
 240	if (rft->write)
 241		return rft->write(of, buf, nbytes, off);
 242
 243	return -EINVAL;
 244}
 245
 246static const struct kernfs_ops rdtgroup_kf_single_ops = {
 247	.atomic_write_len	= PAGE_SIZE,
 248	.write			= rdtgroup_file_write,
 249	.seq_show		= rdtgroup_seqfile_show,
 250};
 251
 252static const struct kernfs_ops kf_mondata_ops = {
 253	.atomic_write_len	= PAGE_SIZE,
 254	.seq_show		= rdtgroup_mondata_show,
 255};
 256
 257static bool is_cpu_list(struct kernfs_open_file *of)
 258{
 259	struct rftype *rft = of->kn->priv;
 260
 261	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
 262}
 263
 264static int rdtgroup_cpus_show(struct kernfs_open_file *of,
 265			      struct seq_file *s, void *v)
 266{
 267	struct rdtgroup *rdtgrp;
 268	struct cpumask *mask;
 269	int ret = 0;
 270
 271	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 272
 273	if (rdtgrp) {
 274		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
 275			if (!rdtgrp->plr->d) {
 276				rdt_last_cmd_clear();
 277				rdt_last_cmd_puts("Cache domain offline\n");
 278				ret = -ENODEV;
 279			} else {
 280				mask = &rdtgrp->plr->d->cpu_mask;
 281				seq_printf(s, is_cpu_list(of) ?
 282					   "%*pbl\n" : "%*pb\n",
 283					   cpumask_pr_args(mask));
 284			}
 285		} else {
 286			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
 287				   cpumask_pr_args(&rdtgrp->cpu_mask));
 288		}
 289	} else {
 290		ret = -ENOENT;
 291	}
 292	rdtgroup_kn_unlock(of->kn);
 293
 294	return ret;
 295}
 296
 297/*
 298 * This is safe against resctrl_sched_in() called from __switch_to()
 299 * because __switch_to() is executed with interrupts disabled. A local call
 300 * from update_closid_rmid() is protected against __switch_to() because
 301 * preemption is disabled.
 302 */
 303static void update_cpu_closid_rmid(void *info)
 304{
 305	struct rdtgroup *r = info;
 306
 307	if (r) {
 308		this_cpu_write(pqr_state.default_closid, r->closid);
 309		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
 310	}
 311
 312	/*
 313	 * We cannot unconditionally write the MSR because the current
 314	 * executing task might have its own closid selected. Just reuse
 315	 * the context switch code.
 316	 */
 317	resctrl_sched_in();
 318}
 319
 320/*
 321 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
 322 *
 323 * Per task closids/rmids must have been set up before calling this function.
 324 */
 325static void
 326update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
 327{
 328	int cpu = get_cpu();
 329
 330	if (cpumask_test_cpu(cpu, cpu_mask))
 331		update_cpu_closid_rmid(r);
 332	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
 333	put_cpu();
 334}
 335
 336static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 337			  cpumask_var_t tmpmask)
 338{
 339	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
 340	struct list_head *head;
 341
 342	/* Check whether cpus belong to parent ctrl group */
 343	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
 344	if (!cpumask_empty(tmpmask)) {
 345		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
 346		return -EINVAL;
 347	}
 348
 349	/* Check whether cpus are dropped from this group */
 350	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 351	if (!cpumask_empty(tmpmask)) {
 352		/* Give any dropped cpus to parent rdtgroup */
 353		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
 354		update_closid_rmid(tmpmask, prgrp);
 355	}
 356
 357	/*
 358	 * If we added cpus, remove them from previous group that owned them
 359	 * and update per-cpu rmid
 360	 */
 361	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 362	if (!cpumask_empty(tmpmask)) {
 363		head = &prgrp->mon.crdtgrp_list;
 364		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 365			if (crgrp == rdtgrp)
 366				continue;
 367			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
 368				       tmpmask);
 369		}
 370		update_closid_rmid(tmpmask, rdtgrp);
 371	}
 372
 373	/* Done pushing/pulling - update this group with new mask */
 374	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 375
 376	return 0;
 377}
 378
 379static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
 380{
 381	struct rdtgroup *crgrp;
 382
 383	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
 384	/* update the child mon group masks as well*/
 385	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
 386		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
 387}
 388
 389static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 390			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
 391{
 392	struct rdtgroup *r, *crgrp;
 393	struct list_head *head;
 394
 395	/* Check whether cpus are dropped from this group */
 396	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 397	if (!cpumask_empty(tmpmask)) {
 398		/* Can't drop from default group */
 399		if (rdtgrp == &rdtgroup_default) {
 400			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
 401			return -EINVAL;
 402		}
 403
 404		/* Give any dropped cpus to rdtgroup_default */
 405		cpumask_or(&rdtgroup_default.cpu_mask,
 406			   &rdtgroup_default.cpu_mask, tmpmask);
 407		update_closid_rmid(tmpmask, &rdtgroup_default);
 408	}
 409
 410	/*
 411	 * If we added cpus, remove them from previous group and
 412	 * the prev group's child groups that owned them
 413	 * and update per-cpu closid/rmid.
 414	 */
 415	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 416	if (!cpumask_empty(tmpmask)) {
 417		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
 418			if (r == rdtgrp)
 419				continue;
 420			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
 421			if (!cpumask_empty(tmpmask1))
 422				cpumask_rdtgrp_clear(r, tmpmask1);
 423		}
 424		update_closid_rmid(tmpmask, rdtgrp);
 425	}
 426
 427	/* Done pushing/pulling - update this group with new mask */
 428	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 429
 430	/*
 431	 * Clear child mon group masks since there is a new parent mask
 432	 * now and update the rmid for the cpus the child lost.
 433	 */
 434	head = &rdtgrp->mon.crdtgrp_list;
 435	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 436		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
 437		update_closid_rmid(tmpmask, rdtgrp);
 438		cpumask_clear(&crgrp->cpu_mask);
 439	}
 440
 441	return 0;
 442}
 443
 444static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 445				   char *buf, size_t nbytes, loff_t off)
 446{
 447	cpumask_var_t tmpmask, newmask, tmpmask1;
 448	struct rdtgroup *rdtgrp;
 449	int ret;
 450
 451	if (!buf)
 452		return -EINVAL;
 453
 454	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 455		return -ENOMEM;
 456	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
 457		free_cpumask_var(tmpmask);
 458		return -ENOMEM;
 459	}
 460	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
 461		free_cpumask_var(tmpmask);
 462		free_cpumask_var(newmask);
 463		return -ENOMEM;
 464	}
 465
 466	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 467	if (!rdtgrp) {
 468		ret = -ENOENT;
 469		goto unlock;
 470	}
 471
 472	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 473	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 474		ret = -EINVAL;
 475		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 476		goto unlock;
 477	}
 478
 479	if (is_cpu_list(of))
 480		ret = cpulist_parse(buf, newmask);
 481	else
 482		ret = cpumask_parse(buf, newmask);
 483
 484	if (ret) {
 485		rdt_last_cmd_puts("Bad CPU list/mask\n");
 486		goto unlock;
 487	}
 488
 489	/* check that user didn't specify any offline cpus */
 490	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 491	if (!cpumask_empty(tmpmask)) {
 492		ret = -EINVAL;
 493		rdt_last_cmd_puts("Can only assign online CPUs\n");
 494		goto unlock;
 495	}
 496
 497	if (rdtgrp->type == RDTCTRL_GROUP)
 498		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
 499	else if (rdtgrp->type == RDTMON_GROUP)
 500		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
 501	else
 502		ret = -EINVAL;
 503
 504unlock:
 505	rdtgroup_kn_unlock(of->kn);
 506	free_cpumask_var(tmpmask);
 507	free_cpumask_var(newmask);
 508	free_cpumask_var(tmpmask1);
 509
 510	return ret ?: nbytes;
 511}
 512
 513/**
 514 * rdtgroup_remove - the helper to remove resource group safely
 515 * @rdtgrp: resource group to remove
 516 *
 517 * On resource group creation via a mkdir, an extra kernfs_node reference is
 518 * taken to ensure that the rdtgroup structure remains accessible for the
 519 * rdtgroup_kn_unlock() calls where it is removed.
 520 *
 521 * Drop the extra reference here, then free the rdtgroup structure.
 522 *
 523 * Return: void
 524 */
 525static void rdtgroup_remove(struct rdtgroup *rdtgrp)
 526{
 527	kernfs_put(rdtgrp->kn);
 528	kfree(rdtgrp);
 529}
 
 
 530
 531static void _update_task_closid_rmid(void *task)
 532{
 533	/*
 534	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
 535	 * Otherwise, the MSR is updated when the task is scheduled in.
 
 536	 */
 537	if (task == current)
 538		resctrl_sched_in();
 539}
 
 
 
 
 
 
 
 
 540
 541static void update_task_closid_rmid(struct task_struct *t)
 542{
 543	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
 544		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
 545	else
 546		_update_task_closid_rmid(t);
 547}
 548
 549static int __rdtgroup_move_task(struct task_struct *tsk,
 550				struct rdtgroup *rdtgrp)
 551{
 552	/* If the task is already in rdtgrp, no need to move the task. */
 553	if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
 554	     tsk->rmid == rdtgrp->mon.rmid) ||
 555	    (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
 556	     tsk->closid == rdtgrp->mon.parent->closid))
 557		return 0;
 
 
 558
 559	/*
 560	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
 561	 * updated by them.
 562	 *
 563	 * For ctrl_mon groups, move both closid and rmid.
 564	 * For monitor groups, can move the tasks only from
 565	 * their parent CTRL group.
 566	 */
 567
 568	if (rdtgrp->type == RDTCTRL_GROUP) {
 569		WRITE_ONCE(tsk->closid, rdtgrp->closid);
 570		WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
 571	} else if (rdtgrp->type == RDTMON_GROUP) {
 572		if (rdtgrp->mon.parent->closid == tsk->closid) {
 573			WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
 574		} else {
 575			rdt_last_cmd_puts("Can't move task to different control group\n");
 576			return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 577		}
 578	}
 579
 580	/*
 581	 * Ensure the task's closid and rmid are written before determining if
 582	 * the task is current that will decide if it will be interrupted.
 583	 * This pairs with the full barrier between the rq->curr update and
 584	 * resctrl_sched_in() during context switch.
 585	 */
 586	smp_mb();
 587
 588	/*
 589	 * By now, the task's closid and rmid are set. If the task is current
 590	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
 591	 * group go into effect. If the task is not current, the MSR will be
 592	 * updated when the task is scheduled in.
 593	 */
 594	update_task_closid_rmid(tsk);
 595
 596	return 0;
 597}
 598
 599static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
 600{
 601	return (rdt_alloc_capable &&
 602	       (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
 603}
 604
 605static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
 606{
 607	return (rdt_mon_capable &&
 608	       (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
 609}
 610
 611/**
 612 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
 613 * @r: Resource group
 614 *
 615 * Return: 1 if tasks have been assigned to @r, 0 otherwise
 616 */
 617int rdtgroup_tasks_assigned(struct rdtgroup *r)
 618{
 619	struct task_struct *p, *t;
 620	int ret = 0;
 621
 622	lockdep_assert_held(&rdtgroup_mutex);
 623
 624	rcu_read_lock();
 625	for_each_process_thread(p, t) {
 626		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
 
 627			ret = 1;
 628			break;
 629		}
 630	}
 631	rcu_read_unlock();
 632
 633	return ret;
 634}
 635
 636static int rdtgroup_task_write_permission(struct task_struct *task,
 637					  struct kernfs_open_file *of)
 638{
 639	const struct cred *tcred = get_task_cred(task);
 640	const struct cred *cred = current_cred();
 641	int ret = 0;
 642
 643	/*
 644	 * Even if we're attaching all tasks in the thread group, we only
 645	 * need to check permissions on one of them.
 646	 */
 647	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 648	    !uid_eq(cred->euid, tcred->uid) &&
 649	    !uid_eq(cred->euid, tcred->suid)) {
 650		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
 651		ret = -EPERM;
 652	}
 653
 654	put_cred(tcred);
 655	return ret;
 656}
 657
 658static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 659			      struct kernfs_open_file *of)
 660{
 661	struct task_struct *tsk;
 662	int ret;
 663
 664	rcu_read_lock();
 665	if (pid) {
 666		tsk = find_task_by_vpid(pid);
 667		if (!tsk) {
 668			rcu_read_unlock();
 669			rdt_last_cmd_printf("No task %d\n", pid);
 670			return -ESRCH;
 671		}
 672	} else {
 673		tsk = current;
 674	}
 675
 676	get_task_struct(tsk);
 677	rcu_read_unlock();
 678
 679	ret = rdtgroup_task_write_permission(tsk, of);
 680	if (!ret)
 681		ret = __rdtgroup_move_task(tsk, rdtgrp);
 682
 683	put_task_struct(tsk);
 684	return ret;
 685}
 686
 687static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 688				    char *buf, size_t nbytes, loff_t off)
 689{
 690	struct rdtgroup *rdtgrp;
 691	int ret = 0;
 692	pid_t pid;
 693
 694	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 695		return -EINVAL;
 696	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 697	if (!rdtgrp) {
 698		rdtgroup_kn_unlock(of->kn);
 699		return -ENOENT;
 700	}
 701	rdt_last_cmd_clear();
 702
 703	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 704	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 705		ret = -EINVAL;
 706		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 707		goto unlock;
 708	}
 709
 710	ret = rdtgroup_move_task(pid, rdtgrp, of);
 711
 712unlock:
 713	rdtgroup_kn_unlock(of->kn);
 714
 715	return ret ?: nbytes;
 716}
 717
 718static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 719{
 720	struct task_struct *p, *t;
 721
 722	rcu_read_lock();
 723	for_each_process_thread(p, t) {
 724		if (is_closid_match(t, r) || is_rmid_match(t, r))
 
 725			seq_printf(s, "%d\n", t->pid);
 726	}
 727	rcu_read_unlock();
 728}
 729
 730static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 731			       struct seq_file *s, void *v)
 732{
 733	struct rdtgroup *rdtgrp;
 734	int ret = 0;
 735
 736	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 737	if (rdtgrp)
 738		show_rdt_tasks(rdtgrp, s);
 739	else
 740		ret = -ENOENT;
 741	rdtgroup_kn_unlock(of->kn);
 742
 743	return ret;
 744}
 745
 746#ifdef CONFIG_PROC_CPU_RESCTRL
 747
 748/*
 749 * A task can only be part of one resctrl control group and of one monitor
 750 * group which is associated to that control group.
 751 *
 752 * 1)   res:
 753 *      mon:
 754 *
 755 *    resctrl is not available.
 756 *
 757 * 2)   res:/
 758 *      mon:
 759 *
 760 *    Task is part of the root resctrl control group, and it is not associated
 761 *    to any monitor group.
 762 *
 763 * 3)  res:/
 764 *     mon:mon0
 765 *
 766 *    Task is part of the root resctrl control group and monitor group mon0.
 767 *
 768 * 4)  res:group0
 769 *     mon:
 770 *
 771 *    Task is part of resctrl control group group0, and it is not associated
 772 *    to any monitor group.
 773 *
 774 * 5) res:group0
 775 *    mon:mon1
 776 *
 777 *    Task is part of resctrl control group group0 and monitor group mon1.
 778 */
 779int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
 780		      struct pid *pid, struct task_struct *tsk)
 781{
 782	struct rdtgroup *rdtg;
 783	int ret = 0;
 784
 785	mutex_lock(&rdtgroup_mutex);
 786
 787	/* Return empty if resctrl has not been mounted. */
 788	if (!static_branch_unlikely(&rdt_enable_key)) {
 789		seq_puts(s, "res:\nmon:\n");
 790		goto unlock;
 791	}
 792
 793	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
 794		struct rdtgroup *crg;
 795
 796		/*
 797		 * Task information is only relevant for shareable
 798		 * and exclusive groups.
 799		 */
 800		if (rdtg->mode != RDT_MODE_SHAREABLE &&
 801		    rdtg->mode != RDT_MODE_EXCLUSIVE)
 802			continue;
 803
 804		if (rdtg->closid != tsk->closid)
 805			continue;
 806
 807		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
 808			   rdtg->kn->name);
 809		seq_puts(s, "mon:");
 810		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
 811				    mon.crdtgrp_list) {
 812			if (tsk->rmid != crg->mon.rmid)
 813				continue;
 814			seq_printf(s, "%s", crg->kn->name);
 815			break;
 816		}
 817		seq_putc(s, '\n');
 818		goto unlock;
 819	}
 820	/*
 821	 * The above search should succeed. Otherwise return
 822	 * with an error.
 823	 */
 824	ret = -ENOENT;
 825unlock:
 826	mutex_unlock(&rdtgroup_mutex);
 827
 828	return ret;
 829}
 830#endif
 831
 832static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
 833				    struct seq_file *seq, void *v)
 834{
 835	int len;
 836
 837	mutex_lock(&rdtgroup_mutex);
 838	len = seq_buf_used(&last_cmd_status);
 839	if (len)
 840		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
 841	else
 842		seq_puts(seq, "ok\n");
 843	mutex_unlock(&rdtgroup_mutex);
 844	return 0;
 845}
 846
 847static int rdt_num_closids_show(struct kernfs_open_file *of,
 848				struct seq_file *seq, void *v)
 849{
 850	struct resctrl_schema *s = of->kn->parent->priv;
 851
 852	seq_printf(seq, "%u\n", s->num_closid);
 853	return 0;
 854}
 855
 856static int rdt_default_ctrl_show(struct kernfs_open_file *of,
 857			     struct seq_file *seq, void *v)
 858{
 859	struct resctrl_schema *s = of->kn->parent->priv;
 860	struct rdt_resource *r = s->res;
 861
 862	seq_printf(seq, "%x\n", r->default_ctrl);
 863	return 0;
 864}
 865
 866static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
 867			     struct seq_file *seq, void *v)
 868{
 869	struct resctrl_schema *s = of->kn->parent->priv;
 870	struct rdt_resource *r = s->res;
 871
 872	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
 873	return 0;
 874}
 875
 876static int rdt_shareable_bits_show(struct kernfs_open_file *of,
 877				   struct seq_file *seq, void *v)
 878{
 879	struct resctrl_schema *s = of->kn->parent->priv;
 880	struct rdt_resource *r = s->res;
 881
 882	seq_printf(seq, "%x\n", r->cache.shareable_bits);
 883	return 0;
 884}
 885
 886/**
 887 * rdt_bit_usage_show - Display current usage of resources
 888 *
 889 * A domain is a shared resource that can now be allocated differently. Here
 890 * we display the current regions of the domain as an annotated bitmask.
 891 * For each domain of this resource its allocation bitmask
 892 * is annotated as below to indicate the current usage of the corresponding bit:
 893 *   0 - currently unused
 894 *   X - currently available for sharing and used by software and hardware
 895 *   H - currently used by hardware only but available for software use
 896 *   S - currently used and shareable by software only
 897 *   E - currently used exclusively by one resource group
 898 *   P - currently pseudo-locked by one resource group
 899 */
 900static int rdt_bit_usage_show(struct kernfs_open_file *of,
 901			      struct seq_file *seq, void *v)
 902{
 903	struct resctrl_schema *s = of->kn->parent->priv;
 904	/*
 905	 * Use unsigned long even though only 32 bits are used to ensure
 906	 * test_bit() is used safely.
 907	 */
 908	unsigned long sw_shareable = 0, hw_shareable = 0;
 909	unsigned long exclusive = 0, pseudo_locked = 0;
 910	struct rdt_resource *r = s->res;
 911	struct rdt_domain *dom;
 912	int i, hwb, swb, excl, psl;
 913	enum rdtgrp_mode mode;
 914	bool sep = false;
 915	u32 ctrl_val;
 916
 917	mutex_lock(&rdtgroup_mutex);
 918	hw_shareable = r->cache.shareable_bits;
 919	list_for_each_entry(dom, &r->domains, list) {
 920		if (sep)
 921			seq_putc(seq, ';');
 
 922		sw_shareable = 0;
 923		exclusive = 0;
 924		seq_printf(seq, "%d=", dom->id);
 925		for (i = 0; i < closids_supported(); i++) {
 926			if (!closid_allocated(i))
 927				continue;
 928			ctrl_val = resctrl_arch_get_config(r, dom, i,
 929							   s->conf_type);
 930			mode = rdtgroup_mode_by_closid(i);
 931			switch (mode) {
 932			case RDT_MODE_SHAREABLE:
 933				sw_shareable |= ctrl_val;
 934				break;
 935			case RDT_MODE_EXCLUSIVE:
 936				exclusive |= ctrl_val;
 937				break;
 938			case RDT_MODE_PSEUDO_LOCKSETUP:
 939			/*
 940			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
 941			 * here but not included since the CBM
 942			 * associated with this CLOSID in this mode
 943			 * is not initialized and no task or cpu can be
 944			 * assigned this CLOSID.
 945			 */
 946				break;
 947			case RDT_MODE_PSEUDO_LOCKED:
 948			case RDT_NUM_MODES:
 949				WARN(1,
 950				     "invalid mode for closid %d\n", i);
 951				break;
 952			}
 953		}
 954		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
 955			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
 956			hwb = test_bit(i, &hw_shareable);
 957			swb = test_bit(i, &sw_shareable);
 958			excl = test_bit(i, &exclusive);
 959			psl = test_bit(i, &pseudo_locked);
 960			if (hwb && swb)
 961				seq_putc(seq, 'X');
 962			else if (hwb && !swb)
 963				seq_putc(seq, 'H');
 964			else if (!hwb && swb)
 965				seq_putc(seq, 'S');
 966			else if (excl)
 967				seq_putc(seq, 'E');
 968			else if (psl)
 969				seq_putc(seq, 'P');
 970			else /* Unused bits remain */
 971				seq_putc(seq, '0');
 972		}
 973		sep = true;
 974	}
 975	seq_putc(seq, '\n');
 976	mutex_unlock(&rdtgroup_mutex);
 977	return 0;
 978}
 979
 980static int rdt_min_bw_show(struct kernfs_open_file *of,
 981			     struct seq_file *seq, void *v)
 982{
 983	struct resctrl_schema *s = of->kn->parent->priv;
 984	struct rdt_resource *r = s->res;
 985
 986	seq_printf(seq, "%u\n", r->membw.min_bw);
 987	return 0;
 988}
 989
 990static int rdt_num_rmids_show(struct kernfs_open_file *of,
 991			      struct seq_file *seq, void *v)
 992{
 993	struct rdt_resource *r = of->kn->parent->priv;
 994
 995	seq_printf(seq, "%d\n", r->num_rmid);
 996
 997	return 0;
 998}
 999
1000static int rdt_mon_features_show(struct kernfs_open_file *of,
1001				 struct seq_file *seq, void *v)
1002{
1003	struct rdt_resource *r = of->kn->parent->priv;
1004	struct mon_evt *mevt;
1005
1006	list_for_each_entry(mevt, &r->evt_list, list)
1007		seq_printf(seq, "%s\n", mevt->name);
1008
1009	return 0;
1010}
1011
1012static int rdt_bw_gran_show(struct kernfs_open_file *of,
1013			     struct seq_file *seq, void *v)
1014{
1015	struct resctrl_schema *s = of->kn->parent->priv;
1016	struct rdt_resource *r = s->res;
1017
1018	seq_printf(seq, "%u\n", r->membw.bw_gran);
1019	return 0;
1020}
1021
1022static int rdt_delay_linear_show(struct kernfs_open_file *of,
1023			     struct seq_file *seq, void *v)
1024{
1025	struct resctrl_schema *s = of->kn->parent->priv;
1026	struct rdt_resource *r = s->res;
1027
1028	seq_printf(seq, "%u\n", r->membw.delay_linear);
1029	return 0;
1030}
1031
1032static int max_threshold_occ_show(struct kernfs_open_file *of,
1033				  struct seq_file *seq, void *v)
1034{
1035	seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
1036
1037	return 0;
1038}
1039
1040static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
1041					 struct seq_file *seq, void *v)
1042{
1043	struct resctrl_schema *s = of->kn->parent->priv;
1044	struct rdt_resource *r = s->res;
1045
1046	if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
1047		seq_puts(seq, "per-thread\n");
1048	else
1049		seq_puts(seq, "max\n");
1050
1051	return 0;
1052}
1053
1054static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
1055				       char *buf, size_t nbytes, loff_t off)
1056{
 
1057	unsigned int bytes;
1058	int ret;
1059
1060	ret = kstrtouint(buf, 0, &bytes);
1061	if (ret)
1062		return ret;
1063
1064	if (bytes > resctrl_rmid_realloc_limit)
1065		return -EINVAL;
1066
1067	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
1068
1069	return nbytes;
1070}
1071
1072/*
1073 * rdtgroup_mode_show - Display mode of this resource group
1074 */
1075static int rdtgroup_mode_show(struct kernfs_open_file *of,
1076			      struct seq_file *s, void *v)
1077{
1078	struct rdtgroup *rdtgrp;
1079
1080	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1081	if (!rdtgrp) {
1082		rdtgroup_kn_unlock(of->kn);
1083		return -ENOENT;
1084	}
1085
1086	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
1087
1088	rdtgroup_kn_unlock(of->kn);
1089	return 0;
1090}
1091
1092static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1093{
1094	switch (my_type) {
1095	case CDP_CODE:
1096		return CDP_DATA;
1097	case CDP_DATA:
1098		return CDP_CODE;
 
 
 
 
 
 
 
 
 
 
 
 
1099	default:
1100	case CDP_NONE:
1101		return CDP_NONE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1102	}
 
 
 
 
 
 
1103}
1104
1105/**
1106 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1107 * @r: Resource to which domain instance @d belongs.
1108 * @d: The domain instance for which @closid is being tested.
1109 * @cbm: Capacity bitmask being tested.
1110 * @closid: Intended closid for @cbm.
1111 * @exclusive: Only check if overlaps with exclusive resource groups
1112 *
1113 * Checks if provided @cbm intended to be used for @closid on domain
1114 * @d overlaps with any other closids or other hardware usage associated
1115 * with this domain. If @exclusive is true then only overlaps with
1116 * resource groups in exclusive mode will be considered. If @exclusive
1117 * is false then overlaps with any resource group or hardware entities
1118 * will be considered.
1119 *
1120 * @cbm is unsigned long, even if only 32 bits are used, to make the
1121 * bitmap functions work correctly.
1122 *
1123 * Return: false if CBM does not overlap, true if it does.
1124 */
1125static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1126				    unsigned long cbm, int closid,
1127				    enum resctrl_conf_type type, bool exclusive)
1128{
1129	enum rdtgrp_mode mode;
1130	unsigned long ctrl_b;
 
1131	int i;
1132
1133	/* Check for any overlap with regions used by hardware directly */
1134	if (!exclusive) {
1135		ctrl_b = r->cache.shareable_bits;
1136		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1137			return true;
1138	}
1139
1140	/* Check for overlap with other resource groups */
1141	for (i = 0; i < closids_supported(); i++) {
1142		ctrl_b = resctrl_arch_get_config(r, d, i, type);
 
1143		mode = rdtgroup_mode_by_closid(i);
1144		if (closid_allocated(i) && i != closid &&
1145		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1146			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1147				if (exclusive) {
1148					if (mode == RDT_MODE_EXCLUSIVE)
1149						return true;
1150					continue;
1151				}
1152				return true;
1153			}
1154		}
1155	}
1156
1157	return false;
1158}
1159
1160/**
1161 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1162 * @s: Schema for the resource to which domain instance @d belongs.
1163 * @d: The domain instance for which @closid is being tested.
1164 * @cbm: Capacity bitmask being tested.
1165 * @closid: Intended closid for @cbm.
1166 * @exclusive: Only check if overlaps with exclusive resource groups
1167 *
1168 * Resources that can be allocated using a CBM can use the CBM to control
1169 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1170 * for overlap. Overlap test is not limited to the specific resource for
1171 * which the CBM is intended though - when dealing with CDP resources that
1172 * share the underlying hardware the overlap check should be performed on
1173 * the CDP resource sharing the hardware also.
1174 *
1175 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1176 * overlap test.
1177 *
1178 * Return: true if CBM overlap detected, false if there is no overlap
1179 */
1180bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
1181			   unsigned long cbm, int closid, bool exclusive)
1182{
1183	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
1184	struct rdt_resource *r = s->res;
1185
1186	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
1187				    exclusive))
1188		return true;
1189
1190	if (!resctrl_arch_get_cdp_enabled(r->rid))
1191		return false;
1192	return  __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
 
1193}
1194
1195/**
1196 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1197 *
1198 * An exclusive resource group implies that there should be no sharing of
1199 * its allocated resources. At the time this group is considered to be
1200 * exclusive this test can determine if its current schemata supports this
1201 * setting by testing for overlap with all other resource groups.
1202 *
1203 * Return: true if resource group can be exclusive, false if there is overlap
1204 * with allocations of other resource groups and thus this resource group
1205 * cannot be exclusive.
1206 */
1207static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1208{
1209	int closid = rdtgrp->closid;
1210	struct resctrl_schema *s;
1211	struct rdt_resource *r;
1212	bool has_cache = false;
1213	struct rdt_domain *d;
1214	u32 ctrl;
1215
1216	list_for_each_entry(s, &resctrl_schema_all, list) {
1217		r = s->res;
1218		if (r->rid == RDT_RESOURCE_MBA)
1219			continue;
1220		has_cache = true;
1221		list_for_each_entry(d, &r->domains, list) {
1222			ctrl = resctrl_arch_get_config(r, d, closid,
1223						       s->conf_type);
1224			if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
1225				rdt_last_cmd_puts("Schemata overlaps\n");
1226				return false;
1227			}
1228		}
1229	}
1230
1231	if (!has_cache) {
1232		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1233		return false;
1234	}
1235
1236	return true;
1237}
1238
1239/**
1240 * rdtgroup_mode_write - Modify the resource group's mode
1241 *
1242 */
1243static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1244				   char *buf, size_t nbytes, loff_t off)
1245{
1246	struct rdtgroup *rdtgrp;
1247	enum rdtgrp_mode mode;
1248	int ret = 0;
1249
1250	/* Valid input requires a trailing newline */
1251	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1252		return -EINVAL;
1253	buf[nbytes - 1] = '\0';
1254
1255	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1256	if (!rdtgrp) {
1257		rdtgroup_kn_unlock(of->kn);
1258		return -ENOENT;
1259	}
1260
1261	rdt_last_cmd_clear();
1262
1263	mode = rdtgrp->mode;
1264
1265	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1266	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1267	    (!strcmp(buf, "pseudo-locksetup") &&
1268	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1269	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1270		goto out;
1271
1272	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1273		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1274		ret = -EINVAL;
1275		goto out;
1276	}
1277
1278	if (!strcmp(buf, "shareable")) {
1279		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1280			ret = rdtgroup_locksetup_exit(rdtgrp);
1281			if (ret)
1282				goto out;
1283		}
1284		rdtgrp->mode = RDT_MODE_SHAREABLE;
1285	} else if (!strcmp(buf, "exclusive")) {
1286		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1287			ret = -EINVAL;
1288			goto out;
1289		}
1290		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1291			ret = rdtgroup_locksetup_exit(rdtgrp);
1292			if (ret)
1293				goto out;
1294		}
1295		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1296	} else if (!strcmp(buf, "pseudo-locksetup")) {
1297		ret = rdtgroup_locksetup_enter(rdtgrp);
1298		if (ret)
1299			goto out;
1300		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1301	} else {
1302		rdt_last_cmd_puts("Unknown or unsupported mode\n");
1303		ret = -EINVAL;
1304	}
1305
1306out:
1307	rdtgroup_kn_unlock(of->kn);
1308	return ret ?: nbytes;
1309}
1310
1311/**
1312 * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1313 * @r: RDT resource to which @d belongs.
1314 * @d: RDT domain instance.
1315 * @cbm: bitmask for which the size should be computed.
1316 *
1317 * The bitmask provided associated with the RDT domain instance @d will be
1318 * translated into how many bytes it represents. The size in bytes is
1319 * computed by first dividing the total cache size by the CBM length to
1320 * determine how many bytes each bit in the bitmask represents. The result
1321 * is multiplied with the number of bits set in the bitmask.
1322 *
1323 * @cbm is unsigned long, even if only 32 bits are used to make the
1324 * bitmap functions work correctly.
1325 */
1326unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1327				  struct rdt_domain *d, unsigned long cbm)
1328{
1329	struct cpu_cacheinfo *ci;
1330	unsigned int size = 0;
1331	int num_b, i;
1332
1333	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1334	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
1335	for (i = 0; i < ci->num_leaves; i++) {
1336		if (ci->info_list[i].level == r->cache_level) {
1337			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
1338			break;
1339		}
1340	}
1341
1342	return size;
1343}
1344
1345/**
1346 * rdtgroup_size_show - Display size in bytes of allocated regions
1347 *
1348 * The "size" file mirrors the layout of the "schemata" file, printing the
1349 * size in bytes of each region instead of the capacity bitmask.
1350 *
1351 */
1352static int rdtgroup_size_show(struct kernfs_open_file *of,
1353			      struct seq_file *s, void *v)
1354{
1355	struct resctrl_schema *schema;
1356	enum resctrl_conf_type type;
1357	struct rdtgroup *rdtgrp;
1358	struct rdt_resource *r;
1359	struct rdt_domain *d;
1360	unsigned int size;
1361	int ret = 0;
1362	u32 closid;
1363	bool sep;
1364	u32 ctrl;
1365
1366	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1367	if (!rdtgrp) {
1368		rdtgroup_kn_unlock(of->kn);
1369		return -ENOENT;
1370	}
1371
1372	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1373		if (!rdtgrp->plr->d) {
1374			rdt_last_cmd_clear();
1375			rdt_last_cmd_puts("Cache domain offline\n");
1376			ret = -ENODEV;
1377		} else {
1378			seq_printf(s, "%*s:", max_name_width,
1379				   rdtgrp->plr->s->name);
1380			size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
1381						    rdtgrp->plr->d,
1382						    rdtgrp->plr->cbm);
1383			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
1384		}
1385		goto out;
1386	}
1387
1388	closid = rdtgrp->closid;
1389
1390	list_for_each_entry(schema, &resctrl_schema_all, list) {
1391		r = schema->res;
1392		type = schema->conf_type;
1393		sep = false;
1394		seq_printf(s, "%*s:", max_name_width, schema->name);
1395		list_for_each_entry(d, &r->domains, list) {
1396			if (sep)
1397				seq_putc(s, ';');
1398			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1399				size = 0;
1400			} else {
1401				if (is_mba_sc(r))
1402					ctrl = d->mbps_val[closid];
1403				else
1404					ctrl = resctrl_arch_get_config(r, d,
1405								       closid,
1406								       type);
1407				if (r->rid == RDT_RESOURCE_MBA)
1408					size = ctrl;
1409				else
1410					size = rdtgroup_cbm_to_size(r, d, ctrl);
1411			}
1412			seq_printf(s, "%d=%u", d->id, size);
1413			sep = true;
1414		}
1415		seq_putc(s, '\n');
1416	}
1417
1418out:
1419	rdtgroup_kn_unlock(of->kn);
1420
1421	return ret;
1422}
1423
1424/* rdtgroup information files for one cache resource. */
1425static struct rftype res_common_files[] = {
1426	{
1427		.name		= "last_cmd_status",
1428		.mode		= 0444,
1429		.kf_ops		= &rdtgroup_kf_single_ops,
1430		.seq_show	= rdt_last_cmd_status_show,
1431		.fflags		= RF_TOP_INFO,
1432	},
1433	{
1434		.name		= "num_closids",
1435		.mode		= 0444,
1436		.kf_ops		= &rdtgroup_kf_single_ops,
1437		.seq_show	= rdt_num_closids_show,
1438		.fflags		= RF_CTRL_INFO,
1439	},
1440	{
1441		.name		= "mon_features",
1442		.mode		= 0444,
1443		.kf_ops		= &rdtgroup_kf_single_ops,
1444		.seq_show	= rdt_mon_features_show,
1445		.fflags		= RF_MON_INFO,
1446	},
1447	{
1448		.name		= "num_rmids",
1449		.mode		= 0444,
1450		.kf_ops		= &rdtgroup_kf_single_ops,
1451		.seq_show	= rdt_num_rmids_show,
1452		.fflags		= RF_MON_INFO,
1453	},
1454	{
1455		.name		= "cbm_mask",
1456		.mode		= 0444,
1457		.kf_ops		= &rdtgroup_kf_single_ops,
1458		.seq_show	= rdt_default_ctrl_show,
1459		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1460	},
1461	{
1462		.name		= "min_cbm_bits",
1463		.mode		= 0444,
1464		.kf_ops		= &rdtgroup_kf_single_ops,
1465		.seq_show	= rdt_min_cbm_bits_show,
1466		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1467	},
1468	{
1469		.name		= "shareable_bits",
1470		.mode		= 0444,
1471		.kf_ops		= &rdtgroup_kf_single_ops,
1472		.seq_show	= rdt_shareable_bits_show,
1473		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1474	},
1475	{
1476		.name		= "bit_usage",
1477		.mode		= 0444,
1478		.kf_ops		= &rdtgroup_kf_single_ops,
1479		.seq_show	= rdt_bit_usage_show,
1480		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1481	},
1482	{
1483		.name		= "min_bandwidth",
1484		.mode		= 0444,
1485		.kf_ops		= &rdtgroup_kf_single_ops,
1486		.seq_show	= rdt_min_bw_show,
1487		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1488	},
1489	{
1490		.name		= "bandwidth_gran",
1491		.mode		= 0444,
1492		.kf_ops		= &rdtgroup_kf_single_ops,
1493		.seq_show	= rdt_bw_gran_show,
1494		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1495	},
1496	{
1497		.name		= "delay_linear",
1498		.mode		= 0444,
1499		.kf_ops		= &rdtgroup_kf_single_ops,
1500		.seq_show	= rdt_delay_linear_show,
1501		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1502	},
1503	/*
1504	 * Platform specific which (if any) capabilities are provided by
1505	 * thread_throttle_mode. Defer "fflags" initialization to platform
1506	 * discovery.
1507	 */
1508	{
1509		.name		= "thread_throttle_mode",
1510		.mode		= 0444,
1511		.kf_ops		= &rdtgroup_kf_single_ops,
1512		.seq_show	= rdt_thread_throttle_mode_show,
1513	},
1514	{
1515		.name		= "max_threshold_occupancy",
1516		.mode		= 0644,
1517		.kf_ops		= &rdtgroup_kf_single_ops,
1518		.write		= max_threshold_occ_write,
1519		.seq_show	= max_threshold_occ_show,
1520		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
1521	},
1522	{
1523		.name		= "cpus",
1524		.mode		= 0644,
1525		.kf_ops		= &rdtgroup_kf_single_ops,
1526		.write		= rdtgroup_cpus_write,
1527		.seq_show	= rdtgroup_cpus_show,
1528		.fflags		= RFTYPE_BASE,
1529	},
1530	{
1531		.name		= "cpus_list",
1532		.mode		= 0644,
1533		.kf_ops		= &rdtgroup_kf_single_ops,
1534		.write		= rdtgroup_cpus_write,
1535		.seq_show	= rdtgroup_cpus_show,
1536		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1537		.fflags		= RFTYPE_BASE,
1538	},
1539	{
1540		.name		= "tasks",
1541		.mode		= 0644,
1542		.kf_ops		= &rdtgroup_kf_single_ops,
1543		.write		= rdtgroup_tasks_write,
1544		.seq_show	= rdtgroup_tasks_show,
1545		.fflags		= RFTYPE_BASE,
1546	},
1547	{
1548		.name		= "schemata",
1549		.mode		= 0644,
1550		.kf_ops		= &rdtgroup_kf_single_ops,
1551		.write		= rdtgroup_schemata_write,
1552		.seq_show	= rdtgroup_schemata_show,
1553		.fflags		= RF_CTRL_BASE,
1554	},
1555	{
1556		.name		= "mode",
1557		.mode		= 0644,
1558		.kf_ops		= &rdtgroup_kf_single_ops,
1559		.write		= rdtgroup_mode_write,
1560		.seq_show	= rdtgroup_mode_show,
1561		.fflags		= RF_CTRL_BASE,
1562	},
1563	{
1564		.name		= "size",
1565		.mode		= 0444,
1566		.kf_ops		= &rdtgroup_kf_single_ops,
1567		.seq_show	= rdtgroup_size_show,
1568		.fflags		= RF_CTRL_BASE,
1569	},
1570
1571};
1572
1573static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1574{
1575	struct rftype *rfts, *rft;
1576	int ret, len;
1577
1578	rfts = res_common_files;
1579	len = ARRAY_SIZE(res_common_files);
1580
1581	lockdep_assert_held(&rdtgroup_mutex);
1582
1583	for (rft = rfts; rft < rfts + len; rft++) {
1584		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
1585			ret = rdtgroup_add_file(kn, rft);
1586			if (ret)
1587				goto error;
1588		}
1589	}
1590
1591	return 0;
1592error:
1593	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
1594	while (--rft >= rfts) {
1595		if ((fflags & rft->fflags) == rft->fflags)
1596			kernfs_remove_by_name(kn, rft->name);
1597	}
1598	return ret;
1599}
1600
1601static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
1602{
1603	struct rftype *rfts, *rft;
1604	int len;
1605
1606	rfts = res_common_files;
1607	len = ARRAY_SIZE(res_common_files);
1608
1609	for (rft = rfts; rft < rfts + len; rft++) {
1610		if (!strcmp(rft->name, name))
1611			return rft;
1612	}
1613
1614	return NULL;
1615}
1616
1617void __init thread_throttle_mode_init(void)
1618{
1619	struct rftype *rft;
1620
1621	rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
1622	if (!rft)
1623		return;
1624
1625	rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
1626}
1627
1628/**
1629 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
1630 * @r: The resource group with which the file is associated.
1631 * @name: Name of the file
1632 *
1633 * The permissions of named resctrl file, directory, or link are modified
1634 * to not allow read, write, or execute by any user.
1635 *
1636 * WARNING: This function is intended to communicate to the user that the
1637 * resctrl file has been locked down - that it is not relevant to the
1638 * particular state the system finds itself in. It should not be relied
1639 * on to protect from user access because after the file's permissions
1640 * are restricted the user can still change the permissions using chmod
1641 * from the command line.
1642 *
1643 * Return: 0 on success, <0 on failure.
1644 */
1645int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
1646{
1647	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1648	struct kernfs_node *kn;
1649	int ret = 0;
1650
1651	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1652	if (!kn)
1653		return -ENOENT;
1654
1655	switch (kernfs_type(kn)) {
1656	case KERNFS_DIR:
1657		iattr.ia_mode = S_IFDIR;
1658		break;
1659	case KERNFS_FILE:
1660		iattr.ia_mode = S_IFREG;
1661		break;
1662	case KERNFS_LINK:
1663		iattr.ia_mode = S_IFLNK;
1664		break;
1665	}
1666
1667	ret = kernfs_setattr(kn, &iattr);
1668	kernfs_put(kn);
1669	return ret;
1670}
1671
1672/**
1673 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
1674 * @r: The resource group with which the file is associated.
1675 * @name: Name of the file
1676 * @mask: Mask of permissions that should be restored
1677 *
1678 * Restore the permissions of the named file. If @name is a directory the
1679 * permissions of its parent will be used.
1680 *
1681 * Return: 0 on success, <0 on failure.
1682 */
1683int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
1684			     umode_t mask)
1685{
1686	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1687	struct kernfs_node *kn, *parent;
1688	struct rftype *rfts, *rft;
1689	int ret, len;
1690
1691	rfts = res_common_files;
1692	len = ARRAY_SIZE(res_common_files);
1693
1694	for (rft = rfts; rft < rfts + len; rft++) {
1695		if (!strcmp(rft->name, name))
1696			iattr.ia_mode = rft->mode & mask;
1697	}
1698
1699	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1700	if (!kn)
1701		return -ENOENT;
1702
1703	switch (kernfs_type(kn)) {
1704	case KERNFS_DIR:
1705		parent = kernfs_get_parent(kn);
1706		if (parent) {
1707			iattr.ia_mode |= parent->mode;
1708			kernfs_put(parent);
1709		}
1710		iattr.ia_mode |= S_IFDIR;
1711		break;
1712	case KERNFS_FILE:
1713		iattr.ia_mode |= S_IFREG;
1714		break;
1715	case KERNFS_LINK:
1716		iattr.ia_mode |= S_IFLNK;
1717		break;
1718	}
1719
1720	ret = kernfs_setattr(kn, &iattr);
1721	kernfs_put(kn);
1722	return ret;
1723}
1724
1725static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
1726				      unsigned long fflags)
1727{
1728	struct kernfs_node *kn_subdir;
1729	int ret;
1730
1731	kn_subdir = kernfs_create_dir(kn_info, name,
1732				      kn_info->mode, priv);
1733	if (IS_ERR(kn_subdir))
1734		return PTR_ERR(kn_subdir);
1735
 
1736	ret = rdtgroup_kn_set_ugid(kn_subdir);
1737	if (ret)
1738		return ret;
1739
1740	ret = rdtgroup_add_files(kn_subdir, fflags);
1741	if (!ret)
1742		kernfs_activate(kn_subdir);
1743
1744	return ret;
1745}
1746
1747static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
1748{
1749	struct resctrl_schema *s;
1750	struct rdt_resource *r;
1751	unsigned long fflags;
1752	char name[32];
1753	int ret;
1754
1755	/* create the directory */
1756	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
1757	if (IS_ERR(kn_info))
1758		return PTR_ERR(kn_info);
 
1759
1760	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
1761	if (ret)
1762		goto out_destroy;
1763
1764	/* loop over enabled controls, these are all alloc_capable */
1765	list_for_each_entry(s, &resctrl_schema_all, list) {
1766		r = s->res;
1767		fflags =  r->fflags | RF_CTRL_INFO;
1768		ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
1769		if (ret)
1770			goto out_destroy;
1771	}
1772
1773	for_each_mon_capable_rdt_resource(r) {
1774		fflags =  r->fflags | RF_MON_INFO;
1775		sprintf(name, "%s_MON", r->name);
1776		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
1777		if (ret)
1778			goto out_destroy;
1779	}
1780
 
 
 
 
 
 
1781	ret = rdtgroup_kn_set_ugid(kn_info);
1782	if (ret)
1783		goto out_destroy;
1784
1785	kernfs_activate(kn_info);
1786
1787	return 0;
1788
1789out_destroy:
1790	kernfs_remove(kn_info);
1791	return ret;
1792}
1793
1794static int
1795mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
1796		    char *name, struct kernfs_node **dest_kn)
1797{
1798	struct kernfs_node *kn;
1799	int ret;
1800
1801	/* create the directory */
1802	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
1803	if (IS_ERR(kn))
1804		return PTR_ERR(kn);
1805
1806	if (dest_kn)
1807		*dest_kn = kn;
1808
 
 
 
 
 
 
1809	ret = rdtgroup_kn_set_ugid(kn);
1810	if (ret)
1811		goto out_destroy;
1812
1813	kernfs_activate(kn);
1814
1815	return 0;
1816
1817out_destroy:
1818	kernfs_remove(kn);
1819	return ret;
1820}
1821
1822static void l3_qos_cfg_update(void *arg)
1823{
1824	bool *enable = arg;
1825
1826	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
1827}
1828
1829static void l2_qos_cfg_update(void *arg)
1830{
1831	bool *enable = arg;
1832
1833	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
1834}
1835
1836static inline bool is_mba_linear(void)
1837{
1838	return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
1839}
1840
1841static int set_cache_qos_cfg(int level, bool enable)
1842{
1843	void (*update)(void *arg);
1844	struct rdt_resource *r_l;
1845	cpumask_var_t cpu_mask;
1846	struct rdt_domain *d;
1847	int cpu;
1848
 
 
 
1849	if (level == RDT_RESOURCE_L3)
1850		update = l3_qos_cfg_update;
1851	else if (level == RDT_RESOURCE_L2)
1852		update = l2_qos_cfg_update;
1853	else
1854		return -EINVAL;
1855
1856	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1857		return -ENOMEM;
1858
1859	r_l = &rdt_resources_all[level].r_resctrl;
1860	list_for_each_entry(d, &r_l->domains, list) {
1861		if (r_l->cache.arch_has_per_cpu_cfg)
1862			/* Pick all the CPUs in the domain instance */
1863			for_each_cpu(cpu, &d->cpu_mask)
1864				cpumask_set_cpu(cpu, cpu_mask);
1865		else
1866			/* Pick one CPU from each domain instance to update MSR */
1867			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
1868	}
1869	cpu = get_cpu();
1870	/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
1871	if (cpumask_test_cpu(cpu, cpu_mask))
1872		update(&enable);
1873	/* Update QOS_CFG MSR on all other cpus in cpu_mask. */
1874	smp_call_function_many(cpu_mask, update, &enable, 1);
1875	put_cpu();
1876
1877	free_cpumask_var(cpu_mask);
1878
1879	return 0;
1880}
1881
1882/* Restore the qos cfg state when a domain comes online */
1883void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
1884{
1885	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
1886
1887	if (!r->cdp_capable)
1888		return;
1889
1890	if (r->rid == RDT_RESOURCE_L2)
1891		l2_qos_cfg_update(&hw_res->cdp_enabled);
1892
1893	if (r->rid == RDT_RESOURCE_L3)
1894		l3_qos_cfg_update(&hw_res->cdp_enabled);
1895}
1896
1897static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
1898{
1899	u32 num_closid = resctrl_arch_get_num_closid(r);
1900	int cpu = cpumask_any(&d->cpu_mask);
1901	int i;
1902
1903	d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
1904				   GFP_KERNEL, cpu_to_node(cpu));
1905	if (!d->mbps_val)
1906		return -ENOMEM;
1907
1908	for (i = 0; i < num_closid; i++)
1909		d->mbps_val[i] = MBA_MAX_MBPS;
1910
1911	return 0;
1912}
1913
1914static void mba_sc_domain_destroy(struct rdt_resource *r,
1915				  struct rdt_domain *d)
1916{
1917	kfree(d->mbps_val);
1918	d->mbps_val = NULL;
1919}
1920
1921/*
 
 
1922 * MBA software controller is supported only if
1923 * MBM is supported and MBA is in linear scale.
1924 */
1925static bool supports_mba_mbps(void)
1926{
1927	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
1928
1929	return (is_mbm_local_enabled() &&
1930		r->alloc_capable && is_mba_linear());
1931}
1932
1933/*
1934 * Enable or disable the MBA software controller
1935 * which helps user specify bandwidth in MBps.
1936 */
1937static int set_mba_sc(bool mba_sc)
1938{
1939	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
1940	u32 num_closid = resctrl_arch_get_num_closid(r);
1941	struct rdt_domain *d;
1942	int i;
1943
1944	if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
 
1945		return -EINVAL;
1946
1947	r->membw.mba_sc = mba_sc;
1948
1949	list_for_each_entry(d, &r->domains, list) {
1950		for (i = 0; i < num_closid; i++)
1951			d->mbps_val[i] = MBA_MAX_MBPS;
1952	}
1953
1954	return 0;
1955}
1956
1957static int cdp_enable(int level)
1958{
1959	struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
 
 
1960	int ret;
1961
1962	if (!r_l->alloc_capable)
 
1963		return -EINVAL;
1964
1965	ret = set_cache_qos_cfg(level, true);
1966	if (!ret)
1967		rdt_resources_all[level].cdp_enabled = true;
1968
 
 
1969	return ret;
1970}
1971
1972static void cdp_disable(int level)
1973{
1974	struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
 
 
1975
1976	if (r_hw->cdp_enabled) {
1977		set_cache_qos_cfg(level, false);
1978		r_hw->cdp_enabled = false;
1979	}
1980}
1981
1982int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
1983{
1984	struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
1985
1986	if (!hw_res->r_resctrl.cdp_capable)
1987		return -EINVAL;
1988
1989	if (enable)
1990		return cdp_enable(l);
 
 
 
 
1991
1992	cdp_disable(l);
 
 
 
1993
1994	return 0;
 
 
1995}
1996
1997static void cdp_disable_all(void)
1998{
1999	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
2000		resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2001	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
2002		resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2003}
2004
2005/*
2006 * We don't allow rdtgroup directories to be created anywhere
2007 * except the root directory. Thus when looking for the rdtgroup
2008 * structure for a kernfs node we are either looking at a directory,
2009 * in which case the rdtgroup structure is pointed at by the "priv"
2010 * field, otherwise we have a file, and need only look to the parent
2011 * to find the rdtgroup.
2012 */
2013static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
2014{
2015	if (kernfs_type(kn) == KERNFS_DIR) {
2016		/*
2017		 * All the resource directories use "kn->priv"
2018		 * to point to the "struct rdtgroup" for the
2019		 * resource. "info" and its subdirectories don't
2020		 * have rdtgroup structures, so return NULL here.
2021		 */
2022		if (kn == kn_info || kn->parent == kn_info)
2023			return NULL;
2024		else
2025			return kn->priv;
2026	} else {
2027		return kn->parent->priv;
2028	}
2029}
2030
2031struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
2032{
2033	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2034
2035	if (!rdtgrp)
2036		return NULL;
2037
2038	atomic_inc(&rdtgrp->waitcount);
2039	kernfs_break_active_protection(kn);
2040
2041	mutex_lock(&rdtgroup_mutex);
2042
2043	/* Was this group deleted while we waited? */
2044	if (rdtgrp->flags & RDT_DELETED)
2045		return NULL;
2046
2047	return rdtgrp;
2048}
2049
2050void rdtgroup_kn_unlock(struct kernfs_node *kn)
2051{
2052	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2053
2054	if (!rdtgrp)
2055		return;
2056
2057	mutex_unlock(&rdtgroup_mutex);
2058
2059	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
2060	    (rdtgrp->flags & RDT_DELETED)) {
2061		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2062		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2063			rdtgroup_pseudo_lock_remove(rdtgrp);
2064		kernfs_unbreak_active_protection(kn);
2065		rdtgroup_remove(rdtgrp);
 
2066	} else {
2067		kernfs_unbreak_active_protection(kn);
2068	}
2069}
2070
2071static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2072			     struct rdtgroup *prgrp,
2073			     struct kernfs_node **mon_data_kn);
2074
2075static int rdt_enable_ctx(struct rdt_fs_context *ctx)
2076{
2077	int ret = 0;
2078
2079	if (ctx->enable_cdpl2)
2080		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
2081
2082	if (!ret && ctx->enable_cdpl3)
2083		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
2084
2085	if (!ret && ctx->enable_mba_mbps)
2086		ret = set_mba_sc(true);
2087
2088	return ret;
2089}
2090
2091static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
2092{
2093	struct resctrl_schema *s;
2094	const char *suffix = "";
2095	int ret, cl;
2096
2097	s = kzalloc(sizeof(*s), GFP_KERNEL);
2098	if (!s)
2099		return -ENOMEM;
2100
2101	s->res = r;
2102	s->num_closid = resctrl_arch_get_num_closid(r);
2103	if (resctrl_arch_get_cdp_enabled(r->rid))
2104		s->num_closid /= 2;
2105
2106	s->conf_type = type;
2107	switch (type) {
2108	case CDP_CODE:
2109		suffix = "CODE";
2110		break;
2111	case CDP_DATA:
2112		suffix = "DATA";
2113		break;
2114	case CDP_NONE:
2115		suffix = "";
2116		break;
2117	}
2118
2119	ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
2120	if (ret >= sizeof(s->name)) {
2121		kfree(s);
2122		return -EINVAL;
2123	}
2124
2125	cl = strlen(s->name);
2126
2127	/*
2128	 * If CDP is supported by this resource, but not enabled,
2129	 * include the suffix. This ensures the tabular format of the
2130	 * schemata file does not change between mounts of the filesystem.
2131	 */
2132	if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
2133		cl += 4;
2134
2135	if (cl > max_name_width)
2136		max_name_width = cl;
2137
2138	INIT_LIST_HEAD(&s->list);
2139	list_add(&s->list, &resctrl_schema_all);
2140
2141	return 0;
2142}
2143
2144static int schemata_list_create(void)
2145{
2146	struct rdt_resource *r;
2147	int ret = 0;
2148
2149	for_each_alloc_capable_rdt_resource(r) {
2150		if (resctrl_arch_get_cdp_enabled(r->rid)) {
2151			ret = schemata_list_add(r, CDP_CODE);
2152			if (ret)
2153				break;
2154
2155			ret = schemata_list_add(r, CDP_DATA);
2156		} else {
2157			ret = schemata_list_add(r, CDP_NONE);
2158		}
2159
2160		if (ret)
2161			break;
2162	}
2163
2164	return ret;
2165}
2166
2167static void schemata_list_destroy(void)
2168{
2169	struct resctrl_schema *s, *tmp;
2170
2171	list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
2172		list_del(&s->list);
2173		kfree(s);
2174	}
2175}
2176
2177static int rdt_get_tree(struct fs_context *fc)
2178{
2179	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2180	struct rdt_domain *dom;
2181	struct rdt_resource *r;
2182	int ret;
2183
2184	cpus_read_lock();
2185	mutex_lock(&rdtgroup_mutex);
2186	/*
2187	 * resctrl file system can only be mounted once.
2188	 */
2189	if (static_branch_unlikely(&rdt_enable_key)) {
2190		ret = -EBUSY;
2191		goto out;
2192	}
2193
2194	ret = rdt_enable_ctx(ctx);
2195	if (ret < 0)
2196		goto out_cdp;
2197
2198	ret = schemata_list_create();
2199	if (ret) {
2200		schemata_list_destroy();
2201		goto out_mba;
2202	}
2203
2204	closid_init();
2205
2206	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
2207	if (ret < 0)
2208		goto out_schemata_free;
2209
2210	if (rdt_mon_capable) {
2211		ret = mongroup_create_dir(rdtgroup_default.kn,
2212					  &rdtgroup_default, "mon_groups",
2213					  &kn_mongrp);
2214		if (ret < 0)
2215			goto out_info;
 
2216
2217		ret = mkdir_mondata_all(rdtgroup_default.kn,
2218					&rdtgroup_default, &kn_mondata);
2219		if (ret < 0)
2220			goto out_mongrp;
 
2221		rdtgroup_default.mon.mon_data_kn = kn_mondata;
2222	}
2223
2224	ret = rdt_pseudo_lock_init();
2225	if (ret)
2226		goto out_mondata;
2227
2228	ret = kernfs_get_tree(fc);
2229	if (ret < 0)
2230		goto out_psl;
2231
2232	if (rdt_alloc_capable)
2233		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
2234	if (rdt_mon_capable)
2235		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
2236
2237	if (rdt_alloc_capable || rdt_mon_capable)
2238		static_branch_enable_cpuslocked(&rdt_enable_key);
2239
2240	if (is_mbm_enabled()) {
2241		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
2242		list_for_each_entry(dom, &r->domains, list)
2243			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
2244	}
2245
2246	goto out;
2247
2248out_psl:
2249	rdt_pseudo_lock_release();
2250out_mondata:
2251	if (rdt_mon_capable)
2252		kernfs_remove(kn_mondata);
2253out_mongrp:
2254	if (rdt_mon_capable)
2255		kernfs_remove(kn_mongrp);
2256out_info:
2257	kernfs_remove(kn_info);
2258out_schemata_free:
2259	schemata_list_destroy();
2260out_mba:
2261	if (ctx->enable_mba_mbps)
2262		set_mba_sc(false);
2263out_cdp:
2264	cdp_disable_all();
2265out:
2266	rdt_last_cmd_clear();
2267	mutex_unlock(&rdtgroup_mutex);
2268	cpus_read_unlock();
2269	return ret;
2270}
2271
2272enum rdt_param {
2273	Opt_cdp,
2274	Opt_cdpl2,
2275	Opt_mba_mbps,
2276	nr__rdt_params
2277};
2278
2279static const struct fs_parameter_spec rdt_fs_parameters[] = {
2280	fsparam_flag("cdp",		Opt_cdp),
2281	fsparam_flag("cdpl2",		Opt_cdpl2),
2282	fsparam_flag("mba_MBps",	Opt_mba_mbps),
2283	{}
2284};
2285
 
 
 
 
 
2286static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2287{
2288	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2289	struct fs_parse_result result;
2290	int opt;
2291
2292	opt = fs_parse(fc, rdt_fs_parameters, param, &result);
2293	if (opt < 0)
2294		return opt;
2295
2296	switch (opt) {
2297	case Opt_cdp:
2298		ctx->enable_cdpl3 = true;
2299		return 0;
2300	case Opt_cdpl2:
2301		ctx->enable_cdpl2 = true;
2302		return 0;
2303	case Opt_mba_mbps:
2304		if (!supports_mba_mbps())
2305			return -EINVAL;
2306		ctx->enable_mba_mbps = true;
2307		return 0;
2308	}
2309
2310	return -EINVAL;
2311}
2312
2313static void rdt_fs_context_free(struct fs_context *fc)
2314{
2315	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2316
2317	kernfs_free_fs_context(fc);
2318	kfree(ctx);
2319}
2320
2321static const struct fs_context_operations rdt_fs_context_ops = {
2322	.free		= rdt_fs_context_free,
2323	.parse_param	= rdt_parse_param,
2324	.get_tree	= rdt_get_tree,
2325};
2326
2327static int rdt_init_fs_context(struct fs_context *fc)
2328{
2329	struct rdt_fs_context *ctx;
2330
2331	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2332	if (!ctx)
2333		return -ENOMEM;
2334
2335	ctx->kfc.root = rdt_root;
2336	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2337	fc->fs_private = &ctx->kfc;
2338	fc->ops = &rdt_fs_context_ops;
2339	put_user_ns(fc->user_ns);
2340	fc->user_ns = get_user_ns(&init_user_ns);
2341	fc->global = true;
2342	return 0;
2343}
2344
2345static int reset_all_ctrls(struct rdt_resource *r)
2346{
2347	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2348	struct rdt_hw_domain *hw_dom;
2349	struct msr_param msr_param;
2350	cpumask_var_t cpu_mask;
2351	struct rdt_domain *d;
2352	int i, cpu;
2353
2354	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2355		return -ENOMEM;
2356
2357	msr_param.res = r;
2358	msr_param.low = 0;
2359	msr_param.high = hw_res->num_closid;
2360
2361	/*
2362	 * Disable resource control for this resource by setting all
2363	 * CBMs in all domains to the maximum mask value. Pick one CPU
2364	 * from each domain to update the MSRs below.
2365	 */
2366	list_for_each_entry(d, &r->domains, list) {
2367		hw_dom = resctrl_to_arch_dom(d);
2368		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
2369
2370		for (i = 0; i < hw_res->num_closid; i++)
2371			hw_dom->ctrl_val[i] = r->default_ctrl;
2372	}
2373	cpu = get_cpu();
2374	/* Update CBM on this cpu if it's in cpu_mask. */
2375	if (cpumask_test_cpu(cpu, cpu_mask))
2376		rdt_ctrl_update(&msr_param);
2377	/* Update CBM on all other cpus in cpu_mask. */
2378	smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
2379	put_cpu();
2380
2381	free_cpumask_var(cpu_mask);
2382
2383	return 0;
2384}
2385
 
 
 
 
 
 
 
 
 
 
 
 
2386/*
2387 * Move tasks from one to the other group. If @from is NULL, then all tasks
2388 * in the systems are moved unconditionally (used for teardown).
2389 *
2390 * If @mask is not NULL the cpus on which moved tasks are running are set
2391 * in that mask so the update smp function call is restricted to affected
2392 * cpus.
2393 */
2394static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2395				 struct cpumask *mask)
2396{
2397	struct task_struct *p, *t;
2398
2399	read_lock(&tasklist_lock);
2400	for_each_process_thread(p, t) {
2401		if (!from || is_closid_match(t, from) ||
2402		    is_rmid_match(t, from)) {
2403			WRITE_ONCE(t->closid, to->closid);
2404			WRITE_ONCE(t->rmid, to->mon.rmid);
2405
 
2406			/*
2407			 * Order the closid/rmid stores above before the loads
2408			 * in task_curr(). This pairs with the full barrier
2409			 * between the rq->curr update and resctrl_sched_in()
2410			 * during context switch.
2411			 */
2412			smp_mb();
2413
2414			/*
2415			 * If the task is on a CPU, set the CPU in the mask.
2416			 * The detection is inaccurate as tasks might move or
2417			 * schedule before the smp function call takes place.
2418			 * In such a case the function call is pointless, but
2419			 * there is no other side effect.
2420			 */
2421			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
2422				cpumask_set_cpu(task_cpu(t), mask);
 
2423		}
2424	}
2425	read_unlock(&tasklist_lock);
2426}
2427
2428static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2429{
2430	struct rdtgroup *sentry, *stmp;
2431	struct list_head *head;
2432
2433	head = &rdtgrp->mon.crdtgrp_list;
2434	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2435		free_rmid(sentry->mon.rmid);
2436		list_del(&sentry->mon.crdtgrp_list);
2437
2438		if (atomic_read(&sentry->waitcount) != 0)
2439			sentry->flags = RDT_DELETED;
2440		else
2441			rdtgroup_remove(sentry);
2442	}
2443}
2444
2445/*
2446 * Forcibly remove all of subdirectories under root.
2447 */
2448static void rmdir_all_sub(void)
2449{
2450	struct rdtgroup *rdtgrp, *tmp;
2451
2452	/* Move all tasks to the default resource group */
2453	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2454
2455	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2456		/* Free any child rmids */
2457		free_all_child_rdtgrp(rdtgrp);
2458
2459		/* Remove each rdtgroup other than root */
2460		if (rdtgrp == &rdtgroup_default)
2461			continue;
2462
2463		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2464		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2465			rdtgroup_pseudo_lock_remove(rdtgrp);
2466
2467		/*
2468		 * Give any CPUs back to the default group. We cannot copy
2469		 * cpu_online_mask because a CPU might have executed the
2470		 * offline callback already, but is still marked online.
2471		 */
2472		cpumask_or(&rdtgroup_default.cpu_mask,
2473			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2474
2475		free_rmid(rdtgrp->mon.rmid);
2476
2477		kernfs_remove(rdtgrp->kn);
2478		list_del(&rdtgrp->rdtgroup_list);
2479
2480		if (atomic_read(&rdtgrp->waitcount) != 0)
2481			rdtgrp->flags = RDT_DELETED;
2482		else
2483			rdtgroup_remove(rdtgrp);
2484	}
2485	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2486	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2487
2488	kernfs_remove(kn_info);
2489	kernfs_remove(kn_mongrp);
2490	kernfs_remove(kn_mondata);
2491}
2492
2493static void rdt_kill_sb(struct super_block *sb)
2494{
2495	struct rdt_resource *r;
2496
2497	cpus_read_lock();
2498	mutex_lock(&rdtgroup_mutex);
2499
2500	set_mba_sc(false);
2501
2502	/*Put everything back to default values. */
2503	for_each_alloc_capable_rdt_resource(r)
2504		reset_all_ctrls(r);
2505	cdp_disable_all();
2506	rmdir_all_sub();
2507	rdt_pseudo_lock_release();
2508	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
2509	schemata_list_destroy();
2510	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
2511	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
2512	static_branch_disable_cpuslocked(&rdt_enable_key);
2513	kernfs_kill_sb(sb);
2514	mutex_unlock(&rdtgroup_mutex);
2515	cpus_read_unlock();
2516}
2517
2518static struct file_system_type rdt_fs_type = {
2519	.name			= "resctrl",
2520	.init_fs_context	= rdt_init_fs_context,
2521	.parameters		= rdt_fs_parameters,
2522	.kill_sb		= rdt_kill_sb,
2523};
2524
2525static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
2526		       void *priv)
2527{
2528	struct kernfs_node *kn;
2529	int ret = 0;
2530
2531	kn = __kernfs_create_file(parent_kn, name, 0444,
2532				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
2533				  &kf_mondata_ops, priv, NULL, NULL);
2534	if (IS_ERR(kn))
2535		return PTR_ERR(kn);
2536
2537	ret = rdtgroup_kn_set_ugid(kn);
2538	if (ret) {
2539		kernfs_remove(kn);
2540		return ret;
2541	}
2542
2543	return ret;
2544}
2545
2546/*
2547 * Remove all subdirectories of mon_data of ctrl_mon groups
2548 * and monitor groups with given domain id.
2549 */
2550static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
2551					   unsigned int dom_id)
2552{
2553	struct rdtgroup *prgrp, *crgrp;
2554	char name[32];
2555
 
 
 
2556	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2557		sprintf(name, "mon_%s_%02d", r->name, dom_id);
2558		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
2559
2560		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
2561			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
2562	}
2563}
2564
2565static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
2566				struct rdt_domain *d,
2567				struct rdt_resource *r, struct rdtgroup *prgrp)
2568{
2569	union mon_data_bits priv;
2570	struct kernfs_node *kn;
2571	struct mon_evt *mevt;
2572	struct rmid_read rr;
2573	char name[32];
2574	int ret;
2575
2576	sprintf(name, "mon_%s_%02d", r->name, d->id);
2577	/* create the directory */
2578	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2579	if (IS_ERR(kn))
2580		return PTR_ERR(kn);
2581
 
 
 
 
 
2582	ret = rdtgroup_kn_set_ugid(kn);
2583	if (ret)
2584		goto out_destroy;
2585
2586	if (WARN_ON(list_empty(&r->evt_list))) {
2587		ret = -EPERM;
2588		goto out_destroy;
2589	}
2590
2591	priv.u.rid = r->rid;
2592	priv.u.domid = d->id;
2593	list_for_each_entry(mevt, &r->evt_list, list) {
2594		priv.u.evtid = mevt->evtid;
2595		ret = mon_addfile(kn, mevt->name, priv.priv);
2596		if (ret)
2597			goto out_destroy;
2598
2599		if (is_mbm_event(mevt->evtid))
2600			mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
2601	}
2602	kernfs_activate(kn);
2603	return 0;
2604
2605out_destroy:
2606	kernfs_remove(kn);
2607	return ret;
2608}
2609
2610/*
2611 * Add all subdirectories of mon_data for "ctrl_mon" groups
2612 * and "monitor" groups with given domain id.
2613 */
2614static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
2615					   struct rdt_domain *d)
2616{
2617	struct kernfs_node *parent_kn;
2618	struct rdtgroup *prgrp, *crgrp;
2619	struct list_head *head;
2620
 
 
 
2621	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2622		parent_kn = prgrp->mon.mon_data_kn;
2623		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
2624
2625		head = &prgrp->mon.crdtgrp_list;
2626		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
2627			parent_kn = crgrp->mon.mon_data_kn;
2628			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
2629		}
2630	}
2631}
2632
2633static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
2634				       struct rdt_resource *r,
2635				       struct rdtgroup *prgrp)
2636{
2637	struct rdt_domain *dom;
2638	int ret;
2639
2640	list_for_each_entry(dom, &r->domains, list) {
2641		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
2642		if (ret)
2643			return ret;
2644	}
2645
2646	return 0;
2647}
2648
2649/*
2650 * This creates a directory mon_data which contains the monitored data.
2651 *
2652 * mon_data has one directory for each domain which are named
2653 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
2654 * with L3 domain looks as below:
2655 * ./mon_data:
2656 * mon_L3_00
2657 * mon_L3_01
2658 * mon_L3_02
2659 * ...
2660 *
2661 * Each domain directory has one file per event:
2662 * ./mon_L3_00/:
2663 * llc_occupancy
2664 *
2665 */
2666static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2667			     struct rdtgroup *prgrp,
2668			     struct kernfs_node **dest_kn)
2669{
2670	struct rdt_resource *r;
2671	struct kernfs_node *kn;
2672	int ret;
2673
2674	/*
2675	 * Create the mon_data directory first.
2676	 */
2677	ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
2678	if (ret)
2679		return ret;
2680
2681	if (dest_kn)
2682		*dest_kn = kn;
2683
2684	/*
2685	 * Create the subdirectories for each domain. Note that all events
2686	 * in a domain like L3 are grouped into a resource whose domain is L3
2687	 */
2688	for_each_mon_capable_rdt_resource(r) {
2689		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
2690		if (ret)
2691			goto out_destroy;
2692	}
2693
2694	return 0;
2695
2696out_destroy:
2697	kernfs_remove(kn);
2698	return ret;
2699}
2700
2701/**
2702 * cbm_ensure_valid - Enforce validity on provided CBM
2703 * @_val:	Candidate CBM
2704 * @r:		RDT resource to which the CBM belongs
2705 *
2706 * The provided CBM represents all cache portions available for use. This
2707 * may be represented by a bitmap that does not consist of contiguous ones
2708 * and thus be an invalid CBM.
2709 * Here the provided CBM is forced to be a valid CBM by only considering
2710 * the first set of contiguous bits as valid and clearing all bits.
2711 * The intention here is to provide a valid default CBM with which a new
2712 * resource group is initialized. The user can follow this with a
2713 * modification to the CBM if the default does not satisfy the
2714 * requirements.
2715 */
2716static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
2717{
2718	unsigned int cbm_len = r->cache.cbm_len;
2719	unsigned long first_bit, zero_bit;
2720	unsigned long val = _val;
2721
2722	if (!val)
2723		return 0;
2724
2725	first_bit = find_first_bit(&val, cbm_len);
2726	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
2727
2728	/* Clear any remaining bits to ensure contiguous region */
2729	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
2730	return (u32)val;
2731}
2732
2733/*
2734 * Initialize cache resources per RDT domain
2735 *
2736 * Set the RDT domain up to start off with all usable allocations. That is,
2737 * all shareable and unused bits. All-zero CBM is invalid.
2738 */
2739static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
2740				 u32 closid)
2741{
2742	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
2743	enum resctrl_conf_type t = s->conf_type;
2744	struct resctrl_staged_config *cfg;
2745	struct rdt_resource *r = s->res;
2746	u32 used_b = 0, unused_b = 0;
2747	unsigned long tmp_cbm;
2748	enum rdtgrp_mode mode;
2749	u32 peer_ctl, ctrl_val;
2750	int i;
2751
2752	cfg = &d->staged_config[t];
2753	cfg->have_new_ctrl = false;
2754	cfg->new_ctrl = r->cache.shareable_bits;
2755	used_b = r->cache.shareable_bits;
2756	for (i = 0; i < closids_supported(); i++) {
 
2757		if (closid_allocated(i) && i != closid) {
2758			mode = rdtgroup_mode_by_closid(i);
2759			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
2760				/*
2761				 * ctrl values for locksetup aren't relevant
2762				 * until the schemata is written, and the mode
2763				 * becomes RDT_MODE_PSEUDO_LOCKED.
2764				 */
2765				continue;
2766			/*
2767			 * If CDP is active include peer domain's
2768			 * usage to ensure there is no overlap
2769			 * with an exclusive group.
2770			 */
2771			if (resctrl_arch_get_cdp_enabled(r->rid))
2772				peer_ctl = resctrl_arch_get_config(r, d, i,
2773								   peer_type);
2774			else
2775				peer_ctl = 0;
2776			ctrl_val = resctrl_arch_get_config(r, d, i,
2777							   s->conf_type);
2778			used_b |= ctrl_val | peer_ctl;
2779			if (mode == RDT_MODE_SHAREABLE)
2780				cfg->new_ctrl |= ctrl_val | peer_ctl;
2781		}
2782	}
2783	if (d->plr && d->plr->cbm > 0)
2784		used_b |= d->plr->cbm;
2785	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
2786	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
2787	cfg->new_ctrl |= unused_b;
2788	/*
2789	 * Force the initial CBM to be valid, user can
2790	 * modify the CBM based on system availability.
2791	 */
2792	cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
2793	/*
2794	 * Assign the u32 CBM to an unsigned long to ensure that
2795	 * bitmap_weight() does not access out-of-bound memory.
2796	 */
2797	tmp_cbm = cfg->new_ctrl;
2798	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
2799		rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
2800		return -ENOSPC;
2801	}
2802	cfg->have_new_ctrl = true;
2803
2804	return 0;
2805}
2806
2807/*
2808 * Initialize cache resources with default values.
2809 *
2810 * A new RDT group is being created on an allocation capable (CAT)
2811 * supporting system. Set this group up to start off with all usable
2812 * allocations.
2813 *
2814 * If there are no more shareable bits available on any domain then
2815 * the entire allocation will fail.
2816 */
2817static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
2818{
2819	struct rdt_domain *d;
2820	int ret;
2821
2822	list_for_each_entry(d, &s->res->domains, list) {
2823		ret = __init_one_rdt_domain(d, s, closid);
2824		if (ret < 0)
2825			return ret;
2826	}
2827
2828	return 0;
2829}
2830
2831/* Initialize MBA resource with default values. */
2832static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
2833{
2834	struct resctrl_staged_config *cfg;
2835	struct rdt_domain *d;
2836
2837	list_for_each_entry(d, &r->domains, list) {
2838		if (is_mba_sc(r)) {
2839			d->mbps_val[closid] = MBA_MAX_MBPS;
2840			continue;
2841		}
2842
2843		cfg = &d->staged_config[CDP_NONE];
2844		cfg->new_ctrl = r->default_ctrl;
2845		cfg->have_new_ctrl = true;
2846	}
2847}
2848
2849/* Initialize the RDT group's allocations. */
2850static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2851{
2852	struct resctrl_schema *s;
2853	struct rdt_resource *r;
2854	int ret;
2855
2856	list_for_each_entry(s, &resctrl_schema_all, list) {
2857		r = s->res;
2858		if (r->rid == RDT_RESOURCE_MBA) {
2859			rdtgroup_init_mba(r, rdtgrp->closid);
2860			if (is_mba_sc(r))
2861				continue;
2862		} else {
2863			ret = rdtgroup_init_cat(s, rdtgrp->closid);
2864			if (ret < 0)
2865				return ret;
2866		}
2867
2868		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
2869		if (ret < 0) {
2870			rdt_last_cmd_puts("Failed to initialize allocations\n");
2871			return ret;
2872		}
2873
2874	}
2875
2876	rdtgrp->mode = RDT_MODE_SHAREABLE;
2877
2878	return 0;
2879}
2880
2881static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 
2882			     const char *name, umode_t mode,
2883			     enum rdt_group_type rtype, struct rdtgroup **r)
2884{
2885	struct rdtgroup *prdtgrp, *rdtgrp;
2886	struct kernfs_node *kn;
2887	uint files = 0;
2888	int ret;
2889
2890	prdtgrp = rdtgroup_kn_lock_live(parent_kn);
2891	if (!prdtgrp) {
2892		ret = -ENODEV;
2893		goto out_unlock;
2894	}
2895
2896	if (rtype == RDTMON_GROUP &&
2897	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2898	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
2899		ret = -EINVAL;
2900		rdt_last_cmd_puts("Pseudo-locking in progress\n");
2901		goto out_unlock;
2902	}
2903
2904	/* allocate the rdtgroup. */
2905	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
2906	if (!rdtgrp) {
2907		ret = -ENOSPC;
2908		rdt_last_cmd_puts("Kernel out of memory\n");
2909		goto out_unlock;
2910	}
2911	*r = rdtgrp;
2912	rdtgrp->mon.parent = prdtgrp;
2913	rdtgrp->type = rtype;
2914	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
2915
2916	/* kernfs creates the directory for rdtgrp */
2917	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
2918	if (IS_ERR(kn)) {
2919		ret = PTR_ERR(kn);
2920		rdt_last_cmd_puts("kernfs create error\n");
2921		goto out_free_rgrp;
2922	}
2923	rdtgrp->kn = kn;
2924
2925	/*
2926	 * kernfs_remove() will drop the reference count on "kn" which
2927	 * will free it. But we still need it to stick around for the
2928	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
2929	 * which will be dropped by kernfs_put() in rdtgroup_remove().
2930	 */
2931	kernfs_get(kn);
2932
2933	ret = rdtgroup_kn_set_ugid(kn);
2934	if (ret) {
2935		rdt_last_cmd_puts("kernfs perm error\n");
2936		goto out_destroy;
2937	}
2938
2939	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
2940	ret = rdtgroup_add_files(kn, files);
2941	if (ret) {
2942		rdt_last_cmd_puts("kernfs fill error\n");
2943		goto out_destroy;
2944	}
2945
2946	if (rdt_mon_capable) {
2947		ret = alloc_rmid();
2948		if (ret < 0) {
2949			rdt_last_cmd_puts("Out of RMIDs\n");
2950			goto out_destroy;
2951		}
2952		rdtgrp->mon.rmid = ret;
2953
2954		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
2955		if (ret) {
2956			rdt_last_cmd_puts("kernfs subdir error\n");
2957			goto out_idfree;
2958		}
2959	}
2960	kernfs_activate(kn);
2961
2962	/*
2963	 * The caller unlocks the parent_kn upon success.
2964	 */
2965	return 0;
2966
2967out_idfree:
2968	free_rmid(rdtgrp->mon.rmid);
2969out_destroy:
2970	kernfs_put(rdtgrp->kn);
2971	kernfs_remove(rdtgrp->kn);
2972out_free_rgrp:
2973	kfree(rdtgrp);
2974out_unlock:
2975	rdtgroup_kn_unlock(parent_kn);
2976	return ret;
2977}
2978
2979static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
2980{
2981	kernfs_remove(rgrp->kn);
2982	free_rmid(rgrp->mon.rmid);
2983	rdtgroup_remove(rgrp);
2984}
2985
2986/*
2987 * Create a monitor group under "mon_groups" directory of a control
2988 * and monitor group(ctrl_mon). This is a resource group
2989 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
2990 */
2991static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
2992			      const char *name, umode_t mode)
 
 
2993{
2994	struct rdtgroup *rdtgrp, *prgrp;
2995	int ret;
2996
2997	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
 
2998	if (ret)
2999		return ret;
3000
3001	prgrp = rdtgrp->mon.parent;
3002	rdtgrp->closid = prgrp->closid;
3003
3004	/*
3005	 * Add the rdtgrp to the list of rdtgrps the parent
3006	 * ctrl_mon group has to track.
3007	 */
3008	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
3009
3010	rdtgroup_kn_unlock(parent_kn);
3011	return ret;
3012}
3013
3014/*
3015 * These are rdtgroups created under the root directory. Can be used
3016 * to allocate and monitor resources.
3017 */
3018static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 
3019				   const char *name, umode_t mode)
3020{
3021	struct rdtgroup *rdtgrp;
3022	struct kernfs_node *kn;
3023	u32 closid;
3024	int ret;
3025
3026	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
 
3027	if (ret)
3028		return ret;
3029
3030	kn = rdtgrp->kn;
3031	ret = closid_alloc();
3032	if (ret < 0) {
3033		rdt_last_cmd_puts("Out of CLOSIDs\n");
3034		goto out_common_fail;
3035	}
3036	closid = ret;
3037	ret = 0;
3038
3039	rdtgrp->closid = closid;
3040	ret = rdtgroup_init_alloc(rdtgrp);
3041	if (ret < 0)
3042		goto out_id_free;
3043
3044	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
3045
3046	if (rdt_mon_capable) {
3047		/*
3048		 * Create an empty mon_groups directory to hold the subset
3049		 * of tasks and cpus to monitor.
3050		 */
3051		ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
3052		if (ret) {
3053			rdt_last_cmd_puts("kernfs subdir error\n");
3054			goto out_del_list;
3055		}
3056	}
3057
3058	goto out_unlock;
3059
3060out_del_list:
3061	list_del(&rdtgrp->rdtgroup_list);
3062out_id_free:
3063	closid_free(closid);
3064out_common_fail:
3065	mkdir_rdt_prepare_clean(rdtgrp);
3066out_unlock:
3067	rdtgroup_kn_unlock(parent_kn);
3068	return ret;
3069}
3070
3071/*
3072 * We allow creating mon groups only with in a directory called "mon_groups"
3073 * which is present in every ctrl_mon group. Check if this is a valid
3074 * "mon_groups" directory.
3075 *
3076 * 1. The directory should be named "mon_groups".
3077 * 2. The mon group itself should "not" be named "mon_groups".
3078 *   This makes sure "mon_groups" directory always has a ctrl_mon group
3079 *   as parent.
3080 */
3081static bool is_mon_groups(struct kernfs_node *kn, const char *name)
3082{
3083	return (!strcmp(kn->name, "mon_groups") &&
3084		strcmp(name, "mon_groups"));
3085}
3086
3087static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
3088			  umode_t mode)
3089{
3090	/* Do not accept '\n' to avoid unparsable situation. */
3091	if (strchr(name, '\n'))
3092		return -EINVAL;
3093
3094	/*
3095	 * If the parent directory is the root directory and RDT
3096	 * allocation is supported, add a control and monitoring
3097	 * subdirectory
3098	 */
3099	if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
3100		return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
3101
3102	/*
3103	 * If RDT monitoring is supported and the parent directory is a valid
3104	 * "mon_groups" directory, add a monitoring subdirectory.
3105	 */
3106	if (rdt_mon_capable && is_mon_groups(parent_kn, name))
3107		return rdtgroup_mkdir_mon(parent_kn, name, mode);
3108
3109	return -EPERM;
3110}
3111
3112static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
 
3113{
3114	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3115	int cpu;
3116
3117	/* Give any tasks back to the parent group */
3118	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
3119
3120	/* Update per cpu rmid of the moved CPUs first */
3121	for_each_cpu(cpu, &rdtgrp->cpu_mask)
3122		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
3123	/*
3124	 * Update the MSR on moved CPUs and CPUs which have moved
3125	 * task running on them.
3126	 */
3127	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3128	update_closid_rmid(tmpmask, NULL);
3129
3130	rdtgrp->flags = RDT_DELETED;
3131	free_rmid(rdtgrp->mon.rmid);
3132
3133	/*
3134	 * Remove the rdtgrp from the parent ctrl_mon group's list
3135	 */
3136	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3137	list_del(&rdtgrp->mon.crdtgrp_list);
3138
 
 
 
 
 
3139	kernfs_remove(rdtgrp->kn);
3140
3141	return 0;
3142}
3143
3144static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
 
3145{
3146	rdtgrp->flags = RDT_DELETED;
3147	list_del(&rdtgrp->rdtgroup_list);
3148
 
 
 
 
 
3149	kernfs_remove(rdtgrp->kn);
3150	return 0;
3151}
3152
3153static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
 
3154{
3155	int cpu;
3156
3157	/* Give any tasks back to the default group */
3158	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
3159
3160	/* Give any CPUs back to the default group */
3161	cpumask_or(&rdtgroup_default.cpu_mask,
3162		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3163
3164	/* Update per cpu closid and rmid of the moved CPUs first */
3165	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
3166		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
3167		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
3168	}
3169
3170	/*
3171	 * Update the MSR on moved CPUs and CPUs which have moved
3172	 * task running on them.
3173	 */
3174	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3175	update_closid_rmid(tmpmask, NULL);
3176
3177	closid_free(rdtgrp->closid);
3178	free_rmid(rdtgrp->mon.rmid);
3179
3180	rdtgroup_ctrl_remove(rdtgrp);
3181
3182	/*
3183	 * Free all the child monitor group rmids.
3184	 */
3185	free_all_child_rdtgrp(rdtgrp);
3186
 
 
3187	return 0;
3188}
3189
3190static int rdtgroup_rmdir(struct kernfs_node *kn)
3191{
3192	struct kernfs_node *parent_kn = kn->parent;
3193	struct rdtgroup *rdtgrp;
3194	cpumask_var_t tmpmask;
3195	int ret = 0;
3196
3197	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
3198		return -ENOMEM;
3199
3200	rdtgrp = rdtgroup_kn_lock_live(kn);
3201	if (!rdtgrp) {
3202		ret = -EPERM;
3203		goto out;
3204	}
3205
3206	/*
3207	 * If the rdtgroup is a ctrl_mon group and parent directory
3208	 * is the root directory, remove the ctrl_mon group.
3209	 *
3210	 * If the rdtgroup is a mon group and parent directory
3211	 * is a valid "mon_groups" directory, remove the mon group.
3212	 */
3213	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
3214	    rdtgrp != &rdtgroup_default) {
3215		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3216		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
3217			ret = rdtgroup_ctrl_remove(rdtgrp);
3218		} else {
3219			ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
3220		}
3221	} else if (rdtgrp->type == RDTMON_GROUP &&
3222		 is_mon_groups(parent_kn, kn->name)) {
3223		ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
3224	} else {
3225		ret = -EPERM;
3226	}
3227
3228out:
3229	rdtgroup_kn_unlock(kn);
3230	free_cpumask_var(tmpmask);
3231	return ret;
3232}
3233
3234static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
3235{
3236	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
3237		seq_puts(seq, ",cdp");
3238
3239	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
3240		seq_puts(seq, ",cdpl2");
3241
3242	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
3243		seq_puts(seq, ",mba_MBps");
3244
3245	return 0;
3246}
3247
3248static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
3249	.mkdir		= rdtgroup_mkdir,
3250	.rmdir		= rdtgroup_rmdir,
3251	.show_options	= rdtgroup_show_options,
3252};
3253
3254static int __init rdtgroup_setup_root(void)
3255{
3256	int ret;
3257
3258	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
3259				      KERNFS_ROOT_CREATE_DEACTIVATED |
3260				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
3261				      &rdtgroup_default);
3262	if (IS_ERR(rdt_root))
3263		return PTR_ERR(rdt_root);
3264
3265	mutex_lock(&rdtgroup_mutex);
3266
3267	rdtgroup_default.closid = 0;
3268	rdtgroup_default.mon.rmid = 0;
3269	rdtgroup_default.type = RDTCTRL_GROUP;
3270	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
3271
3272	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
3273
3274	ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RF_CTRL_BASE);
3275	if (ret) {
3276		kernfs_destroy_root(rdt_root);
3277		goto out;
3278	}
3279
3280	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
3281	kernfs_activate(rdtgroup_default.kn);
3282
3283out:
3284	mutex_unlock(&rdtgroup_mutex);
3285
3286	return ret;
3287}
3288
3289static void domain_destroy_mon_state(struct rdt_domain *d)
3290{
3291	bitmap_free(d->rmid_busy_llc);
3292	kfree(d->mbm_total);
3293	kfree(d->mbm_local);
3294}
3295
3296void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
3297{
3298	lockdep_assert_held(&rdtgroup_mutex);
3299
3300	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
3301		mba_sc_domain_destroy(r, d);
3302
3303	if (!r->mon_capable)
3304		return;
3305
3306	/*
3307	 * If resctrl is mounted, remove all the
3308	 * per domain monitor data directories.
3309	 */
3310	if (static_branch_unlikely(&rdt_mon_enable_key))
3311		rmdir_mondata_subdir_allrdtgrp(r, d->id);
3312
3313	if (is_mbm_enabled())
3314		cancel_delayed_work(&d->mbm_over);
3315	if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) {
3316		/*
3317		 * When a package is going down, forcefully
3318		 * decrement rmid->ebusy. There is no way to know
3319		 * that the L3 was flushed and hence may lead to
3320		 * incorrect counts in rare scenarios, but leaving
3321		 * the RMID as busy creates RMID leaks if the
3322		 * package never comes back.
3323		 */
3324		__check_limbo(d, true);
3325		cancel_delayed_work(&d->cqm_limbo);
3326	}
3327
3328	domain_destroy_mon_state(d);
3329}
3330
3331static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
3332{
3333	size_t tsize;
3334
3335	if (is_llc_occupancy_enabled()) {
3336		d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
3337		if (!d->rmid_busy_llc)
3338			return -ENOMEM;
3339	}
3340	if (is_mbm_total_enabled()) {
3341		tsize = sizeof(*d->mbm_total);
3342		d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
3343		if (!d->mbm_total) {
3344			bitmap_free(d->rmid_busy_llc);
3345			return -ENOMEM;
3346		}
3347	}
3348	if (is_mbm_local_enabled()) {
3349		tsize = sizeof(*d->mbm_local);
3350		d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
3351		if (!d->mbm_local) {
3352			bitmap_free(d->rmid_busy_llc);
3353			kfree(d->mbm_total);
3354			return -ENOMEM;
3355		}
3356	}
3357
3358	return 0;
3359}
3360
3361int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
3362{
3363	int err;
3364
3365	lockdep_assert_held(&rdtgroup_mutex);
3366
3367	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
3368		/* RDT_RESOURCE_MBA is never mon_capable */
3369		return mba_sc_domain_allocate(r, d);
3370
3371	if (!r->mon_capable)
3372		return 0;
3373
3374	err = domain_setup_mon_state(r, d);
3375	if (err)
3376		return err;
3377
3378	if (is_mbm_enabled()) {
3379		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
3380		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
3381	}
3382
3383	if (is_llc_occupancy_enabled())
3384		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
3385
3386	/* If resctrl is mounted, add per domain monitor data directories. */
3387	if (static_branch_unlikely(&rdt_mon_enable_key))
3388		mkdir_mondata_subdir_allrdtgrp(r, d);
3389
3390	return 0;
3391}
3392
3393/*
3394 * rdtgroup_init - rdtgroup initialization
3395 *
3396 * Setup resctrl file system including set up root, create mount point,
3397 * register rdtgroup filesystem, and initialize files under root directory.
3398 *
3399 * Return: 0 on success or -errno
3400 */
3401int __init rdtgroup_init(void)
3402{
3403	int ret = 0;
3404
3405	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
3406		     sizeof(last_cmd_status_buf));
3407
3408	ret = rdtgroup_setup_root();
3409	if (ret)
3410		return ret;
3411
3412	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
3413	if (ret)
3414		goto cleanup_root;
3415
3416	ret = register_filesystem(&rdt_fs_type);
3417	if (ret)
3418		goto cleanup_mountpoint;
3419
3420	/*
3421	 * Adding the resctrl debugfs directory here may not be ideal since
3422	 * it would let the resctrl debugfs directory appear on the debugfs
3423	 * filesystem before the resctrl filesystem is mounted.
3424	 * It may also be ok since that would enable debugging of RDT before
3425	 * resctrl is mounted.
3426	 * The reason why the debugfs directory is created here and not in
3427	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
3428	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
3429	 * (the lockdep class of inode->i_rwsem). Other filesystem
3430	 * interactions (eg. SyS_getdents) have the lock ordering:
3431	 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
3432	 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
3433	 * is taken, thus creating dependency:
3434	 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
3435	 * issues considering the other two lock dependencies.
3436	 * By creating the debugfs directory here we avoid a dependency
3437	 * that may cause deadlock (even though file operations cannot
3438	 * occur until the filesystem is mounted, but I do not know how to
3439	 * tell lockdep that).
3440	 */
3441	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
3442
3443	return 0;
3444
3445cleanup_mountpoint:
3446	sysfs_remove_mount_point(fs_kobj, "resctrl");
3447cleanup_root:
3448	kernfs_destroy_root(rdt_root);
3449
3450	return ret;
3451}
3452
3453void __exit rdtgroup_exit(void)
3454{
3455	debugfs_remove_recursive(debugfs_resctrl);
3456	unregister_filesystem(&rdt_fs_type);
3457	sysfs_remove_mount_point(fs_kobj, "resctrl");
3458	kernfs_destroy_root(rdt_root);
3459}