rdtgroup.c - arch/x86/kernel/cpu/resctrl/rdtgroup.c - Linux diff v5.4

   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * User interface for Resource Alloction in Resource Director Technology(RDT)
   4 *
   5 * Copyright (C) 2016 Intel Corporation
   6 *
   7 * Author: Fenghua Yu <fenghua.yu@intel.com>
   8 *
   9 * More information about RDT be found in the Intel (R) x86 Architecture
  10 * Software Developer Manual.
  11 */
  12
  13#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
  14
  15#include <linux/cacheinfo.h>
  16#include <linux/cpu.h>
  17#include <linux/debugfs.h>
  18#include <linux/fs.h>
  19#include <linux/fs_parser.h>
  20#include <linux/sysfs.h>
  21#include <linux/kernfs.h>
  22#include <linux/seq_buf.h>
  23#include <linux/seq_file.h>
  24#include <linux/sched/signal.h>
  25#include <linux/sched/task.h>
  26#include <linux/slab.h>
  27#include <linux/task_work.h>
  28#include <linux/user_namespace.h>
  29
  30#include <uapi/linux/magic.h>
  31
  32#include <asm/resctrl_sched.h>
  33#include "internal.h"
  34
  35DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
  36DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  37DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
 
 
 
 
  38static struct kernfs_root *rdt_root;
  39struct rdtgroup rdtgroup_default;
  40LIST_HEAD(rdt_all_groups);
  41
 
 
 
 
 
 
  42/* Kernel fs node for "info" directory under root */
  43static struct kernfs_node *kn_info;
  44
  45/* Kernel fs node for "mon_groups" directory under root */
  46static struct kernfs_node *kn_mongrp;
  47
  48/* Kernel fs node for "mon_data" directory under root */
  49static struct kernfs_node *kn_mondata;
  50
  51static struct seq_buf last_cmd_status;
  52static char last_cmd_status_buf[512];
  53
 
 
 
  54struct dentry *debugfs_resctrl;
  55
 
 
  56void rdt_last_cmd_clear(void)
  57{
  58	lockdep_assert_held(&rdtgroup_mutex);
  59	seq_buf_clear(&last_cmd_status);
  60}
  61
  62void rdt_last_cmd_puts(const char *s)
  63{
  64	lockdep_assert_held(&rdtgroup_mutex);
  65	seq_buf_puts(&last_cmd_status, s);
  66}
  67
  68void rdt_last_cmd_printf(const char *fmt, ...)
  69{
  70	va_list ap;
  71
  72	va_start(ap, fmt);
  73	lockdep_assert_held(&rdtgroup_mutex);
  74	seq_buf_vprintf(&last_cmd_status, fmt, ap);
  75	va_end(ap);
  76}
  77
 
 
 
 
 
 
 
 
 
 
 
 
 
  78/*
  79 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  80 * we can keep a bitmap of free CLOSIDs in a single integer.
  81 *
  82 * Using a global CLOSID across all resources has some advantages and
  83 * some drawbacks:
  84 * + We can simply set "current->closid" to assign a task to a resource
  85 *   group.
  86 * + Context switch code can avoid extra memory references deciding which
  87 *   CLOSID to load into the PQR_ASSOC MSR
  88 * - We give up some options in configuring resource groups across multi-socket
  89 *   systems.
  90 * - Our choices on how to configure each resource become progressively more
  91 *   limited as the number of resources grows.
  92 */
  93static int closid_free_map;
  94static int closid_free_map_len;
  95
  96int closids_supported(void)
  97{
  98	return closid_free_map_len;
  99}
 100
 101static void closid_init(void)
 102{
 103	struct rdt_resource *r;
 104	int rdt_min_closid = 32;
 105
 106	/* Compute rdt_min_closid across all resources */
 107	for_each_alloc_enabled_rdt_resource(r)
 108		rdt_min_closid = min(rdt_min_closid, r->num_closid);
 109
 110	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
 111
 112	/* CLOSID 0 is always reserved for the default group */
 113	closid_free_map &= ~1;
 114	closid_free_map_len = rdt_min_closid;
 115}
 116
 117static int closid_alloc(void)
 118{
 119	u32 closid = ffs(closid_free_map);
 
 120
 121	if (closid == 0)
 122		return -ENOSPC;
 123	closid--;
 124	closid_free_map &= ~(1 << closid);
 
 
 
 
 
 
 
 
 
 
 125
 126	return closid;
 127}
 128
 129void closid_free(int closid)
 130{
 131	closid_free_map |= 1 << closid;
 
 
 132}
 133
 134/**
 135 * closid_allocated - test if provided closid is in use
 136 * @closid: closid to be tested
 137 *
 138 * Return: true if @closid is currently associated with a resource group,
 139 * false if @closid is free
 140 */
 141static bool closid_allocated(unsigned int closid)
 142{
 143	return (closid_free_map & (1 << closid)) == 0;
 
 
 144}
 145
 146/**
 147 * rdtgroup_mode_by_closid - Return mode of resource group with closid
 148 * @closid: closid if the resource group
 149 *
 150 * Each resource group is associated with a @closid. Here the mode
 151 * of a resource group can be queried by searching for it using its closid.
 152 *
 153 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
 154 */
 155enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
 156{
 157	struct rdtgroup *rdtgrp;
 158
 159	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
 160		if (rdtgrp->closid == closid)
 161			return rdtgrp->mode;
 162	}
 163
 164	return RDT_NUM_MODES;
 165}
 166
 167static const char * const rdt_mode_str[] = {
 168	[RDT_MODE_SHAREABLE]		= "shareable",
 169	[RDT_MODE_EXCLUSIVE]		= "exclusive",
 170	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
 171	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
 172};
 173
 174/**
 175 * rdtgroup_mode_str - Return the string representation of mode
 176 * @mode: the resource group mode as &enum rdtgroup_mode
 177 *
 178 * Return: string representation of valid mode, "unknown" otherwise
 179 */
 180static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
 181{
 182	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
 183		return "unknown";
 184
 185	return rdt_mode_str[mode];
 186}
 187
 188/* set uid and gid of rdtgroup dirs and files to that of the creator */
 189static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
 190{
 191	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
 192				.ia_uid = current_fsuid(),
 193				.ia_gid = current_fsgid(), };
 194
 195	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
 196	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
 197		return 0;
 198
 199	return kernfs_setattr(kn, &iattr);
 200}
 201
 202static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 203{
 204	struct kernfs_node *kn;
 205	int ret;
 206
 207	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
 208				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 209				  0, rft->kf_ops, rft, NULL, NULL);
 210	if (IS_ERR(kn))
 211		return PTR_ERR(kn);
 212
 213	ret = rdtgroup_kn_set_ugid(kn);
 214	if (ret) {
 215		kernfs_remove(kn);
 216		return ret;
 217	}
 218
 219	return 0;
 220}
 221
 222static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
 223{
 224	struct kernfs_open_file *of = m->private;
 225	struct rftype *rft = of->kn->priv;
 226
 227	if (rft->seq_show)
 228		return rft->seq_show(of, m, arg);
 229	return 0;
 230}
 231
 232static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
 233				   size_t nbytes, loff_t off)
 234{
 235	struct rftype *rft = of->kn->priv;
 236
 237	if (rft->write)
 238		return rft->write(of, buf, nbytes, off);
 239
 240	return -EINVAL;
 241}
 242
 243static struct kernfs_ops rdtgroup_kf_single_ops = {
 244	.atomic_write_len	= PAGE_SIZE,
 245	.write			= rdtgroup_file_write,
 246	.seq_show		= rdtgroup_seqfile_show,
 247};
 248
 249static struct kernfs_ops kf_mondata_ops = {
 250	.atomic_write_len	= PAGE_SIZE,
 251	.seq_show		= rdtgroup_mondata_show,
 252};
 253
 254static bool is_cpu_list(struct kernfs_open_file *of)
 255{
 256	struct rftype *rft = of->kn->priv;
 257
 258	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
 259}
 260
 261static int rdtgroup_cpus_show(struct kernfs_open_file *of,
 262			      struct seq_file *s, void *v)
 263{
 264	struct rdtgroup *rdtgrp;
 265	struct cpumask *mask;
 266	int ret = 0;
 267
 268	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 269
 270	if (rdtgrp) {
 271		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
 272			if (!rdtgrp->plr->d) {
 273				rdt_last_cmd_clear();
 274				rdt_last_cmd_puts("Cache domain offline\n");
 275				ret = -ENODEV;
 276			} else {
 277				mask = &rdtgrp->plr->d->cpu_mask;
 278				seq_printf(s, is_cpu_list(of) ?
 279					   "%*pbl\n" : "%*pb\n",
 280					   cpumask_pr_args(mask));
 281			}
 282		} else {
 283			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
 284				   cpumask_pr_args(&rdtgrp->cpu_mask));
 285		}
 286	} else {
 287		ret = -ENOENT;
 288	}
 289	rdtgroup_kn_unlock(of->kn);
 290
 291	return ret;
 292}
 293
 294/*
 295 * This is safe against resctrl_sched_in() called from __switch_to()
 296 * because __switch_to() is executed with interrupts disabled. A local call
 297 * from update_closid_rmid() is proteced against __switch_to() because
 298 * preemption is disabled.
 299 */
 300static void update_cpu_closid_rmid(void *info)
 301{
 302	struct rdtgroup *r = info;
 303
 304	if (r) {
 305		this_cpu_write(pqr_state.default_closid, r->closid);
 306		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
 307	}
 308
 309	/*
 310	 * We cannot unconditionally write the MSR because the current
 311	 * executing task might have its own closid selected. Just reuse
 312	 * the context switch code.
 313	 */
 314	resctrl_sched_in();
 315}
 316
 317/*
 318 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
 319 *
 320 * Per task closids/rmids must have been set up before calling this function.
 321 */
 322static void
 323update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
 324{
 325	int cpu = get_cpu();
 326
 327	if (cpumask_test_cpu(cpu, cpu_mask))
 328		update_cpu_closid_rmid(r);
 329	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
 330	put_cpu();
 331}
 332
 333static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 334			  cpumask_var_t tmpmask)
 335{
 336	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
 337	struct list_head *head;
 338
 339	/* Check whether cpus belong to parent ctrl group */
 340	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
 341	if (cpumask_weight(tmpmask)) {
 342		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
 343		return -EINVAL;
 344	}
 345
 346	/* Check whether cpus are dropped from this group */
 347	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 348	if (cpumask_weight(tmpmask)) {
 349		/* Give any dropped cpus to parent rdtgroup */
 350		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
 351		update_closid_rmid(tmpmask, prgrp);
 352	}
 353
 354	/*
 355	 * If we added cpus, remove them from previous group that owned them
 356	 * and update per-cpu rmid
 357	 */
 358	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 359	if (cpumask_weight(tmpmask)) {
 360		head = &prgrp->mon.crdtgrp_list;
 361		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 362			if (crgrp == rdtgrp)
 363				continue;
 364			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
 365				       tmpmask);
 366		}
 367		update_closid_rmid(tmpmask, rdtgrp);
 368	}
 369
 370	/* Done pushing/pulling - update this group with new mask */
 371	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 372
 373	return 0;
 374}
 375
 376static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
 377{
 378	struct rdtgroup *crgrp;
 379
 380	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
 381	/* update the child mon group masks as well*/
 382	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
 383		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
 384}
 385
 386static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 387			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
 388{
 389	struct rdtgroup *r, *crgrp;
 390	struct list_head *head;
 391
 392	/* Check whether cpus are dropped from this group */
 393	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 394	if (cpumask_weight(tmpmask)) {
 395		/* Can't drop from default group */
 396		if (rdtgrp == &rdtgroup_default) {
 397			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
 398			return -EINVAL;
 399		}
 400
 401		/* Give any dropped cpus to rdtgroup_default */
 402		cpumask_or(&rdtgroup_default.cpu_mask,
 403			   &rdtgroup_default.cpu_mask, tmpmask);
 404		update_closid_rmid(tmpmask, &rdtgroup_default);
 405	}
 406
 407	/*
 408	 * If we added cpus, remove them from previous group and
 409	 * the prev group's child groups that owned them
 410	 * and update per-cpu closid/rmid.
 411	 */
 412	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 413	if (cpumask_weight(tmpmask)) {
 414		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
 415			if (r == rdtgrp)
 416				continue;
 417			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
 418			if (cpumask_weight(tmpmask1))
 419				cpumask_rdtgrp_clear(r, tmpmask1);
 420		}
 421		update_closid_rmid(tmpmask, rdtgrp);
 422	}
 423
 424	/* Done pushing/pulling - update this group with new mask */
 425	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 426
 427	/*
 428	 * Clear child mon group masks since there is a new parent mask
 429	 * now and update the rmid for the cpus the child lost.
 430	 */
 431	head = &rdtgrp->mon.crdtgrp_list;
 432	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 433		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
 434		update_closid_rmid(tmpmask, rdtgrp);
 435		cpumask_clear(&crgrp->cpu_mask);
 436	}
 437
 438	return 0;
 439}
 440
 441static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 442				   char *buf, size_t nbytes, loff_t off)
 443{
 444	cpumask_var_t tmpmask, newmask, tmpmask1;
 445	struct rdtgroup *rdtgrp;
 446	int ret;
 447
 448	if (!buf)
 449		return -EINVAL;
 450
 451	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 452		return -ENOMEM;
 453	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
 454		free_cpumask_var(tmpmask);
 455		return -ENOMEM;
 456	}
 457	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
 458		free_cpumask_var(tmpmask);
 459		free_cpumask_var(newmask);
 460		return -ENOMEM;
 461	}
 462
 463	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 464	if (!rdtgrp) {
 465		ret = -ENOENT;
 466		goto unlock;
 467	}
 468
 469	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 470	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 471		ret = -EINVAL;
 472		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 473		goto unlock;
 474	}
 475
 476	if (is_cpu_list(of))
 477		ret = cpulist_parse(buf, newmask);
 478	else
 479		ret = cpumask_parse(buf, newmask);
 480
 481	if (ret) {
 482		rdt_last_cmd_puts("Bad CPU list/mask\n");
 483		goto unlock;
 484	}
 485
 486	/* check that user didn't specify any offline cpus */
 487	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 488	if (cpumask_weight(tmpmask)) {
 489		ret = -EINVAL;
 490		rdt_last_cmd_puts("Can only assign online CPUs\n");
 491		goto unlock;
 492	}
 493
 494	if (rdtgrp->type == RDTCTRL_GROUP)
 495		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
 496	else if (rdtgrp->type == RDTMON_GROUP)
 497		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
 498	else
 499		ret = -EINVAL;
 500
 501unlock:
 502	rdtgroup_kn_unlock(of->kn);
 503	free_cpumask_var(tmpmask);
 504	free_cpumask_var(newmask);
 505	free_cpumask_var(tmpmask1);
 506
 507	return ret ?: nbytes;
 508}
 509
 510struct task_move_callback {
 511	struct callback_head	work;
 512	struct rdtgroup		*rdtgrp;
 513};
 514
 515static void move_myself(struct callback_head *head)
 
 
 
 
 
 
 
 516{
 517	struct task_move_callback *callback;
 518	struct rdtgroup *rdtgrp;
 519
 520	callback = container_of(head, struct task_move_callback, work);
 521	rdtgrp = callback->rdtgrp;
 522
 
 
 523	/*
 524	 * If resource group was deleted before this task work callback
 525	 * was invoked, then assign the task to root group and free the
 526	 * resource group.
 527	 */
 528	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 529	    (rdtgrp->flags & RDT_DELETED)) {
 530		current->closid = 0;
 531		current->rmid = 0;
 532		kfree(rdtgrp);
 533	}
 534
 535	preempt_disable();
 536	/* update PQR_ASSOC MSR to make resource group go into effect */
 537	resctrl_sched_in();
 538	preempt_enable();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 539
 540	kfree(callback);
 
 541}
 542
 543static int __rdtgroup_move_task(struct task_struct *tsk,
 544				struct rdtgroup *rdtgrp)
 545{
 546	struct task_move_callback *callback;
 547	int ret;
 548
 549	callback = kzalloc(sizeof(*callback), GFP_KERNEL);
 550	if (!callback)
 551		return -ENOMEM;
 552	callback->work.func = move_myself;
 553	callback->rdtgrp = rdtgrp;
 554
 555	/*
 556	 * Take a refcount, so rdtgrp cannot be freed before the
 557	 * callback has been invoked.
 
 
 
 
 558	 */
 559	atomic_inc(&rdtgrp->waitcount);
 560	ret = task_work_add(tsk, &callback->work, true);
 561	if (ret) {
 562		/*
 563		 * Task is exiting. Drop the refcount and free the callback.
 564		 * No need to check the refcount as the group cannot be
 565		 * deleted before the write function unlocks rdtgroup_mutex.
 566		 */
 567		atomic_dec(&rdtgrp->waitcount);
 568		kfree(callback);
 569		rdt_last_cmd_puts("Task exited\n");
 570	} else {
 571		/*
 572		 * For ctrl_mon groups move both closid and rmid.
 573		 * For monitor groups, can move the tasks only from
 574		 * their parent CTRL group.
 575		 */
 576		if (rdtgrp->type == RDTCTRL_GROUP) {
 577			tsk->closid = rdtgrp->closid;
 578			tsk->rmid = rdtgrp->mon.rmid;
 579		} else if (rdtgrp->type == RDTMON_GROUP) {
 580			if (rdtgrp->mon.parent->closid == tsk->closid) {
 581				tsk->rmid = rdtgrp->mon.rmid;
 582			} else {
 583				rdt_last_cmd_puts("Can't move task to different control group\n");
 584				ret = -EINVAL;
 585			}
 586		}
 587	}
 588	return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 589}
 590
 591/**
 592 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
 593 * @r: Resource group
 594 *
 595 * Return: 1 if tasks have been assigned to @r, 0 otherwise
 596 */
 597int rdtgroup_tasks_assigned(struct rdtgroup *r)
 598{
 599	struct task_struct *p, *t;
 600	int ret = 0;
 601
 602	lockdep_assert_held(&rdtgroup_mutex);
 603
 604	rcu_read_lock();
 605	for_each_process_thread(p, t) {
 606		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
 607		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) {
 608			ret = 1;
 609			break;
 610		}
 611	}
 612	rcu_read_unlock();
 613
 614	return ret;
 615}
 616
 617static int rdtgroup_task_write_permission(struct task_struct *task,
 618					  struct kernfs_open_file *of)
 619{
 620	const struct cred *tcred = get_task_cred(task);
 621	const struct cred *cred = current_cred();
 622	int ret = 0;
 623
 624	/*
 625	 * Even if we're attaching all tasks in the thread group, we only
 626	 * need to check permissions on one of them.
 627	 */
 628	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 629	    !uid_eq(cred->euid, tcred->uid) &&
 630	    !uid_eq(cred->euid, tcred->suid)) {
 631		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
 632		ret = -EPERM;
 633	}
 634
 635	put_cred(tcred);
 636	return ret;
 637}
 638
 639static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 640			      struct kernfs_open_file *of)
 641{
 642	struct task_struct *tsk;
 643	int ret;
 644
 645	rcu_read_lock();
 646	if (pid) {
 647		tsk = find_task_by_vpid(pid);
 648		if (!tsk) {
 649			rcu_read_unlock();
 650			rdt_last_cmd_printf("No task %d\n", pid);
 651			return -ESRCH;
 652		}
 653	} else {
 654		tsk = current;
 655	}
 656
 657	get_task_struct(tsk);
 658	rcu_read_unlock();
 659
 660	ret = rdtgroup_task_write_permission(tsk, of);
 661	if (!ret)
 662		ret = __rdtgroup_move_task(tsk, rdtgrp);
 663
 664	put_task_struct(tsk);
 665	return ret;
 666}
 667
 668static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 669				    char *buf, size_t nbytes, loff_t off)
 670{
 671	struct rdtgroup *rdtgrp;
 
 672	int ret = 0;
 673	pid_t pid;
 674
 675	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 676		return -EINVAL;
 677	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 678	if (!rdtgrp) {
 679		rdtgroup_kn_unlock(of->kn);
 680		return -ENOENT;
 681	}
 682	rdt_last_cmd_clear();
 683
 684	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 685	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 686		ret = -EINVAL;
 687		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 688		goto unlock;
 689	}
 690
 691	ret = rdtgroup_move_task(pid, rdtgrp, of);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 692
 693unlock:
 694	rdtgroup_kn_unlock(of->kn);
 695
 696	return ret ?: nbytes;
 697}
 698
 699static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 700{
 701	struct task_struct *p, *t;
 
 702
 703	rcu_read_lock();
 704	for_each_process_thread(p, t) {
 705		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
 706		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
 707			seq_printf(s, "%d\n", t->pid);
 
 
 708	}
 709	rcu_read_unlock();
 710}
 711
 712static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 713			       struct seq_file *s, void *v)
 714{
 715	struct rdtgroup *rdtgrp;
 716	int ret = 0;
 717
 718	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 719	if (rdtgrp)
 720		show_rdt_tasks(rdtgrp, s);
 721	else
 722		ret = -ENOENT;
 723	rdtgroup_kn_unlock(of->kn);
 724
 725	return ret;
 726}
 727
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 728static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
 729				    struct seq_file *seq, void *v)
 730{
 731	int len;
 732
 733	mutex_lock(&rdtgroup_mutex);
 734	len = seq_buf_used(&last_cmd_status);
 735	if (len)
 736		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
 737	else
 738		seq_puts(seq, "ok\n");
 739	mutex_unlock(&rdtgroup_mutex);
 740	return 0;
 741}
 742
 743static int rdt_num_closids_show(struct kernfs_open_file *of,
 744				struct seq_file *seq, void *v)
 745{
 746	struct rdt_resource *r = of->kn->parent->priv;
 747
 748	seq_printf(seq, "%d\n", r->num_closid);
 749	return 0;
 750}
 751
 752static int rdt_default_ctrl_show(struct kernfs_open_file *of,
 753			     struct seq_file *seq, void *v)
 754{
 755	struct rdt_resource *r = of->kn->parent->priv;
 
 756
 757	seq_printf(seq, "%x\n", r->default_ctrl);
 758	return 0;
 759}
 760
 761static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
 762			     struct seq_file *seq, void *v)
 763{
 764	struct rdt_resource *r = of->kn->parent->priv;
 
 765
 766	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
 767	return 0;
 768}
 769
 770static int rdt_shareable_bits_show(struct kernfs_open_file *of,
 771				   struct seq_file *seq, void *v)
 772{
 773	struct rdt_resource *r = of->kn->parent->priv;
 
 774
 775	seq_printf(seq, "%x\n", r->cache.shareable_bits);
 776	return 0;
 777}
 778
 779/**
 780 * rdt_bit_usage_show - Display current usage of resources
 781 *
 782 * A domain is a shared resource that can now be allocated differently. Here
 783 * we display the current regions of the domain as an annotated bitmask.
 784 * For each domain of this resource its allocation bitmask
 785 * is annotated as below to indicate the current usage of the corresponding bit:
 786 *   0 - currently unused
 787 *   X - currently available for sharing and used by software and hardware
 788 *   H - currently used by hardware only but available for software use
 789 *   S - currently used and shareable by software only
 790 *   E - currently used exclusively by one resource group
 791 *   P - currently pseudo-locked by one resource group
 792 */
 793static int rdt_bit_usage_show(struct kernfs_open_file *of,
 794			      struct seq_file *seq, void *v)
 795{
 796	struct rdt_resource *r = of->kn->parent->priv;
 797	/*
 798	 * Use unsigned long even though only 32 bits are used to ensure
 799	 * test_bit() is used safely.
 800	 */
 801	unsigned long sw_shareable = 0, hw_shareable = 0;
 802	unsigned long exclusive = 0, pseudo_locked = 0;
 803	struct rdt_domain *dom;
 
 804	int i, hwb, swb, excl, psl;
 805	enum rdtgrp_mode mode;
 806	bool sep = false;
 807	u32 *ctrl;
 808
 
 809	mutex_lock(&rdtgroup_mutex);
 810	hw_shareable = r->cache.shareable_bits;
 811	list_for_each_entry(dom, &r->domains, list) {
 812		if (sep)
 813			seq_putc(seq, ';');
 814		ctrl = dom->ctrl_val;
 815		sw_shareable = 0;
 816		exclusive = 0;
 817		seq_printf(seq, "%d=", dom->id);
 818		for (i = 0; i < closids_supported(); i++, ctrl++) {
 819			if (!closid_allocated(i))
 820				continue;
 
 
 821			mode = rdtgroup_mode_by_closid(i);
 822			switch (mode) {
 823			case RDT_MODE_SHAREABLE:
 824				sw_shareable |= *ctrl;
 825				break;
 826			case RDT_MODE_EXCLUSIVE:
 827				exclusive |= *ctrl;
 828				break;
 829			case RDT_MODE_PSEUDO_LOCKSETUP:
 830			/*
 831			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
 832			 * here but not included since the CBM
 833			 * associated with this CLOSID in this mode
 834			 * is not initialized and no task or cpu can be
 835			 * assigned this CLOSID.
 836			 */
 837				break;
 838			case RDT_MODE_PSEUDO_LOCKED:
 839			case RDT_NUM_MODES:
 840				WARN(1,
 841				     "invalid mode for closid %d\n", i);
 842				break;
 843			}
 844		}
 845		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
 846			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
 847			hwb = test_bit(i, &hw_shareable);
 848			swb = test_bit(i, &sw_shareable);
 849			excl = test_bit(i, &exclusive);
 850			psl = test_bit(i, &pseudo_locked);
 851			if (hwb && swb)
 852				seq_putc(seq, 'X');
 853			else if (hwb && !swb)
 854				seq_putc(seq, 'H');
 855			else if (!hwb && swb)
 856				seq_putc(seq, 'S');
 857			else if (excl)
 858				seq_putc(seq, 'E');
 859			else if (psl)
 860				seq_putc(seq, 'P');
 861			else /* Unused bits remain */
 862				seq_putc(seq, '0');
 863		}
 864		sep = true;
 865	}
 866	seq_putc(seq, '\n');
 867	mutex_unlock(&rdtgroup_mutex);
 
 868	return 0;
 869}
 870
 871static int rdt_min_bw_show(struct kernfs_open_file *of,
 872			     struct seq_file *seq, void *v)
 873{
 874	struct rdt_resource *r = of->kn->parent->priv;
 
 875
 876	seq_printf(seq, "%u\n", r->membw.min_bw);
 877	return 0;
 878}
 879
 880static int rdt_num_rmids_show(struct kernfs_open_file *of,
 881			      struct seq_file *seq, void *v)
 882{
 883	struct rdt_resource *r = of->kn->parent->priv;
 884
 885	seq_printf(seq, "%d\n", r->num_rmid);
 886
 887	return 0;
 888}
 889
 890static int rdt_mon_features_show(struct kernfs_open_file *of,
 891				 struct seq_file *seq, void *v)
 892{
 893	struct rdt_resource *r = of->kn->parent->priv;
 894	struct mon_evt *mevt;
 895
 896	list_for_each_entry(mevt, &r->evt_list, list)
 897		seq_printf(seq, "%s\n", mevt->name);
 
 
 
 898
 899	return 0;
 900}
 901
 902static int rdt_bw_gran_show(struct kernfs_open_file *of,
 903			     struct seq_file *seq, void *v)
 904{
 905	struct rdt_resource *r = of->kn->parent->priv;
 
 906
 907	seq_printf(seq, "%u\n", r->membw.bw_gran);
 908	return 0;
 909}
 910
 911static int rdt_delay_linear_show(struct kernfs_open_file *of,
 912			     struct seq_file *seq, void *v)
 913{
 914	struct rdt_resource *r = of->kn->parent->priv;
 
 915
 916	seq_printf(seq, "%u\n", r->membw.delay_linear);
 917	return 0;
 918}
 919
 920static int max_threshold_occ_show(struct kernfs_open_file *of,
 921				  struct seq_file *seq, void *v)
 922{
 923	struct rdt_resource *r = of->kn->parent->priv;
 
 
 
 924
 925	seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale);
 
 
 
 
 
 
 
 
 
 926
 927	return 0;
 928}
 929
 930static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
 931				       char *buf, size_t nbytes, loff_t off)
 932{
 933	struct rdt_resource *r = of->kn->parent->priv;
 934	unsigned int bytes;
 935	int ret;
 936
 937	ret = kstrtouint(buf, 0, &bytes);
 938	if (ret)
 939		return ret;
 940
 941	if (bytes > (boot_cpu_data.x86_cache_size * 1024))
 942		return -EINVAL;
 943
 944	resctrl_cqm_threshold = bytes / r->mon_scale;
 945
 946	return nbytes;
 947}
 948
 949/*
 950 * rdtgroup_mode_show - Display mode of this resource group
 951 */
 952static int rdtgroup_mode_show(struct kernfs_open_file *of,
 953			      struct seq_file *s, void *v)
 954{
 955	struct rdtgroup *rdtgrp;
 956
 957	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 958	if (!rdtgrp) {
 959		rdtgroup_kn_unlock(of->kn);
 960		return -ENOENT;
 961	}
 962
 963	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
 964
 965	rdtgroup_kn_unlock(of->kn);
 966	return 0;
 967}
 968
 969/**
 970 * rdt_cdp_peer_get - Retrieve CDP peer if it exists
 971 * @r: RDT resource to which RDT domain @d belongs
 972 * @d: Cache instance for which a CDP peer is requested
 973 * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
 974 *         Used to return the result.
 975 * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
 976 *         Used to return the result.
 977 *
 978 * RDT resources are managed independently and by extension the RDT domains
 979 * (RDT resource instances) are managed independently also. The Code and
 980 * Data Prioritization (CDP) RDT resources, while managed independently,
 981 * could refer to the same underlying hardware. For example,
 982 * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
 983 *
 984 * When provided with an RDT resource @r and an instance of that RDT
 985 * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
 986 * resource and the exact instance that shares the same hardware.
 987 *
 988 * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
 989 *         If a CDP peer was found, @r_cdp will point to the peer RDT resource
 990 *         and @d_cdp will point to the peer RDT domain.
 991 */
 992static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
 993			    struct rdt_resource **r_cdp,
 994			    struct rdt_domain **d_cdp)
 995{
 996	struct rdt_resource *_r_cdp = NULL;
 997	struct rdt_domain *_d_cdp = NULL;
 998	int ret = 0;
 999
1000	switch (r->rid) {
1001	case RDT_RESOURCE_L3DATA:
1002		_r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
1003		break;
1004	case RDT_RESOURCE_L3CODE:
1005		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L3DATA];
1006		break;
1007	case RDT_RESOURCE_L2DATA:
1008		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2CODE];
1009		break;
1010	case RDT_RESOURCE_L2CODE:
1011		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2DATA];
1012		break;
1013	default:
1014		ret = -ENOENT;
1015		goto out;
1016	}
 
1017
1018	/*
1019	 * When a new CPU comes online and CDP is enabled then the new
1020	 * RDT domains (if any) associated with both CDP RDT resources
1021	 * are added in the same CPU online routine while the
1022	 * rdtgroup_mutex is held. It should thus not happen for one
1023	 * RDT domain to exist and be associated with its RDT CDP
1024	 * resource but there is no RDT domain associated with the
1025	 * peer RDT CDP resource. Hence the WARN.
1026	 */
1027	_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
1028	if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
1029		_r_cdp = NULL;
1030		ret = -EINVAL;
1031	}
1032
1033out:
1034	*r_cdp = _r_cdp;
1035	*d_cdp = _d_cdp;
1036
1037	return ret;
1038}
1039
1040/**
1041 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1042 * @r: Resource to which domain instance @d belongs.
1043 * @d: The domain instance for which @closid is being tested.
1044 * @cbm: Capacity bitmask being tested.
1045 * @closid: Intended closid for @cbm.
 
1046 * @exclusive: Only check if overlaps with exclusive resource groups
1047 *
1048 * Checks if provided @cbm intended to be used for @closid on domain
1049 * @d overlaps with any other closids or other hardware usage associated
1050 * with this domain. If @exclusive is true then only overlaps with
1051 * resource groups in exclusive mode will be considered. If @exclusive
1052 * is false then overlaps with any resource group or hardware entities
1053 * will be considered.
1054 *
1055 * @cbm is unsigned long, even if only 32 bits are used, to make the
1056 * bitmap functions work correctly.
1057 *
1058 * Return: false if CBM does not overlap, true if it does.
1059 */
1060static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1061				    unsigned long cbm, int closid, bool exclusive)
 
1062{
1063	enum rdtgrp_mode mode;
1064	unsigned long ctrl_b;
1065	u32 *ctrl;
1066	int i;
1067
1068	/* Check for any overlap with regions used by hardware directly */
1069	if (!exclusive) {
1070		ctrl_b = r->cache.shareable_bits;
1071		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1072			return true;
1073	}
1074
1075	/* Check for overlap with other resource groups */
1076	ctrl = d->ctrl_val;
1077	for (i = 0; i < closids_supported(); i++, ctrl++) {
1078		ctrl_b = *ctrl;
1079		mode = rdtgroup_mode_by_closid(i);
1080		if (closid_allocated(i) && i != closid &&
1081		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1082			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1083				if (exclusive) {
1084					if (mode == RDT_MODE_EXCLUSIVE)
1085						return true;
1086					continue;
1087				}
1088				return true;
1089			}
1090		}
1091	}
1092
1093	return false;
1094}
1095
1096/**
1097 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1098 * @r: Resource to which domain instance @d belongs.
1099 * @d: The domain instance for which @closid is being tested.
1100 * @cbm: Capacity bitmask being tested.
1101 * @closid: Intended closid for @cbm.
1102 * @exclusive: Only check if overlaps with exclusive resource groups
1103 *
1104 * Resources that can be allocated using a CBM can use the CBM to control
1105 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1106 * for overlap. Overlap test is not limited to the specific resource for
1107 * which the CBM is intended though - when dealing with CDP resources that
1108 * share the underlying hardware the overlap check should be performed on
1109 * the CDP resource sharing the hardware also.
1110 *
1111 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1112 * overlap test.
1113 *
1114 * Return: true if CBM overlap detected, false if there is no overlap
1115 */
1116bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1117			   unsigned long cbm, int closid, bool exclusive)
1118{
1119	struct rdt_resource *r_cdp;
1120	struct rdt_domain *d_cdp;
1121
1122	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
 
1123		return true;
1124
1125	if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
1126		return false;
1127
1128	return  __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
1129}
1130
1131/**
1132 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
 
1133 *
1134 * An exclusive resource group implies that there should be no sharing of
1135 * its allocated resources. At the time this group is considered to be
1136 * exclusive this test can determine if its current schemata supports this
1137 * setting by testing for overlap with all other resource groups.
1138 *
1139 * Return: true if resource group can be exclusive, false if there is overlap
1140 * with allocations of other resource groups and thus this resource group
1141 * cannot be exclusive.
1142 */
1143static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1144{
1145	int closid = rdtgrp->closid;
 
 
1146	struct rdt_resource *r;
1147	bool has_cache = false;
1148	struct rdt_domain *d;
1149
1150	for_each_alloc_enabled_rdt_resource(r) {
1151		if (r->rid == RDT_RESOURCE_MBA)
 
 
 
 
1152			continue;
1153		has_cache = true;
1154		list_for_each_entry(d, &r->domains, list) {
1155			if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
1156						  rdtgrp->closid, false)) {
 
1157				rdt_last_cmd_puts("Schemata overlaps\n");
1158				return false;
1159			}
1160		}
1161	}
1162
1163	if (!has_cache) {
1164		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1165		return false;
1166	}
1167
1168	return true;
1169}
1170
1171/**
1172 * rdtgroup_mode_write - Modify the resource group's mode
1173 *
1174 */
1175static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1176				   char *buf, size_t nbytes, loff_t off)
1177{
1178	struct rdtgroup *rdtgrp;
1179	enum rdtgrp_mode mode;
1180	int ret = 0;
1181
1182	/* Valid input requires a trailing newline */
1183	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1184		return -EINVAL;
1185	buf[nbytes - 1] = '\0';
1186
1187	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1188	if (!rdtgrp) {
1189		rdtgroup_kn_unlock(of->kn);
1190		return -ENOENT;
1191	}
1192
1193	rdt_last_cmd_clear();
1194
1195	mode = rdtgrp->mode;
1196
1197	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1198	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1199	    (!strcmp(buf, "pseudo-locksetup") &&
1200	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1201	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1202		goto out;
1203
1204	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1205		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1206		ret = -EINVAL;
1207		goto out;
1208	}
1209
1210	if (!strcmp(buf, "shareable")) {
1211		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1212			ret = rdtgroup_locksetup_exit(rdtgrp);
1213			if (ret)
1214				goto out;
1215		}
1216		rdtgrp->mode = RDT_MODE_SHAREABLE;
1217	} else if (!strcmp(buf, "exclusive")) {
1218		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1219			ret = -EINVAL;
1220			goto out;
1221		}
1222		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1223			ret = rdtgroup_locksetup_exit(rdtgrp);
1224			if (ret)
1225				goto out;
1226		}
1227		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1228	} else if (!strcmp(buf, "pseudo-locksetup")) {
1229		ret = rdtgroup_locksetup_enter(rdtgrp);
1230		if (ret)
1231			goto out;
1232		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1233	} else {
1234		rdt_last_cmd_puts("Unknown or unsupported mode\n");
1235		ret = -EINVAL;
1236	}
1237
1238out:
1239	rdtgroup_kn_unlock(of->kn);
1240	return ret ?: nbytes;
1241}
1242
1243/**
1244 * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1245 * @r: RDT resource to which @d belongs.
1246 * @d: RDT domain instance.
1247 * @cbm: bitmask for which the size should be computed.
1248 *
1249 * The bitmask provided associated with the RDT domain instance @d will be
1250 * translated into how many bytes it represents. The size in bytes is
1251 * computed by first dividing the total cache size by the CBM length to
1252 * determine how many bytes each bit in the bitmask represents. The result
1253 * is multiplied with the number of bits set in the bitmask.
1254 *
1255 * @cbm is unsigned long, even if only 32 bits are used to make the
1256 * bitmap functions work correctly.
1257 */
1258unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1259				  struct rdt_domain *d, unsigned long cbm)
1260{
1261	struct cpu_cacheinfo *ci;
1262	unsigned int size = 0;
1263	int num_b, i;
 
 
 
 
1264
1265	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1266	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
1267	for (i = 0; i < ci->num_leaves; i++) {
1268		if (ci->info_list[i].level == r->cache_level) {
1269			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
1270			break;
1271		}
1272	}
1273
1274	return size;
1275}
1276
1277/**
1278 * rdtgroup_size_show - Display size in bytes of allocated regions
1279 *
1280 * The "size" file mirrors the layout of the "schemata" file, printing the
1281 * size in bytes of each region instead of the capacity bitmask.
1282 *
1283 */
1284static int rdtgroup_size_show(struct kernfs_open_file *of,
1285			      struct seq_file *s, void *v)
1286{
 
 
 
1287	struct rdtgroup *rdtgrp;
1288	struct rdt_resource *r;
1289	struct rdt_domain *d;
1290	unsigned int size;
1291	int ret = 0;
 
1292	bool sep;
1293	u32 ctrl;
1294
1295	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1296	if (!rdtgrp) {
1297		rdtgroup_kn_unlock(of->kn);
1298		return -ENOENT;
1299	}
1300
1301	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1302		if (!rdtgrp->plr->d) {
1303			rdt_last_cmd_clear();
1304			rdt_last_cmd_puts("Cache domain offline\n");
1305			ret = -ENODEV;
1306		} else {
1307			seq_printf(s, "%*s:", max_name_width,
1308				   rdtgrp->plr->r->name);
1309			size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
1310						    rdtgrp->plr->d,
1311						    rdtgrp->plr->cbm);
1312			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
1313		}
1314		goto out;
1315	}
1316
1317	for_each_alloc_enabled_rdt_resource(r) {
 
 
 
 
1318		sep = false;
1319		seq_printf(s, "%*s:", max_name_width, r->name);
1320		list_for_each_entry(d, &r->domains, list) {
1321			if (sep)
1322				seq_putc(s, ';');
1323			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1324				size = 0;
1325			} else {
1326				ctrl = (!is_mba_sc(r) ?
1327						d->ctrl_val[rdtgrp->closid] :
1328						d->mbps_val[rdtgrp->closid]);
1329				if (r->rid == RDT_RESOURCE_MBA)
 
 
 
 
1330					size = ctrl;
1331				else
1332					size = rdtgroup_cbm_to_size(r, d, ctrl);
1333			}
1334			seq_printf(s, "%d=%u", d->id, size);
1335			sep = true;
1336		}
1337		seq_putc(s, '\n');
1338	}
1339
1340out:
1341	rdtgroup_kn_unlock(of->kn);
1342
1343	return ret;
1344}
1345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1346/* rdtgroup information files for one cache resource. */
1347static struct rftype res_common_files[] = {
1348	{
1349		.name		= "last_cmd_status",
1350		.mode		= 0444,
1351		.kf_ops		= &rdtgroup_kf_single_ops,
1352		.seq_show	= rdt_last_cmd_status_show,
1353		.fflags		= RF_TOP_INFO,
1354	},
1355	{
1356		.name		= "num_closids",
1357		.mode		= 0444,
1358		.kf_ops		= &rdtgroup_kf_single_ops,
1359		.seq_show	= rdt_num_closids_show,
1360		.fflags		= RF_CTRL_INFO,
1361	},
1362	{
1363		.name		= "mon_features",
1364		.mode		= 0444,
1365		.kf_ops		= &rdtgroup_kf_single_ops,
1366		.seq_show	= rdt_mon_features_show,
1367		.fflags		= RF_MON_INFO,
1368	},
1369	{
1370		.name		= "num_rmids",
1371		.mode		= 0444,
1372		.kf_ops		= &rdtgroup_kf_single_ops,
1373		.seq_show	= rdt_num_rmids_show,
1374		.fflags		= RF_MON_INFO,
1375	},
1376	{
1377		.name		= "cbm_mask",
1378		.mode		= 0444,
1379		.kf_ops		= &rdtgroup_kf_single_ops,
1380		.seq_show	= rdt_default_ctrl_show,
1381		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1382	},
1383	{
1384		.name		= "min_cbm_bits",
1385		.mode		= 0444,
1386		.kf_ops		= &rdtgroup_kf_single_ops,
1387		.seq_show	= rdt_min_cbm_bits_show,
1388		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1389	},
1390	{
1391		.name		= "shareable_bits",
1392		.mode		= 0444,
1393		.kf_ops		= &rdtgroup_kf_single_ops,
1394		.seq_show	= rdt_shareable_bits_show,
1395		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1396	},
1397	{
1398		.name		= "bit_usage",
1399		.mode		= 0444,
1400		.kf_ops		= &rdtgroup_kf_single_ops,
1401		.seq_show	= rdt_bit_usage_show,
1402		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1403	},
1404	{
1405		.name		= "min_bandwidth",
1406		.mode		= 0444,
1407		.kf_ops		= &rdtgroup_kf_single_ops,
1408		.seq_show	= rdt_min_bw_show,
1409		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1410	},
1411	{
1412		.name		= "bandwidth_gran",
1413		.mode		= 0444,
1414		.kf_ops		= &rdtgroup_kf_single_ops,
1415		.seq_show	= rdt_bw_gran_show,
1416		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1417	},
1418	{
1419		.name		= "delay_linear",
1420		.mode		= 0444,
1421		.kf_ops		= &rdtgroup_kf_single_ops,
1422		.seq_show	= rdt_delay_linear_show,
1423		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
 
 
 
 
 
 
 
 
 
 
 
1424	},
1425	{
1426		.name		= "max_threshold_occupancy",
1427		.mode		= 0644,
1428		.kf_ops		= &rdtgroup_kf_single_ops,
1429		.write		= max_threshold_occ_write,
1430		.seq_show	= max_threshold_occ_show,
1431		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1432	},
1433	{
1434		.name		= "cpus",
1435		.mode		= 0644,
1436		.kf_ops		= &rdtgroup_kf_single_ops,
1437		.write		= rdtgroup_cpus_write,
1438		.seq_show	= rdtgroup_cpus_show,
1439		.fflags		= RFTYPE_BASE,
1440	},
1441	{
1442		.name		= "cpus_list",
1443		.mode		= 0644,
1444		.kf_ops		= &rdtgroup_kf_single_ops,
1445		.write		= rdtgroup_cpus_write,
1446		.seq_show	= rdtgroup_cpus_show,
1447		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1448		.fflags		= RFTYPE_BASE,
1449	},
1450	{
1451		.name		= "tasks",
1452		.mode		= 0644,
1453		.kf_ops		= &rdtgroup_kf_single_ops,
1454		.write		= rdtgroup_tasks_write,
1455		.seq_show	= rdtgroup_tasks_show,
1456		.fflags		= RFTYPE_BASE,
1457	},
1458	{
 
 
 
 
 
 
 
1459		.name		= "schemata",
1460		.mode		= 0644,
1461		.kf_ops		= &rdtgroup_kf_single_ops,
1462		.write		= rdtgroup_schemata_write,
1463		.seq_show	= rdtgroup_schemata_show,
1464		.fflags		= RF_CTRL_BASE,
1465	},
1466	{
1467		.name		= "mode",
1468		.mode		= 0644,
1469		.kf_ops		= &rdtgroup_kf_single_ops,
1470		.write		= rdtgroup_mode_write,
1471		.seq_show	= rdtgroup_mode_show,
1472		.fflags		= RF_CTRL_BASE,
1473	},
1474	{
1475		.name		= "size",
1476		.mode		= 0444,
1477		.kf_ops		= &rdtgroup_kf_single_ops,
1478		.seq_show	= rdtgroup_size_show,
1479		.fflags		= RF_CTRL_BASE,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1480	},
1481
1482};
1483
1484static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1485{
1486	struct rftype *rfts, *rft;
1487	int ret, len;
1488
1489	rfts = res_common_files;
1490	len = ARRAY_SIZE(res_common_files);
1491
1492	lockdep_assert_held(&rdtgroup_mutex);
1493
 
 
 
1494	for (rft = rfts; rft < rfts + len; rft++) {
1495		if ((fflags & rft->fflags) == rft->fflags) {
1496			ret = rdtgroup_add_file(kn, rft);
1497			if (ret)
1498				goto error;
1499		}
1500	}
1501
1502	return 0;
1503error:
1504	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
1505	while (--rft >= rfts) {
1506		if ((fflags & rft->fflags) == rft->fflags)
1507			kernfs_remove_by_name(kn, rft->name);
1508	}
1509	return ret;
1510}
1511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1512/**
1513 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
1514 * @r: The resource group with which the file is associated.
1515 * @name: Name of the file
1516 *
1517 * The permissions of named resctrl file, directory, or link are modified
1518 * to not allow read, write, or execute by any user.
1519 *
1520 * WARNING: This function is intended to communicate to the user that the
1521 * resctrl file has been locked down - that it is not relevant to the
1522 * particular state the system finds itself in. It should not be relied
1523 * on to protect from user access because after the file's permissions
1524 * are restricted the user can still change the permissions using chmod
1525 * from the command line.
1526 *
1527 * Return: 0 on success, <0 on failure.
1528 */
1529int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
1530{
1531	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1532	struct kernfs_node *kn;
1533	int ret = 0;
1534
1535	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1536	if (!kn)
1537		return -ENOENT;
1538
1539	switch (kernfs_type(kn)) {
1540	case KERNFS_DIR:
1541		iattr.ia_mode = S_IFDIR;
1542		break;
1543	case KERNFS_FILE:
1544		iattr.ia_mode = S_IFREG;
1545		break;
1546	case KERNFS_LINK:
1547		iattr.ia_mode = S_IFLNK;
1548		break;
1549	}
1550
1551	ret = kernfs_setattr(kn, &iattr);
1552	kernfs_put(kn);
1553	return ret;
1554}
1555
1556/**
1557 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
1558 * @r: The resource group with which the file is associated.
1559 * @name: Name of the file
1560 * @mask: Mask of permissions that should be restored
1561 *
1562 * Restore the permissions of the named file. If @name is a directory the
1563 * permissions of its parent will be used.
1564 *
1565 * Return: 0 on success, <0 on failure.
1566 */
1567int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
1568			     umode_t mask)
1569{
1570	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1571	struct kernfs_node *kn, *parent;
1572	struct rftype *rfts, *rft;
1573	int ret, len;
1574
1575	rfts = res_common_files;
1576	len = ARRAY_SIZE(res_common_files);
1577
1578	for (rft = rfts; rft < rfts + len; rft++) {
1579		if (!strcmp(rft->name, name))
1580			iattr.ia_mode = rft->mode & mask;
1581	}
1582
1583	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1584	if (!kn)
1585		return -ENOENT;
1586
1587	switch (kernfs_type(kn)) {
1588	case KERNFS_DIR:
1589		parent = kernfs_get_parent(kn);
1590		if (parent) {
1591			iattr.ia_mode |= parent->mode;
1592			kernfs_put(parent);
1593		}
1594		iattr.ia_mode |= S_IFDIR;
1595		break;
1596	case KERNFS_FILE:
1597		iattr.ia_mode |= S_IFREG;
1598		break;
1599	case KERNFS_LINK:
1600		iattr.ia_mode |= S_IFLNK;
1601		break;
1602	}
1603
1604	ret = kernfs_setattr(kn, &iattr);
1605	kernfs_put(kn);
1606	return ret;
1607}
1608
1609static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
1610				      unsigned long fflags)
1611{
1612	struct kernfs_node *kn_subdir;
1613	int ret;
1614
1615	kn_subdir = kernfs_create_dir(kn_info, name,
1616				      kn_info->mode, r);
1617	if (IS_ERR(kn_subdir))
1618		return PTR_ERR(kn_subdir);
1619
1620	kernfs_get(kn_subdir);
1621	ret = rdtgroup_kn_set_ugid(kn_subdir);
1622	if (ret)
1623		return ret;
1624
1625	ret = rdtgroup_add_files(kn_subdir, fflags);
1626	if (!ret)
1627		kernfs_activate(kn_subdir);
1628
1629	return ret;
1630}
1631
1632static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
1633{
 
1634	struct rdt_resource *r;
1635	unsigned long fflags;
1636	char name[32];
1637	int ret;
1638
1639	/* create the directory */
1640	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
1641	if (IS_ERR(kn_info))
1642		return PTR_ERR(kn_info);
1643	kernfs_get(kn_info);
1644
1645	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
1646	if (ret)
1647		goto out_destroy;
1648
1649	for_each_alloc_enabled_rdt_resource(r) {
1650		fflags =  r->fflags | RF_CTRL_INFO;
1651		ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
 
 
1652		if (ret)
1653			goto out_destroy;
1654	}
1655
1656	for_each_mon_enabled_rdt_resource(r) {
1657		fflags =  r->fflags | RF_MON_INFO;
1658		sprintf(name, "%s_MON", r->name);
1659		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
1660		if (ret)
1661			goto out_destroy;
1662	}
1663
1664	/*
1665	 * This extra ref will be put in kernfs_remove() and guarantees
1666	 * that @rdtgrp->kn is always accessible.
1667	 */
1668	kernfs_get(kn_info);
1669
1670	ret = rdtgroup_kn_set_ugid(kn_info);
1671	if (ret)
1672		goto out_destroy;
1673
1674	kernfs_activate(kn_info);
1675
1676	return 0;
1677
1678out_destroy:
1679	kernfs_remove(kn_info);
1680	return ret;
1681}
1682
1683static int
1684mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
1685		    char *name, struct kernfs_node **dest_kn)
1686{
1687	struct kernfs_node *kn;
1688	int ret;
1689
1690	/* create the directory */
1691	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
1692	if (IS_ERR(kn))
1693		return PTR_ERR(kn);
1694
1695	if (dest_kn)
1696		*dest_kn = kn;
1697
1698	/*
1699	 * This extra ref will be put in kernfs_remove() and guarantees
1700	 * that @rdtgrp->kn is always accessible.
1701	 */
1702	kernfs_get(kn);
1703
1704	ret = rdtgroup_kn_set_ugid(kn);
1705	if (ret)
1706		goto out_destroy;
1707
1708	kernfs_activate(kn);
1709
1710	return 0;
1711
1712out_destroy:
1713	kernfs_remove(kn);
1714	return ret;
1715}
1716
1717static void l3_qos_cfg_update(void *arg)
1718{
1719	bool *enable = arg;
1720
1721	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
1722}
1723
1724static void l2_qos_cfg_update(void *arg)
1725{
1726	bool *enable = arg;
1727
1728	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
1729}
1730
1731static inline bool is_mba_linear(void)
1732{
1733	return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
1734}
1735
1736static int set_cache_qos_cfg(int level, bool enable)
1737{
1738	void (*update)(void *arg);
 
1739	struct rdt_resource *r_l;
1740	cpumask_var_t cpu_mask;
1741	struct rdt_domain *d;
1742	int cpu;
1743
1744	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1745		return -ENOMEM;
1746
1747	if (level == RDT_RESOURCE_L3)
1748		update = l3_qos_cfg_update;
1749	else if (level == RDT_RESOURCE_L2)
1750		update = l2_qos_cfg_update;
1751	else
1752		return -EINVAL;
1753
1754	r_l = &rdt_resources_all[level];
1755	list_for_each_entry(d, &r_l->domains, list) {
1756		/* Pick one CPU from each domain instance to update MSR */
1757		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
1758	}
1759	cpu = get_cpu();
1760	/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
1761	if (cpumask_test_cpu(cpu, cpu_mask))
1762		update(&enable);
1763	/* Update QOS_CFG MSR on all other cpus in cpu_mask. */
1764	smp_call_function_many(cpu_mask, update, &enable, 1);
1765	put_cpu();
 
 
 
 
1766
1767	free_cpumask_var(cpu_mask);
1768
1769	return 0;
1770}
1771
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1772/*
1773 * Enable or disable the MBA software controller
1774 * which helps user specify bandwidth in MBps.
1775 * MBA software controller is supported only if
1776 * MBM is supported and MBA is in linear scale.
1777 */
1778static int set_mba_sc(bool mba_sc)
1779{
1780	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
1781	struct rdt_domain *d;
 
 
1782
1783	if (!is_mbm_enabled() || !is_mba_linear() ||
1784	    mba_sc == is_mba_sc(r))
1785		return -EINVAL;
1786
1787	r->membw.mba_sc = mba_sc;
1788	list_for_each_entry(d, &r->domains, list)
1789		setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
 
 
 
1790
1791	return 0;
1792}
1793
1794static int cdp_enable(int level, int data_type, int code_type)
1795{
1796	struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
1797	struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
1798	struct rdt_resource *r_l = &rdt_resources_all[level];
1799	int ret;
1800
1801	if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
1802	    !r_lcode->alloc_capable)
1803		return -EINVAL;
1804
1805	ret = set_cache_qos_cfg(level, true);
1806	if (!ret) {
1807		r_l->alloc_enabled = false;
1808		r_ldata->alloc_enabled = true;
1809		r_lcode->alloc_enabled = true;
1810	}
1811	return ret;
1812}
1813
1814static int cdpl3_enable(void)
1815{
1816	return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
1817			  RDT_RESOURCE_L3CODE);
1818}
1819
1820static int cdpl2_enable(void)
1821{
1822	return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
1823			  RDT_RESOURCE_L2CODE);
1824}
1825
1826static void cdp_disable(int level, int data_type, int code_type)
1827{
1828	struct rdt_resource *r = &rdt_resources_all[level];
1829
1830	r->alloc_enabled = r->alloc_capable;
1831
1832	if (rdt_resources_all[data_type].alloc_enabled) {
1833		rdt_resources_all[data_type].alloc_enabled = false;
1834		rdt_resources_all[code_type].alloc_enabled = false;
1835		set_cache_qos_cfg(level, false);
 
1836	}
1837}
1838
1839static void cdpl3_disable(void)
1840{
1841	cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
1842}
1843
1844static void cdpl2_disable(void)
1845{
1846	cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
1847}
1848
1849static void cdp_disable_all(void)
1850{
1851	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
1852		cdpl3_disable();
1853	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
1854		cdpl2_disable();
1855}
1856
1857/*
1858 * We don't allow rdtgroup directories to be created anywhere
1859 * except the root directory. Thus when looking for the rdtgroup
1860 * structure for a kernfs node we are either looking at a directory,
1861 * in which case the rdtgroup structure is pointed at by the "priv"
1862 * field, otherwise we have a file, and need only look to the parent
1863 * to find the rdtgroup.
1864 */
1865static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
1866{
1867	if (kernfs_type(kn) == KERNFS_DIR) {
1868		/*
1869		 * All the resource directories use "kn->priv"
1870		 * to point to the "struct rdtgroup" for the
1871		 * resource. "info" and its subdirectories don't
1872		 * have rdtgroup structures, so return NULL here.
1873		 */
1874		if (kn == kn_info || kn->parent == kn_info)
1875			return NULL;
1876		else
1877			return kn->priv;
1878	} else {
1879		return kn->parent->priv;
1880	}
1881}
1882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1883struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
1884{
1885	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1886
1887	if (!rdtgrp)
1888		return NULL;
1889
1890	atomic_inc(&rdtgrp->waitcount);
1891	kernfs_break_active_protection(kn);
1892
 
1893	mutex_lock(&rdtgroup_mutex);
1894
1895	/* Was this group deleted while we waited? */
1896	if (rdtgrp->flags & RDT_DELETED)
1897		return NULL;
1898
1899	return rdtgrp;
1900}
1901
1902void rdtgroup_kn_unlock(struct kernfs_node *kn)
1903{
1904	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1905
1906	if (!rdtgrp)
1907		return;
1908
1909	mutex_unlock(&rdtgroup_mutex);
 
1910
1911	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
1912	    (rdtgrp->flags & RDT_DELETED)) {
1913		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
1914		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
1915			rdtgroup_pseudo_lock_remove(rdtgrp);
1916		kernfs_unbreak_active_protection(kn);
1917		kernfs_put(rdtgrp->kn);
1918		kfree(rdtgrp);
1919	} else {
1920		kernfs_unbreak_active_protection(kn);
1921	}
1922}
1923
1924static int mkdir_mondata_all(struct kernfs_node *parent_kn,
1925			     struct rdtgroup *prgrp,
1926			     struct kernfs_node **mon_data_kn);
1927
 
 
 
 
 
 
 
 
 
1928static int rdt_enable_ctx(struct rdt_fs_context *ctx)
1929{
1930	int ret = 0;
1931
1932	if (ctx->enable_cdpl2)
1933		ret = cdpl2_enable();
 
 
 
1934
1935	if (!ret && ctx->enable_cdpl3)
1936		ret = cdpl3_enable();
 
 
 
1937
1938	if (!ret && ctx->enable_mba_mbps)
1939		ret = set_mba_sc(true);
 
 
 
1940
 
 
 
 
 
 
 
 
 
 
1941	return ret;
1942}
1943
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1944static int rdt_get_tree(struct fs_context *fc)
1945{
1946	struct rdt_fs_context *ctx = rdt_fc2context(fc);
1947	struct rdt_domain *dom;
 
1948	struct rdt_resource *r;
1949	int ret;
1950
1951	cpus_read_lock();
1952	mutex_lock(&rdtgroup_mutex);
1953	/*
1954	 * resctrl file system can only be mounted once.
1955	 */
1956	if (static_branch_unlikely(&rdt_enable_key)) {
1957		ret = -EBUSY;
1958		goto out;
1959	}
1960
 
 
 
 
1961	ret = rdt_enable_ctx(ctx);
1962	if (ret < 0)
1963		goto out_cdp;
 
 
 
 
 
 
1964
1965	closid_init();
1966
 
 
 
 
 
 
 
 
 
1967	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
1968	if (ret < 0)
1969		goto out_mba;
1970
1971	if (rdt_mon_capable) {
1972		ret = mongroup_create_dir(rdtgroup_default.kn,
1973					  NULL, "mon_groups",
1974					  &kn_mongrp);
1975		if (ret < 0)
1976			goto out_info;
1977		kernfs_get(kn_mongrp);
1978
1979		ret = mkdir_mondata_all(rdtgroup_default.kn,
1980					&rdtgroup_default, &kn_mondata);
1981		if (ret < 0)
1982			goto out_mongrp;
1983		kernfs_get(kn_mondata);
1984		rdtgroup_default.mon.mon_data_kn = kn_mondata;
1985	}
1986
1987	ret = rdt_pseudo_lock_init();
1988	if (ret)
1989		goto out_mondata;
1990
1991	ret = kernfs_get_tree(fc);
1992	if (ret < 0)
1993		goto out_psl;
1994
1995	if (rdt_alloc_capable)
1996		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
1997	if (rdt_mon_capable)
1998		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
1999
2000	if (rdt_alloc_capable || rdt_mon_capable)
2001		static_branch_enable_cpuslocked(&rdt_enable_key);
2002
2003	if (is_mbm_enabled()) {
2004		r = &rdt_resources_all[RDT_RESOURCE_L3];
2005		list_for_each_entry(dom, &r->domains, list)
2006			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
 
2007	}
2008
2009	goto out;
2010
2011out_psl:
2012	rdt_pseudo_lock_release();
2013out_mondata:
2014	if (rdt_mon_capable)
2015		kernfs_remove(kn_mondata);
2016out_mongrp:
2017	if (rdt_mon_capable)
2018		kernfs_remove(kn_mongrp);
2019out_info:
2020	kernfs_remove(kn_info);
2021out_mba:
2022	if (ctx->enable_mba_mbps)
2023		set_mba_sc(false);
2024out_cdp:
2025	cdp_disable_all();
 
2026out:
2027	rdt_last_cmd_clear();
2028	mutex_unlock(&rdtgroup_mutex);
2029	cpus_read_unlock();
2030	return ret;
2031}
2032
2033enum rdt_param {
2034	Opt_cdp,
2035	Opt_cdpl2,
2036	Opt_mba_mbps,
 
2037	nr__rdt_params
2038};
2039
2040static const struct fs_parameter_spec rdt_param_specs[] = {
2041	fsparam_flag("cdp",		Opt_cdp),
2042	fsparam_flag("cdpl2",		Opt_cdpl2),
2043	fsparam_flag("mba_MBps",	Opt_mba_mbps),
 
2044	{}
2045};
2046
2047static const struct fs_parameter_description rdt_fs_parameters = {
2048	.name		= "rdt",
2049	.specs		= rdt_param_specs,
2050};
2051
2052static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2053{
2054	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2055	struct fs_parse_result result;
 
2056	int opt;
2057
2058	opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
2059	if (opt < 0)
2060		return opt;
2061
2062	switch (opt) {
2063	case Opt_cdp:
2064		ctx->enable_cdpl3 = true;
2065		return 0;
2066	case Opt_cdpl2:
2067		ctx->enable_cdpl2 = true;
2068		return 0;
2069	case Opt_mba_mbps:
2070		if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
2071			return -EINVAL;
 
2072		ctx->enable_mba_mbps = true;
2073		return 0;
 
 
 
2074	}
2075
2076	return -EINVAL;
2077}
2078
2079static void rdt_fs_context_free(struct fs_context *fc)
2080{
2081	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2082
2083	kernfs_free_fs_context(fc);
2084	kfree(ctx);
2085}
2086
2087static const struct fs_context_operations rdt_fs_context_ops = {
2088	.free		= rdt_fs_context_free,
2089	.parse_param	= rdt_parse_param,
2090	.get_tree	= rdt_get_tree,
2091};
2092
2093static int rdt_init_fs_context(struct fs_context *fc)
2094{
2095	struct rdt_fs_context *ctx;
2096
2097	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2098	if (!ctx)
2099		return -ENOMEM;
2100
2101	ctx->kfc.root = rdt_root;
2102	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2103	fc->fs_private = &ctx->kfc;
2104	fc->ops = &rdt_fs_context_ops;
2105	put_user_ns(fc->user_ns);
2106	fc->user_ns = get_user_ns(&init_user_ns);
2107	fc->global = true;
2108	return 0;
2109}
2110
2111static int reset_all_ctrls(struct rdt_resource *r)
2112{
 
 
2113	struct msr_param msr_param;
2114	cpumask_var_t cpu_mask;
2115	struct rdt_domain *d;
2116	int i, cpu;
2117
2118	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2119		return -ENOMEM;
2120
2121	msr_param.res = r;
2122	msr_param.low = 0;
2123	msr_param.high = r->num_closid;
2124
2125	/*
2126	 * Disable resource control for this resource by setting all
2127	 * CBMs in all domains to the maximum mask value. Pick one CPU
2128	 * from each domain to update the MSRs below.
2129	 */
2130	list_for_each_entry(d, &r->domains, list) {
2131		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
2132
2133		for (i = 0; i < r->num_closid; i++)
2134			d->ctrl_val[i] = r->default_ctrl;
 
 
2135	}
2136	cpu = get_cpu();
2137	/* Update CBM on this cpu if it's in cpu_mask. */
2138	if (cpumask_test_cpu(cpu, cpu_mask))
2139		rdt_ctrl_update(&msr_param);
2140	/* Update CBM on all other cpus in cpu_mask. */
2141	smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
2142	put_cpu();
2143
2144	free_cpumask_var(cpu_mask);
2145
2146	return 0;
2147}
2148
2149static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
2150{
2151	return (rdt_alloc_capable &&
2152		(r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
2153}
2154
2155static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
2156{
2157	return (rdt_mon_capable &&
2158		(r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
2159}
2160
2161/*
2162 * Move tasks from one to the other group. If @from is NULL, then all tasks
2163 * in the systems are moved unconditionally (used for teardown).
2164 *
2165 * If @mask is not NULL the cpus on which moved tasks are running are set
2166 * in that mask so the update smp function call is restricted to affected
2167 * cpus.
2168 */
2169static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2170				 struct cpumask *mask)
2171{
2172	struct task_struct *p, *t;
2173
2174	read_lock(&tasklist_lock);
2175	for_each_process_thread(p, t) {
2176		if (!from || is_closid_match(t, from) ||
2177		    is_rmid_match(t, from)) {
2178			t->closid = to->closid;
2179			t->rmid = to->mon.rmid;
2180
2181#ifdef CONFIG_SMP
2182			/*
2183			 * This is safe on x86 w/o barriers as the ordering
2184			 * of writing to task_cpu() and t->on_cpu is
2185			 * reverse to the reading here. The detection is
2186			 * inaccurate as tasks might move or schedule
2187			 * before the smp function call takes place. In
2188			 * such a case the function call is pointless, but
 
 
 
 
 
 
2189			 * there is no other side effect.
2190			 */
2191			if (mask && t->on_cpu)
2192				cpumask_set_cpu(task_cpu(t), mask);
2193#endif
2194		}
2195	}
2196	read_unlock(&tasklist_lock);
2197}
2198
2199static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2200{
2201	struct rdtgroup *sentry, *stmp;
2202	struct list_head *head;
2203
2204	head = &rdtgrp->mon.crdtgrp_list;
2205	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2206		free_rmid(sentry->mon.rmid);
2207		list_del(&sentry->mon.crdtgrp_list);
2208		kfree(sentry);
 
 
 
 
2209	}
2210}
2211
2212/*
2213 * Forcibly remove all of subdirectories under root.
2214 */
2215static void rmdir_all_sub(void)
2216{
2217	struct rdtgroup *rdtgrp, *tmp;
2218
2219	/* Move all tasks to the default resource group */
2220	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2221
2222	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2223		/* Free any child rmids */
2224		free_all_child_rdtgrp(rdtgrp);
2225
2226		/* Remove each rdtgroup other than root */
2227		if (rdtgrp == &rdtgroup_default)
2228			continue;
2229
2230		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2231		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2232			rdtgroup_pseudo_lock_remove(rdtgrp);
2233
2234		/*
2235		 * Give any CPUs back to the default group. We cannot copy
2236		 * cpu_online_mask because a CPU might have executed the
2237		 * offline callback already, but is still marked online.
2238		 */
2239		cpumask_or(&rdtgroup_default.cpu_mask,
2240			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2241
2242		free_rmid(rdtgrp->mon.rmid);
2243
2244		kernfs_remove(rdtgrp->kn);
2245		list_del(&rdtgrp->rdtgroup_list);
2246		kfree(rdtgrp);
 
 
 
 
2247	}
2248	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2249	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2250
2251	kernfs_remove(kn_info);
2252	kernfs_remove(kn_mongrp);
2253	kernfs_remove(kn_mondata);
2254}
2255
2256static void rdt_kill_sb(struct super_block *sb)
2257{
2258	struct rdt_resource *r;
2259
2260	cpus_read_lock();
2261	mutex_lock(&rdtgroup_mutex);
2262
2263	set_mba_sc(false);
2264
2265	/*Put everything back to default values. */
2266	for_each_alloc_enabled_rdt_resource(r)
2267		reset_all_ctrls(r);
2268	cdp_disable_all();
2269	rmdir_all_sub();
2270	rdt_pseudo_lock_release();
2271	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
2272	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
2273	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
2274	static_branch_disable_cpuslocked(&rdt_enable_key);
 
 
 
 
2275	kernfs_kill_sb(sb);
2276	mutex_unlock(&rdtgroup_mutex);
2277	cpus_read_unlock();
2278}
2279
2280static struct file_system_type rdt_fs_type = {
2281	.name			= "resctrl",
2282	.init_fs_context	= rdt_init_fs_context,
2283	.parameters		= &rdt_fs_parameters,
2284	.kill_sb		= rdt_kill_sb,
2285};
2286
2287static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
2288		       void *priv)
2289{
2290	struct kernfs_node *kn;
2291	int ret = 0;
2292
2293	kn = __kernfs_create_file(parent_kn, name, 0444,
2294				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
2295				  &kf_mondata_ops, priv, NULL, NULL);
2296	if (IS_ERR(kn))
2297		return PTR_ERR(kn);
2298
2299	ret = rdtgroup_kn_set_ugid(kn);
2300	if (ret) {
2301		kernfs_remove(kn);
2302		return ret;
2303	}
2304
2305	return ret;
2306}
2307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2308/*
2309 * Remove all subdirectories of mon_data of ctrl_mon groups
2310 * and monitor groups with given domain id.
 
 
2311 */
2312void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
 
2313{
2314	struct rdtgroup *prgrp, *crgrp;
 
 
2315	char name[32];
2316
2317	if (!r->mon_enabled)
2318		return;
 
 
2319
2320	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2321		sprintf(name, "mon_%s_%02d", r->name, dom_id);
2322		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
2323
2324		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
2325			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
2326	}
2327}
2328
2329static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
2330				struct rdt_domain *d,
2331				struct rdt_resource *r, struct rdtgroup *prgrp)
2332{
 
2333	union mon_data_bits priv;
2334	struct kernfs_node *kn;
2335	struct mon_evt *mevt;
2336	struct rmid_read rr;
2337	char name[32];
2338	int ret;
2339
2340	sprintf(name, "mon_%s_%02d", r->name, d->id);
2341	/* create the directory */
2342	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2343	if (IS_ERR(kn))
2344		return PTR_ERR(kn);
2345
2346	/*
2347	 * This extra ref will be put in kernfs_remove() and guarantees
2348	 * that kn is always accessible.
2349	 */
2350	kernfs_get(kn);
2351	ret = rdtgroup_kn_set_ugid(kn);
2352	if (ret)
2353		goto out_destroy;
2354
2355	if (WARN_ON(list_empty(&r->evt_list))) {
2356		ret = -EPERM;
2357		goto out_destroy;
2358	}
2359
2360	priv.u.rid = r->rid;
2361	priv.u.domid = d->id;
 
2362	list_for_each_entry(mevt, &r->evt_list, list) {
2363		priv.u.evtid = mevt->evtid;
2364		ret = mon_addfile(kn, mevt->name, priv.priv);
2365		if (ret)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2366			goto out_destroy;
2367
2368		if (is_mbm_event(mevt->evtid))
2369			mon_event_read(&rr, d, prgrp, mevt->evtid, true);
 
2370	}
 
2371	kernfs_activate(kn);
2372	return 0;
2373
2374out_destroy:
2375	kernfs_remove(kn);
2376	return ret;
2377}
2378
2379/*
2380 * Add all subdirectories of mon_data for "ctrl_mon" groups
2381 * and "monitor" groups with given domain id.
2382 */
2383void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
2384				    struct rdt_domain *d)
2385{
2386	struct kernfs_node *parent_kn;
2387	struct rdtgroup *prgrp, *crgrp;
2388	struct list_head *head;
2389
2390	if (!r->mon_enabled)
2391		return;
2392
2393	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2394		parent_kn = prgrp->mon.mon_data_kn;
2395		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
2396
2397		head = &prgrp->mon.crdtgrp_list;
2398		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
2399			parent_kn = crgrp->mon.mon_data_kn;
2400			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
2401		}
2402	}
2403}
2404
2405static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
2406				       struct rdt_resource *r,
2407				       struct rdtgroup *prgrp)
2408{
2409	struct rdt_domain *dom;
2410	int ret;
2411
2412	list_for_each_entry(dom, &r->domains, list) {
 
 
 
2413		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
2414		if (ret)
2415			return ret;
2416	}
2417
2418	return 0;
2419}
2420
2421/*
2422 * This creates a directory mon_data which contains the monitored data.
2423 *
2424 * mon_data has one directory for each domain whic are named
2425 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
2426 * with L3 domain looks as below:
2427 * ./mon_data:
2428 * mon_L3_00
2429 * mon_L3_01
2430 * mon_L3_02
2431 * ...
2432 *
2433 * Each domain directory has one file per event:
2434 * ./mon_L3_00/:
2435 * llc_occupancy
2436 *
2437 */
2438static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2439			     struct rdtgroup *prgrp,
2440			     struct kernfs_node **dest_kn)
2441{
2442	struct rdt_resource *r;
2443	struct kernfs_node *kn;
2444	int ret;
2445
2446	/*
2447	 * Create the mon_data directory first.
2448	 */
2449	ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
2450	if (ret)
2451		return ret;
2452
2453	if (dest_kn)
2454		*dest_kn = kn;
2455
2456	/*
2457	 * Create the subdirectories for each domain. Note that all events
2458	 * in a domain like L3 are grouped into a resource whose domain is L3
2459	 */
2460	for_each_mon_enabled_rdt_resource(r) {
2461		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
2462		if (ret)
2463			goto out_destroy;
2464	}
2465
2466	return 0;
2467
2468out_destroy:
2469	kernfs_remove(kn);
2470	return ret;
2471}
2472
2473/**
2474 * cbm_ensure_valid - Enforce validity on provided CBM
2475 * @_val:	Candidate CBM
2476 * @r:		RDT resource to which the CBM belongs
2477 *
2478 * The provided CBM represents all cache portions available for use. This
2479 * may be represented by a bitmap that does not consist of contiguous ones
2480 * and thus be an invalid CBM.
2481 * Here the provided CBM is forced to be a valid CBM by only considering
2482 * the first set of contiguous bits as valid and clearing all bits.
2483 * The intention here is to provide a valid default CBM with which a new
2484 * resource group is initialized. The user can follow this with a
2485 * modification to the CBM if the default does not satisfy the
2486 * requirements.
2487 */
2488static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
2489{
2490	unsigned int cbm_len = r->cache.cbm_len;
2491	unsigned long first_bit, zero_bit;
2492	unsigned long val = _val;
2493
2494	if (!val)
2495		return 0;
2496
2497	first_bit = find_first_bit(&val, cbm_len);
2498	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
2499
2500	/* Clear any remaining bits to ensure contiguous region */
2501	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
2502	return (u32)val;
2503}
2504
2505/*
2506 * Initialize cache resources per RDT domain
2507 *
2508 * Set the RDT domain up to start off with all usable allocations. That is,
2509 * all shareable and unused bits. All-zero CBM is invalid.
2510 */
2511static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
2512				 u32 closid)
2513{
2514	struct rdt_resource *r_cdp = NULL;
2515	struct rdt_domain *d_cdp = NULL;
 
 
2516	u32 used_b = 0, unused_b = 0;
2517	unsigned long tmp_cbm;
2518	enum rdtgrp_mode mode;
2519	u32 peer_ctl, *ctrl;
2520	int i;
2521
2522	rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
2523	d->have_new_ctrl = false;
2524	d->new_ctrl = r->cache.shareable_bits;
2525	used_b = r->cache.shareable_bits;
2526	ctrl = d->ctrl_val;
2527	for (i = 0; i < closids_supported(); i++, ctrl++) {
2528		if (closid_allocated(i) && i != closid) {
2529			mode = rdtgroup_mode_by_closid(i);
2530			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
2531				/*
2532				 * ctrl values for locksetup aren't relevant
2533				 * until the schemata is written, and the mode
2534				 * becomes RDT_MODE_PSEUDO_LOCKED.
2535				 */
2536				continue;
2537			/*
2538			 * If CDP is active include peer domain's
2539			 * usage to ensure there is no overlap
2540			 * with an exclusive group.
2541			 */
2542			if (d_cdp)
2543				peer_ctl = d_cdp->ctrl_val[i];
 
2544			else
2545				peer_ctl = 0;
2546			used_b |= *ctrl | peer_ctl;
 
 
2547			if (mode == RDT_MODE_SHAREABLE)
2548				d->new_ctrl |= *ctrl | peer_ctl;
2549		}
2550	}
2551	if (d->plr && d->plr->cbm > 0)
2552		used_b |= d->plr->cbm;
2553	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
2554	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
2555	d->new_ctrl |= unused_b;
2556	/*
2557	 * Force the initial CBM to be valid, user can
2558	 * modify the CBM based on system availability.
2559	 */
2560	d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
2561	/*
2562	 * Assign the u32 CBM to an unsigned long to ensure that
2563	 * bitmap_weight() does not access out-of-bound memory.
2564	 */
2565	tmp_cbm = d->new_ctrl;
2566	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
2567		rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
2568		return -ENOSPC;
2569	}
2570	d->have_new_ctrl = true;
2571
2572	return 0;
2573}
2574
2575/*
2576 * Initialize cache resources with default values.
2577 *
2578 * A new RDT group is being created on an allocation capable (CAT)
2579 * supporting system. Set this group up to start off with all usable
2580 * allocations.
2581 *
2582 * If there are no more shareable bits available on any domain then
2583 * the entire allocation will fail.
2584 */
2585static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
2586{
2587	struct rdt_domain *d;
2588	int ret;
2589
2590	list_for_each_entry(d, &r->domains, list) {
2591		ret = __init_one_rdt_domain(d, r, closid);
2592		if (ret < 0)
2593			return ret;
2594	}
2595
2596	return 0;
2597}
2598
2599/* Initialize MBA resource with default values. */
2600static void rdtgroup_init_mba(struct rdt_resource *r)
2601{
2602	struct rdt_domain *d;
 
2603
2604	list_for_each_entry(d, &r->domains, list) {
2605		d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
2606		d->have_new_ctrl = true;
 
 
 
 
 
 
2607	}
2608}
2609
2610/* Initialize the RDT group's allocations. */
2611static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2612{
 
2613	struct rdt_resource *r;
2614	int ret;
 
 
2615
2616	for_each_alloc_enabled_rdt_resource(r) {
2617		if (r->rid == RDT_RESOURCE_MBA) {
2618			rdtgroup_init_mba(r);
 
 
 
 
2619		} else {
2620			ret = rdtgroup_init_cat(r, rdtgrp->closid);
2621			if (ret < 0)
2622				return ret;
2623		}
2624
2625		ret = update_domains(r, rdtgrp->closid);
2626		if (ret < 0) {
2627			rdt_last_cmd_puts("Failed to initialize allocations\n");
2628			return ret;
2629		}
2630
2631	}
2632
2633	rdtgrp->mode = RDT_MODE_SHAREABLE;
2634
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2635	return 0;
2636}
2637
 
 
 
 
 
 
2638static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
2639			     struct kernfs_node *prgrp_kn,
2640			     const char *name, umode_t mode,
2641			     enum rdt_group_type rtype, struct rdtgroup **r)
2642{
2643	struct rdtgroup *prdtgrp, *rdtgrp;
 
2644	struct kernfs_node *kn;
2645	uint files = 0;
2646	int ret;
2647
2648	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
2649	if (!prdtgrp) {
2650		ret = -ENODEV;
2651		goto out_unlock;
2652	}
2653
2654	if (rtype == RDTMON_GROUP &&
2655	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2656	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
2657		ret = -EINVAL;
2658		rdt_last_cmd_puts("Pseudo-locking in progress\n");
2659		goto out_unlock;
2660	}
2661
2662	/* allocate the rdtgroup. */
2663	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
2664	if (!rdtgrp) {
2665		ret = -ENOSPC;
2666		rdt_last_cmd_puts("Kernel out of memory\n");
2667		goto out_unlock;
2668	}
2669	*r = rdtgrp;
2670	rdtgrp->mon.parent = prdtgrp;
2671	rdtgrp->type = rtype;
2672	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
2673
2674	/* kernfs creates the directory for rdtgrp */
2675	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
2676	if (IS_ERR(kn)) {
2677		ret = PTR_ERR(kn);
2678		rdt_last_cmd_puts("kernfs create error\n");
2679		goto out_free_rgrp;
2680	}
2681	rdtgrp->kn = kn;
2682
2683	/*
2684	 * kernfs_remove() will drop the reference count on "kn" which
2685	 * will free it. But we still need it to stick around for the
2686	 * rdtgroup_kn_unlock(kn} call below. Take one extra reference
2687	 * here, which will be dropped inside rdtgroup_kn_unlock().
2688	 */
2689	kernfs_get(kn);
2690
2691	ret = rdtgroup_kn_set_ugid(kn);
2692	if (ret) {
2693		rdt_last_cmd_puts("kernfs perm error\n");
2694		goto out_destroy;
2695	}
2696
2697	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
 
 
 
 
 
 
 
2698	ret = rdtgroup_add_files(kn, files);
2699	if (ret) {
2700		rdt_last_cmd_puts("kernfs fill error\n");
2701		goto out_destroy;
2702	}
2703
2704	if (rdt_mon_capable) {
2705		ret = alloc_rmid();
2706		if (ret < 0) {
2707			rdt_last_cmd_puts("Out of RMIDs\n");
2708			goto out_destroy;
2709		}
2710		rdtgrp->mon.rmid = ret;
2711
2712		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
2713		if (ret) {
2714			rdt_last_cmd_puts("kernfs subdir error\n");
2715			goto out_idfree;
2716		}
2717	}
2718	kernfs_activate(kn);
2719
2720	/*
2721	 * The caller unlocks the prgrp_kn upon success.
2722	 */
2723	return 0;
2724
2725out_idfree:
2726	free_rmid(rdtgrp->mon.rmid);
2727out_destroy:
 
2728	kernfs_remove(rdtgrp->kn);
2729out_free_rgrp:
2730	kfree(rdtgrp);
2731out_unlock:
2732	rdtgroup_kn_unlock(prgrp_kn);
2733	return ret;
2734}
2735
2736static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
2737{
2738	kernfs_remove(rgrp->kn);
2739	free_rmid(rgrp->mon.rmid);
2740	kfree(rgrp);
2741}
2742
2743/*
2744 * Create a monitor group under "mon_groups" directory of a control
2745 * and monitor group(ctrl_mon). This is a resource group
2746 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
2747 */
2748static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
2749			      struct kernfs_node *prgrp_kn,
2750			      const char *name,
2751			      umode_t mode)
2752{
2753	struct rdtgroup *rdtgrp, *prgrp;
2754	int ret;
2755
2756	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP,
2757				&rdtgrp);
2758	if (ret)
2759		return ret;
2760
2761	prgrp = rdtgrp->mon.parent;
2762	rdtgrp->closid = prgrp->closid;
2763
 
 
 
 
 
 
 
 
2764	/*
2765	 * Add the rdtgrp to the list of rdtgrps the parent
2766	 * ctrl_mon group has to track.
2767	 */
2768	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
2769
2770	rdtgroup_kn_unlock(prgrp_kn);
 
2771	return ret;
2772}
2773
2774/*
2775 * These are rdtgroups created under the root directory. Can be used
2776 * to allocate and monitor resources.
2777 */
2778static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
2779				   struct kernfs_node *prgrp_kn,
2780				   const char *name, umode_t mode)
2781{
2782	struct rdtgroup *rdtgrp;
2783	struct kernfs_node *kn;
2784	u32 closid;
2785	int ret;
2786
2787	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP,
2788				&rdtgrp);
2789	if (ret)
2790		return ret;
2791
2792	kn = rdtgrp->kn;
2793	ret = closid_alloc();
2794	if (ret < 0) {
2795		rdt_last_cmd_puts("Out of CLOSIDs\n");
2796		goto out_common_fail;
2797	}
2798	closid = ret;
2799	ret = 0;
2800
2801	rdtgrp->closid = closid;
 
 
 
 
 
 
 
2802	ret = rdtgroup_init_alloc(rdtgrp);
2803	if (ret < 0)
2804		goto out_id_free;
2805
2806	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
2807
2808	if (rdt_mon_capable) {
2809		/*
2810		 * Create an empty mon_groups directory to hold the subset
2811		 * of tasks and cpus to monitor.
2812		 */
2813		ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
2814		if (ret) {
2815			rdt_last_cmd_puts("kernfs subdir error\n");
2816			goto out_del_list;
2817		}
2818	}
2819
2820	goto out_unlock;
2821
2822out_del_list:
2823	list_del(&rdtgrp->rdtgroup_list);
2824out_id_free:
 
 
2825	closid_free(closid);
2826out_common_fail:
2827	mkdir_rdt_prepare_clean(rdtgrp);
2828out_unlock:
2829	rdtgroup_kn_unlock(prgrp_kn);
2830	return ret;
2831}
2832
2833/*
2834 * We allow creating mon groups only with in a directory called "mon_groups"
2835 * which is present in every ctrl_mon group. Check if this is a valid
2836 * "mon_groups" directory.
2837 *
2838 * 1. The directory should be named "mon_groups".
2839 * 2. The mon group itself should "not" be named "mon_groups".
2840 *   This makes sure "mon_groups" directory always has a ctrl_mon group
2841 *   as parent.
2842 */
2843static bool is_mon_groups(struct kernfs_node *kn, const char *name)
2844{
2845	return (!strcmp(kn->name, "mon_groups") &&
2846		strcmp(name, "mon_groups"));
2847}
2848
2849static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
2850			  umode_t mode)
2851{
2852	/* Do not accept '\n' to avoid unparsable situation. */
2853	if (strchr(name, '\n'))
2854		return -EINVAL;
2855
2856	/*
2857	 * If the parent directory is the root directory and RDT
2858	 * allocation is supported, add a control and monitoring
2859	 * subdirectory
2860	 */
2861	if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
2862		return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode);
2863
2864	/*
2865	 * If RDT monitoring is supported and the parent directory is a valid
2866	 * "mon_groups" directory, add a monitoring subdirectory.
2867	 */
2868	if (rdt_mon_capable && is_mon_groups(parent_kn, name))
2869		return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode);
2870
2871	return -EPERM;
2872}
2873
2874static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
2875			      cpumask_var_t tmpmask)
2876{
2877	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
2878	int cpu;
2879
2880	/* Give any tasks back to the parent group */
2881	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
2882
2883	/* Update per cpu rmid of the moved CPUs first */
2884	for_each_cpu(cpu, &rdtgrp->cpu_mask)
2885		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
2886	/*
2887	 * Update the MSR on moved CPUs and CPUs which have moved
2888	 * task running on them.
2889	 */
2890	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
2891	update_closid_rmid(tmpmask, NULL);
2892
2893	rdtgrp->flags = RDT_DELETED;
2894	free_rmid(rdtgrp->mon.rmid);
2895
2896	/*
2897	 * Remove the rdtgrp from the parent ctrl_mon group's list
2898	 */
2899	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
2900	list_del(&rdtgrp->mon.crdtgrp_list);
2901
2902	/*
2903	 * one extra hold on this, will drop when we kfree(rdtgrp)
2904	 * in rdtgroup_kn_unlock()
2905	 */
2906	kernfs_get(kn);
2907	kernfs_remove(rdtgrp->kn);
2908
2909	return 0;
2910}
2911
2912static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
2913				struct rdtgroup *rdtgrp)
2914{
2915	rdtgrp->flags = RDT_DELETED;
2916	list_del(&rdtgrp->rdtgroup_list);
2917
2918	/*
2919	 * one extra hold on this, will drop when we kfree(rdtgrp)
2920	 * in rdtgroup_kn_unlock()
2921	 */
2922	kernfs_get(kn);
2923	kernfs_remove(rdtgrp->kn);
2924	return 0;
2925}
2926
2927static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
2928			       cpumask_var_t tmpmask)
2929{
2930	int cpu;
2931
2932	/* Give any tasks back to the default group */
2933	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
2934
2935	/* Give any CPUs back to the default group */
2936	cpumask_or(&rdtgroup_default.cpu_mask,
2937		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2938
2939	/* Update per cpu closid and rmid of the moved CPUs first */
2940	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
2941		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
2942		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
2943	}
2944
2945	/*
2946	 * Update the MSR on moved CPUs and CPUs which have moved
2947	 * task running on them.
2948	 */
2949	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
2950	update_closid_rmid(tmpmask, NULL);
2951
 
2952	closid_free(rdtgrp->closid);
2953	free_rmid(rdtgrp->mon.rmid);
 
2954
2955	/*
2956	 * Free all the child monitor group rmids.
2957	 */
2958	free_all_child_rdtgrp(rdtgrp);
2959
2960	rdtgroup_ctrl_remove(kn, rdtgrp);
2961
2962	return 0;
2963}
2964
2965static int rdtgroup_rmdir(struct kernfs_node *kn)
2966{
2967	struct kernfs_node *parent_kn = kn->parent;
2968	struct rdtgroup *rdtgrp;
2969	cpumask_var_t tmpmask;
2970	int ret = 0;
2971
2972	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
2973		return -ENOMEM;
2974
2975	rdtgrp = rdtgroup_kn_lock_live(kn);
2976	if (!rdtgrp) {
2977		ret = -EPERM;
2978		goto out;
2979	}
2980
2981	/*
2982	 * If the rdtgroup is a ctrl_mon group and parent directory
2983	 * is the root directory, remove the ctrl_mon group.
2984	 *
2985	 * If the rdtgroup is a mon group and parent directory
2986	 * is a valid "mon_groups" directory, remove the mon group.
2987	 */
2988	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) {
 
2989		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2990		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
2991			ret = rdtgroup_ctrl_remove(kn, rdtgrp);
2992		} else {
2993			ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
2994		}
2995	} else if (rdtgrp->type == RDTMON_GROUP &&
2996		 is_mon_groups(parent_kn, kn->name)) {
2997		ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
2998	} else {
2999		ret = -EPERM;
3000	}
3001
3002out:
3003	rdtgroup_kn_unlock(kn);
3004	free_cpumask_var(tmpmask);
3005	return ret;
3006}
3007
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3008static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
3009{
3010	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
3011		seq_puts(seq, ",cdp");
3012
3013	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
3014		seq_puts(seq, ",cdpl2");
3015
3016	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
3017		seq_puts(seq, ",mba_MBps");
3018
 
 
 
3019	return 0;
3020}
3021
3022static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
3023	.mkdir		= rdtgroup_mkdir,
3024	.rmdir		= rdtgroup_rmdir,
 
3025	.show_options	= rdtgroup_show_options,
3026};
3027
3028static int __init rdtgroup_setup_root(void)
3029{
3030	int ret;
3031
3032	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
3033				      KERNFS_ROOT_CREATE_DEACTIVATED |
3034				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
3035				      &rdtgroup_default);
3036	if (IS_ERR(rdt_root))
3037		return PTR_ERR(rdt_root);
3038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3039	mutex_lock(&rdtgroup_mutex);
3040
3041	rdtgroup_default.closid = 0;
3042	rdtgroup_default.mon.rmid = 0;
3043	rdtgroup_default.type = RDTCTRL_GROUP;
3044	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
3045
3046	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
3047
3048	ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE);
3049	if (ret) {
3050		kernfs_destroy_root(rdt_root);
3051		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3052	}
3053
3054	rdtgroup_default.kn = rdt_root->kn;
3055	kernfs_activate(rdtgroup_default.kn);
3056
3057out:
3058	mutex_unlock(&rdtgroup_mutex);
 
3059
3060	return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3061}
3062
3063/*
3064 * rdtgroup_init - rdtgroup initialization
3065 *
3066 * Setup resctrl file system including set up root, create mount point,
3067 * register rdtgroup filesystem, and initialize files under root directory.
3068 *
3069 * Return: 0 on success or -errno
3070 */
3071int __init rdtgroup_init(void)
3072{
3073	int ret = 0;
3074
3075	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
3076		     sizeof(last_cmd_status_buf));
3077
3078	ret = rdtgroup_setup_root();
3079	if (ret)
3080		return ret;
3081
3082	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
3083	if (ret)
3084		goto cleanup_root;
3085
3086	ret = register_filesystem(&rdt_fs_type);
3087	if (ret)
3088		goto cleanup_mountpoint;
3089
3090	/*
3091	 * Adding the resctrl debugfs directory here may not be ideal since
3092	 * it would let the resctrl debugfs directory appear on the debugfs
3093	 * filesystem before the resctrl filesystem is mounted.
3094	 * It may also be ok since that would enable debugging of RDT before
3095	 * resctrl is mounted.
3096	 * The reason why the debugfs directory is created here and not in
3097	 * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and
3098	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
3099	 * (the lockdep class of inode->i_rwsem). Other filesystem
3100	 * interactions (eg. SyS_getdents) have the lock ordering:
3101	 * &sb->s_type->i_mutex_key --> &mm->mmap_sem
3102	 * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex
3103	 * is taken, thus creating dependency:
3104	 * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause
3105	 * issues considering the other two lock dependencies.
3106	 * By creating the debugfs directory here we avoid a dependency
3107	 * that may cause deadlock (even though file operations cannot
3108	 * occur until the filesystem is mounted, but I do not know how to
3109	 * tell lockdep that).
3110	 */
3111	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
3112
3113	return 0;
3114
3115cleanup_mountpoint:
3116	sysfs_remove_mount_point(fs_kobj, "resctrl");
3117cleanup_root:
3118	kernfs_destroy_root(rdt_root);
3119
3120	return ret;
3121}
3122
3123void __exit rdtgroup_exit(void)
3124{
3125	debugfs_remove_recursive(debugfs_resctrl);
3126	unregister_filesystem(&rdt_fs_type);
3127	sysfs_remove_mount_point(fs_kobj, "resctrl");
3128	kernfs_destroy_root(rdt_root);
3129}

   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * User interface for Resource Allocation in Resource Director Technology(RDT)
   4 *
   5 * Copyright (C) 2016 Intel Corporation
   6 *
   7 * Author: Fenghua Yu <fenghua.yu@intel.com>
   8 *
   9 * More information about RDT be found in the Intel (R) x86 Architecture
  10 * Software Developer Manual.
  11 */
  12
  13#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
  14
 
  15#include <linux/cpu.h>
  16#include <linux/debugfs.h>
  17#include <linux/fs.h>
  18#include <linux/fs_parser.h>
  19#include <linux/sysfs.h>
  20#include <linux/kernfs.h>
  21#include <linux/seq_buf.h>
  22#include <linux/seq_file.h>
  23#include <linux/sched/signal.h>
  24#include <linux/sched/task.h>
  25#include <linux/slab.h>
  26#include <linux/task_work.h>
  27#include <linux/user_namespace.h>
  28
  29#include <uapi/linux/magic.h>
  30
  31#include <asm/resctrl.h>
  32#include "internal.h"
  33
  34DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
  35DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  36DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
  37
  38/* Mutex to protect rdtgroup access. */
  39DEFINE_MUTEX(rdtgroup_mutex);
  40
  41static struct kernfs_root *rdt_root;
  42struct rdtgroup rdtgroup_default;
  43LIST_HEAD(rdt_all_groups);
  44
  45/* list of entries for the schemata file */
  46LIST_HEAD(resctrl_schema_all);
  47
  48/* The filesystem can only be mounted once. */
  49bool resctrl_mounted;
  50
  51/* Kernel fs node for "info" directory under root */
  52static struct kernfs_node *kn_info;
  53
  54/* Kernel fs node for "mon_groups" directory under root */
  55static struct kernfs_node *kn_mongrp;
  56
  57/* Kernel fs node for "mon_data" directory under root */
  58static struct kernfs_node *kn_mondata;
  59
  60static struct seq_buf last_cmd_status;
  61static char last_cmd_status_buf[512];
  62
  63static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
  64static void rdtgroup_destroy_root(void);
  65
  66struct dentry *debugfs_resctrl;
  67
  68static bool resctrl_debug;
  69
  70void rdt_last_cmd_clear(void)
  71{
  72	lockdep_assert_held(&rdtgroup_mutex);
  73	seq_buf_clear(&last_cmd_status);
  74}
  75
  76void rdt_last_cmd_puts(const char *s)
  77{
  78	lockdep_assert_held(&rdtgroup_mutex);
  79	seq_buf_puts(&last_cmd_status, s);
  80}
  81
  82void rdt_last_cmd_printf(const char *fmt, ...)
  83{
  84	va_list ap;
  85
  86	va_start(ap, fmt);
  87	lockdep_assert_held(&rdtgroup_mutex);
  88	seq_buf_vprintf(&last_cmd_status, fmt, ap);
  89	va_end(ap);
  90}
  91
  92void rdt_staged_configs_clear(void)
  93{
  94	struct rdt_ctrl_domain *dom;
  95	struct rdt_resource *r;
  96
  97	lockdep_assert_held(&rdtgroup_mutex);
  98
  99	for_each_alloc_capable_rdt_resource(r) {
 100		list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
 101			memset(dom->staged_config, 0, sizeof(dom->staged_config));
 102	}
 103}
 104
 105/*
 106 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
 107 * we can keep a bitmap of free CLOSIDs in a single integer.
 108 *
 109 * Using a global CLOSID across all resources has some advantages and
 110 * some drawbacks:
 111 * + We can simply set current's closid to assign a task to a resource
 112 *   group.
 113 * + Context switch code can avoid extra memory references deciding which
 114 *   CLOSID to load into the PQR_ASSOC MSR
 115 * - We give up some options in configuring resource groups across multi-socket
 116 *   systems.
 117 * - Our choices on how to configure each resource become progressively more
 118 *   limited as the number of resources grows.
 119 */
 120static unsigned long closid_free_map;
 121static int closid_free_map_len;
 122
 123int closids_supported(void)
 124{
 125	return closid_free_map_len;
 126}
 127
 128static void closid_init(void)
 129{
 130	struct resctrl_schema *s;
 131	u32 rdt_min_closid = 32;
 132
 133	/* Compute rdt_min_closid across all resources */
 134	list_for_each_entry(s, &resctrl_schema_all, list)
 135		rdt_min_closid = min(rdt_min_closid, s->num_closid);
 136
 137	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
 138
 139	/* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
 140	__clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map);
 141	closid_free_map_len = rdt_min_closid;
 142}
 143
 144static int closid_alloc(void)
 145{
 146	int cleanest_closid;
 147	u32 closid;
 148
 149	lockdep_assert_held(&rdtgroup_mutex);
 150
 151	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
 152		cleanest_closid = resctrl_find_cleanest_closid();
 153		if (cleanest_closid < 0)
 154			return cleanest_closid;
 155		closid = cleanest_closid;
 156	} else {
 157		closid = ffs(closid_free_map);
 158		if (closid == 0)
 159			return -ENOSPC;
 160		closid--;
 161	}
 162	__clear_bit(closid, &closid_free_map);
 163
 164	return closid;
 165}
 166
 167void closid_free(int closid)
 168{
 169	lockdep_assert_held(&rdtgroup_mutex);
 170
 171	__set_bit(closid, &closid_free_map);
 172}
 173
 174/**
 175 * closid_allocated - test if provided closid is in use
 176 * @closid: closid to be tested
 177 *
 178 * Return: true if @closid is currently associated with a resource group,
 179 * false if @closid is free
 180 */
 181bool closid_allocated(unsigned int closid)
 182{
 183	lockdep_assert_held(&rdtgroup_mutex);
 184
 185	return !test_bit(closid, &closid_free_map);
 186}
 187
 188/**
 189 * rdtgroup_mode_by_closid - Return mode of resource group with closid
 190 * @closid: closid if the resource group
 191 *
 192 * Each resource group is associated with a @closid. Here the mode
 193 * of a resource group can be queried by searching for it using its closid.
 194 *
 195 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
 196 */
 197enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
 198{
 199	struct rdtgroup *rdtgrp;
 200
 201	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
 202		if (rdtgrp->closid == closid)
 203			return rdtgrp->mode;
 204	}
 205
 206	return RDT_NUM_MODES;
 207}
 208
 209static const char * const rdt_mode_str[] = {
 210	[RDT_MODE_SHAREABLE]		= "shareable",
 211	[RDT_MODE_EXCLUSIVE]		= "exclusive",
 212	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
 213	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
 214};
 215
 216/**
 217 * rdtgroup_mode_str - Return the string representation of mode
 218 * @mode: the resource group mode as &enum rdtgroup_mode
 219 *
 220 * Return: string representation of valid mode, "unknown" otherwise
 221 */
 222static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
 223{
 224	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
 225		return "unknown";
 226
 227	return rdt_mode_str[mode];
 228}
 229
 230/* set uid and gid of rdtgroup dirs and files to that of the creator */
 231static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
 232{
 233	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
 234				.ia_uid = current_fsuid(),
 235				.ia_gid = current_fsgid(), };
 236
 237	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
 238	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
 239		return 0;
 240
 241	return kernfs_setattr(kn, &iattr);
 242}
 243
 244static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 245{
 246	struct kernfs_node *kn;
 247	int ret;
 248
 249	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
 250				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 251				  0, rft->kf_ops, rft, NULL, NULL);
 252	if (IS_ERR(kn))
 253		return PTR_ERR(kn);
 254
 255	ret = rdtgroup_kn_set_ugid(kn);
 256	if (ret) {
 257		kernfs_remove(kn);
 258		return ret;
 259	}
 260
 261	return 0;
 262}
 263
 264static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
 265{
 266	struct kernfs_open_file *of = m->private;
 267	struct rftype *rft = of->kn->priv;
 268
 269	if (rft->seq_show)
 270		return rft->seq_show(of, m, arg);
 271	return 0;
 272}
 273
 274static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
 275				   size_t nbytes, loff_t off)
 276{
 277	struct rftype *rft = of->kn->priv;
 278
 279	if (rft->write)
 280		return rft->write(of, buf, nbytes, off);
 281
 282	return -EINVAL;
 283}
 284
 285static const struct kernfs_ops rdtgroup_kf_single_ops = {
 286	.atomic_write_len	= PAGE_SIZE,
 287	.write			= rdtgroup_file_write,
 288	.seq_show		= rdtgroup_seqfile_show,
 289};
 290
 291static const struct kernfs_ops kf_mondata_ops = {
 292	.atomic_write_len	= PAGE_SIZE,
 293	.seq_show		= rdtgroup_mondata_show,
 294};
 295
 296static bool is_cpu_list(struct kernfs_open_file *of)
 297{
 298	struct rftype *rft = of->kn->priv;
 299
 300	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
 301}
 302
 303static int rdtgroup_cpus_show(struct kernfs_open_file *of,
 304			      struct seq_file *s, void *v)
 305{
 306	struct rdtgroup *rdtgrp;
 307	struct cpumask *mask;
 308	int ret = 0;
 309
 310	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 311
 312	if (rdtgrp) {
 313		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
 314			if (!rdtgrp->plr->d) {
 315				rdt_last_cmd_clear();
 316				rdt_last_cmd_puts("Cache domain offline\n");
 317				ret = -ENODEV;
 318			} else {
 319				mask = &rdtgrp->plr->d->hdr.cpu_mask;
 320				seq_printf(s, is_cpu_list(of) ?
 321					   "%*pbl\n" : "%*pb\n",
 322					   cpumask_pr_args(mask));
 323			}
 324		} else {
 325			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
 326				   cpumask_pr_args(&rdtgrp->cpu_mask));
 327		}
 328	} else {
 329		ret = -ENOENT;
 330	}
 331	rdtgroup_kn_unlock(of->kn);
 332
 333	return ret;
 334}
 335
 336/*
 337 * This is safe against resctrl_sched_in() called from __switch_to()
 338 * because __switch_to() is executed with interrupts disabled. A local call
 339 * from update_closid_rmid() is protected against __switch_to() because
 340 * preemption is disabled.
 341 */
 342static void update_cpu_closid_rmid(void *info)
 343{
 344	struct rdtgroup *r = info;
 345
 346	if (r) {
 347		this_cpu_write(pqr_state.default_closid, r->closid);
 348		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
 349	}
 350
 351	/*
 352	 * We cannot unconditionally write the MSR because the current
 353	 * executing task might have its own closid selected. Just reuse
 354	 * the context switch code.
 355	 */
 356	resctrl_sched_in(current);
 357}
 358
 359/*
 360 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
 361 *
 362 * Per task closids/rmids must have been set up before calling this function.
 363 */
 364static void
 365update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
 366{
 367	on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1);
 
 
 
 
 
 368}
 369
 370static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 371			  cpumask_var_t tmpmask)
 372{
 373	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
 374	struct list_head *head;
 375
 376	/* Check whether cpus belong to parent ctrl group */
 377	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
 378	if (!cpumask_empty(tmpmask)) {
 379		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
 380		return -EINVAL;
 381	}
 382
 383	/* Check whether cpus are dropped from this group */
 384	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 385	if (!cpumask_empty(tmpmask)) {
 386		/* Give any dropped cpus to parent rdtgroup */
 387		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
 388		update_closid_rmid(tmpmask, prgrp);
 389	}
 390
 391	/*
 392	 * If we added cpus, remove them from previous group that owned them
 393	 * and update per-cpu rmid
 394	 */
 395	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 396	if (!cpumask_empty(tmpmask)) {
 397		head = &prgrp->mon.crdtgrp_list;
 398		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 399			if (crgrp == rdtgrp)
 400				continue;
 401			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
 402				       tmpmask);
 403		}
 404		update_closid_rmid(tmpmask, rdtgrp);
 405	}
 406
 407	/* Done pushing/pulling - update this group with new mask */
 408	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 409
 410	return 0;
 411}
 412
 413static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
 414{
 415	struct rdtgroup *crgrp;
 416
 417	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
 418	/* update the child mon group masks as well*/
 419	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
 420		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
 421}
 422
 423static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 424			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
 425{
 426	struct rdtgroup *r, *crgrp;
 427	struct list_head *head;
 428
 429	/* Check whether cpus are dropped from this group */
 430	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 431	if (!cpumask_empty(tmpmask)) {
 432		/* Can't drop from default group */
 433		if (rdtgrp == &rdtgroup_default) {
 434			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
 435			return -EINVAL;
 436		}
 437
 438		/* Give any dropped cpus to rdtgroup_default */
 439		cpumask_or(&rdtgroup_default.cpu_mask,
 440			   &rdtgroup_default.cpu_mask, tmpmask);
 441		update_closid_rmid(tmpmask, &rdtgroup_default);
 442	}
 443
 444	/*
 445	 * If we added cpus, remove them from previous group and
 446	 * the prev group's child groups that owned them
 447	 * and update per-cpu closid/rmid.
 448	 */
 449	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 450	if (!cpumask_empty(tmpmask)) {
 451		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
 452			if (r == rdtgrp)
 453				continue;
 454			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
 455			if (!cpumask_empty(tmpmask1))
 456				cpumask_rdtgrp_clear(r, tmpmask1);
 457		}
 458		update_closid_rmid(tmpmask, rdtgrp);
 459	}
 460
 461	/* Done pushing/pulling - update this group with new mask */
 462	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 463
 464	/*
 465	 * Clear child mon group masks since there is a new parent mask
 466	 * now and update the rmid for the cpus the child lost.
 467	 */
 468	head = &rdtgrp->mon.crdtgrp_list;
 469	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 470		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
 471		update_closid_rmid(tmpmask, rdtgrp);
 472		cpumask_clear(&crgrp->cpu_mask);
 473	}
 474
 475	return 0;
 476}
 477
 478static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 479				   char *buf, size_t nbytes, loff_t off)
 480{
 481	cpumask_var_t tmpmask, newmask, tmpmask1;
 482	struct rdtgroup *rdtgrp;
 483	int ret;
 484
 485	if (!buf)
 486		return -EINVAL;
 487
 488	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 489		return -ENOMEM;
 490	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
 491		free_cpumask_var(tmpmask);
 492		return -ENOMEM;
 493	}
 494	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
 495		free_cpumask_var(tmpmask);
 496		free_cpumask_var(newmask);
 497		return -ENOMEM;
 498	}
 499
 500	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 501	if (!rdtgrp) {
 502		ret = -ENOENT;
 503		goto unlock;
 504	}
 505
 506	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 507	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 508		ret = -EINVAL;
 509		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 510		goto unlock;
 511	}
 512
 513	if (is_cpu_list(of))
 514		ret = cpulist_parse(buf, newmask);
 515	else
 516		ret = cpumask_parse(buf, newmask);
 517
 518	if (ret) {
 519		rdt_last_cmd_puts("Bad CPU list/mask\n");
 520		goto unlock;
 521	}
 522
 523	/* check that user didn't specify any offline cpus */
 524	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 525	if (!cpumask_empty(tmpmask)) {
 526		ret = -EINVAL;
 527		rdt_last_cmd_puts("Can only assign online CPUs\n");
 528		goto unlock;
 529	}
 530
 531	if (rdtgrp->type == RDTCTRL_GROUP)
 532		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
 533	else if (rdtgrp->type == RDTMON_GROUP)
 534		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
 535	else
 536		ret = -EINVAL;
 537
 538unlock:
 539	rdtgroup_kn_unlock(of->kn);
 540	free_cpumask_var(tmpmask);
 541	free_cpumask_var(newmask);
 542	free_cpumask_var(tmpmask1);
 543
 544	return ret ?: nbytes;
 545}
 546
 547/**
 548 * rdtgroup_remove - the helper to remove resource group safely
 549 * @rdtgrp: resource group to remove
 550 *
 551 * On resource group creation via a mkdir, an extra kernfs_node reference is
 552 * taken to ensure that the rdtgroup structure remains accessible for the
 553 * rdtgroup_kn_unlock() calls where it is removed.
 554 *
 555 * Drop the extra reference here, then free the rdtgroup structure.
 556 *
 557 * Return: void
 558 */
 559static void rdtgroup_remove(struct rdtgroup *rdtgrp)
 560{
 561	kernfs_put(rdtgrp->kn);
 562	kfree(rdtgrp);
 563}
 
 
 564
 565static void _update_task_closid_rmid(void *task)
 566{
 567	/*
 568	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
 569	 * Otherwise, the MSR is updated when the task is scheduled in.
 
 570	 */
 571	if (task == current)
 572		resctrl_sched_in(task);
 573}
 
 
 
 574
 575static void update_task_closid_rmid(struct task_struct *t)
 576{
 577	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
 578		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
 579	else
 580		_update_task_closid_rmid(t);
 581}
 582
 583static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
 584{
 585	u32 closid, rmid = rdtgrp->mon.rmid;
 586
 587	if (rdtgrp->type == RDTCTRL_GROUP)
 588		closid = rdtgrp->closid;
 589	else if (rdtgrp->type == RDTMON_GROUP)
 590		closid = rdtgrp->mon.parent->closid;
 591	else
 592		return false;
 593
 594	return resctrl_arch_match_closid(tsk, closid) &&
 595	       resctrl_arch_match_rmid(tsk, closid, rmid);
 596}
 597
 598static int __rdtgroup_move_task(struct task_struct *tsk,
 599				struct rdtgroup *rdtgrp)
 600{
 601	/* If the task is already in rdtgrp, no need to move the task. */
 602	if (task_in_rdtgroup(tsk, rdtgrp))
 603		return 0;
 
 
 
 
 
 604
 605	/*
 606	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
 607	 * updated by them.
 608	 *
 609	 * For ctrl_mon groups, move both closid and rmid.
 610	 * For monitor groups, can move the tasks only from
 611	 * their parent CTRL group.
 612	 */
 613	if (rdtgrp->type == RDTMON_GROUP &&
 614	    !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
 615		rdt_last_cmd_puts("Can't move task to different control group\n");
 616		return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 617	}
 618
 619	if (rdtgrp->type == RDTMON_GROUP)
 620		resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
 621					     rdtgrp->mon.rmid);
 622	else
 623		resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
 624					     rdtgrp->mon.rmid);
 625
 626	/*
 627	 * Ensure the task's closid and rmid are written before determining if
 628	 * the task is current that will decide if it will be interrupted.
 629	 * This pairs with the full barrier between the rq->curr update and
 630	 * resctrl_sched_in() during context switch.
 631	 */
 632	smp_mb();
 633
 634	/*
 635	 * By now, the task's closid and rmid are set. If the task is current
 636	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
 637	 * group go into effect. If the task is not current, the MSR will be
 638	 * updated when the task is scheduled in.
 639	 */
 640	update_task_closid_rmid(tsk);
 641
 642	return 0;
 643}
 644
 645static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
 646{
 647	return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
 648		resctrl_arch_match_closid(t, r->closid));
 649}
 650
 651static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
 652{
 653	return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
 654		resctrl_arch_match_rmid(t, r->mon.parent->closid,
 655					r->mon.rmid));
 656}
 657
 658/**
 659 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
 660 * @r: Resource group
 661 *
 662 * Return: 1 if tasks have been assigned to @r, 0 otherwise
 663 */
 664int rdtgroup_tasks_assigned(struct rdtgroup *r)
 665{
 666	struct task_struct *p, *t;
 667	int ret = 0;
 668
 669	lockdep_assert_held(&rdtgroup_mutex);
 670
 671	rcu_read_lock();
 672	for_each_process_thread(p, t) {
 673		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
 
 674			ret = 1;
 675			break;
 676		}
 677	}
 678	rcu_read_unlock();
 679
 680	return ret;
 681}
 682
 683static int rdtgroup_task_write_permission(struct task_struct *task,
 684					  struct kernfs_open_file *of)
 685{
 686	const struct cred *tcred = get_task_cred(task);
 687	const struct cred *cred = current_cred();
 688	int ret = 0;
 689
 690	/*
 691	 * Even if we're attaching all tasks in the thread group, we only
 692	 * need to check permissions on one of them.
 693	 */
 694	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 695	    !uid_eq(cred->euid, tcred->uid) &&
 696	    !uid_eq(cred->euid, tcred->suid)) {
 697		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
 698		ret = -EPERM;
 699	}
 700
 701	put_cred(tcred);
 702	return ret;
 703}
 704
 705static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 706			      struct kernfs_open_file *of)
 707{
 708	struct task_struct *tsk;
 709	int ret;
 710
 711	rcu_read_lock();
 712	if (pid) {
 713		tsk = find_task_by_vpid(pid);
 714		if (!tsk) {
 715			rcu_read_unlock();
 716			rdt_last_cmd_printf("No task %d\n", pid);
 717			return -ESRCH;
 718		}
 719	} else {
 720		tsk = current;
 721	}
 722
 723	get_task_struct(tsk);
 724	rcu_read_unlock();
 725
 726	ret = rdtgroup_task_write_permission(tsk, of);
 727	if (!ret)
 728		ret = __rdtgroup_move_task(tsk, rdtgrp);
 729
 730	put_task_struct(tsk);
 731	return ret;
 732}
 733
 734static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 735				    char *buf, size_t nbytes, loff_t off)
 736{
 737	struct rdtgroup *rdtgrp;
 738	char *pid_str;
 739	int ret = 0;
 740	pid_t pid;
 741
 
 
 742	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 743	if (!rdtgrp) {
 744		rdtgroup_kn_unlock(of->kn);
 745		return -ENOENT;
 746	}
 747	rdt_last_cmd_clear();
 748
 749	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 750	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 751		ret = -EINVAL;
 752		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 753		goto unlock;
 754	}
 755
 756	while (buf && buf[0] != '\0' && buf[0] != '\n') {
 757		pid_str = strim(strsep(&buf, ","));
 758
 759		if (kstrtoint(pid_str, 0, &pid)) {
 760			rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
 761			ret = -EINVAL;
 762			break;
 763		}
 764
 765		if (pid < 0) {
 766			rdt_last_cmd_printf("Invalid pid %d\n", pid);
 767			ret = -EINVAL;
 768			break;
 769		}
 770
 771		ret = rdtgroup_move_task(pid, rdtgrp, of);
 772		if (ret) {
 773			rdt_last_cmd_printf("Error while processing task %d\n", pid);
 774			break;
 775		}
 776	}
 777
 778unlock:
 779	rdtgroup_kn_unlock(of->kn);
 780
 781	return ret ?: nbytes;
 782}
 783
 784static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 785{
 786	struct task_struct *p, *t;
 787	pid_t pid;
 788
 789	rcu_read_lock();
 790	for_each_process_thread(p, t) {
 791		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
 792			pid = task_pid_vnr(t);
 793			if (pid)
 794				seq_printf(s, "%d\n", pid);
 795		}
 796	}
 797	rcu_read_unlock();
 798}
 799
 800static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 801			       struct seq_file *s, void *v)
 802{
 803	struct rdtgroup *rdtgrp;
 804	int ret = 0;
 805
 806	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 807	if (rdtgrp)
 808		show_rdt_tasks(rdtgrp, s);
 809	else
 810		ret = -ENOENT;
 811	rdtgroup_kn_unlock(of->kn);
 812
 813	return ret;
 814}
 815
 816static int rdtgroup_closid_show(struct kernfs_open_file *of,
 817				struct seq_file *s, void *v)
 818{
 819	struct rdtgroup *rdtgrp;
 820	int ret = 0;
 821
 822	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 823	if (rdtgrp)
 824		seq_printf(s, "%u\n", rdtgrp->closid);
 825	else
 826		ret = -ENOENT;
 827	rdtgroup_kn_unlock(of->kn);
 828
 829	return ret;
 830}
 831
 832static int rdtgroup_rmid_show(struct kernfs_open_file *of,
 833			      struct seq_file *s, void *v)
 834{
 835	struct rdtgroup *rdtgrp;
 836	int ret = 0;
 837
 838	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 839	if (rdtgrp)
 840		seq_printf(s, "%u\n", rdtgrp->mon.rmid);
 841	else
 842		ret = -ENOENT;
 843	rdtgroup_kn_unlock(of->kn);
 844
 845	return ret;
 846}
 847
 848#ifdef CONFIG_PROC_CPU_RESCTRL
 849
 850/*
 851 * A task can only be part of one resctrl control group and of one monitor
 852 * group which is associated to that control group.
 853 *
 854 * 1)   res:
 855 *      mon:
 856 *
 857 *    resctrl is not available.
 858 *
 859 * 2)   res:/
 860 *      mon:
 861 *
 862 *    Task is part of the root resctrl control group, and it is not associated
 863 *    to any monitor group.
 864 *
 865 * 3)  res:/
 866 *     mon:mon0
 867 *
 868 *    Task is part of the root resctrl control group and monitor group mon0.
 869 *
 870 * 4)  res:group0
 871 *     mon:
 872 *
 873 *    Task is part of resctrl control group group0, and it is not associated
 874 *    to any monitor group.
 875 *
 876 * 5) res:group0
 877 *    mon:mon1
 878 *
 879 *    Task is part of resctrl control group group0 and monitor group mon1.
 880 */
 881int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
 882		      struct pid *pid, struct task_struct *tsk)
 883{
 884	struct rdtgroup *rdtg;
 885	int ret = 0;
 886
 887	mutex_lock(&rdtgroup_mutex);
 888
 889	/* Return empty if resctrl has not been mounted. */
 890	if (!resctrl_mounted) {
 891		seq_puts(s, "res:\nmon:\n");
 892		goto unlock;
 893	}
 894
 895	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
 896		struct rdtgroup *crg;
 897
 898		/*
 899		 * Task information is only relevant for shareable
 900		 * and exclusive groups.
 901		 */
 902		if (rdtg->mode != RDT_MODE_SHAREABLE &&
 903		    rdtg->mode != RDT_MODE_EXCLUSIVE)
 904			continue;
 905
 906		if (!resctrl_arch_match_closid(tsk, rdtg->closid))
 907			continue;
 908
 909		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
 910			   rdtg->kn->name);
 911		seq_puts(s, "mon:");
 912		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
 913				    mon.crdtgrp_list) {
 914			if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
 915						     crg->mon.rmid))
 916				continue;
 917			seq_printf(s, "%s", crg->kn->name);
 918			break;
 919		}
 920		seq_putc(s, '\n');
 921		goto unlock;
 922	}
 923	/*
 924	 * The above search should succeed. Otherwise return
 925	 * with an error.
 926	 */
 927	ret = -ENOENT;
 928unlock:
 929	mutex_unlock(&rdtgroup_mutex);
 930
 931	return ret;
 932}
 933#endif
 934
 935static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
 936				    struct seq_file *seq, void *v)
 937{
 938	int len;
 939
 940	mutex_lock(&rdtgroup_mutex);
 941	len = seq_buf_used(&last_cmd_status);
 942	if (len)
 943		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
 944	else
 945		seq_puts(seq, "ok\n");
 946	mutex_unlock(&rdtgroup_mutex);
 947	return 0;
 948}
 949
 950static int rdt_num_closids_show(struct kernfs_open_file *of,
 951				struct seq_file *seq, void *v)
 952{
 953	struct resctrl_schema *s = of->kn->parent->priv;
 954
 955	seq_printf(seq, "%u\n", s->num_closid);
 956	return 0;
 957}
 958
 959static int rdt_default_ctrl_show(struct kernfs_open_file *of,
 960			     struct seq_file *seq, void *v)
 961{
 962	struct resctrl_schema *s = of->kn->parent->priv;
 963	struct rdt_resource *r = s->res;
 964
 965	seq_printf(seq, "%x\n", r->default_ctrl);
 966	return 0;
 967}
 968
 969static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
 970			     struct seq_file *seq, void *v)
 971{
 972	struct resctrl_schema *s = of->kn->parent->priv;
 973	struct rdt_resource *r = s->res;
 974
 975	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
 976	return 0;
 977}
 978
 979static int rdt_shareable_bits_show(struct kernfs_open_file *of,
 980				   struct seq_file *seq, void *v)
 981{
 982	struct resctrl_schema *s = of->kn->parent->priv;
 983	struct rdt_resource *r = s->res;
 984
 985	seq_printf(seq, "%x\n", r->cache.shareable_bits);
 986	return 0;
 987}
 988
 989/*
 990 * rdt_bit_usage_show - Display current usage of resources
 991 *
 992 * A domain is a shared resource that can now be allocated differently. Here
 993 * we display the current regions of the domain as an annotated bitmask.
 994 * For each domain of this resource its allocation bitmask
 995 * is annotated as below to indicate the current usage of the corresponding bit:
 996 *   0 - currently unused
 997 *   X - currently available for sharing and used by software and hardware
 998 *   H - currently used by hardware only but available for software use
 999 *   S - currently used and shareable by software only
1000 *   E - currently used exclusively by one resource group
1001 *   P - currently pseudo-locked by one resource group
1002 */
1003static int rdt_bit_usage_show(struct kernfs_open_file *of,
1004			      struct seq_file *seq, void *v)
1005{
1006	struct resctrl_schema *s = of->kn->parent->priv;
1007	/*
1008	 * Use unsigned long even though only 32 bits are used to ensure
1009	 * test_bit() is used safely.
1010	 */
1011	unsigned long sw_shareable = 0, hw_shareable = 0;
1012	unsigned long exclusive = 0, pseudo_locked = 0;
1013	struct rdt_resource *r = s->res;
1014	struct rdt_ctrl_domain *dom;
1015	int i, hwb, swb, excl, psl;
1016	enum rdtgrp_mode mode;
1017	bool sep = false;
1018	u32 ctrl_val;
1019
1020	cpus_read_lock();
1021	mutex_lock(&rdtgroup_mutex);
1022	hw_shareable = r->cache.shareable_bits;
1023	list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
1024		if (sep)
1025			seq_putc(seq, ';');
 
1026		sw_shareable = 0;
1027		exclusive = 0;
1028		seq_printf(seq, "%d=", dom->hdr.id);
1029		for (i = 0; i < closids_supported(); i++) {
1030			if (!closid_allocated(i))
1031				continue;
1032			ctrl_val = resctrl_arch_get_config(r, dom, i,
1033							   s->conf_type);
1034			mode = rdtgroup_mode_by_closid(i);
1035			switch (mode) {
1036			case RDT_MODE_SHAREABLE:
1037				sw_shareable |= ctrl_val;
1038				break;
1039			case RDT_MODE_EXCLUSIVE:
1040				exclusive |= ctrl_val;
1041				break;
1042			case RDT_MODE_PSEUDO_LOCKSETUP:
1043			/*
1044			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
1045			 * here but not included since the CBM
1046			 * associated with this CLOSID in this mode
1047			 * is not initialized and no task or cpu can be
1048			 * assigned this CLOSID.
1049			 */
1050				break;
1051			case RDT_MODE_PSEUDO_LOCKED:
1052			case RDT_NUM_MODES:
1053				WARN(1,
1054				     "invalid mode for closid %d\n", i);
1055				break;
1056			}
1057		}
1058		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
1059			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
1060			hwb = test_bit(i, &hw_shareable);
1061			swb = test_bit(i, &sw_shareable);
1062			excl = test_bit(i, &exclusive);
1063			psl = test_bit(i, &pseudo_locked);
1064			if (hwb && swb)
1065				seq_putc(seq, 'X');
1066			else if (hwb && !swb)
1067				seq_putc(seq, 'H');
1068			else if (!hwb && swb)
1069				seq_putc(seq, 'S');
1070			else if (excl)
1071				seq_putc(seq, 'E');
1072			else if (psl)
1073				seq_putc(seq, 'P');
1074			else /* Unused bits remain */
1075				seq_putc(seq, '0');
1076		}
1077		sep = true;
1078	}
1079	seq_putc(seq, '\n');
1080	mutex_unlock(&rdtgroup_mutex);
1081	cpus_read_unlock();
1082	return 0;
1083}
1084
1085static int rdt_min_bw_show(struct kernfs_open_file *of,
1086			     struct seq_file *seq, void *v)
1087{
1088	struct resctrl_schema *s = of->kn->parent->priv;
1089	struct rdt_resource *r = s->res;
1090
1091	seq_printf(seq, "%u\n", r->membw.min_bw);
1092	return 0;
1093}
1094
1095static int rdt_num_rmids_show(struct kernfs_open_file *of,
1096			      struct seq_file *seq, void *v)
1097{
1098	struct rdt_resource *r = of->kn->parent->priv;
1099
1100	seq_printf(seq, "%d\n", r->num_rmid);
1101
1102	return 0;
1103}
1104
1105static int rdt_mon_features_show(struct kernfs_open_file *of,
1106				 struct seq_file *seq, void *v)
1107{
1108	struct rdt_resource *r = of->kn->parent->priv;
1109	struct mon_evt *mevt;
1110
1111	list_for_each_entry(mevt, &r->evt_list, list) {
1112		seq_printf(seq, "%s\n", mevt->name);
1113		if (mevt->configurable)
1114			seq_printf(seq, "%s_config\n", mevt->name);
1115	}
1116
1117	return 0;
1118}
1119
1120static int rdt_bw_gran_show(struct kernfs_open_file *of,
1121			     struct seq_file *seq, void *v)
1122{
1123	struct resctrl_schema *s = of->kn->parent->priv;
1124	struct rdt_resource *r = s->res;
1125
1126	seq_printf(seq, "%u\n", r->membw.bw_gran);
1127	return 0;
1128}
1129
1130static int rdt_delay_linear_show(struct kernfs_open_file *of,
1131			     struct seq_file *seq, void *v)
1132{
1133	struct resctrl_schema *s = of->kn->parent->priv;
1134	struct rdt_resource *r = s->res;
1135
1136	seq_printf(seq, "%u\n", r->membw.delay_linear);
1137	return 0;
1138}
1139
1140static int max_threshold_occ_show(struct kernfs_open_file *of,
1141				  struct seq_file *seq, void *v)
1142{
1143	seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
1144
1145	return 0;
1146}
1147
1148static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
1149					 struct seq_file *seq, void *v)
1150{
1151	struct resctrl_schema *s = of->kn->parent->priv;
1152	struct rdt_resource *r = s->res;
1153
1154	if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
1155		seq_puts(seq, "per-thread\n");
1156	else
1157		seq_puts(seq, "max\n");
1158
1159	return 0;
1160}
1161
1162static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
1163				       char *buf, size_t nbytes, loff_t off)
1164{
 
1165	unsigned int bytes;
1166	int ret;
1167
1168	ret = kstrtouint(buf, 0, &bytes);
1169	if (ret)
1170		return ret;
1171
1172	if (bytes > resctrl_rmid_realloc_limit)
1173		return -EINVAL;
1174
1175	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
1176
1177	return nbytes;
1178}
1179
1180/*
1181 * rdtgroup_mode_show - Display mode of this resource group
1182 */
1183static int rdtgroup_mode_show(struct kernfs_open_file *of,
1184			      struct seq_file *s, void *v)
1185{
1186	struct rdtgroup *rdtgrp;
1187
1188	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1189	if (!rdtgrp) {
1190		rdtgroup_kn_unlock(of->kn);
1191		return -ENOENT;
1192	}
1193
1194	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
1195
1196	rdtgroup_kn_unlock(of->kn);
1197	return 0;
1198}
1199
1200static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1201{
1202	switch (my_type) {
1203	case CDP_CODE:
1204		return CDP_DATA;
1205	case CDP_DATA:
1206		return CDP_CODE;
 
 
 
 
 
 
 
 
 
 
 
 
1207	default:
1208	case CDP_NONE:
1209		return CDP_NONE;
1210	}
1211}
1212
1213static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
1214					struct seq_file *seq, void *v)
1215{
1216	struct resctrl_schema *s = of->kn->parent->priv;
1217	struct rdt_resource *r = s->res;
 
 
 
 
 
 
 
 
 
1218
1219	seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
 
 
1220
1221	return 0;
1222}
1223
1224/**
1225 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1226 * @r: Resource to which domain instance @d belongs.
1227 * @d: The domain instance for which @closid is being tested.
1228 * @cbm: Capacity bitmask being tested.
1229 * @closid: Intended closid for @cbm.
1230 * @type: CDP type of @r.
1231 * @exclusive: Only check if overlaps with exclusive resource groups
1232 *
1233 * Checks if provided @cbm intended to be used for @closid on domain
1234 * @d overlaps with any other closids or other hardware usage associated
1235 * with this domain. If @exclusive is true then only overlaps with
1236 * resource groups in exclusive mode will be considered. If @exclusive
1237 * is false then overlaps with any resource group or hardware entities
1238 * will be considered.
1239 *
1240 * @cbm is unsigned long, even if only 32 bits are used, to make the
1241 * bitmap functions work correctly.
1242 *
1243 * Return: false if CBM does not overlap, true if it does.
1244 */
1245static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
1246				    unsigned long cbm, int closid,
1247				    enum resctrl_conf_type type, bool exclusive)
1248{
1249	enum rdtgrp_mode mode;
1250	unsigned long ctrl_b;
 
1251	int i;
1252
1253	/* Check for any overlap with regions used by hardware directly */
1254	if (!exclusive) {
1255		ctrl_b = r->cache.shareable_bits;
1256		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1257			return true;
1258	}
1259
1260	/* Check for overlap with other resource groups */
1261	for (i = 0; i < closids_supported(); i++) {
1262		ctrl_b = resctrl_arch_get_config(r, d, i, type);
 
1263		mode = rdtgroup_mode_by_closid(i);
1264		if (closid_allocated(i) && i != closid &&
1265		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1266			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1267				if (exclusive) {
1268					if (mode == RDT_MODE_EXCLUSIVE)
1269						return true;
1270					continue;
1271				}
1272				return true;
1273			}
1274		}
1275	}
1276
1277	return false;
1278}
1279
1280/**
1281 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1282 * @s: Schema for the resource to which domain instance @d belongs.
1283 * @d: The domain instance for which @closid is being tested.
1284 * @cbm: Capacity bitmask being tested.
1285 * @closid: Intended closid for @cbm.
1286 * @exclusive: Only check if overlaps with exclusive resource groups
1287 *
1288 * Resources that can be allocated using a CBM can use the CBM to control
1289 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1290 * for overlap. Overlap test is not limited to the specific resource for
1291 * which the CBM is intended though - when dealing with CDP resources that
1292 * share the underlying hardware the overlap check should be performed on
1293 * the CDP resource sharing the hardware also.
1294 *
1295 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1296 * overlap test.
1297 *
1298 * Return: true if CBM overlap detected, false if there is no overlap
1299 */
1300bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
1301			   unsigned long cbm, int closid, bool exclusive)
1302{
1303	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
1304	struct rdt_resource *r = s->res;
1305
1306	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
1307				    exclusive))
1308		return true;
1309
1310	if (!resctrl_arch_get_cdp_enabled(r->rid))
1311		return false;
1312	return  __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
 
1313}
1314
1315/**
1316 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1317 * @rdtgrp: Resource group identified through its closid.
1318 *
1319 * An exclusive resource group implies that there should be no sharing of
1320 * its allocated resources. At the time this group is considered to be
1321 * exclusive this test can determine if its current schemata supports this
1322 * setting by testing for overlap with all other resource groups.
1323 *
1324 * Return: true if resource group can be exclusive, false if there is overlap
1325 * with allocations of other resource groups and thus this resource group
1326 * cannot be exclusive.
1327 */
1328static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1329{
1330	int closid = rdtgrp->closid;
1331	struct rdt_ctrl_domain *d;
1332	struct resctrl_schema *s;
1333	struct rdt_resource *r;
1334	bool has_cache = false;
1335	u32 ctrl;
1336
1337	/* Walking r->domains, ensure it can't race with cpuhp */
1338	lockdep_assert_cpus_held();
1339
1340	list_for_each_entry(s, &resctrl_schema_all, list) {
1341		r = s->res;
1342		if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
1343			continue;
1344		has_cache = true;
1345		list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1346			ctrl = resctrl_arch_get_config(r, d, closid,
1347						       s->conf_type);
1348			if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
1349				rdt_last_cmd_puts("Schemata overlaps\n");
1350				return false;
1351			}
1352		}
1353	}
1354
1355	if (!has_cache) {
1356		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1357		return false;
1358	}
1359
1360	return true;
1361}
1362
1363/*
1364 * rdtgroup_mode_write - Modify the resource group's mode
 
1365 */
1366static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1367				   char *buf, size_t nbytes, loff_t off)
1368{
1369	struct rdtgroup *rdtgrp;
1370	enum rdtgrp_mode mode;
1371	int ret = 0;
1372
1373	/* Valid input requires a trailing newline */
1374	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1375		return -EINVAL;
1376	buf[nbytes - 1] = '\0';
1377
1378	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1379	if (!rdtgrp) {
1380		rdtgroup_kn_unlock(of->kn);
1381		return -ENOENT;
1382	}
1383
1384	rdt_last_cmd_clear();
1385
1386	mode = rdtgrp->mode;
1387
1388	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1389	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1390	    (!strcmp(buf, "pseudo-locksetup") &&
1391	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1392	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1393		goto out;
1394
1395	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1396		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1397		ret = -EINVAL;
1398		goto out;
1399	}
1400
1401	if (!strcmp(buf, "shareable")) {
1402		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1403			ret = rdtgroup_locksetup_exit(rdtgrp);
1404			if (ret)
1405				goto out;
1406		}
1407		rdtgrp->mode = RDT_MODE_SHAREABLE;
1408	} else if (!strcmp(buf, "exclusive")) {
1409		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1410			ret = -EINVAL;
1411			goto out;
1412		}
1413		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1414			ret = rdtgroup_locksetup_exit(rdtgrp);
1415			if (ret)
1416				goto out;
1417		}
1418		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1419	} else if (!strcmp(buf, "pseudo-locksetup")) {
1420		ret = rdtgroup_locksetup_enter(rdtgrp);
1421		if (ret)
1422			goto out;
1423		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1424	} else {
1425		rdt_last_cmd_puts("Unknown or unsupported mode\n");
1426		ret = -EINVAL;
1427	}
1428
1429out:
1430	rdtgroup_kn_unlock(of->kn);
1431	return ret ?: nbytes;
1432}
1433
1434/**
1435 * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1436 * @r: RDT resource to which @d belongs.
1437 * @d: RDT domain instance.
1438 * @cbm: bitmask for which the size should be computed.
1439 *
1440 * The bitmask provided associated with the RDT domain instance @d will be
1441 * translated into how many bytes it represents. The size in bytes is
1442 * computed by first dividing the total cache size by the CBM length to
1443 * determine how many bytes each bit in the bitmask represents. The result
1444 * is multiplied with the number of bits set in the bitmask.
1445 *
1446 * @cbm is unsigned long, even if only 32 bits are used to make the
1447 * bitmap functions work correctly.
1448 */
1449unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1450				  struct rdt_ctrl_domain *d, unsigned long cbm)
1451{
 
1452	unsigned int size = 0;
1453	struct cacheinfo *ci;
1454	int num_b;
1455
1456	if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
1457		return size;
1458
1459	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1460	ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
1461	if (ci)
1462		size = ci->size / r->cache.cbm_len * num_b;
 
 
 
 
1463
1464	return size;
1465}
1466
1467/*
1468 * rdtgroup_size_show - Display size in bytes of allocated regions
1469 *
1470 * The "size" file mirrors the layout of the "schemata" file, printing the
1471 * size in bytes of each region instead of the capacity bitmask.
 
1472 */
1473static int rdtgroup_size_show(struct kernfs_open_file *of,
1474			      struct seq_file *s, void *v)
1475{
1476	struct resctrl_schema *schema;
1477	enum resctrl_conf_type type;
1478	struct rdt_ctrl_domain *d;
1479	struct rdtgroup *rdtgrp;
1480	struct rdt_resource *r;
 
1481	unsigned int size;
1482	int ret = 0;
1483	u32 closid;
1484	bool sep;
1485	u32 ctrl;
1486
1487	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1488	if (!rdtgrp) {
1489		rdtgroup_kn_unlock(of->kn);
1490		return -ENOENT;
1491	}
1492
1493	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1494		if (!rdtgrp->plr->d) {
1495			rdt_last_cmd_clear();
1496			rdt_last_cmd_puts("Cache domain offline\n");
1497			ret = -ENODEV;
1498		} else {
1499			seq_printf(s, "%*s:", max_name_width,
1500				   rdtgrp->plr->s->name);
1501			size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
1502						    rdtgrp->plr->d,
1503						    rdtgrp->plr->cbm);
1504			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
1505		}
1506		goto out;
1507	}
1508
1509	closid = rdtgrp->closid;
1510
1511	list_for_each_entry(schema, &resctrl_schema_all, list) {
1512		r = schema->res;
1513		type = schema->conf_type;
1514		sep = false;
1515		seq_printf(s, "%*s:", max_name_width, schema->name);
1516		list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1517			if (sep)
1518				seq_putc(s, ';');
1519			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1520				size = 0;
1521			} else {
1522				if (is_mba_sc(r))
1523					ctrl = d->mbps_val[closid];
1524				else
1525					ctrl = resctrl_arch_get_config(r, d,
1526								       closid,
1527								       type);
1528				if (r->rid == RDT_RESOURCE_MBA ||
1529				    r->rid == RDT_RESOURCE_SMBA)
1530					size = ctrl;
1531				else
1532					size = rdtgroup_cbm_to_size(r, d, ctrl);
1533			}
1534			seq_printf(s, "%d=%u", d->hdr.id, size);
1535			sep = true;
1536		}
1537		seq_putc(s, '\n');
1538	}
1539
1540out:
1541	rdtgroup_kn_unlock(of->kn);
1542
1543	return ret;
1544}
1545
1546struct mon_config_info {
1547	u32 evtid;
1548	u32 mon_config;
1549};
1550
1551#define INVALID_CONFIG_INDEX   UINT_MAX
1552
1553/**
1554 * mon_event_config_index_get - get the hardware index for the
1555 *                              configurable event
1556 * @evtid: event id.
1557 *
1558 * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
1559 *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
1560 *         INVALID_CONFIG_INDEX for invalid evtid
1561 */
1562static inline unsigned int mon_event_config_index_get(u32 evtid)
1563{
1564	switch (evtid) {
1565	case QOS_L3_MBM_TOTAL_EVENT_ID:
1566		return 0;
1567	case QOS_L3_MBM_LOCAL_EVENT_ID:
1568		return 1;
1569	default:
1570		/* Should never reach here */
1571		return INVALID_CONFIG_INDEX;
1572	}
1573}
1574
1575static void mon_event_config_read(void *info)
1576{
1577	struct mon_config_info *mon_info = info;
1578	unsigned int index;
1579	u64 msrval;
1580
1581	index = mon_event_config_index_get(mon_info->evtid);
1582	if (index == INVALID_CONFIG_INDEX) {
1583		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
1584		return;
1585	}
1586	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
1587
1588	/* Report only the valid event configuration bits */
1589	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
1590}
1591
1592static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info)
1593{
1594	smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1);
1595}
1596
1597static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
1598{
1599	struct mon_config_info mon_info;
1600	struct rdt_mon_domain *dom;
1601	bool sep = false;
1602
1603	cpus_read_lock();
1604	mutex_lock(&rdtgroup_mutex);
1605
1606	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1607		if (sep)
1608			seq_puts(s, ";");
1609
1610		memset(&mon_info, 0, sizeof(struct mon_config_info));
1611		mon_info.evtid = evtid;
1612		mondata_config_read(dom, &mon_info);
1613
1614		seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
1615		sep = true;
1616	}
1617	seq_puts(s, "\n");
1618
1619	mutex_unlock(&rdtgroup_mutex);
1620	cpus_read_unlock();
1621
1622	return 0;
1623}
1624
1625static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
1626				       struct seq_file *seq, void *v)
1627{
1628	struct rdt_resource *r = of->kn->parent->priv;
1629
1630	mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
1631
1632	return 0;
1633}
1634
1635static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
1636				       struct seq_file *seq, void *v)
1637{
1638	struct rdt_resource *r = of->kn->parent->priv;
1639
1640	mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
1641
1642	return 0;
1643}
1644
1645static void mon_event_config_write(void *info)
1646{
1647	struct mon_config_info *mon_info = info;
1648	unsigned int index;
1649
1650	index = mon_event_config_index_get(mon_info->evtid);
1651	if (index == INVALID_CONFIG_INDEX) {
1652		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
1653		return;
1654	}
1655	wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
1656}
1657
1658static void mbm_config_write_domain(struct rdt_resource *r,
1659				    struct rdt_mon_domain *d, u32 evtid, u32 val)
1660{
1661	struct mon_config_info mon_info = {0};
1662
1663	/*
1664	 * Read the current config value first. If both are the same then
1665	 * no need to write it again.
1666	 */
1667	mon_info.evtid = evtid;
1668	mondata_config_read(d, &mon_info);
1669	if (mon_info.mon_config == val)
1670		return;
1671
1672	mon_info.mon_config = val;
1673
1674	/*
1675	 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
1676	 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
1677	 * are scoped at the domain level. Writing any of these MSRs
1678	 * on one CPU is observed by all the CPUs in the domain.
1679	 */
1680	smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write,
1681			      &mon_info, 1);
1682
1683	/*
1684	 * When an Event Configuration is changed, the bandwidth counters
1685	 * for all RMIDs and Events will be cleared by the hardware. The
1686	 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
1687	 * every RMID on the next read to any event for every RMID.
1688	 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
1689	 * cleared while it is tracked by the hardware. Clear the
1690	 * mbm_local and mbm_total counts for all the RMIDs.
1691	 */
1692	resctrl_arch_reset_rmid_all(r, d);
1693}
1694
1695static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
1696{
1697	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
1698	char *dom_str = NULL, *id_str;
1699	unsigned long dom_id, val;
1700	struct rdt_mon_domain *d;
1701
1702	/* Walking r->domains, ensure it can't race with cpuhp */
1703	lockdep_assert_cpus_held();
1704
1705next:
1706	if (!tok || tok[0] == '\0')
1707		return 0;
1708
1709	/* Start processing the strings for each domain */
1710	dom_str = strim(strsep(&tok, ";"));
1711	id_str = strsep(&dom_str, "=");
1712
1713	if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1714		rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
1715		return -EINVAL;
1716	}
1717
1718	if (!dom_str || kstrtoul(dom_str, 16, &val)) {
1719		rdt_last_cmd_puts("Non-numeric event configuration value\n");
1720		return -EINVAL;
1721	}
1722
1723	/* Value from user cannot be more than the supported set of events */
1724	if ((val & hw_res->mbm_cfg_mask) != val) {
1725		rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
1726				    hw_res->mbm_cfg_mask);
1727		return -EINVAL;
1728	}
1729
1730	list_for_each_entry(d, &r->mon_domains, hdr.list) {
1731		if (d->hdr.id == dom_id) {
1732			mbm_config_write_domain(r, d, evtid, val);
1733			goto next;
1734		}
1735	}
1736
1737	return -EINVAL;
1738}
1739
1740static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
1741					    char *buf, size_t nbytes,
1742					    loff_t off)
1743{
1744	struct rdt_resource *r = of->kn->parent->priv;
1745	int ret;
1746
1747	/* Valid input requires a trailing newline */
1748	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1749		return -EINVAL;
1750
1751	cpus_read_lock();
1752	mutex_lock(&rdtgroup_mutex);
1753
1754	rdt_last_cmd_clear();
1755
1756	buf[nbytes - 1] = '\0';
1757
1758	ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
1759
1760	mutex_unlock(&rdtgroup_mutex);
1761	cpus_read_unlock();
1762
1763	return ret ?: nbytes;
1764}
1765
1766static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
1767					    char *buf, size_t nbytes,
1768					    loff_t off)
1769{
1770	struct rdt_resource *r = of->kn->parent->priv;
1771	int ret;
1772
1773	/* Valid input requires a trailing newline */
1774	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1775		return -EINVAL;
1776
1777	cpus_read_lock();
1778	mutex_lock(&rdtgroup_mutex);
1779
1780	rdt_last_cmd_clear();
1781
1782	buf[nbytes - 1] = '\0';
1783
1784	ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
1785
1786	mutex_unlock(&rdtgroup_mutex);
1787	cpus_read_unlock();
1788
1789	return ret ?: nbytes;
1790}
1791
1792/* rdtgroup information files for one cache resource. */
1793static struct rftype res_common_files[] = {
1794	{
1795		.name		= "last_cmd_status",
1796		.mode		= 0444,
1797		.kf_ops		= &rdtgroup_kf_single_ops,
1798		.seq_show	= rdt_last_cmd_status_show,
1799		.fflags		= RFTYPE_TOP_INFO,
1800	},
1801	{
1802		.name		= "num_closids",
1803		.mode		= 0444,
1804		.kf_ops		= &rdtgroup_kf_single_ops,
1805		.seq_show	= rdt_num_closids_show,
1806		.fflags		= RFTYPE_CTRL_INFO,
1807	},
1808	{
1809		.name		= "mon_features",
1810		.mode		= 0444,
1811		.kf_ops		= &rdtgroup_kf_single_ops,
1812		.seq_show	= rdt_mon_features_show,
1813		.fflags		= RFTYPE_MON_INFO,
1814	},
1815	{
1816		.name		= "num_rmids",
1817		.mode		= 0444,
1818		.kf_ops		= &rdtgroup_kf_single_ops,
1819		.seq_show	= rdt_num_rmids_show,
1820		.fflags		= RFTYPE_MON_INFO,
1821	},
1822	{
1823		.name		= "cbm_mask",
1824		.mode		= 0444,
1825		.kf_ops		= &rdtgroup_kf_single_ops,
1826		.seq_show	= rdt_default_ctrl_show,
1827		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1828	},
1829	{
1830		.name		= "min_cbm_bits",
1831		.mode		= 0444,
1832		.kf_ops		= &rdtgroup_kf_single_ops,
1833		.seq_show	= rdt_min_cbm_bits_show,
1834		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1835	},
1836	{
1837		.name		= "shareable_bits",
1838		.mode		= 0444,
1839		.kf_ops		= &rdtgroup_kf_single_ops,
1840		.seq_show	= rdt_shareable_bits_show,
1841		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1842	},
1843	{
1844		.name		= "bit_usage",
1845		.mode		= 0444,
1846		.kf_ops		= &rdtgroup_kf_single_ops,
1847		.seq_show	= rdt_bit_usage_show,
1848		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1849	},
1850	{
1851		.name		= "min_bandwidth",
1852		.mode		= 0444,
1853		.kf_ops		= &rdtgroup_kf_single_ops,
1854		.seq_show	= rdt_min_bw_show,
1855		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1856	},
1857	{
1858		.name		= "bandwidth_gran",
1859		.mode		= 0444,
1860		.kf_ops		= &rdtgroup_kf_single_ops,
1861		.seq_show	= rdt_bw_gran_show,
1862		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1863	},
1864	{
1865		.name		= "delay_linear",
1866		.mode		= 0444,
1867		.kf_ops		= &rdtgroup_kf_single_ops,
1868		.seq_show	= rdt_delay_linear_show,
1869		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1870	},
1871	/*
1872	 * Platform specific which (if any) capabilities are provided by
1873	 * thread_throttle_mode. Defer "fflags" initialization to platform
1874	 * discovery.
1875	 */
1876	{
1877		.name		= "thread_throttle_mode",
1878		.mode		= 0444,
1879		.kf_ops		= &rdtgroup_kf_single_ops,
1880		.seq_show	= rdt_thread_throttle_mode_show,
1881	},
1882	{
1883		.name		= "max_threshold_occupancy",
1884		.mode		= 0644,
1885		.kf_ops		= &rdtgroup_kf_single_ops,
1886		.write		= max_threshold_occ_write,
1887		.seq_show	= max_threshold_occ_show,
1888		.fflags		= RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
1889	},
1890	{
1891		.name		= "mbm_total_bytes_config",
1892		.mode		= 0644,
1893		.kf_ops		= &rdtgroup_kf_single_ops,
1894		.seq_show	= mbm_total_bytes_config_show,
1895		.write		= mbm_total_bytes_config_write,
1896	},
1897	{
1898		.name		= "mbm_local_bytes_config",
1899		.mode		= 0644,
1900		.kf_ops		= &rdtgroup_kf_single_ops,
1901		.seq_show	= mbm_local_bytes_config_show,
1902		.write		= mbm_local_bytes_config_write,
1903	},
1904	{
1905		.name		= "cpus",
1906		.mode		= 0644,
1907		.kf_ops		= &rdtgroup_kf_single_ops,
1908		.write		= rdtgroup_cpus_write,
1909		.seq_show	= rdtgroup_cpus_show,
1910		.fflags		= RFTYPE_BASE,
1911	},
1912	{
1913		.name		= "cpus_list",
1914		.mode		= 0644,
1915		.kf_ops		= &rdtgroup_kf_single_ops,
1916		.write		= rdtgroup_cpus_write,
1917		.seq_show	= rdtgroup_cpus_show,
1918		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1919		.fflags		= RFTYPE_BASE,
1920	},
1921	{
1922		.name		= "tasks",
1923		.mode		= 0644,
1924		.kf_ops		= &rdtgroup_kf_single_ops,
1925		.write		= rdtgroup_tasks_write,
1926		.seq_show	= rdtgroup_tasks_show,
1927		.fflags		= RFTYPE_BASE,
1928	},
1929	{
1930		.name		= "mon_hw_id",
1931		.mode		= 0444,
1932		.kf_ops		= &rdtgroup_kf_single_ops,
1933		.seq_show	= rdtgroup_rmid_show,
1934		.fflags		= RFTYPE_MON_BASE | RFTYPE_DEBUG,
1935	},
1936	{
1937		.name		= "schemata",
1938		.mode		= 0644,
1939		.kf_ops		= &rdtgroup_kf_single_ops,
1940		.write		= rdtgroup_schemata_write,
1941		.seq_show	= rdtgroup_schemata_show,
1942		.fflags		= RFTYPE_CTRL_BASE,
1943	},
1944	{
1945		.name		= "mode",
1946		.mode		= 0644,
1947		.kf_ops		= &rdtgroup_kf_single_ops,
1948		.write		= rdtgroup_mode_write,
1949		.seq_show	= rdtgroup_mode_show,
1950		.fflags		= RFTYPE_CTRL_BASE,
1951	},
1952	{
1953		.name		= "size",
1954		.mode		= 0444,
1955		.kf_ops		= &rdtgroup_kf_single_ops,
1956		.seq_show	= rdtgroup_size_show,
1957		.fflags		= RFTYPE_CTRL_BASE,
1958	},
1959	{
1960		.name		= "sparse_masks",
1961		.mode		= 0444,
1962		.kf_ops		= &rdtgroup_kf_single_ops,
1963		.seq_show	= rdt_has_sparse_bitmasks_show,
1964		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1965	},
1966	{
1967		.name		= "ctrl_hw_id",
1968		.mode		= 0444,
1969		.kf_ops		= &rdtgroup_kf_single_ops,
1970		.seq_show	= rdtgroup_closid_show,
1971		.fflags		= RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
1972	},
1973
1974};
1975
1976static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1977{
1978	struct rftype *rfts, *rft;
1979	int ret, len;
1980
1981	rfts = res_common_files;
1982	len = ARRAY_SIZE(res_common_files);
1983
1984	lockdep_assert_held(&rdtgroup_mutex);
1985
1986	if (resctrl_debug)
1987		fflags |= RFTYPE_DEBUG;
1988
1989	for (rft = rfts; rft < rfts + len; rft++) {
1990		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
1991			ret = rdtgroup_add_file(kn, rft);
1992			if (ret)
1993				goto error;
1994		}
1995	}
1996
1997	return 0;
1998error:
1999	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
2000	while (--rft >= rfts) {
2001		if ((fflags & rft->fflags) == rft->fflags)
2002			kernfs_remove_by_name(kn, rft->name);
2003	}
2004	return ret;
2005}
2006
2007static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
2008{
2009	struct rftype *rfts, *rft;
2010	int len;
2011
2012	rfts = res_common_files;
2013	len = ARRAY_SIZE(res_common_files);
2014
2015	for (rft = rfts; rft < rfts + len; rft++) {
2016		if (!strcmp(rft->name, name))
2017			return rft;
2018	}
2019
2020	return NULL;
2021}
2022
2023void __init thread_throttle_mode_init(void)
2024{
2025	struct rftype *rft;
2026
2027	rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
2028	if (!rft)
2029		return;
2030
2031	rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB;
2032}
2033
2034void __init mbm_config_rftype_init(const char *config)
2035{
2036	struct rftype *rft;
2037
2038	rft = rdtgroup_get_rftype_by_name(config);
2039	if (rft)
2040		rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE;
2041}
2042
2043/**
2044 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
2045 * @r: The resource group with which the file is associated.
2046 * @name: Name of the file
2047 *
2048 * The permissions of named resctrl file, directory, or link are modified
2049 * to not allow read, write, or execute by any user.
2050 *
2051 * WARNING: This function is intended to communicate to the user that the
2052 * resctrl file has been locked down - that it is not relevant to the
2053 * particular state the system finds itself in. It should not be relied
2054 * on to protect from user access because after the file's permissions
2055 * are restricted the user can still change the permissions using chmod
2056 * from the command line.
2057 *
2058 * Return: 0 on success, <0 on failure.
2059 */
2060int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
2061{
2062	struct iattr iattr = {.ia_valid = ATTR_MODE,};
2063	struct kernfs_node *kn;
2064	int ret = 0;
2065
2066	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2067	if (!kn)
2068		return -ENOENT;
2069
2070	switch (kernfs_type(kn)) {
2071	case KERNFS_DIR:
2072		iattr.ia_mode = S_IFDIR;
2073		break;
2074	case KERNFS_FILE:
2075		iattr.ia_mode = S_IFREG;
2076		break;
2077	case KERNFS_LINK:
2078		iattr.ia_mode = S_IFLNK;
2079		break;
2080	}
2081
2082	ret = kernfs_setattr(kn, &iattr);
2083	kernfs_put(kn);
2084	return ret;
2085}
2086
2087/**
2088 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
2089 * @r: The resource group with which the file is associated.
2090 * @name: Name of the file
2091 * @mask: Mask of permissions that should be restored
2092 *
2093 * Restore the permissions of the named file. If @name is a directory the
2094 * permissions of its parent will be used.
2095 *
2096 * Return: 0 on success, <0 on failure.
2097 */
2098int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
2099			     umode_t mask)
2100{
2101	struct iattr iattr = {.ia_valid = ATTR_MODE,};
2102	struct kernfs_node *kn, *parent;
2103	struct rftype *rfts, *rft;
2104	int ret, len;
2105
2106	rfts = res_common_files;
2107	len = ARRAY_SIZE(res_common_files);
2108
2109	for (rft = rfts; rft < rfts + len; rft++) {
2110		if (!strcmp(rft->name, name))
2111			iattr.ia_mode = rft->mode & mask;
2112	}
2113
2114	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2115	if (!kn)
2116		return -ENOENT;
2117
2118	switch (kernfs_type(kn)) {
2119	case KERNFS_DIR:
2120		parent = kernfs_get_parent(kn);
2121		if (parent) {
2122			iattr.ia_mode |= parent->mode;
2123			kernfs_put(parent);
2124		}
2125		iattr.ia_mode |= S_IFDIR;
2126		break;
2127	case KERNFS_FILE:
2128		iattr.ia_mode |= S_IFREG;
2129		break;
2130	case KERNFS_LINK:
2131		iattr.ia_mode |= S_IFLNK;
2132		break;
2133	}
2134
2135	ret = kernfs_setattr(kn, &iattr);
2136	kernfs_put(kn);
2137	return ret;
2138}
2139
2140static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
2141				      unsigned long fflags)
2142{
2143	struct kernfs_node *kn_subdir;
2144	int ret;
2145
2146	kn_subdir = kernfs_create_dir(kn_info, name,
2147				      kn_info->mode, priv);
2148	if (IS_ERR(kn_subdir))
2149		return PTR_ERR(kn_subdir);
2150
 
2151	ret = rdtgroup_kn_set_ugid(kn_subdir);
2152	if (ret)
2153		return ret;
2154
2155	ret = rdtgroup_add_files(kn_subdir, fflags);
2156	if (!ret)
2157		kernfs_activate(kn_subdir);
2158
2159	return ret;
2160}
2161
2162static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
2163{
2164	struct resctrl_schema *s;
2165	struct rdt_resource *r;
2166	unsigned long fflags;
2167	char name[32];
2168	int ret;
2169
2170	/* create the directory */
2171	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
2172	if (IS_ERR(kn_info))
2173		return PTR_ERR(kn_info);
 
2174
2175	ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
2176	if (ret)
2177		goto out_destroy;
2178
2179	/* loop over enabled controls, these are all alloc_capable */
2180	list_for_each_entry(s, &resctrl_schema_all, list) {
2181		r = s->res;
2182		fflags = r->fflags | RFTYPE_CTRL_INFO;
2183		ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
2184		if (ret)
2185			goto out_destroy;
2186	}
2187
2188	for_each_mon_capable_rdt_resource(r) {
2189		fflags = r->fflags | RFTYPE_MON_INFO;
2190		sprintf(name, "%s_MON", r->name);
2191		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
2192		if (ret)
2193			goto out_destroy;
2194	}
2195
 
 
 
 
 
 
2196	ret = rdtgroup_kn_set_ugid(kn_info);
2197	if (ret)
2198		goto out_destroy;
2199
2200	kernfs_activate(kn_info);
2201
2202	return 0;
2203
2204out_destroy:
2205	kernfs_remove(kn_info);
2206	return ret;
2207}
2208
2209static int
2210mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
2211		    char *name, struct kernfs_node **dest_kn)
2212{
2213	struct kernfs_node *kn;
2214	int ret;
2215
2216	/* create the directory */
2217	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2218	if (IS_ERR(kn))
2219		return PTR_ERR(kn);
2220
2221	if (dest_kn)
2222		*dest_kn = kn;
2223
 
 
 
 
 
 
2224	ret = rdtgroup_kn_set_ugid(kn);
2225	if (ret)
2226		goto out_destroy;
2227
2228	kernfs_activate(kn);
2229
2230	return 0;
2231
2232out_destroy:
2233	kernfs_remove(kn);
2234	return ret;
2235}
2236
2237static void l3_qos_cfg_update(void *arg)
2238{
2239	bool *enable = arg;
2240
2241	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
2242}
2243
2244static void l2_qos_cfg_update(void *arg)
2245{
2246	bool *enable = arg;
2247
2248	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
2249}
2250
2251static inline bool is_mba_linear(void)
2252{
2253	return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
2254}
2255
2256static int set_cache_qos_cfg(int level, bool enable)
2257{
2258	void (*update)(void *arg);
2259	struct rdt_ctrl_domain *d;
2260	struct rdt_resource *r_l;
2261	cpumask_var_t cpu_mask;
 
2262	int cpu;
2263
2264	/* Walking r->domains, ensure it can't race with cpuhp */
2265	lockdep_assert_cpus_held();
2266
2267	if (level == RDT_RESOURCE_L3)
2268		update = l3_qos_cfg_update;
2269	else if (level == RDT_RESOURCE_L2)
2270		update = l2_qos_cfg_update;
2271	else
2272		return -EINVAL;
2273
2274	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2275		return -ENOMEM;
2276
2277	r_l = &rdt_resources_all[level].r_resctrl;
2278	list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
2279		if (r_l->cache.arch_has_per_cpu_cfg)
2280			/* Pick all the CPUs in the domain instance */
2281			for_each_cpu(cpu, &d->hdr.cpu_mask)
2282				cpumask_set_cpu(cpu, cpu_mask);
2283		else
2284			/* Pick one CPU from each domain instance to update MSR */
2285			cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
2286	}
2287
2288	/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
2289	on_each_cpu_mask(cpu_mask, update, &enable, 1);
2290
2291	free_cpumask_var(cpu_mask);
2292
2293	return 0;
2294}
2295
2296/* Restore the qos cfg state when a domain comes online */
2297void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
2298{
2299	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2300
2301	if (!r->cdp_capable)
2302		return;
2303
2304	if (r->rid == RDT_RESOURCE_L2)
2305		l2_qos_cfg_update(&hw_res->cdp_enabled);
2306
2307	if (r->rid == RDT_RESOURCE_L3)
2308		l3_qos_cfg_update(&hw_res->cdp_enabled);
2309}
2310
2311static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
2312{
2313	u32 num_closid = resctrl_arch_get_num_closid(r);
2314	int cpu = cpumask_any(&d->hdr.cpu_mask);
2315	int i;
2316
2317	d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
2318				   GFP_KERNEL, cpu_to_node(cpu));
2319	if (!d->mbps_val)
2320		return -ENOMEM;
2321
2322	for (i = 0; i < num_closid; i++)
2323		d->mbps_val[i] = MBA_MAX_MBPS;
2324
2325	return 0;
2326}
2327
2328static void mba_sc_domain_destroy(struct rdt_resource *r,
2329				  struct rdt_ctrl_domain *d)
2330{
2331	kfree(d->mbps_val);
2332	d->mbps_val = NULL;
2333}
2334
2335/*
2336 * MBA software controller is supported only if
2337 * MBM is supported and MBA is in linear scale,
2338 * and the MBM monitor scope is the same as MBA
2339 * control scope.
2340 */
2341static bool supports_mba_mbps(void)
2342{
2343	struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
2344	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
2345
2346	return (is_mbm_local_enabled() &&
2347		r->alloc_capable && is_mba_linear() &&
2348		r->ctrl_scope == rmbm->mon_scope);
2349}
2350
2351/*
2352 * Enable or disable the MBA software controller
2353 * which helps user specify bandwidth in MBps.
 
 
2354 */
2355static int set_mba_sc(bool mba_sc)
2356{
2357	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
2358	u32 num_closid = resctrl_arch_get_num_closid(r);
2359	struct rdt_ctrl_domain *d;
2360	int i;
2361
2362	if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
 
2363		return -EINVAL;
2364
2365	r->membw.mba_sc = mba_sc;
2366
2367	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2368		for (i = 0; i < num_closid; i++)
2369			d->mbps_val[i] = MBA_MAX_MBPS;
2370	}
2371
2372	return 0;
2373}
2374
2375static int cdp_enable(int level)
2376{
2377	struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
 
 
2378	int ret;
2379
2380	if (!r_l->alloc_capable)
 
2381		return -EINVAL;
2382
2383	ret = set_cache_qos_cfg(level, true);
2384	if (!ret)
2385		rdt_resources_all[level].cdp_enabled = true;
 
 
 
 
 
 
 
 
 
 
 
2386
2387	return ret;
 
 
 
2388}
2389
2390static void cdp_disable(int level)
2391{
2392	struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
 
 
2393
2394	if (r_hw->cdp_enabled) {
 
 
2395		set_cache_qos_cfg(level, false);
2396		r_hw->cdp_enabled = false;
2397	}
2398}
2399
2400int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
2401{
2402	struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
 
2403
2404	if (!hw_res->r_resctrl.cdp_capable)
2405		return -EINVAL;
 
 
2406
2407	if (enable)
2408		return cdp_enable(l);
2409
2410	cdp_disable(l);
2411
2412	return 0;
2413}
2414
2415/*
2416 * We don't allow rdtgroup directories to be created anywhere
2417 * except the root directory. Thus when looking for the rdtgroup
2418 * structure for a kernfs node we are either looking at a directory,
2419 * in which case the rdtgroup structure is pointed at by the "priv"
2420 * field, otherwise we have a file, and need only look to the parent
2421 * to find the rdtgroup.
2422 */
2423static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
2424{
2425	if (kernfs_type(kn) == KERNFS_DIR) {
2426		/*
2427		 * All the resource directories use "kn->priv"
2428		 * to point to the "struct rdtgroup" for the
2429		 * resource. "info" and its subdirectories don't
2430		 * have rdtgroup structures, so return NULL here.
2431		 */
2432		if (kn == kn_info || kn->parent == kn_info)
2433			return NULL;
2434		else
2435			return kn->priv;
2436	} else {
2437		return kn->parent->priv;
2438	}
2439}
2440
2441static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2442{
2443	atomic_inc(&rdtgrp->waitcount);
2444	kernfs_break_active_protection(kn);
2445}
2446
2447static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2448{
2449	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
2450	    (rdtgrp->flags & RDT_DELETED)) {
2451		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2452		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2453			rdtgroup_pseudo_lock_remove(rdtgrp);
2454		kernfs_unbreak_active_protection(kn);
2455		rdtgroup_remove(rdtgrp);
2456	} else {
2457		kernfs_unbreak_active_protection(kn);
2458	}
2459}
2460
2461struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
2462{
2463	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2464
2465	if (!rdtgrp)
2466		return NULL;
2467
2468	rdtgroup_kn_get(rdtgrp, kn);
 
2469
2470	cpus_read_lock();
2471	mutex_lock(&rdtgroup_mutex);
2472
2473	/* Was this group deleted while we waited? */
2474	if (rdtgrp->flags & RDT_DELETED)
2475		return NULL;
2476
2477	return rdtgrp;
2478}
2479
2480void rdtgroup_kn_unlock(struct kernfs_node *kn)
2481{
2482	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2483
2484	if (!rdtgrp)
2485		return;
2486
2487	mutex_unlock(&rdtgroup_mutex);
2488	cpus_read_unlock();
2489
2490	rdtgroup_kn_put(rdtgrp, kn);
 
 
 
 
 
 
 
 
 
 
2491}
2492
2493static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2494			     struct rdtgroup *prgrp,
2495			     struct kernfs_node **mon_data_kn);
2496
2497static void rdt_disable_ctx(void)
2498{
2499	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2500	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2501	set_mba_sc(false);
2502
2503	resctrl_debug = false;
2504}
2505
2506static int rdt_enable_ctx(struct rdt_fs_context *ctx)
2507{
2508	int ret = 0;
2509
2510	if (ctx->enable_cdpl2) {
2511		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
2512		if (ret)
2513			goto out_done;
2514	}
2515
2516	if (ctx->enable_cdpl3) {
2517		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
2518		if (ret)
2519			goto out_cdpl2;
2520	}
2521
2522	if (ctx->enable_mba_mbps) {
2523		ret = set_mba_sc(true);
2524		if (ret)
2525			goto out_cdpl3;
2526	}
2527
2528	if (ctx->enable_debug)
2529		resctrl_debug = true;
2530
2531	return 0;
2532
2533out_cdpl3:
2534	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2535out_cdpl2:
2536	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2537out_done:
2538	return ret;
2539}
2540
2541static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
2542{
2543	struct resctrl_schema *s;
2544	const char *suffix = "";
2545	int ret, cl;
2546
2547	s = kzalloc(sizeof(*s), GFP_KERNEL);
2548	if (!s)
2549		return -ENOMEM;
2550
2551	s->res = r;
2552	s->num_closid = resctrl_arch_get_num_closid(r);
2553	if (resctrl_arch_get_cdp_enabled(r->rid))
2554		s->num_closid /= 2;
2555
2556	s->conf_type = type;
2557	switch (type) {
2558	case CDP_CODE:
2559		suffix = "CODE";
2560		break;
2561	case CDP_DATA:
2562		suffix = "DATA";
2563		break;
2564	case CDP_NONE:
2565		suffix = "";
2566		break;
2567	}
2568
2569	ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
2570	if (ret >= sizeof(s->name)) {
2571		kfree(s);
2572		return -EINVAL;
2573	}
2574
2575	cl = strlen(s->name);
2576
2577	/*
2578	 * If CDP is supported by this resource, but not enabled,
2579	 * include the suffix. This ensures the tabular format of the
2580	 * schemata file does not change between mounts of the filesystem.
2581	 */
2582	if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
2583		cl += 4;
2584
2585	if (cl > max_name_width)
2586		max_name_width = cl;
2587
2588	INIT_LIST_HEAD(&s->list);
2589	list_add(&s->list, &resctrl_schema_all);
2590
2591	return 0;
2592}
2593
2594static int schemata_list_create(void)
2595{
2596	struct rdt_resource *r;
2597	int ret = 0;
2598
2599	for_each_alloc_capable_rdt_resource(r) {
2600		if (resctrl_arch_get_cdp_enabled(r->rid)) {
2601			ret = schemata_list_add(r, CDP_CODE);
2602			if (ret)
2603				break;
2604
2605			ret = schemata_list_add(r, CDP_DATA);
2606		} else {
2607			ret = schemata_list_add(r, CDP_NONE);
2608		}
2609
2610		if (ret)
2611			break;
2612	}
2613
2614	return ret;
2615}
2616
2617static void schemata_list_destroy(void)
2618{
2619	struct resctrl_schema *s, *tmp;
2620
2621	list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
2622		list_del(&s->list);
2623		kfree(s);
2624	}
2625}
2626
2627static int rdt_get_tree(struct fs_context *fc)
2628{
2629	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2630	unsigned long flags = RFTYPE_CTRL_BASE;
2631	struct rdt_mon_domain *dom;
2632	struct rdt_resource *r;
2633	int ret;
2634
2635	cpus_read_lock();
2636	mutex_lock(&rdtgroup_mutex);
2637	/*
2638	 * resctrl file system can only be mounted once.
2639	 */
2640	if (resctrl_mounted) {
2641		ret = -EBUSY;
2642		goto out;
2643	}
2644
2645	ret = rdtgroup_setup_root(ctx);
2646	if (ret)
2647		goto out;
2648
2649	ret = rdt_enable_ctx(ctx);
2650	if (ret)
2651		goto out_root;
2652
2653	ret = schemata_list_create();
2654	if (ret) {
2655		schemata_list_destroy();
2656		goto out_ctx;
2657	}
2658
2659	closid_init();
2660
2661	if (resctrl_arch_mon_capable())
2662		flags |= RFTYPE_MON;
2663
2664	ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
2665	if (ret)
2666		goto out_schemata_free;
2667
2668	kernfs_activate(rdtgroup_default.kn);
2669
2670	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
2671	if (ret < 0)
2672		goto out_schemata_free;
2673
2674	if (resctrl_arch_mon_capable()) {
2675		ret = mongroup_create_dir(rdtgroup_default.kn,
2676					  &rdtgroup_default, "mon_groups",
2677					  &kn_mongrp);
2678		if (ret < 0)
2679			goto out_info;
 
2680
2681		ret = mkdir_mondata_all(rdtgroup_default.kn,
2682					&rdtgroup_default, &kn_mondata);
2683		if (ret < 0)
2684			goto out_mongrp;
 
2685		rdtgroup_default.mon.mon_data_kn = kn_mondata;
2686	}
2687
2688	ret = rdt_pseudo_lock_init();
2689	if (ret)
2690		goto out_mondata;
2691
2692	ret = kernfs_get_tree(fc);
2693	if (ret < 0)
2694		goto out_psl;
2695
2696	if (resctrl_arch_alloc_capable())
2697		resctrl_arch_enable_alloc();
2698	if (resctrl_arch_mon_capable())
2699		resctrl_arch_enable_mon();
2700
2701	if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
2702		resctrl_mounted = true;
2703
2704	if (is_mbm_enabled()) {
2705		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
2706		list_for_each_entry(dom, &r->mon_domains, hdr.list)
2707			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
2708						   RESCTRL_PICK_ANY_CPU);
2709	}
2710
2711	goto out;
2712
2713out_psl:
2714	rdt_pseudo_lock_release();
2715out_mondata:
2716	if (resctrl_arch_mon_capable())
2717		kernfs_remove(kn_mondata);
2718out_mongrp:
2719	if (resctrl_arch_mon_capable())
2720		kernfs_remove(kn_mongrp);
2721out_info:
2722	kernfs_remove(kn_info);
2723out_schemata_free:
2724	schemata_list_destroy();
2725out_ctx:
2726	rdt_disable_ctx();
2727out_root:
2728	rdtgroup_destroy_root();
2729out:
2730	rdt_last_cmd_clear();
2731	mutex_unlock(&rdtgroup_mutex);
2732	cpus_read_unlock();
2733	return ret;
2734}
2735
2736enum rdt_param {
2737	Opt_cdp,
2738	Opt_cdpl2,
2739	Opt_mba_mbps,
2740	Opt_debug,
2741	nr__rdt_params
2742};
2743
2744static const struct fs_parameter_spec rdt_fs_parameters[] = {
2745	fsparam_flag("cdp",		Opt_cdp),
2746	fsparam_flag("cdpl2",		Opt_cdpl2),
2747	fsparam_flag("mba_MBps",	Opt_mba_mbps),
2748	fsparam_flag("debug",		Opt_debug),
2749	{}
2750};
2751
 
 
 
 
 
2752static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2753{
2754	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2755	struct fs_parse_result result;
2756	const char *msg;
2757	int opt;
2758
2759	opt = fs_parse(fc, rdt_fs_parameters, param, &result);
2760	if (opt < 0)
2761		return opt;
2762
2763	switch (opt) {
2764	case Opt_cdp:
2765		ctx->enable_cdpl3 = true;
2766		return 0;
2767	case Opt_cdpl2:
2768		ctx->enable_cdpl2 = true;
2769		return 0;
2770	case Opt_mba_mbps:
2771		msg = "mba_MBps requires local MBM and linear scale MBA at L3 scope";
2772		if (!supports_mba_mbps())
2773			return invalfc(fc, msg);
2774		ctx->enable_mba_mbps = true;
2775		return 0;
2776	case Opt_debug:
2777		ctx->enable_debug = true;
2778		return 0;
2779	}
2780
2781	return -EINVAL;
2782}
2783
2784static void rdt_fs_context_free(struct fs_context *fc)
2785{
2786	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2787
2788	kernfs_free_fs_context(fc);
2789	kfree(ctx);
2790}
2791
2792static const struct fs_context_operations rdt_fs_context_ops = {
2793	.free		= rdt_fs_context_free,
2794	.parse_param	= rdt_parse_param,
2795	.get_tree	= rdt_get_tree,
2796};
2797
2798static int rdt_init_fs_context(struct fs_context *fc)
2799{
2800	struct rdt_fs_context *ctx;
2801
2802	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2803	if (!ctx)
2804		return -ENOMEM;
2805
 
2806	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2807	fc->fs_private = &ctx->kfc;
2808	fc->ops = &rdt_fs_context_ops;
2809	put_user_ns(fc->user_ns);
2810	fc->user_ns = get_user_ns(&init_user_ns);
2811	fc->global = true;
2812	return 0;
2813}
2814
2815static int reset_all_ctrls(struct rdt_resource *r)
2816{
2817	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2818	struct rdt_hw_ctrl_domain *hw_dom;
2819	struct msr_param msr_param;
2820	struct rdt_ctrl_domain *d;
2821	int i;
 
2822
2823	/* Walking r->domains, ensure it can't race with cpuhp */
2824	lockdep_assert_cpus_held();
2825
2826	msr_param.res = r;
2827	msr_param.low = 0;
2828	msr_param.high = hw_res->num_closid;
2829
2830	/*
2831	 * Disable resource control for this resource by setting all
2832	 * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
2833	 * from each domain to update the MSRs below.
2834	 */
2835	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2836		hw_dom = resctrl_to_arch_ctrl_dom(d);
2837
2838		for (i = 0; i < hw_res->num_closid; i++)
2839			hw_dom->ctrl_val[i] = r->default_ctrl;
2840		msr_param.dom = d;
2841		smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
2842	}
 
 
 
 
 
 
 
 
 
2843
2844	return 0;
2845}
2846
 
 
 
 
 
 
 
 
 
 
 
 
2847/*
2848 * Move tasks from one to the other group. If @from is NULL, then all tasks
2849 * in the systems are moved unconditionally (used for teardown).
2850 *
2851 * If @mask is not NULL the cpus on which moved tasks are running are set
2852 * in that mask so the update smp function call is restricted to affected
2853 * cpus.
2854 */
2855static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2856				 struct cpumask *mask)
2857{
2858	struct task_struct *p, *t;
2859
2860	read_lock(&tasklist_lock);
2861	for_each_process_thread(p, t) {
2862		if (!from || is_closid_match(t, from) ||
2863		    is_rmid_match(t, from)) {
2864			resctrl_arch_set_closid_rmid(t, to->closid,
2865						     to->mon.rmid);
2866
 
2867			/*
2868			 * Order the closid/rmid stores above before the loads
2869			 * in task_curr(). This pairs with the full barrier
2870			 * between the rq->curr update and resctrl_sched_in()
2871			 * during context switch.
2872			 */
2873			smp_mb();
2874
2875			/*
2876			 * If the task is on a CPU, set the CPU in the mask.
2877			 * The detection is inaccurate as tasks might move or
2878			 * schedule before the smp function call takes place.
2879			 * In such a case the function call is pointless, but
2880			 * there is no other side effect.
2881			 */
2882			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
2883				cpumask_set_cpu(task_cpu(t), mask);
 
2884		}
2885	}
2886	read_unlock(&tasklist_lock);
2887}
2888
2889static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2890{
2891	struct rdtgroup *sentry, *stmp;
2892	struct list_head *head;
2893
2894	head = &rdtgrp->mon.crdtgrp_list;
2895	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2896		free_rmid(sentry->closid, sentry->mon.rmid);
2897		list_del(&sentry->mon.crdtgrp_list);
2898
2899		if (atomic_read(&sentry->waitcount) != 0)
2900			sentry->flags = RDT_DELETED;
2901		else
2902			rdtgroup_remove(sentry);
2903	}
2904}
2905
2906/*
2907 * Forcibly remove all of subdirectories under root.
2908 */
2909static void rmdir_all_sub(void)
2910{
2911	struct rdtgroup *rdtgrp, *tmp;
2912
2913	/* Move all tasks to the default resource group */
2914	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2915
2916	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2917		/* Free any child rmids */
2918		free_all_child_rdtgrp(rdtgrp);
2919
2920		/* Remove each rdtgroup other than root */
2921		if (rdtgrp == &rdtgroup_default)
2922			continue;
2923
2924		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2925		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2926			rdtgroup_pseudo_lock_remove(rdtgrp);
2927
2928		/*
2929		 * Give any CPUs back to the default group. We cannot copy
2930		 * cpu_online_mask because a CPU might have executed the
2931		 * offline callback already, but is still marked online.
2932		 */
2933		cpumask_or(&rdtgroup_default.cpu_mask,
2934			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2935
2936		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
2937
2938		kernfs_remove(rdtgrp->kn);
2939		list_del(&rdtgrp->rdtgroup_list);
2940
2941		if (atomic_read(&rdtgrp->waitcount) != 0)
2942			rdtgrp->flags = RDT_DELETED;
2943		else
2944			rdtgroup_remove(rdtgrp);
2945	}
2946	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2947	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2948
2949	kernfs_remove(kn_info);
2950	kernfs_remove(kn_mongrp);
2951	kernfs_remove(kn_mondata);
2952}
2953
2954static void rdt_kill_sb(struct super_block *sb)
2955{
2956	struct rdt_resource *r;
2957
2958	cpus_read_lock();
2959	mutex_lock(&rdtgroup_mutex);
2960
2961	rdt_disable_ctx();
2962
2963	/*Put everything back to default values. */
2964	for_each_alloc_capable_rdt_resource(r)
2965		reset_all_ctrls(r);
 
2966	rmdir_all_sub();
2967	rdt_pseudo_lock_release();
2968	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
2969	schemata_list_destroy();
2970	rdtgroup_destroy_root();
2971	if (resctrl_arch_alloc_capable())
2972		resctrl_arch_disable_alloc();
2973	if (resctrl_arch_mon_capable())
2974		resctrl_arch_disable_mon();
2975	resctrl_mounted = false;
2976	kernfs_kill_sb(sb);
2977	mutex_unlock(&rdtgroup_mutex);
2978	cpus_read_unlock();
2979}
2980
2981static struct file_system_type rdt_fs_type = {
2982	.name			= "resctrl",
2983	.init_fs_context	= rdt_init_fs_context,
2984	.parameters		= rdt_fs_parameters,
2985	.kill_sb		= rdt_kill_sb,
2986};
2987
2988static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
2989		       void *priv)
2990{
2991	struct kernfs_node *kn;
2992	int ret = 0;
2993
2994	kn = __kernfs_create_file(parent_kn, name, 0444,
2995				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
2996				  &kf_mondata_ops, priv, NULL, NULL);
2997	if (IS_ERR(kn))
2998		return PTR_ERR(kn);
2999
3000	ret = rdtgroup_kn_set_ugid(kn);
3001	if (ret) {
3002		kernfs_remove(kn);
3003		return ret;
3004	}
3005
3006	return ret;
3007}
3008
3009static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
3010{
3011	struct kernfs_node *kn;
3012
3013	kn = kernfs_find_and_get(pkn, name);
3014	if (!kn)
3015		return;
3016	kernfs_put(kn);
3017
3018	if (kn->dir.subdirs <= 1)
3019		kernfs_remove(kn);
3020	else
3021		kernfs_remove_by_name(kn, subname);
3022}
3023
3024/*
3025 * Remove all subdirectories of mon_data of ctrl_mon groups
3026 * and monitor groups for the given domain.
3027 * Remove files and directories containing "sum" of domain data
3028 * when last domain being summed is removed.
3029 */
3030static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3031					   struct rdt_mon_domain *d)
3032{
3033	struct rdtgroup *prgrp, *crgrp;
3034	char subname[32];
3035	bool snc_mode;
3036	char name[32];
3037
3038	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3039	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3040	if (snc_mode)
3041		sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
3042
3043	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3044		mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
 
3045
3046		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
3047			mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
3048	}
3049}
3050
3051static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
3052			     struct rdt_resource *r, struct rdtgroup *prgrp,
3053			     bool do_sum)
3054{
3055	struct rmid_read rr = {0};
3056	union mon_data_bits priv;
 
3057	struct mon_evt *mevt;
 
 
3058	int ret;
3059
3060	if (WARN_ON(list_empty(&r->evt_list)))
3061		return -EPERM;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3062
3063	priv.u.rid = r->rid;
3064	priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
3065	priv.u.sum = do_sum;
3066	list_for_each_entry(mevt, &r->evt_list, list) {
3067		priv.u.evtid = mevt->evtid;
3068		ret = mon_addfile(kn, mevt->name, priv.priv);
3069		if (ret)
3070			return ret;
3071
3072		if (!do_sum && is_mbm_event(mevt->evtid))
3073			mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
3074	}
3075
3076	return 0;
3077}
3078
3079static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
3080				struct rdt_mon_domain *d,
3081				struct rdt_resource *r, struct rdtgroup *prgrp)
3082{
3083	struct kernfs_node *kn, *ckn;
3084	char name[32];
3085	bool snc_mode;
3086	int ret = 0;
3087
3088	lockdep_assert_held(&rdtgroup_mutex);
3089
3090	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3091	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3092	kn = kernfs_find_and_get(parent_kn, name);
3093	if (kn) {
3094		/*
3095		 * rdtgroup_mutex will prevent this directory from being
3096		 * removed. No need to keep this hold.
3097		 */
3098		kernfs_put(kn);
3099	} else {
3100		kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
3101		if (IS_ERR(kn))
3102			return PTR_ERR(kn);
3103
3104		ret = rdtgroup_kn_set_ugid(kn);
3105		if (ret)
3106			goto out_destroy;
3107		ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
3108		if (ret)
3109			goto out_destroy;
3110	}
3111
3112	if (snc_mode) {
3113		sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
3114		ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
3115		if (IS_ERR(ckn)) {
3116			ret = -EINVAL;
3117			goto out_destroy;
3118		}
3119
3120		ret = rdtgroup_kn_set_ugid(ckn);
3121		if (ret)
3122			goto out_destroy;
3123
3124		ret = mon_add_all_files(ckn, d, r, prgrp, false);
3125		if (ret)
3126			goto out_destroy;
3127	}
3128
3129	kernfs_activate(kn);
3130	return 0;
3131
3132out_destroy:
3133	kernfs_remove(kn);
3134	return ret;
3135}
3136
3137/*
3138 * Add all subdirectories of mon_data for "ctrl_mon" groups
3139 * and "monitor" groups with given domain id.
3140 */
3141static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3142					   struct rdt_mon_domain *d)
3143{
3144	struct kernfs_node *parent_kn;
3145	struct rdtgroup *prgrp, *crgrp;
3146	struct list_head *head;
3147
 
 
 
3148	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3149		parent_kn = prgrp->mon.mon_data_kn;
3150		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
3151
3152		head = &prgrp->mon.crdtgrp_list;
3153		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
3154			parent_kn = crgrp->mon.mon_data_kn;
3155			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
3156		}
3157	}
3158}
3159
3160static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
3161				       struct rdt_resource *r,
3162				       struct rdtgroup *prgrp)
3163{
3164	struct rdt_mon_domain *dom;
3165	int ret;
3166
3167	/* Walking r->domains, ensure it can't race with cpuhp */
3168	lockdep_assert_cpus_held();
3169
3170	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
3171		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
3172		if (ret)
3173			return ret;
3174	}
3175
3176	return 0;
3177}
3178
3179/*
3180 * This creates a directory mon_data which contains the monitored data.
3181 *
3182 * mon_data has one directory for each domain which are named
3183 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
3184 * with L3 domain looks as below:
3185 * ./mon_data:
3186 * mon_L3_00
3187 * mon_L3_01
3188 * mon_L3_02
3189 * ...
3190 *
3191 * Each domain directory has one file per event:
3192 * ./mon_L3_00/:
3193 * llc_occupancy
3194 *
3195 */
3196static int mkdir_mondata_all(struct kernfs_node *parent_kn,
3197			     struct rdtgroup *prgrp,
3198			     struct kernfs_node **dest_kn)
3199{
3200	struct rdt_resource *r;
3201	struct kernfs_node *kn;
3202	int ret;
3203
3204	/*
3205	 * Create the mon_data directory first.
3206	 */
3207	ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
3208	if (ret)
3209		return ret;
3210
3211	if (dest_kn)
3212		*dest_kn = kn;
3213
3214	/*
3215	 * Create the subdirectories for each domain. Note that all events
3216	 * in a domain like L3 are grouped into a resource whose domain is L3
3217	 */
3218	for_each_mon_capable_rdt_resource(r) {
3219		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
3220		if (ret)
3221			goto out_destroy;
3222	}
3223
3224	return 0;
3225
3226out_destroy:
3227	kernfs_remove(kn);
3228	return ret;
3229}
3230
3231/**
3232 * cbm_ensure_valid - Enforce validity on provided CBM
3233 * @_val:	Candidate CBM
3234 * @r:		RDT resource to which the CBM belongs
3235 *
3236 * The provided CBM represents all cache portions available for use. This
3237 * may be represented by a bitmap that does not consist of contiguous ones
3238 * and thus be an invalid CBM.
3239 * Here the provided CBM is forced to be a valid CBM by only considering
3240 * the first set of contiguous bits as valid and clearing all bits.
3241 * The intention here is to provide a valid default CBM with which a new
3242 * resource group is initialized. The user can follow this with a
3243 * modification to the CBM if the default does not satisfy the
3244 * requirements.
3245 */
3246static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
3247{
3248	unsigned int cbm_len = r->cache.cbm_len;
3249	unsigned long first_bit, zero_bit;
3250	unsigned long val = _val;
3251
3252	if (!val)
3253		return 0;
3254
3255	first_bit = find_first_bit(&val, cbm_len);
3256	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
3257
3258	/* Clear any remaining bits to ensure contiguous region */
3259	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
3260	return (u32)val;
3261}
3262
3263/*
3264 * Initialize cache resources per RDT domain
3265 *
3266 * Set the RDT domain up to start off with all usable allocations. That is,
3267 * all shareable and unused bits. All-zero CBM is invalid.
3268 */
3269static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
3270				 u32 closid)
3271{
3272	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
3273	enum resctrl_conf_type t = s->conf_type;
3274	struct resctrl_staged_config *cfg;
3275	struct rdt_resource *r = s->res;
3276	u32 used_b = 0, unused_b = 0;
3277	unsigned long tmp_cbm;
3278	enum rdtgrp_mode mode;
3279	u32 peer_ctl, ctrl_val;
3280	int i;
3281
3282	cfg = &d->staged_config[t];
3283	cfg->have_new_ctrl = false;
3284	cfg->new_ctrl = r->cache.shareable_bits;
3285	used_b = r->cache.shareable_bits;
3286	for (i = 0; i < closids_supported(); i++) {
 
3287		if (closid_allocated(i) && i != closid) {
3288			mode = rdtgroup_mode_by_closid(i);
3289			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
3290				/*
3291				 * ctrl values for locksetup aren't relevant
3292				 * until the schemata is written, and the mode
3293				 * becomes RDT_MODE_PSEUDO_LOCKED.
3294				 */
3295				continue;
3296			/*
3297			 * If CDP is active include peer domain's
3298			 * usage to ensure there is no overlap
3299			 * with an exclusive group.
3300			 */
3301			if (resctrl_arch_get_cdp_enabled(r->rid))
3302				peer_ctl = resctrl_arch_get_config(r, d, i,
3303								   peer_type);
3304			else
3305				peer_ctl = 0;
3306			ctrl_val = resctrl_arch_get_config(r, d, i,
3307							   s->conf_type);
3308			used_b |= ctrl_val | peer_ctl;
3309			if (mode == RDT_MODE_SHAREABLE)
3310				cfg->new_ctrl |= ctrl_val | peer_ctl;
3311		}
3312	}
3313	if (d->plr && d->plr->cbm > 0)
3314		used_b |= d->plr->cbm;
3315	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
3316	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
3317	cfg->new_ctrl |= unused_b;
3318	/*
3319	 * Force the initial CBM to be valid, user can
3320	 * modify the CBM based on system availability.
3321	 */
3322	cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
3323	/*
3324	 * Assign the u32 CBM to an unsigned long to ensure that
3325	 * bitmap_weight() does not access out-of-bound memory.
3326	 */
3327	tmp_cbm = cfg->new_ctrl;
3328	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
3329		rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
3330		return -ENOSPC;
3331	}
3332	cfg->have_new_ctrl = true;
3333
3334	return 0;
3335}
3336
3337/*
3338 * Initialize cache resources with default values.
3339 *
3340 * A new RDT group is being created on an allocation capable (CAT)
3341 * supporting system. Set this group up to start off with all usable
3342 * allocations.
3343 *
3344 * If there are no more shareable bits available on any domain then
3345 * the entire allocation will fail.
3346 */
3347static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
3348{
3349	struct rdt_ctrl_domain *d;
3350	int ret;
3351
3352	list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
3353		ret = __init_one_rdt_domain(d, s, closid);
3354		if (ret < 0)
3355			return ret;
3356	}
3357
3358	return 0;
3359}
3360
3361/* Initialize MBA resource with default values. */
3362static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
3363{
3364	struct resctrl_staged_config *cfg;
3365	struct rdt_ctrl_domain *d;
3366
3367	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
3368		if (is_mba_sc(r)) {
3369			d->mbps_val[closid] = MBA_MAX_MBPS;
3370			continue;
3371		}
3372
3373		cfg = &d->staged_config[CDP_NONE];
3374		cfg->new_ctrl = r->default_ctrl;
3375		cfg->have_new_ctrl = true;
3376	}
3377}
3378
3379/* Initialize the RDT group's allocations. */
3380static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
3381{
3382	struct resctrl_schema *s;
3383	struct rdt_resource *r;
3384	int ret = 0;
3385
3386	rdt_staged_configs_clear();
3387
3388	list_for_each_entry(s, &resctrl_schema_all, list) {
3389		r = s->res;
3390		if (r->rid == RDT_RESOURCE_MBA ||
3391		    r->rid == RDT_RESOURCE_SMBA) {
3392			rdtgroup_init_mba(r, rdtgrp->closid);
3393			if (is_mba_sc(r))
3394				continue;
3395		} else {
3396			ret = rdtgroup_init_cat(s, rdtgrp->closid);
3397			if (ret < 0)
3398				goto out;
3399		}
3400
3401		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
3402		if (ret < 0) {
3403			rdt_last_cmd_puts("Failed to initialize allocations\n");
3404			goto out;
3405		}
3406
3407	}
3408
3409	rdtgrp->mode = RDT_MODE_SHAREABLE;
3410
3411out:
3412	rdt_staged_configs_clear();
3413	return ret;
3414}
3415
3416static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
3417{
3418	int ret;
3419
3420	if (!resctrl_arch_mon_capable())
3421		return 0;
3422
3423	ret = alloc_rmid(rdtgrp->closid);
3424	if (ret < 0) {
3425		rdt_last_cmd_puts("Out of RMIDs\n");
3426		return ret;
3427	}
3428	rdtgrp->mon.rmid = ret;
3429
3430	ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
3431	if (ret) {
3432		rdt_last_cmd_puts("kernfs subdir error\n");
3433		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3434		return ret;
3435	}
3436
3437	return 0;
3438}
3439
3440static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
3441{
3442	if (resctrl_arch_mon_capable())
3443		free_rmid(rgrp->closid, rgrp->mon.rmid);
3444}
3445
3446static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 
3447			     const char *name, umode_t mode,
3448			     enum rdt_group_type rtype, struct rdtgroup **r)
3449{
3450	struct rdtgroup *prdtgrp, *rdtgrp;
3451	unsigned long files = 0;
3452	struct kernfs_node *kn;
 
3453	int ret;
3454
3455	prdtgrp = rdtgroup_kn_lock_live(parent_kn);
3456	if (!prdtgrp) {
3457		ret = -ENODEV;
3458		goto out_unlock;
3459	}
3460
3461	if (rtype == RDTMON_GROUP &&
3462	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3463	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
3464		ret = -EINVAL;
3465		rdt_last_cmd_puts("Pseudo-locking in progress\n");
3466		goto out_unlock;
3467	}
3468
3469	/* allocate the rdtgroup. */
3470	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
3471	if (!rdtgrp) {
3472		ret = -ENOSPC;
3473		rdt_last_cmd_puts("Kernel out of memory\n");
3474		goto out_unlock;
3475	}
3476	*r = rdtgrp;
3477	rdtgrp->mon.parent = prdtgrp;
3478	rdtgrp->type = rtype;
3479	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
3480
3481	/* kernfs creates the directory for rdtgrp */
3482	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
3483	if (IS_ERR(kn)) {
3484		ret = PTR_ERR(kn);
3485		rdt_last_cmd_puts("kernfs create error\n");
3486		goto out_free_rgrp;
3487	}
3488	rdtgrp->kn = kn;
3489
3490	/*
3491	 * kernfs_remove() will drop the reference count on "kn" which
3492	 * will free it. But we still need it to stick around for the
3493	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
3494	 * which will be dropped by kernfs_put() in rdtgroup_remove().
3495	 */
3496	kernfs_get(kn);
3497
3498	ret = rdtgroup_kn_set_ugid(kn);
3499	if (ret) {
3500		rdt_last_cmd_puts("kernfs perm error\n");
3501		goto out_destroy;
3502	}
3503
3504	if (rtype == RDTCTRL_GROUP) {
3505		files = RFTYPE_BASE | RFTYPE_CTRL;
3506		if (resctrl_arch_mon_capable())
3507			files |= RFTYPE_MON;
3508	} else {
3509		files = RFTYPE_BASE | RFTYPE_MON;
3510	}
3511
3512	ret = rdtgroup_add_files(kn, files);
3513	if (ret) {
3514		rdt_last_cmd_puts("kernfs fill error\n");
3515		goto out_destroy;
3516	}
3517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3518	/*
3519	 * The caller unlocks the parent_kn upon success.
3520	 */
3521	return 0;
3522
 
 
3523out_destroy:
3524	kernfs_put(rdtgrp->kn);
3525	kernfs_remove(rdtgrp->kn);
3526out_free_rgrp:
3527	kfree(rdtgrp);
3528out_unlock:
3529	rdtgroup_kn_unlock(parent_kn);
3530	return ret;
3531}
3532
3533static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
3534{
3535	kernfs_remove(rgrp->kn);
3536	rdtgroup_remove(rgrp);
 
3537}
3538
3539/*
3540 * Create a monitor group under "mon_groups" directory of a control
3541 * and monitor group(ctrl_mon). This is a resource group
3542 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
3543 */
3544static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
3545			      const char *name, umode_t mode)
 
 
3546{
3547	struct rdtgroup *rdtgrp, *prgrp;
3548	int ret;
3549
3550	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
 
3551	if (ret)
3552		return ret;
3553
3554	prgrp = rdtgrp->mon.parent;
3555	rdtgrp->closid = prgrp->closid;
3556
3557	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3558	if (ret) {
3559		mkdir_rdt_prepare_clean(rdtgrp);
3560		goto out_unlock;
3561	}
3562
3563	kernfs_activate(rdtgrp->kn);
3564
3565	/*
3566	 * Add the rdtgrp to the list of rdtgrps the parent
3567	 * ctrl_mon group has to track.
3568	 */
3569	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
3570
3571out_unlock:
3572	rdtgroup_kn_unlock(parent_kn);
3573	return ret;
3574}
3575
3576/*
3577 * These are rdtgroups created under the root directory. Can be used
3578 * to allocate and monitor resources.
3579 */
3580static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 
3581				   const char *name, umode_t mode)
3582{
3583	struct rdtgroup *rdtgrp;
3584	struct kernfs_node *kn;
3585	u32 closid;
3586	int ret;
3587
3588	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
 
3589	if (ret)
3590		return ret;
3591
3592	kn = rdtgrp->kn;
3593	ret = closid_alloc();
3594	if (ret < 0) {
3595		rdt_last_cmd_puts("Out of CLOSIDs\n");
3596		goto out_common_fail;
3597	}
3598	closid = ret;
3599	ret = 0;
3600
3601	rdtgrp->closid = closid;
3602
3603	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3604	if (ret)
3605		goto out_closid_free;
3606
3607	kernfs_activate(rdtgrp->kn);
3608
3609	ret = rdtgroup_init_alloc(rdtgrp);
3610	if (ret < 0)
3611		goto out_rmid_free;
3612
3613	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
3614
3615	if (resctrl_arch_mon_capable()) {
3616		/*
3617		 * Create an empty mon_groups directory to hold the subset
3618		 * of tasks and cpus to monitor.
3619		 */
3620		ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
3621		if (ret) {
3622			rdt_last_cmd_puts("kernfs subdir error\n");
3623			goto out_del_list;
3624		}
3625	}
3626
3627	goto out_unlock;
3628
3629out_del_list:
3630	list_del(&rdtgrp->rdtgroup_list);
3631out_rmid_free:
3632	mkdir_rdt_prepare_rmid_free(rdtgrp);
3633out_closid_free:
3634	closid_free(closid);
3635out_common_fail:
3636	mkdir_rdt_prepare_clean(rdtgrp);
3637out_unlock:
3638	rdtgroup_kn_unlock(parent_kn);
3639	return ret;
3640}
3641
3642/*
3643 * We allow creating mon groups only with in a directory called "mon_groups"
3644 * which is present in every ctrl_mon group. Check if this is a valid
3645 * "mon_groups" directory.
3646 *
3647 * 1. The directory should be named "mon_groups".
3648 * 2. The mon group itself should "not" be named "mon_groups".
3649 *   This makes sure "mon_groups" directory always has a ctrl_mon group
3650 *   as parent.
3651 */
3652static bool is_mon_groups(struct kernfs_node *kn, const char *name)
3653{
3654	return (!strcmp(kn->name, "mon_groups") &&
3655		strcmp(name, "mon_groups"));
3656}
3657
3658static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
3659			  umode_t mode)
3660{
3661	/* Do not accept '\n' to avoid unparsable situation. */
3662	if (strchr(name, '\n'))
3663		return -EINVAL;
3664
3665	/*
3666	 * If the parent directory is the root directory and RDT
3667	 * allocation is supported, add a control and monitoring
3668	 * subdirectory
3669	 */
3670	if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
3671		return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
3672
3673	/*
3674	 * If RDT monitoring is supported and the parent directory is a valid
3675	 * "mon_groups" directory, add a monitoring subdirectory.
3676	 */
3677	if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name))
3678		return rdtgroup_mkdir_mon(parent_kn, name, mode);
3679
3680	return -EPERM;
3681}
3682
3683static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
 
3684{
3685	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3686	int cpu;
3687
3688	/* Give any tasks back to the parent group */
3689	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
3690
3691	/* Update per cpu rmid of the moved CPUs first */
3692	for_each_cpu(cpu, &rdtgrp->cpu_mask)
3693		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
3694	/*
3695	 * Update the MSR on moved CPUs and CPUs which have moved
3696	 * task running on them.
3697	 */
3698	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3699	update_closid_rmid(tmpmask, NULL);
3700
3701	rdtgrp->flags = RDT_DELETED;
3702	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3703
3704	/*
3705	 * Remove the rdtgrp from the parent ctrl_mon group's list
3706	 */
3707	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3708	list_del(&rdtgrp->mon.crdtgrp_list);
3709
 
 
 
 
 
3710	kernfs_remove(rdtgrp->kn);
3711
3712	return 0;
3713}
3714
3715static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
 
3716{
3717	rdtgrp->flags = RDT_DELETED;
3718	list_del(&rdtgrp->rdtgroup_list);
3719
 
 
 
 
 
3720	kernfs_remove(rdtgrp->kn);
3721	return 0;
3722}
3723
3724static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
 
3725{
3726	int cpu;
3727
3728	/* Give any tasks back to the default group */
3729	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
3730
3731	/* Give any CPUs back to the default group */
3732	cpumask_or(&rdtgroup_default.cpu_mask,
3733		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3734
3735	/* Update per cpu closid and rmid of the moved CPUs first */
3736	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
3737		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
3738		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
3739	}
3740
3741	/*
3742	 * Update the MSR on moved CPUs and CPUs which have moved
3743	 * task running on them.
3744	 */
3745	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3746	update_closid_rmid(tmpmask, NULL);
3747
3748	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3749	closid_free(rdtgrp->closid);
3750
3751	rdtgroup_ctrl_remove(rdtgrp);
3752
3753	/*
3754	 * Free all the child monitor group rmids.
3755	 */
3756	free_all_child_rdtgrp(rdtgrp);
3757
 
 
3758	return 0;
3759}
3760
3761static int rdtgroup_rmdir(struct kernfs_node *kn)
3762{
3763	struct kernfs_node *parent_kn = kn->parent;
3764	struct rdtgroup *rdtgrp;
3765	cpumask_var_t tmpmask;
3766	int ret = 0;
3767
3768	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
3769		return -ENOMEM;
3770
3771	rdtgrp = rdtgroup_kn_lock_live(kn);
3772	if (!rdtgrp) {
3773		ret = -EPERM;
3774		goto out;
3775	}
3776
3777	/*
3778	 * If the rdtgroup is a ctrl_mon group and parent directory
3779	 * is the root directory, remove the ctrl_mon group.
3780	 *
3781	 * If the rdtgroup is a mon group and parent directory
3782	 * is a valid "mon_groups" directory, remove the mon group.
3783	 */
3784	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
3785	    rdtgrp != &rdtgroup_default) {
3786		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3787		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
3788			ret = rdtgroup_ctrl_remove(rdtgrp);
3789		} else {
3790			ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
3791		}
3792	} else if (rdtgrp->type == RDTMON_GROUP &&
3793		 is_mon_groups(parent_kn, kn->name)) {
3794		ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
3795	} else {
3796		ret = -EPERM;
3797	}
3798
3799out:
3800	rdtgroup_kn_unlock(kn);
3801	free_cpumask_var(tmpmask);
3802	return ret;
3803}
3804
3805/**
3806 * mongrp_reparent() - replace parent CTRL_MON group of a MON group
3807 * @rdtgrp:		the MON group whose parent should be replaced
3808 * @new_prdtgrp:	replacement parent CTRL_MON group for @rdtgrp
3809 * @cpus:		cpumask provided by the caller for use during this call
3810 *
3811 * Replaces the parent CTRL_MON group for a MON group, resulting in all member
3812 * tasks' CLOSID immediately changing to that of the new parent group.
3813 * Monitoring data for the group is unaffected by this operation.
3814 */
3815static void mongrp_reparent(struct rdtgroup *rdtgrp,
3816			    struct rdtgroup *new_prdtgrp,
3817			    cpumask_var_t cpus)
3818{
3819	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3820
3821	WARN_ON(rdtgrp->type != RDTMON_GROUP);
3822	WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
3823
3824	/* Nothing to do when simply renaming a MON group. */
3825	if (prdtgrp == new_prdtgrp)
3826		return;
3827
3828	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3829	list_move_tail(&rdtgrp->mon.crdtgrp_list,
3830		       &new_prdtgrp->mon.crdtgrp_list);
3831
3832	rdtgrp->mon.parent = new_prdtgrp;
3833	rdtgrp->closid = new_prdtgrp->closid;
3834
3835	/* Propagate updated closid to all tasks in this group. */
3836	rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
3837
3838	update_closid_rmid(cpus, NULL);
3839}
3840
3841static int rdtgroup_rename(struct kernfs_node *kn,
3842			   struct kernfs_node *new_parent, const char *new_name)
3843{
3844	struct rdtgroup *new_prdtgrp;
3845	struct rdtgroup *rdtgrp;
3846	cpumask_var_t tmpmask;
3847	int ret;
3848
3849	rdtgrp = kernfs_to_rdtgroup(kn);
3850	new_prdtgrp = kernfs_to_rdtgroup(new_parent);
3851	if (!rdtgrp || !new_prdtgrp)
3852		return -ENOENT;
3853
3854	/* Release both kernfs active_refs before obtaining rdtgroup mutex. */
3855	rdtgroup_kn_get(rdtgrp, kn);
3856	rdtgroup_kn_get(new_prdtgrp, new_parent);
3857
3858	mutex_lock(&rdtgroup_mutex);
3859
3860	rdt_last_cmd_clear();
3861
3862	/*
3863	 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
3864	 * either kernfs_node is a file.
3865	 */
3866	if (kernfs_type(kn) != KERNFS_DIR ||
3867	    kernfs_type(new_parent) != KERNFS_DIR) {
3868		rdt_last_cmd_puts("Source and destination must be directories");
3869		ret = -EPERM;
3870		goto out;
3871	}
3872
3873	if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
3874		ret = -ENOENT;
3875		goto out;
3876	}
3877
3878	if (rdtgrp->type != RDTMON_GROUP || !kn->parent ||
3879	    !is_mon_groups(kn->parent, kn->name)) {
3880		rdt_last_cmd_puts("Source must be a MON group\n");
3881		ret = -EPERM;
3882		goto out;
3883	}
3884
3885	if (!is_mon_groups(new_parent, new_name)) {
3886		rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
3887		ret = -EPERM;
3888		goto out;
3889	}
3890
3891	/*
3892	 * If the MON group is monitoring CPUs, the CPUs must be assigned to the
3893	 * current parent CTRL_MON group and therefore cannot be assigned to
3894	 * the new parent, making the move illegal.
3895	 */
3896	if (!cpumask_empty(&rdtgrp->cpu_mask) &&
3897	    rdtgrp->mon.parent != new_prdtgrp) {
3898		rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
3899		ret = -EPERM;
3900		goto out;
3901	}
3902
3903	/*
3904	 * Allocate the cpumask for use in mongrp_reparent() to avoid the
3905	 * possibility of failing to allocate it after kernfs_rename() has
3906	 * succeeded.
3907	 */
3908	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
3909		ret = -ENOMEM;
3910		goto out;
3911	}
3912
3913	/*
3914	 * Perform all input validation and allocations needed to ensure
3915	 * mongrp_reparent() will succeed before calling kernfs_rename(),
3916	 * otherwise it would be necessary to revert this call if
3917	 * mongrp_reparent() failed.
3918	 */
3919	ret = kernfs_rename(kn, new_parent, new_name);
3920	if (!ret)
3921		mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
3922
3923	free_cpumask_var(tmpmask);
3924
3925out:
3926	mutex_unlock(&rdtgroup_mutex);
3927	rdtgroup_kn_put(rdtgrp, kn);
3928	rdtgroup_kn_put(new_prdtgrp, new_parent);
3929	return ret;
3930}
3931
3932static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
3933{
3934	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
3935		seq_puts(seq, ",cdp");
3936
3937	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
3938		seq_puts(seq, ",cdpl2");
3939
3940	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
3941		seq_puts(seq, ",mba_MBps");
3942
3943	if (resctrl_debug)
3944		seq_puts(seq, ",debug");
3945
3946	return 0;
3947}
3948
3949static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
3950	.mkdir		= rdtgroup_mkdir,
3951	.rmdir		= rdtgroup_rmdir,
3952	.rename		= rdtgroup_rename,
3953	.show_options	= rdtgroup_show_options,
3954};
3955
3956static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
3957{
 
 
3958	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
3959				      KERNFS_ROOT_CREATE_DEACTIVATED |
3960				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
3961				      &rdtgroup_default);
3962	if (IS_ERR(rdt_root))
3963		return PTR_ERR(rdt_root);
3964
3965	ctx->kfc.root = rdt_root;
3966	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
3967
3968	return 0;
3969}
3970
3971static void rdtgroup_destroy_root(void)
3972{
3973	kernfs_destroy_root(rdt_root);
3974	rdtgroup_default.kn = NULL;
3975}
3976
3977static void __init rdtgroup_setup_default(void)
3978{
3979	mutex_lock(&rdtgroup_mutex);
3980
3981	rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
3982	rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
3983	rdtgroup_default.type = RDTCTRL_GROUP;
3984	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
3985
3986	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
3987
3988	mutex_unlock(&rdtgroup_mutex);
3989}
3990
3991static void domain_destroy_mon_state(struct rdt_mon_domain *d)
3992{
3993	bitmap_free(d->rmid_busy_llc);
3994	kfree(d->mbm_total);
3995	kfree(d->mbm_local);
3996}
3997
3998void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
3999{
4000	mutex_lock(&rdtgroup_mutex);
4001
4002	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
4003		mba_sc_domain_destroy(r, d);
4004
4005	mutex_unlock(&rdtgroup_mutex);
4006}
4007
4008void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4009{
4010	mutex_lock(&rdtgroup_mutex);
4011
4012	/*
4013	 * If resctrl is mounted, remove all the
4014	 * per domain monitor data directories.
4015	 */
4016	if (resctrl_mounted && resctrl_arch_mon_capable())
4017		rmdir_mondata_subdir_allrdtgrp(r, d);
4018
4019	if (is_mbm_enabled())
4020		cancel_delayed_work(&d->mbm_over);
4021	if (is_llc_occupancy_enabled() && has_busy_rmid(d)) {
4022		/*
4023		 * When a package is going down, forcefully
4024		 * decrement rmid->ebusy. There is no way to know
4025		 * that the L3 was flushed and hence may lead to
4026		 * incorrect counts in rare scenarios, but leaving
4027		 * the RMID as busy creates RMID leaks if the
4028		 * package never comes back.
4029		 */
4030		__check_limbo(d, true);
4031		cancel_delayed_work(&d->cqm_limbo);
4032	}
4033
4034	domain_destroy_mon_state(d);
 
4035
 
4036	mutex_unlock(&rdtgroup_mutex);
4037}
4038
4039static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
4040{
4041	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
4042	size_t tsize;
4043
4044	if (is_llc_occupancy_enabled()) {
4045		d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
4046		if (!d->rmid_busy_llc)
4047			return -ENOMEM;
4048	}
4049	if (is_mbm_total_enabled()) {
4050		tsize = sizeof(*d->mbm_total);
4051		d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
4052		if (!d->mbm_total) {
4053			bitmap_free(d->rmid_busy_llc);
4054			return -ENOMEM;
4055		}
4056	}
4057	if (is_mbm_local_enabled()) {
4058		tsize = sizeof(*d->mbm_local);
4059		d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
4060		if (!d->mbm_local) {
4061			bitmap_free(d->rmid_busy_llc);
4062			kfree(d->mbm_total);
4063			return -ENOMEM;
4064		}
4065	}
4066
4067	return 0;
4068}
4069
4070int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
4071{
4072	int err = 0;
4073
4074	mutex_lock(&rdtgroup_mutex);
4075
4076	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
4077		/* RDT_RESOURCE_MBA is never mon_capable */
4078		err = mba_sc_domain_allocate(r, d);
4079	}
4080
4081	mutex_unlock(&rdtgroup_mutex);
4082
4083	return err;
4084}
4085
4086int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4087{
4088	int err;
4089
4090	mutex_lock(&rdtgroup_mutex);
4091
4092	err = domain_setup_mon_state(r, d);
4093	if (err)
4094		goto out_unlock;
4095
4096	if (is_mbm_enabled()) {
4097		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
4098		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
4099					   RESCTRL_PICK_ANY_CPU);
4100	}
4101
4102	if (is_llc_occupancy_enabled())
4103		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
4104
4105	/*
4106	 * If the filesystem is not mounted then only the default resource group
4107	 * exists. Creation of its directories is deferred until mount time
4108	 * by rdt_get_tree() calling mkdir_mondata_all().
4109	 * If resctrl is mounted, add per domain monitor data directories.
4110	 */
4111	if (resctrl_mounted && resctrl_arch_mon_capable())
4112		mkdir_mondata_subdir_allrdtgrp(r, d);
4113
4114out_unlock:
4115	mutex_unlock(&rdtgroup_mutex);
4116
4117	return err;
4118}
4119
4120void resctrl_online_cpu(unsigned int cpu)
4121{
4122	mutex_lock(&rdtgroup_mutex);
4123	/* The CPU is set in default rdtgroup after online. */
4124	cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
4125	mutex_unlock(&rdtgroup_mutex);
4126}
4127
4128static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
4129{
4130	struct rdtgroup *cr;
4131
4132	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
4133		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
4134			break;
4135	}
4136}
4137
4138void resctrl_offline_cpu(unsigned int cpu)
4139{
4140	struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
4141	struct rdt_mon_domain *d;
4142	struct rdtgroup *rdtgrp;
4143
4144	mutex_lock(&rdtgroup_mutex);
4145	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
4146		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
4147			clear_childcpus(rdtgrp, cpu);
4148			break;
4149		}
4150	}
4151
4152	if (!l3->mon_capable)
4153		goto out_unlock;
4154
4155	d = get_mon_domain_from_cpu(cpu, l3);
4156	if (d) {
4157		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
4158			cancel_delayed_work(&d->mbm_over);
4159			mbm_setup_overflow_handler(d, 0, cpu);
4160		}
4161		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
4162		    has_busy_rmid(d)) {
4163			cancel_delayed_work(&d->cqm_limbo);
4164			cqm_setup_limbo_handler(d, 0, cpu);
4165		}
4166	}
4167
4168out_unlock:
4169	mutex_unlock(&rdtgroup_mutex);
4170}
4171
4172/*
4173 * rdtgroup_init - rdtgroup initialization
4174 *
4175 * Setup resctrl file system including set up root, create mount point,
4176 * register rdtgroup filesystem, and initialize files under root directory.
4177 *
4178 * Return: 0 on success or -errno
4179 */
4180int __init rdtgroup_init(void)
4181{
4182	int ret = 0;
4183
4184	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
4185		     sizeof(last_cmd_status_buf));
4186
4187	rdtgroup_setup_default();
 
 
4188
4189	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
4190	if (ret)
4191		return ret;
4192
4193	ret = register_filesystem(&rdt_fs_type);
4194	if (ret)
4195		goto cleanup_mountpoint;
4196
4197	/*
4198	 * Adding the resctrl debugfs directory here may not be ideal since
4199	 * it would let the resctrl debugfs directory appear on the debugfs
4200	 * filesystem before the resctrl filesystem is mounted.
4201	 * It may also be ok since that would enable debugging of RDT before
4202	 * resctrl is mounted.
4203	 * The reason why the debugfs directory is created here and not in
4204	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
4205	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
4206	 * (the lockdep class of inode->i_rwsem). Other filesystem
4207	 * interactions (eg. SyS_getdents) have the lock ordering:
4208	 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
4209	 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
4210	 * is taken, thus creating dependency:
4211	 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
4212	 * issues considering the other two lock dependencies.
4213	 * By creating the debugfs directory here we avoid a dependency
4214	 * that may cause deadlock (even though file operations cannot
4215	 * occur until the filesystem is mounted, but I do not know how to
4216	 * tell lockdep that).
4217	 */
4218	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
4219
4220	return 0;
4221
4222cleanup_mountpoint:
4223	sysfs_remove_mount_point(fs_kobj, "resctrl");
 
 
4224
4225	return ret;
4226}
4227
4228void __exit rdtgroup_exit(void)
4229{
4230	debugfs_remove_recursive(debugfs_resctrl);
4231	unregister_filesystem(&rdt_fs_type);
4232	sysfs_remove_mount_point(fs_kobj, "resctrl");
 
4233}