rdtgroup.c - arch/x86/kernel/cpu/resctrl/rdtgroup.c - Linux diff v5.4

   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * User interface for Resource Alloction in Resource Director Technology(RDT)
   4 *
   5 * Copyright (C) 2016 Intel Corporation
   6 *
   7 * Author: Fenghua Yu <fenghua.yu@intel.com>
   8 *
   9 * More information about RDT be found in the Intel (R) x86 Architecture
  10 * Software Developer Manual.
  11 */
  12
  13#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
  14
  15#include <linux/cacheinfo.h>
  16#include <linux/cpu.h>
  17#include <linux/debugfs.h>
  18#include <linux/fs.h>
  19#include <linux/fs_parser.h>
  20#include <linux/sysfs.h>
  21#include <linux/kernfs.h>
  22#include <linux/seq_buf.h>
  23#include <linux/seq_file.h>
  24#include <linux/sched/signal.h>
  25#include <linux/sched/task.h>
  26#include <linux/slab.h>
  27#include <linux/task_work.h>
  28#include <linux/user_namespace.h>
  29
  30#include <uapi/linux/magic.h>
  31
  32#include <asm/resctrl_sched.h>
  33#include "internal.h"
  34
  35DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
  36DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  37DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
  38static struct kernfs_root *rdt_root;
  39struct rdtgroup rdtgroup_default;
  40LIST_HEAD(rdt_all_groups);
  41
  42/* Kernel fs node for "info" directory under root */
  43static struct kernfs_node *kn_info;
  44
  45/* Kernel fs node for "mon_groups" directory under root */
  46static struct kernfs_node *kn_mongrp;
  47
  48/* Kernel fs node for "mon_data" directory under root */
  49static struct kernfs_node *kn_mondata;
  50
  51static struct seq_buf last_cmd_status;
  52static char last_cmd_status_buf[512];
  53
  54struct dentry *debugfs_resctrl;
  55
  56void rdt_last_cmd_clear(void)
  57{
  58	lockdep_assert_held(&rdtgroup_mutex);
  59	seq_buf_clear(&last_cmd_status);
  60}
  61
  62void rdt_last_cmd_puts(const char *s)
  63{
  64	lockdep_assert_held(&rdtgroup_mutex);
  65	seq_buf_puts(&last_cmd_status, s);
  66}
  67
  68void rdt_last_cmd_printf(const char *fmt, ...)
  69{
  70	va_list ap;
  71
  72	va_start(ap, fmt);
  73	lockdep_assert_held(&rdtgroup_mutex);
  74	seq_buf_vprintf(&last_cmd_status, fmt, ap);
  75	va_end(ap);
  76}
  77
  78/*
  79 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  80 * we can keep a bitmap of free CLOSIDs in a single integer.
  81 *
  82 * Using a global CLOSID across all resources has some advantages and
  83 * some drawbacks:
  84 * + We can simply set "current->closid" to assign a task to a resource
  85 *   group.
  86 * + Context switch code can avoid extra memory references deciding which
  87 *   CLOSID to load into the PQR_ASSOC MSR
  88 * - We give up some options in configuring resource groups across multi-socket
  89 *   systems.
  90 * - Our choices on how to configure each resource become progressively more
  91 *   limited as the number of resources grows.
  92 */
  93static int closid_free_map;
  94static int closid_free_map_len;
  95
  96int closids_supported(void)
  97{
  98	return closid_free_map_len;
  99}
 100
 101static void closid_init(void)
 102{
 103	struct rdt_resource *r;
 104	int rdt_min_closid = 32;
 105
 106	/* Compute rdt_min_closid across all resources */
 107	for_each_alloc_enabled_rdt_resource(r)
 108		rdt_min_closid = min(rdt_min_closid, r->num_closid);
 109
 110	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
 111
 112	/* CLOSID 0 is always reserved for the default group */
 113	closid_free_map &= ~1;
 114	closid_free_map_len = rdt_min_closid;
 115}
 116
 117static int closid_alloc(void)
 118{
 119	u32 closid = ffs(closid_free_map);
 120
 121	if (closid == 0)
 122		return -ENOSPC;
 123	closid--;
 124	closid_free_map &= ~(1 << closid);
 125
 126	return closid;
 127}
 128
 129void closid_free(int closid)
 130{
 131	closid_free_map |= 1 << closid;
 132}
 133
 134/**
 135 * closid_allocated - test if provided closid is in use
 136 * @closid: closid to be tested
 137 *
 138 * Return: true if @closid is currently associated with a resource group,
 139 * false if @closid is free
 140 */
 141static bool closid_allocated(unsigned int closid)
 142{
 143	return (closid_free_map & (1 << closid)) == 0;
 144}
 145
 146/**
 147 * rdtgroup_mode_by_closid - Return mode of resource group with closid
 148 * @closid: closid if the resource group
 149 *
 150 * Each resource group is associated with a @closid. Here the mode
 151 * of a resource group can be queried by searching for it using its closid.
 152 *
 153 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
 154 */
 155enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
 156{
 157	struct rdtgroup *rdtgrp;
 158
 159	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
 160		if (rdtgrp->closid == closid)
 161			return rdtgrp->mode;
 162	}
 163
 164	return RDT_NUM_MODES;
 165}
 166
 167static const char * const rdt_mode_str[] = {
 168	[RDT_MODE_SHAREABLE]		= "shareable",
 169	[RDT_MODE_EXCLUSIVE]		= "exclusive",
 170	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
 171	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
 172};
 173
 174/**
 175 * rdtgroup_mode_str - Return the string representation of mode
 176 * @mode: the resource group mode as &enum rdtgroup_mode
 177 *
 178 * Return: string representation of valid mode, "unknown" otherwise
 179 */
 180static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
 181{
 182	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
 183		return "unknown";
 184
 185	return rdt_mode_str[mode];
 186}
 187
 188/* set uid and gid of rdtgroup dirs and files to that of the creator */
 189static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
 190{
 191	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
 192				.ia_uid = current_fsuid(),
 193				.ia_gid = current_fsgid(), };
 194
 195	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
 196	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
 197		return 0;
 198
 199	return kernfs_setattr(kn, &iattr);
 200}
 201
 202static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 203{
 204	struct kernfs_node *kn;
 205	int ret;
 206
 207	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
 208				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 209				  0, rft->kf_ops, rft, NULL, NULL);
 210	if (IS_ERR(kn))
 211		return PTR_ERR(kn);
 212
 213	ret = rdtgroup_kn_set_ugid(kn);
 214	if (ret) {
 215		kernfs_remove(kn);
 216		return ret;
 217	}
 218
 219	return 0;
 220}
 221
 222static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
 223{
 224	struct kernfs_open_file *of = m->private;
 225	struct rftype *rft = of->kn->priv;
 226
 227	if (rft->seq_show)
 228		return rft->seq_show(of, m, arg);
 229	return 0;
 230}
 231
 232static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
 233				   size_t nbytes, loff_t off)
 234{
 235	struct rftype *rft = of->kn->priv;
 236
 237	if (rft->write)
 238		return rft->write(of, buf, nbytes, off);
 239
 240	return -EINVAL;
 241}
 242
 243static struct kernfs_ops rdtgroup_kf_single_ops = {
 244	.atomic_write_len	= PAGE_SIZE,
 245	.write			= rdtgroup_file_write,
 246	.seq_show		= rdtgroup_seqfile_show,
 247};
 248
 249static struct kernfs_ops kf_mondata_ops = {
 250	.atomic_write_len	= PAGE_SIZE,
 251	.seq_show		= rdtgroup_mondata_show,
 252};
 253
 254static bool is_cpu_list(struct kernfs_open_file *of)
 255{
 256	struct rftype *rft = of->kn->priv;
 257
 258	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
 259}
 260
 261static int rdtgroup_cpus_show(struct kernfs_open_file *of,
 262			      struct seq_file *s, void *v)
 263{
 264	struct rdtgroup *rdtgrp;
 265	struct cpumask *mask;
 266	int ret = 0;
 267
 268	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 269
 270	if (rdtgrp) {
 271		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
 272			if (!rdtgrp->plr->d) {
 273				rdt_last_cmd_clear();
 274				rdt_last_cmd_puts("Cache domain offline\n");
 275				ret = -ENODEV;
 276			} else {
 277				mask = &rdtgrp->plr->d->cpu_mask;
 278				seq_printf(s, is_cpu_list(of) ?
 279					   "%*pbl\n" : "%*pb\n",
 280					   cpumask_pr_args(mask));
 281			}
 282		} else {
 283			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
 284				   cpumask_pr_args(&rdtgrp->cpu_mask));
 285		}
 286	} else {
 287		ret = -ENOENT;
 288	}
 289	rdtgroup_kn_unlock(of->kn);
 290
 291	return ret;
 292}
 293
 294/*
 295 * This is safe against resctrl_sched_in() called from __switch_to()
 296 * because __switch_to() is executed with interrupts disabled. A local call
 297 * from update_closid_rmid() is proteced against __switch_to() because
 298 * preemption is disabled.
 299 */
 300static void update_cpu_closid_rmid(void *info)
 301{
 302	struct rdtgroup *r = info;
 303
 304	if (r) {
 305		this_cpu_write(pqr_state.default_closid, r->closid);
 306		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
 307	}
 308
 309	/*
 310	 * We cannot unconditionally write the MSR because the current
 311	 * executing task might have its own closid selected. Just reuse
 312	 * the context switch code.
 313	 */
 314	resctrl_sched_in();
 315}
 316
 317/*
 318 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
 319 *
 320 * Per task closids/rmids must have been set up before calling this function.
 321 */
 322static void
 323update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
 324{
 325	int cpu = get_cpu();
 326
 327	if (cpumask_test_cpu(cpu, cpu_mask))
 328		update_cpu_closid_rmid(r);
 329	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
 330	put_cpu();
 331}
 332
 333static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 334			  cpumask_var_t tmpmask)
 335{
 336	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
 337	struct list_head *head;
 338
 339	/* Check whether cpus belong to parent ctrl group */
 340	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
 341	if (cpumask_weight(tmpmask)) {
 342		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
 343		return -EINVAL;
 344	}
 345
 346	/* Check whether cpus are dropped from this group */
 347	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 348	if (cpumask_weight(tmpmask)) {
 349		/* Give any dropped cpus to parent rdtgroup */
 350		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
 351		update_closid_rmid(tmpmask, prgrp);
 352	}
 353
 354	/*
 355	 * If we added cpus, remove them from previous group that owned them
 356	 * and update per-cpu rmid
 357	 */
 358	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 359	if (cpumask_weight(tmpmask)) {
 360		head = &prgrp->mon.crdtgrp_list;
 361		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 362			if (crgrp == rdtgrp)
 363				continue;
 364			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
 365				       tmpmask);
 366		}
 367		update_closid_rmid(tmpmask, rdtgrp);
 368	}
 369
 370	/* Done pushing/pulling - update this group with new mask */
 371	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 372
 373	return 0;
 374}
 375
 376static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
 377{
 378	struct rdtgroup *crgrp;
 379
 380	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
 381	/* update the child mon group masks as well*/
 382	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
 383		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
 384}
 385
 386static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 387			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
 388{
 389	struct rdtgroup *r, *crgrp;
 390	struct list_head *head;
 391
 392	/* Check whether cpus are dropped from this group */
 393	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 394	if (cpumask_weight(tmpmask)) {
 395		/* Can't drop from default group */
 396		if (rdtgrp == &rdtgroup_default) {
 397			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
 398			return -EINVAL;
 399		}
 400
 401		/* Give any dropped cpus to rdtgroup_default */
 402		cpumask_or(&rdtgroup_default.cpu_mask,
 403			   &rdtgroup_default.cpu_mask, tmpmask);
 404		update_closid_rmid(tmpmask, &rdtgroup_default);
 405	}
 406
 407	/*
 408	 * If we added cpus, remove them from previous group and
 409	 * the prev group's child groups that owned them
 410	 * and update per-cpu closid/rmid.
 411	 */
 412	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 413	if (cpumask_weight(tmpmask)) {
 414		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
 415			if (r == rdtgrp)
 416				continue;
 417			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
 418			if (cpumask_weight(tmpmask1))
 419				cpumask_rdtgrp_clear(r, tmpmask1);
 420		}
 421		update_closid_rmid(tmpmask, rdtgrp);
 422	}
 423
 424	/* Done pushing/pulling - update this group with new mask */
 425	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 426
 427	/*
 428	 * Clear child mon group masks since there is a new parent mask
 429	 * now and update the rmid for the cpus the child lost.
 430	 */
 431	head = &rdtgrp->mon.crdtgrp_list;
 432	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 433		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
 434		update_closid_rmid(tmpmask, rdtgrp);
 435		cpumask_clear(&crgrp->cpu_mask);
 436	}
 437
 438	return 0;
 439}
 440
 441static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 442				   char *buf, size_t nbytes, loff_t off)
 443{
 444	cpumask_var_t tmpmask, newmask, tmpmask1;
 445	struct rdtgroup *rdtgrp;
 446	int ret;
 447
 448	if (!buf)
 449		return -EINVAL;
 450
 451	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 452		return -ENOMEM;
 453	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
 454		free_cpumask_var(tmpmask);
 455		return -ENOMEM;
 456	}
 457	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
 458		free_cpumask_var(tmpmask);
 459		free_cpumask_var(newmask);
 460		return -ENOMEM;
 461	}
 462
 463	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 464	if (!rdtgrp) {
 465		ret = -ENOENT;
 466		goto unlock;
 467	}
 468
 469	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 470	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 471		ret = -EINVAL;
 472		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 473		goto unlock;
 474	}
 475
 476	if (is_cpu_list(of))
 477		ret = cpulist_parse(buf, newmask);
 478	else
 479		ret = cpumask_parse(buf, newmask);
 480
 481	if (ret) {
 482		rdt_last_cmd_puts("Bad CPU list/mask\n");
 483		goto unlock;
 484	}
 485
 486	/* check that user didn't specify any offline cpus */
 487	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 488	if (cpumask_weight(tmpmask)) {
 489		ret = -EINVAL;
 490		rdt_last_cmd_puts("Can only assign online CPUs\n");
 491		goto unlock;
 492	}
 493
 494	if (rdtgrp->type == RDTCTRL_GROUP)
 495		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
 496	else if (rdtgrp->type == RDTMON_GROUP)
 497		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
 498	else
 499		ret = -EINVAL;
 500
 501unlock:
 502	rdtgroup_kn_unlock(of->kn);
 503	free_cpumask_var(tmpmask);
 504	free_cpumask_var(newmask);
 505	free_cpumask_var(tmpmask1);
 506
 507	return ret ?: nbytes;
 508}
 509
 510struct task_move_callback {
 511	struct callback_head	work;
 512	struct rdtgroup		*rdtgrp;
 513};
 514
 515static void move_myself(struct callback_head *head)
 
 
 
 
 
 
 
 516{
 517	struct task_move_callback *callback;
 518	struct rdtgroup *rdtgrp;
 519
 520	callback = container_of(head, struct task_move_callback, work);
 521	rdtgrp = callback->rdtgrp;
 522
 
 
 523	/*
 524	 * If resource group was deleted before this task work callback
 525	 * was invoked, then assign the task to root group and free the
 526	 * resource group.
 527	 */
 528	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 529	    (rdtgrp->flags & RDT_DELETED)) {
 530		current->closid = 0;
 531		current->rmid = 0;
 532		kfree(rdtgrp);
 533	}
 534
 535	preempt_disable();
 536	/* update PQR_ASSOC MSR to make resource group go into effect */
 537	resctrl_sched_in();
 538	preempt_enable();
 539
 540	kfree(callback);
 
 
 
 
 
 541}
 542
 543static int __rdtgroup_move_task(struct task_struct *tsk,
 544				struct rdtgroup *rdtgrp)
 545{
 546	struct task_move_callback *callback;
 547	int ret;
 548
 549	callback = kzalloc(sizeof(*callback), GFP_KERNEL);
 550	if (!callback)
 551		return -ENOMEM;
 552	callback->work.func = move_myself;
 553	callback->rdtgrp = rdtgrp;
 554
 555	/*
 556	 * Take a refcount, so rdtgrp cannot be freed before the
 557	 * callback has been invoked.
 
 
 
 
 558	 */
 559	atomic_inc(&rdtgrp->waitcount);
 560	ret = task_work_add(tsk, &callback->work, true);
 561	if (ret) {
 562		/*
 563		 * Task is exiting. Drop the refcount and free the callback.
 564		 * No need to check the refcount as the group cannot be
 565		 * deleted before the write function unlocks rdtgroup_mutex.
 566		 */
 567		atomic_dec(&rdtgrp->waitcount);
 568		kfree(callback);
 569		rdt_last_cmd_puts("Task exited\n");
 570	} else {
 571		/*
 572		 * For ctrl_mon groups move both closid and rmid.
 573		 * For monitor groups, can move the tasks only from
 574		 * their parent CTRL group.
 575		 */
 576		if (rdtgrp->type == RDTCTRL_GROUP) {
 577			tsk->closid = rdtgrp->closid;
 578			tsk->rmid = rdtgrp->mon.rmid;
 579		} else if (rdtgrp->type == RDTMON_GROUP) {
 580			if (rdtgrp->mon.parent->closid == tsk->closid) {
 581				tsk->rmid = rdtgrp->mon.rmid;
 582			} else {
 583				rdt_last_cmd_puts("Can't move task to different control group\n");
 584				ret = -EINVAL;
 585			}
 586		}
 587	}
 588	return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 589}
 590
 591/**
 592 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
 593 * @r: Resource group
 594 *
 595 * Return: 1 if tasks have been assigned to @r, 0 otherwise
 596 */
 597int rdtgroup_tasks_assigned(struct rdtgroup *r)
 598{
 599	struct task_struct *p, *t;
 600	int ret = 0;
 601
 602	lockdep_assert_held(&rdtgroup_mutex);
 603
 604	rcu_read_lock();
 605	for_each_process_thread(p, t) {
 606		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
 607		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) {
 608			ret = 1;
 609			break;
 610		}
 611	}
 612	rcu_read_unlock();
 613
 614	return ret;
 615}
 616
 617static int rdtgroup_task_write_permission(struct task_struct *task,
 618					  struct kernfs_open_file *of)
 619{
 620	const struct cred *tcred = get_task_cred(task);
 621	const struct cred *cred = current_cred();
 622	int ret = 0;
 623
 624	/*
 625	 * Even if we're attaching all tasks in the thread group, we only
 626	 * need to check permissions on one of them.
 627	 */
 628	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 629	    !uid_eq(cred->euid, tcred->uid) &&
 630	    !uid_eq(cred->euid, tcred->suid)) {
 631		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
 632		ret = -EPERM;
 633	}
 634
 635	put_cred(tcred);
 636	return ret;
 637}
 638
 639static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 640			      struct kernfs_open_file *of)
 641{
 642	struct task_struct *tsk;
 643	int ret;
 644
 645	rcu_read_lock();
 646	if (pid) {
 647		tsk = find_task_by_vpid(pid);
 648		if (!tsk) {
 649			rcu_read_unlock();
 650			rdt_last_cmd_printf("No task %d\n", pid);
 651			return -ESRCH;
 652		}
 653	} else {
 654		tsk = current;
 655	}
 656
 657	get_task_struct(tsk);
 658	rcu_read_unlock();
 659
 660	ret = rdtgroup_task_write_permission(tsk, of);
 661	if (!ret)
 662		ret = __rdtgroup_move_task(tsk, rdtgrp);
 663
 664	put_task_struct(tsk);
 665	return ret;
 666}
 667
 668static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 669				    char *buf, size_t nbytes, loff_t off)
 670{
 671	struct rdtgroup *rdtgrp;
 672	int ret = 0;
 673	pid_t pid;
 674
 675	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 676		return -EINVAL;
 677	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 678	if (!rdtgrp) {
 679		rdtgroup_kn_unlock(of->kn);
 680		return -ENOENT;
 681	}
 682	rdt_last_cmd_clear();
 683
 684	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 685	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 686		ret = -EINVAL;
 687		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 688		goto unlock;
 689	}
 690
 691	ret = rdtgroup_move_task(pid, rdtgrp, of);
 692
 693unlock:
 694	rdtgroup_kn_unlock(of->kn);
 695
 696	return ret ?: nbytes;
 697}
 698
 699static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 700{
 701	struct task_struct *p, *t;
 702
 703	rcu_read_lock();
 704	for_each_process_thread(p, t) {
 705		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
 706		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
 707			seq_printf(s, "%d\n", t->pid);
 708	}
 709	rcu_read_unlock();
 710}
 711
 712static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 713			       struct seq_file *s, void *v)
 714{
 715	struct rdtgroup *rdtgrp;
 716	int ret = 0;
 717
 718	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 719	if (rdtgrp)
 720		show_rdt_tasks(rdtgrp, s);
 721	else
 722		ret = -ENOENT;
 723	rdtgroup_kn_unlock(of->kn);
 724
 725	return ret;
 726}
 727
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 728static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
 729				    struct seq_file *seq, void *v)
 730{
 731	int len;
 732
 733	mutex_lock(&rdtgroup_mutex);
 734	len = seq_buf_used(&last_cmd_status);
 735	if (len)
 736		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
 737	else
 738		seq_puts(seq, "ok\n");
 739	mutex_unlock(&rdtgroup_mutex);
 740	return 0;
 741}
 742
 743static int rdt_num_closids_show(struct kernfs_open_file *of,
 744				struct seq_file *seq, void *v)
 745{
 746	struct rdt_resource *r = of->kn->parent->priv;
 747
 748	seq_printf(seq, "%d\n", r->num_closid);
 749	return 0;
 750}
 751
 752static int rdt_default_ctrl_show(struct kernfs_open_file *of,
 753			     struct seq_file *seq, void *v)
 754{
 755	struct rdt_resource *r = of->kn->parent->priv;
 756
 757	seq_printf(seq, "%x\n", r->default_ctrl);
 758	return 0;
 759}
 760
 761static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
 762			     struct seq_file *seq, void *v)
 763{
 764	struct rdt_resource *r = of->kn->parent->priv;
 765
 766	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
 767	return 0;
 768}
 769
 770static int rdt_shareable_bits_show(struct kernfs_open_file *of,
 771				   struct seq_file *seq, void *v)
 772{
 773	struct rdt_resource *r = of->kn->parent->priv;
 774
 775	seq_printf(seq, "%x\n", r->cache.shareable_bits);
 776	return 0;
 777}
 778
 779/**
 780 * rdt_bit_usage_show - Display current usage of resources
 781 *
 782 * A domain is a shared resource that can now be allocated differently. Here
 783 * we display the current regions of the domain as an annotated bitmask.
 784 * For each domain of this resource its allocation bitmask
 785 * is annotated as below to indicate the current usage of the corresponding bit:
 786 *   0 - currently unused
 787 *   X - currently available for sharing and used by software and hardware
 788 *   H - currently used by hardware only but available for software use
 789 *   S - currently used and shareable by software only
 790 *   E - currently used exclusively by one resource group
 791 *   P - currently pseudo-locked by one resource group
 792 */
 793static int rdt_bit_usage_show(struct kernfs_open_file *of,
 794			      struct seq_file *seq, void *v)
 795{
 796	struct rdt_resource *r = of->kn->parent->priv;
 797	/*
 798	 * Use unsigned long even though only 32 bits are used to ensure
 799	 * test_bit() is used safely.
 800	 */
 801	unsigned long sw_shareable = 0, hw_shareable = 0;
 802	unsigned long exclusive = 0, pseudo_locked = 0;
 803	struct rdt_domain *dom;
 804	int i, hwb, swb, excl, psl;
 805	enum rdtgrp_mode mode;
 806	bool sep = false;
 807	u32 *ctrl;
 808
 809	mutex_lock(&rdtgroup_mutex);
 810	hw_shareable = r->cache.shareable_bits;
 811	list_for_each_entry(dom, &r->domains, list) {
 812		if (sep)
 813			seq_putc(seq, ';');
 814		ctrl = dom->ctrl_val;
 815		sw_shareable = 0;
 816		exclusive = 0;
 817		seq_printf(seq, "%d=", dom->id);
 818		for (i = 0; i < closids_supported(); i++, ctrl++) {
 819			if (!closid_allocated(i))
 820				continue;
 821			mode = rdtgroup_mode_by_closid(i);
 822			switch (mode) {
 823			case RDT_MODE_SHAREABLE:
 824				sw_shareable |= *ctrl;
 825				break;
 826			case RDT_MODE_EXCLUSIVE:
 827				exclusive |= *ctrl;
 828				break;
 829			case RDT_MODE_PSEUDO_LOCKSETUP:
 830			/*
 831			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
 832			 * here but not included since the CBM
 833			 * associated with this CLOSID in this mode
 834			 * is not initialized and no task or cpu can be
 835			 * assigned this CLOSID.
 836			 */
 837				break;
 838			case RDT_MODE_PSEUDO_LOCKED:
 839			case RDT_NUM_MODES:
 840				WARN(1,
 841				     "invalid mode for closid %d\n", i);
 842				break;
 843			}
 844		}
 845		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
 846			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
 847			hwb = test_bit(i, &hw_shareable);
 848			swb = test_bit(i, &sw_shareable);
 849			excl = test_bit(i, &exclusive);
 850			psl = test_bit(i, &pseudo_locked);
 851			if (hwb && swb)
 852				seq_putc(seq, 'X');
 853			else if (hwb && !swb)
 854				seq_putc(seq, 'H');
 855			else if (!hwb && swb)
 856				seq_putc(seq, 'S');
 857			else if (excl)
 858				seq_putc(seq, 'E');
 859			else if (psl)
 860				seq_putc(seq, 'P');
 861			else /* Unused bits remain */
 862				seq_putc(seq, '0');
 863		}
 864		sep = true;
 865	}
 866	seq_putc(seq, '\n');
 867	mutex_unlock(&rdtgroup_mutex);
 868	return 0;
 869}
 870
 871static int rdt_min_bw_show(struct kernfs_open_file *of,
 872			     struct seq_file *seq, void *v)
 873{
 874	struct rdt_resource *r = of->kn->parent->priv;
 875
 876	seq_printf(seq, "%u\n", r->membw.min_bw);
 877	return 0;
 878}
 879
 880static int rdt_num_rmids_show(struct kernfs_open_file *of,
 881			      struct seq_file *seq, void *v)
 882{
 883	struct rdt_resource *r = of->kn->parent->priv;
 884
 885	seq_printf(seq, "%d\n", r->num_rmid);
 886
 887	return 0;
 888}
 889
 890static int rdt_mon_features_show(struct kernfs_open_file *of,
 891				 struct seq_file *seq, void *v)
 892{
 893	struct rdt_resource *r = of->kn->parent->priv;
 894	struct mon_evt *mevt;
 895
 896	list_for_each_entry(mevt, &r->evt_list, list)
 897		seq_printf(seq, "%s\n", mevt->name);
 898
 899	return 0;
 900}
 901
 902static int rdt_bw_gran_show(struct kernfs_open_file *of,
 903			     struct seq_file *seq, void *v)
 904{
 905	struct rdt_resource *r = of->kn->parent->priv;
 906
 907	seq_printf(seq, "%u\n", r->membw.bw_gran);
 908	return 0;
 909}
 910
 911static int rdt_delay_linear_show(struct kernfs_open_file *of,
 912			     struct seq_file *seq, void *v)
 913{
 914	struct rdt_resource *r = of->kn->parent->priv;
 915
 916	seq_printf(seq, "%u\n", r->membw.delay_linear);
 917	return 0;
 918}
 919
 920static int max_threshold_occ_show(struct kernfs_open_file *of,
 921				  struct seq_file *seq, void *v)
 922{
 923	struct rdt_resource *r = of->kn->parent->priv;
 924
 925	seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale);
 926
 927	return 0;
 928}
 929
 
 
 
 
 
 
 
 
 
 
 
 
 
 930static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
 931				       char *buf, size_t nbytes, loff_t off)
 932{
 933	struct rdt_resource *r = of->kn->parent->priv;
 934	unsigned int bytes;
 935	int ret;
 936
 937	ret = kstrtouint(buf, 0, &bytes);
 938	if (ret)
 939		return ret;
 940
 941	if (bytes > (boot_cpu_data.x86_cache_size * 1024))
 942		return -EINVAL;
 943
 944	resctrl_cqm_threshold = bytes / r->mon_scale;
 945
 946	return nbytes;
 947}
 948
 949/*
 950 * rdtgroup_mode_show - Display mode of this resource group
 951 */
 952static int rdtgroup_mode_show(struct kernfs_open_file *of,
 953			      struct seq_file *s, void *v)
 954{
 955	struct rdtgroup *rdtgrp;
 956
 957	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 958	if (!rdtgrp) {
 959		rdtgroup_kn_unlock(of->kn);
 960		return -ENOENT;
 961	}
 962
 963	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
 964
 965	rdtgroup_kn_unlock(of->kn);
 966	return 0;
 967}
 968
 969/**
 970 * rdt_cdp_peer_get - Retrieve CDP peer if it exists
 971 * @r: RDT resource to which RDT domain @d belongs
 972 * @d: Cache instance for which a CDP peer is requested
 973 * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
 974 *         Used to return the result.
 975 * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
 976 *         Used to return the result.
 977 *
 978 * RDT resources are managed independently and by extension the RDT domains
 979 * (RDT resource instances) are managed independently also. The Code and
 980 * Data Prioritization (CDP) RDT resources, while managed independently,
 981 * could refer to the same underlying hardware. For example,
 982 * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
 983 *
 984 * When provided with an RDT resource @r and an instance of that RDT
 985 * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
 986 * resource and the exact instance that shares the same hardware.
 987 *
 988 * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
 989 *         If a CDP peer was found, @r_cdp will point to the peer RDT resource
 990 *         and @d_cdp will point to the peer RDT domain.
 991 */
 992static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
 993			    struct rdt_resource **r_cdp,
 994			    struct rdt_domain **d_cdp)
 995{
 996	struct rdt_resource *_r_cdp = NULL;
 997	struct rdt_domain *_d_cdp = NULL;
 998	int ret = 0;
 999
1000	switch (r->rid) {
1001	case RDT_RESOURCE_L3DATA:
1002		_r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
1003		break;
1004	case RDT_RESOURCE_L3CODE:
1005		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L3DATA];
1006		break;
1007	case RDT_RESOURCE_L2DATA:
1008		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2CODE];
1009		break;
1010	case RDT_RESOURCE_L2CODE:
1011		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2DATA];
1012		break;
1013	default:
1014		ret = -ENOENT;
1015		goto out;
1016	}
1017
1018	/*
1019	 * When a new CPU comes online and CDP is enabled then the new
1020	 * RDT domains (if any) associated with both CDP RDT resources
1021	 * are added in the same CPU online routine while the
1022	 * rdtgroup_mutex is held. It should thus not happen for one
1023	 * RDT domain to exist and be associated with its RDT CDP
1024	 * resource but there is no RDT domain associated with the
1025	 * peer RDT CDP resource. Hence the WARN.
1026	 */
1027	_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
1028	if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
1029		_r_cdp = NULL;
 
1030		ret = -EINVAL;
1031	}
1032
1033out:
1034	*r_cdp = _r_cdp;
1035	*d_cdp = _d_cdp;
1036
1037	return ret;
1038}
1039
1040/**
1041 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1042 * @r: Resource to which domain instance @d belongs.
1043 * @d: The domain instance for which @closid is being tested.
1044 * @cbm: Capacity bitmask being tested.
1045 * @closid: Intended closid for @cbm.
1046 * @exclusive: Only check if overlaps with exclusive resource groups
1047 *
1048 * Checks if provided @cbm intended to be used for @closid on domain
1049 * @d overlaps with any other closids or other hardware usage associated
1050 * with this domain. If @exclusive is true then only overlaps with
1051 * resource groups in exclusive mode will be considered. If @exclusive
1052 * is false then overlaps with any resource group or hardware entities
1053 * will be considered.
1054 *
1055 * @cbm is unsigned long, even if only 32 bits are used, to make the
1056 * bitmap functions work correctly.
1057 *
1058 * Return: false if CBM does not overlap, true if it does.
1059 */
1060static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1061				    unsigned long cbm, int closid, bool exclusive)
1062{
1063	enum rdtgrp_mode mode;
1064	unsigned long ctrl_b;
1065	u32 *ctrl;
1066	int i;
1067
1068	/* Check for any overlap with regions used by hardware directly */
1069	if (!exclusive) {
1070		ctrl_b = r->cache.shareable_bits;
1071		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1072			return true;
1073	}
1074
1075	/* Check for overlap with other resource groups */
1076	ctrl = d->ctrl_val;
1077	for (i = 0; i < closids_supported(); i++, ctrl++) {
1078		ctrl_b = *ctrl;
1079		mode = rdtgroup_mode_by_closid(i);
1080		if (closid_allocated(i) && i != closid &&
1081		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1082			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1083				if (exclusive) {
1084					if (mode == RDT_MODE_EXCLUSIVE)
1085						return true;
1086					continue;
1087				}
1088				return true;
1089			}
1090		}
1091	}
1092
1093	return false;
1094}
1095
1096/**
1097 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1098 * @r: Resource to which domain instance @d belongs.
1099 * @d: The domain instance for which @closid is being tested.
1100 * @cbm: Capacity bitmask being tested.
1101 * @closid: Intended closid for @cbm.
1102 * @exclusive: Only check if overlaps with exclusive resource groups
1103 *
1104 * Resources that can be allocated using a CBM can use the CBM to control
1105 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1106 * for overlap. Overlap test is not limited to the specific resource for
1107 * which the CBM is intended though - when dealing with CDP resources that
1108 * share the underlying hardware the overlap check should be performed on
1109 * the CDP resource sharing the hardware also.
1110 *
1111 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1112 * overlap test.
1113 *
1114 * Return: true if CBM overlap detected, false if there is no overlap
1115 */
1116bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1117			   unsigned long cbm, int closid, bool exclusive)
1118{
1119	struct rdt_resource *r_cdp;
1120	struct rdt_domain *d_cdp;
1121
1122	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
1123		return true;
1124
1125	if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
1126		return false;
1127
1128	return  __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
1129}
1130
1131/**
1132 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1133 *
1134 * An exclusive resource group implies that there should be no sharing of
1135 * its allocated resources. At the time this group is considered to be
1136 * exclusive this test can determine if its current schemata supports this
1137 * setting by testing for overlap with all other resource groups.
1138 *
1139 * Return: true if resource group can be exclusive, false if there is overlap
1140 * with allocations of other resource groups and thus this resource group
1141 * cannot be exclusive.
1142 */
1143static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1144{
1145	int closid = rdtgrp->closid;
1146	struct rdt_resource *r;
1147	bool has_cache = false;
1148	struct rdt_domain *d;
1149
1150	for_each_alloc_enabled_rdt_resource(r) {
1151		if (r->rid == RDT_RESOURCE_MBA)
1152			continue;
1153		has_cache = true;
1154		list_for_each_entry(d, &r->domains, list) {
1155			if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
1156						  rdtgrp->closid, false)) {
1157				rdt_last_cmd_puts("Schemata overlaps\n");
1158				return false;
1159			}
1160		}
1161	}
1162
1163	if (!has_cache) {
1164		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1165		return false;
1166	}
1167
1168	return true;
1169}
1170
1171/**
1172 * rdtgroup_mode_write - Modify the resource group's mode
1173 *
1174 */
1175static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1176				   char *buf, size_t nbytes, loff_t off)
1177{
1178	struct rdtgroup *rdtgrp;
1179	enum rdtgrp_mode mode;
1180	int ret = 0;
1181
1182	/* Valid input requires a trailing newline */
1183	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1184		return -EINVAL;
1185	buf[nbytes - 1] = '\0';
1186
1187	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1188	if (!rdtgrp) {
1189		rdtgroup_kn_unlock(of->kn);
1190		return -ENOENT;
1191	}
1192
1193	rdt_last_cmd_clear();
1194
1195	mode = rdtgrp->mode;
1196
1197	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1198	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1199	    (!strcmp(buf, "pseudo-locksetup") &&
1200	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1201	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1202		goto out;
1203
1204	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1205		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1206		ret = -EINVAL;
1207		goto out;
1208	}
1209
1210	if (!strcmp(buf, "shareable")) {
1211		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1212			ret = rdtgroup_locksetup_exit(rdtgrp);
1213			if (ret)
1214				goto out;
1215		}
1216		rdtgrp->mode = RDT_MODE_SHAREABLE;
1217	} else if (!strcmp(buf, "exclusive")) {
1218		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1219			ret = -EINVAL;
1220			goto out;
1221		}
1222		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1223			ret = rdtgroup_locksetup_exit(rdtgrp);
1224			if (ret)
1225				goto out;
1226		}
1227		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1228	} else if (!strcmp(buf, "pseudo-locksetup")) {
1229		ret = rdtgroup_locksetup_enter(rdtgrp);
1230		if (ret)
1231			goto out;
1232		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1233	} else {
1234		rdt_last_cmd_puts("Unknown or unsupported mode\n");
1235		ret = -EINVAL;
1236	}
1237
1238out:
1239	rdtgroup_kn_unlock(of->kn);
1240	return ret ?: nbytes;
1241}
1242
1243/**
1244 * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1245 * @r: RDT resource to which @d belongs.
1246 * @d: RDT domain instance.
1247 * @cbm: bitmask for which the size should be computed.
1248 *
1249 * The bitmask provided associated with the RDT domain instance @d will be
1250 * translated into how many bytes it represents. The size in bytes is
1251 * computed by first dividing the total cache size by the CBM length to
1252 * determine how many bytes each bit in the bitmask represents. The result
1253 * is multiplied with the number of bits set in the bitmask.
1254 *
1255 * @cbm is unsigned long, even if only 32 bits are used to make the
1256 * bitmap functions work correctly.
1257 */
1258unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1259				  struct rdt_domain *d, unsigned long cbm)
1260{
1261	struct cpu_cacheinfo *ci;
1262	unsigned int size = 0;
1263	int num_b, i;
1264
1265	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1266	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
1267	for (i = 0; i < ci->num_leaves; i++) {
1268		if (ci->info_list[i].level == r->cache_level) {
1269			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
1270			break;
1271		}
1272	}
1273
1274	return size;
1275}
1276
1277/**
1278 * rdtgroup_size_show - Display size in bytes of allocated regions
1279 *
1280 * The "size" file mirrors the layout of the "schemata" file, printing the
1281 * size in bytes of each region instead of the capacity bitmask.
1282 *
1283 */
1284static int rdtgroup_size_show(struct kernfs_open_file *of,
1285			      struct seq_file *s, void *v)
1286{
1287	struct rdtgroup *rdtgrp;
1288	struct rdt_resource *r;
1289	struct rdt_domain *d;
1290	unsigned int size;
1291	int ret = 0;
1292	bool sep;
1293	u32 ctrl;
1294
1295	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1296	if (!rdtgrp) {
1297		rdtgroup_kn_unlock(of->kn);
1298		return -ENOENT;
1299	}
1300
1301	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1302		if (!rdtgrp->plr->d) {
1303			rdt_last_cmd_clear();
1304			rdt_last_cmd_puts("Cache domain offline\n");
1305			ret = -ENODEV;
1306		} else {
1307			seq_printf(s, "%*s:", max_name_width,
1308				   rdtgrp->plr->r->name);
1309			size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
1310						    rdtgrp->plr->d,
1311						    rdtgrp->plr->cbm);
1312			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
1313		}
1314		goto out;
1315	}
1316
1317	for_each_alloc_enabled_rdt_resource(r) {
1318		sep = false;
1319		seq_printf(s, "%*s:", max_name_width, r->name);
1320		list_for_each_entry(d, &r->domains, list) {
1321			if (sep)
1322				seq_putc(s, ';');
1323			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1324				size = 0;
1325			} else {
1326				ctrl = (!is_mba_sc(r) ?
1327						d->ctrl_val[rdtgrp->closid] :
1328						d->mbps_val[rdtgrp->closid]);
1329				if (r->rid == RDT_RESOURCE_MBA)
1330					size = ctrl;
1331				else
1332					size = rdtgroup_cbm_to_size(r, d, ctrl);
1333			}
1334			seq_printf(s, "%d=%u", d->id, size);
1335			sep = true;
1336		}
1337		seq_putc(s, '\n');
1338	}
1339
1340out:
1341	rdtgroup_kn_unlock(of->kn);
1342
1343	return ret;
1344}
1345
1346/* rdtgroup information files for one cache resource. */
1347static struct rftype res_common_files[] = {
1348	{
1349		.name		= "last_cmd_status",
1350		.mode		= 0444,
1351		.kf_ops		= &rdtgroup_kf_single_ops,
1352		.seq_show	= rdt_last_cmd_status_show,
1353		.fflags		= RF_TOP_INFO,
1354	},
1355	{
1356		.name		= "num_closids",
1357		.mode		= 0444,
1358		.kf_ops		= &rdtgroup_kf_single_ops,
1359		.seq_show	= rdt_num_closids_show,
1360		.fflags		= RF_CTRL_INFO,
1361	},
1362	{
1363		.name		= "mon_features",
1364		.mode		= 0444,
1365		.kf_ops		= &rdtgroup_kf_single_ops,
1366		.seq_show	= rdt_mon_features_show,
1367		.fflags		= RF_MON_INFO,
1368	},
1369	{
1370		.name		= "num_rmids",
1371		.mode		= 0444,
1372		.kf_ops		= &rdtgroup_kf_single_ops,
1373		.seq_show	= rdt_num_rmids_show,
1374		.fflags		= RF_MON_INFO,
1375	},
1376	{
1377		.name		= "cbm_mask",
1378		.mode		= 0444,
1379		.kf_ops		= &rdtgroup_kf_single_ops,
1380		.seq_show	= rdt_default_ctrl_show,
1381		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1382	},
1383	{
1384		.name		= "min_cbm_bits",
1385		.mode		= 0444,
1386		.kf_ops		= &rdtgroup_kf_single_ops,
1387		.seq_show	= rdt_min_cbm_bits_show,
1388		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1389	},
1390	{
1391		.name		= "shareable_bits",
1392		.mode		= 0444,
1393		.kf_ops		= &rdtgroup_kf_single_ops,
1394		.seq_show	= rdt_shareable_bits_show,
1395		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1396	},
1397	{
1398		.name		= "bit_usage",
1399		.mode		= 0444,
1400		.kf_ops		= &rdtgroup_kf_single_ops,
1401		.seq_show	= rdt_bit_usage_show,
1402		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1403	},
1404	{
1405		.name		= "min_bandwidth",
1406		.mode		= 0444,
1407		.kf_ops		= &rdtgroup_kf_single_ops,
1408		.seq_show	= rdt_min_bw_show,
1409		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1410	},
1411	{
1412		.name		= "bandwidth_gran",
1413		.mode		= 0444,
1414		.kf_ops		= &rdtgroup_kf_single_ops,
1415		.seq_show	= rdt_bw_gran_show,
1416		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1417	},
1418	{
1419		.name		= "delay_linear",
1420		.mode		= 0444,
1421		.kf_ops		= &rdtgroup_kf_single_ops,
1422		.seq_show	= rdt_delay_linear_show,
1423		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1424	},
 
 
 
 
 
 
 
 
 
 
 
1425	{
1426		.name		= "max_threshold_occupancy",
1427		.mode		= 0644,
1428		.kf_ops		= &rdtgroup_kf_single_ops,
1429		.write		= max_threshold_occ_write,
1430		.seq_show	= max_threshold_occ_show,
1431		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
1432	},
1433	{
1434		.name		= "cpus",
1435		.mode		= 0644,
1436		.kf_ops		= &rdtgroup_kf_single_ops,
1437		.write		= rdtgroup_cpus_write,
1438		.seq_show	= rdtgroup_cpus_show,
1439		.fflags		= RFTYPE_BASE,
1440	},
1441	{
1442		.name		= "cpus_list",
1443		.mode		= 0644,
1444		.kf_ops		= &rdtgroup_kf_single_ops,
1445		.write		= rdtgroup_cpus_write,
1446		.seq_show	= rdtgroup_cpus_show,
1447		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1448		.fflags		= RFTYPE_BASE,
1449	},
1450	{
1451		.name		= "tasks",
1452		.mode		= 0644,
1453		.kf_ops		= &rdtgroup_kf_single_ops,
1454		.write		= rdtgroup_tasks_write,
1455		.seq_show	= rdtgroup_tasks_show,
1456		.fflags		= RFTYPE_BASE,
1457	},
1458	{
1459		.name		= "schemata",
1460		.mode		= 0644,
1461		.kf_ops		= &rdtgroup_kf_single_ops,
1462		.write		= rdtgroup_schemata_write,
1463		.seq_show	= rdtgroup_schemata_show,
1464		.fflags		= RF_CTRL_BASE,
1465	},
1466	{
1467		.name		= "mode",
1468		.mode		= 0644,
1469		.kf_ops		= &rdtgroup_kf_single_ops,
1470		.write		= rdtgroup_mode_write,
1471		.seq_show	= rdtgroup_mode_show,
1472		.fflags		= RF_CTRL_BASE,
1473	},
1474	{
1475		.name		= "size",
1476		.mode		= 0444,
1477		.kf_ops		= &rdtgroup_kf_single_ops,
1478		.seq_show	= rdtgroup_size_show,
1479		.fflags		= RF_CTRL_BASE,
1480	},
1481
1482};
1483
1484static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1485{
1486	struct rftype *rfts, *rft;
1487	int ret, len;
1488
1489	rfts = res_common_files;
1490	len = ARRAY_SIZE(res_common_files);
1491
1492	lockdep_assert_held(&rdtgroup_mutex);
1493
1494	for (rft = rfts; rft < rfts + len; rft++) {
1495		if ((fflags & rft->fflags) == rft->fflags) {
1496			ret = rdtgroup_add_file(kn, rft);
1497			if (ret)
1498				goto error;
1499		}
1500	}
1501
1502	return 0;
1503error:
1504	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
1505	while (--rft >= rfts) {
1506		if ((fflags & rft->fflags) == rft->fflags)
1507			kernfs_remove_by_name(kn, rft->name);
1508	}
1509	return ret;
1510}
1511
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1512/**
1513 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
1514 * @r: The resource group with which the file is associated.
1515 * @name: Name of the file
1516 *
1517 * The permissions of named resctrl file, directory, or link are modified
1518 * to not allow read, write, or execute by any user.
1519 *
1520 * WARNING: This function is intended to communicate to the user that the
1521 * resctrl file has been locked down - that it is not relevant to the
1522 * particular state the system finds itself in. It should not be relied
1523 * on to protect from user access because after the file's permissions
1524 * are restricted the user can still change the permissions using chmod
1525 * from the command line.
1526 *
1527 * Return: 0 on success, <0 on failure.
1528 */
1529int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
1530{
1531	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1532	struct kernfs_node *kn;
1533	int ret = 0;
1534
1535	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1536	if (!kn)
1537		return -ENOENT;
1538
1539	switch (kernfs_type(kn)) {
1540	case KERNFS_DIR:
1541		iattr.ia_mode = S_IFDIR;
1542		break;
1543	case KERNFS_FILE:
1544		iattr.ia_mode = S_IFREG;
1545		break;
1546	case KERNFS_LINK:
1547		iattr.ia_mode = S_IFLNK;
1548		break;
1549	}
1550
1551	ret = kernfs_setattr(kn, &iattr);
1552	kernfs_put(kn);
1553	return ret;
1554}
1555
1556/**
1557 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
1558 * @r: The resource group with which the file is associated.
1559 * @name: Name of the file
1560 * @mask: Mask of permissions that should be restored
1561 *
1562 * Restore the permissions of the named file. If @name is a directory the
1563 * permissions of its parent will be used.
1564 *
1565 * Return: 0 on success, <0 on failure.
1566 */
1567int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
1568			     umode_t mask)
1569{
1570	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1571	struct kernfs_node *kn, *parent;
1572	struct rftype *rfts, *rft;
1573	int ret, len;
1574
1575	rfts = res_common_files;
1576	len = ARRAY_SIZE(res_common_files);
1577
1578	for (rft = rfts; rft < rfts + len; rft++) {
1579		if (!strcmp(rft->name, name))
1580			iattr.ia_mode = rft->mode & mask;
1581	}
1582
1583	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1584	if (!kn)
1585		return -ENOENT;
1586
1587	switch (kernfs_type(kn)) {
1588	case KERNFS_DIR:
1589		parent = kernfs_get_parent(kn);
1590		if (parent) {
1591			iattr.ia_mode |= parent->mode;
1592			kernfs_put(parent);
1593		}
1594		iattr.ia_mode |= S_IFDIR;
1595		break;
1596	case KERNFS_FILE:
1597		iattr.ia_mode |= S_IFREG;
1598		break;
1599	case KERNFS_LINK:
1600		iattr.ia_mode |= S_IFLNK;
1601		break;
1602	}
1603
1604	ret = kernfs_setattr(kn, &iattr);
1605	kernfs_put(kn);
1606	return ret;
1607}
1608
1609static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
1610				      unsigned long fflags)
1611{
1612	struct kernfs_node *kn_subdir;
1613	int ret;
1614
1615	kn_subdir = kernfs_create_dir(kn_info, name,
1616				      kn_info->mode, r);
1617	if (IS_ERR(kn_subdir))
1618		return PTR_ERR(kn_subdir);
1619
1620	kernfs_get(kn_subdir);
1621	ret = rdtgroup_kn_set_ugid(kn_subdir);
1622	if (ret)
1623		return ret;
1624
1625	ret = rdtgroup_add_files(kn_subdir, fflags);
1626	if (!ret)
1627		kernfs_activate(kn_subdir);
1628
1629	return ret;
1630}
1631
1632static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
1633{
1634	struct rdt_resource *r;
1635	unsigned long fflags;
1636	char name[32];
1637	int ret;
1638
1639	/* create the directory */
1640	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
1641	if (IS_ERR(kn_info))
1642		return PTR_ERR(kn_info);
1643	kernfs_get(kn_info);
1644
1645	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
1646	if (ret)
1647		goto out_destroy;
1648
1649	for_each_alloc_enabled_rdt_resource(r) {
1650		fflags =  r->fflags | RF_CTRL_INFO;
1651		ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
1652		if (ret)
1653			goto out_destroy;
1654	}
1655
1656	for_each_mon_enabled_rdt_resource(r) {
1657		fflags =  r->fflags | RF_MON_INFO;
1658		sprintf(name, "%s_MON", r->name);
1659		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
1660		if (ret)
1661			goto out_destroy;
1662	}
1663
1664	/*
1665	 * This extra ref will be put in kernfs_remove() and guarantees
1666	 * that @rdtgrp->kn is always accessible.
1667	 */
1668	kernfs_get(kn_info);
1669
1670	ret = rdtgroup_kn_set_ugid(kn_info);
1671	if (ret)
1672		goto out_destroy;
1673
1674	kernfs_activate(kn_info);
1675
1676	return 0;
1677
1678out_destroy:
1679	kernfs_remove(kn_info);
1680	return ret;
1681}
1682
1683static int
1684mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
1685		    char *name, struct kernfs_node **dest_kn)
1686{
1687	struct kernfs_node *kn;
1688	int ret;
1689
1690	/* create the directory */
1691	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
1692	if (IS_ERR(kn))
1693		return PTR_ERR(kn);
1694
1695	if (dest_kn)
1696		*dest_kn = kn;
1697
1698	/*
1699	 * This extra ref will be put in kernfs_remove() and guarantees
1700	 * that @rdtgrp->kn is always accessible.
1701	 */
1702	kernfs_get(kn);
1703
1704	ret = rdtgroup_kn_set_ugid(kn);
1705	if (ret)
1706		goto out_destroy;
1707
1708	kernfs_activate(kn);
1709
1710	return 0;
1711
1712out_destroy:
1713	kernfs_remove(kn);
1714	return ret;
1715}
1716
1717static void l3_qos_cfg_update(void *arg)
1718{
1719	bool *enable = arg;
1720
1721	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
1722}
1723
1724static void l2_qos_cfg_update(void *arg)
1725{
1726	bool *enable = arg;
1727
1728	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
1729}
1730
1731static inline bool is_mba_linear(void)
1732{
1733	return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
1734}
1735
1736static int set_cache_qos_cfg(int level, bool enable)
1737{
1738	void (*update)(void *arg);
1739	struct rdt_resource *r_l;
1740	cpumask_var_t cpu_mask;
1741	struct rdt_domain *d;
1742	int cpu;
1743
1744	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1745		return -ENOMEM;
1746
1747	if (level == RDT_RESOURCE_L3)
1748		update = l3_qos_cfg_update;
1749	else if (level == RDT_RESOURCE_L2)
1750		update = l2_qos_cfg_update;
1751	else
1752		return -EINVAL;
1753
 
 
 
1754	r_l = &rdt_resources_all[level];
1755	list_for_each_entry(d, &r_l->domains, list) {
1756		/* Pick one CPU from each domain instance to update MSR */
1757		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
 
 
 
 
 
1758	}
1759	cpu = get_cpu();
1760	/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
1761	if (cpumask_test_cpu(cpu, cpu_mask))
1762		update(&enable);
1763	/* Update QOS_CFG MSR on all other cpus in cpu_mask. */
1764	smp_call_function_many(cpu_mask, update, &enable, 1);
1765	put_cpu();
1766
1767	free_cpumask_var(cpu_mask);
1768
1769	return 0;
1770}
1771
 
 
 
 
 
 
 
 
 
 
 
 
 
1772/*
1773 * Enable or disable the MBA software controller
1774 * which helps user specify bandwidth in MBps.
1775 * MBA software controller is supported only if
1776 * MBM is supported and MBA is in linear scale.
1777 */
1778static int set_mba_sc(bool mba_sc)
1779{
1780	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
1781	struct rdt_domain *d;
1782
1783	if (!is_mbm_enabled() || !is_mba_linear() ||
1784	    mba_sc == is_mba_sc(r))
1785		return -EINVAL;
1786
1787	r->membw.mba_sc = mba_sc;
1788	list_for_each_entry(d, &r->domains, list)
1789		setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
1790
1791	return 0;
1792}
1793
1794static int cdp_enable(int level, int data_type, int code_type)
1795{
1796	struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
1797	struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
1798	struct rdt_resource *r_l = &rdt_resources_all[level];
1799	int ret;
1800
1801	if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
1802	    !r_lcode->alloc_capable)
1803		return -EINVAL;
1804
1805	ret = set_cache_qos_cfg(level, true);
1806	if (!ret) {
1807		r_l->alloc_enabled = false;
1808		r_ldata->alloc_enabled = true;
1809		r_lcode->alloc_enabled = true;
1810	}
1811	return ret;
1812}
1813
1814static int cdpl3_enable(void)
1815{
1816	return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
1817			  RDT_RESOURCE_L3CODE);
1818}
1819
1820static int cdpl2_enable(void)
1821{
1822	return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
1823			  RDT_RESOURCE_L2CODE);
1824}
1825
1826static void cdp_disable(int level, int data_type, int code_type)
1827{
1828	struct rdt_resource *r = &rdt_resources_all[level];
1829
1830	r->alloc_enabled = r->alloc_capable;
1831
1832	if (rdt_resources_all[data_type].alloc_enabled) {
1833		rdt_resources_all[data_type].alloc_enabled = false;
1834		rdt_resources_all[code_type].alloc_enabled = false;
1835		set_cache_qos_cfg(level, false);
1836	}
1837}
1838
1839static void cdpl3_disable(void)
1840{
1841	cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
1842}
1843
1844static void cdpl2_disable(void)
1845{
1846	cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
1847}
1848
1849static void cdp_disable_all(void)
1850{
1851	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
1852		cdpl3_disable();
1853	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
1854		cdpl2_disable();
1855}
1856
1857/*
1858 * We don't allow rdtgroup directories to be created anywhere
1859 * except the root directory. Thus when looking for the rdtgroup
1860 * structure for a kernfs node we are either looking at a directory,
1861 * in which case the rdtgroup structure is pointed at by the "priv"
1862 * field, otherwise we have a file, and need only look to the parent
1863 * to find the rdtgroup.
1864 */
1865static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
1866{
1867	if (kernfs_type(kn) == KERNFS_DIR) {
1868		/*
1869		 * All the resource directories use "kn->priv"
1870		 * to point to the "struct rdtgroup" for the
1871		 * resource. "info" and its subdirectories don't
1872		 * have rdtgroup structures, so return NULL here.
1873		 */
1874		if (kn == kn_info || kn->parent == kn_info)
1875			return NULL;
1876		else
1877			return kn->priv;
1878	} else {
1879		return kn->parent->priv;
1880	}
1881}
1882
1883struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
1884{
1885	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1886
1887	if (!rdtgrp)
1888		return NULL;
1889
1890	atomic_inc(&rdtgrp->waitcount);
1891	kernfs_break_active_protection(kn);
1892
1893	mutex_lock(&rdtgroup_mutex);
1894
1895	/* Was this group deleted while we waited? */
1896	if (rdtgrp->flags & RDT_DELETED)
1897		return NULL;
1898
1899	return rdtgrp;
1900}
1901
1902void rdtgroup_kn_unlock(struct kernfs_node *kn)
1903{
1904	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1905
1906	if (!rdtgrp)
1907		return;
1908
1909	mutex_unlock(&rdtgroup_mutex);
1910
1911	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
1912	    (rdtgrp->flags & RDT_DELETED)) {
1913		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
1914		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
1915			rdtgroup_pseudo_lock_remove(rdtgrp);
1916		kernfs_unbreak_active_protection(kn);
1917		kernfs_put(rdtgrp->kn);
1918		kfree(rdtgrp);
1919	} else {
1920		kernfs_unbreak_active_protection(kn);
1921	}
1922}
1923
1924static int mkdir_mondata_all(struct kernfs_node *parent_kn,
1925			     struct rdtgroup *prgrp,
1926			     struct kernfs_node **mon_data_kn);
1927
1928static int rdt_enable_ctx(struct rdt_fs_context *ctx)
1929{
1930	int ret = 0;
1931
1932	if (ctx->enable_cdpl2)
1933		ret = cdpl2_enable();
1934
1935	if (!ret && ctx->enable_cdpl3)
1936		ret = cdpl3_enable();
1937
1938	if (!ret && ctx->enable_mba_mbps)
1939		ret = set_mba_sc(true);
1940
1941	return ret;
1942}
1943
1944static int rdt_get_tree(struct fs_context *fc)
1945{
1946	struct rdt_fs_context *ctx = rdt_fc2context(fc);
1947	struct rdt_domain *dom;
1948	struct rdt_resource *r;
1949	int ret;
1950
1951	cpus_read_lock();
1952	mutex_lock(&rdtgroup_mutex);
1953	/*
1954	 * resctrl file system can only be mounted once.
1955	 */
1956	if (static_branch_unlikely(&rdt_enable_key)) {
1957		ret = -EBUSY;
1958		goto out;
1959	}
1960
1961	ret = rdt_enable_ctx(ctx);
1962	if (ret < 0)
1963		goto out_cdp;
1964
1965	closid_init();
1966
1967	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
1968	if (ret < 0)
1969		goto out_mba;
1970
1971	if (rdt_mon_capable) {
1972		ret = mongroup_create_dir(rdtgroup_default.kn,
1973					  NULL, "mon_groups",
1974					  &kn_mongrp);
1975		if (ret < 0)
1976			goto out_info;
1977		kernfs_get(kn_mongrp);
1978
1979		ret = mkdir_mondata_all(rdtgroup_default.kn,
1980					&rdtgroup_default, &kn_mondata);
1981		if (ret < 0)
1982			goto out_mongrp;
1983		kernfs_get(kn_mondata);
1984		rdtgroup_default.mon.mon_data_kn = kn_mondata;
1985	}
1986
1987	ret = rdt_pseudo_lock_init();
1988	if (ret)
1989		goto out_mondata;
1990
1991	ret = kernfs_get_tree(fc);
1992	if (ret < 0)
1993		goto out_psl;
1994
1995	if (rdt_alloc_capable)
1996		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
1997	if (rdt_mon_capable)
1998		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
1999
2000	if (rdt_alloc_capable || rdt_mon_capable)
2001		static_branch_enable_cpuslocked(&rdt_enable_key);
2002
2003	if (is_mbm_enabled()) {
2004		r = &rdt_resources_all[RDT_RESOURCE_L3];
2005		list_for_each_entry(dom, &r->domains, list)
2006			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
2007	}
2008
2009	goto out;
2010
2011out_psl:
2012	rdt_pseudo_lock_release();
2013out_mondata:
2014	if (rdt_mon_capable)
2015		kernfs_remove(kn_mondata);
2016out_mongrp:
2017	if (rdt_mon_capable)
2018		kernfs_remove(kn_mongrp);
2019out_info:
2020	kernfs_remove(kn_info);
2021out_mba:
2022	if (ctx->enable_mba_mbps)
2023		set_mba_sc(false);
2024out_cdp:
2025	cdp_disable_all();
2026out:
2027	rdt_last_cmd_clear();
2028	mutex_unlock(&rdtgroup_mutex);
2029	cpus_read_unlock();
2030	return ret;
2031}
2032
2033enum rdt_param {
2034	Opt_cdp,
2035	Opt_cdpl2,
2036	Opt_mba_mbps,
2037	nr__rdt_params
2038};
2039
2040static const struct fs_parameter_spec rdt_param_specs[] = {
2041	fsparam_flag("cdp",		Opt_cdp),
2042	fsparam_flag("cdpl2",		Opt_cdpl2),
2043	fsparam_flag("mba_MBps",	Opt_mba_mbps),
2044	{}
2045};
2046
2047static const struct fs_parameter_description rdt_fs_parameters = {
2048	.name		= "rdt",
2049	.specs		= rdt_param_specs,
2050};
2051
2052static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2053{
2054	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2055	struct fs_parse_result result;
2056	int opt;
2057
2058	opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
2059	if (opt < 0)
2060		return opt;
2061
2062	switch (opt) {
2063	case Opt_cdp:
2064		ctx->enable_cdpl3 = true;
2065		return 0;
2066	case Opt_cdpl2:
2067		ctx->enable_cdpl2 = true;
2068		return 0;
2069	case Opt_mba_mbps:
2070		if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
2071			return -EINVAL;
2072		ctx->enable_mba_mbps = true;
2073		return 0;
2074	}
2075
2076	return -EINVAL;
2077}
2078
2079static void rdt_fs_context_free(struct fs_context *fc)
2080{
2081	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2082
2083	kernfs_free_fs_context(fc);
2084	kfree(ctx);
2085}
2086
2087static const struct fs_context_operations rdt_fs_context_ops = {
2088	.free		= rdt_fs_context_free,
2089	.parse_param	= rdt_parse_param,
2090	.get_tree	= rdt_get_tree,
2091};
2092
2093static int rdt_init_fs_context(struct fs_context *fc)
2094{
2095	struct rdt_fs_context *ctx;
2096
2097	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2098	if (!ctx)
2099		return -ENOMEM;
2100
2101	ctx->kfc.root = rdt_root;
2102	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2103	fc->fs_private = &ctx->kfc;
2104	fc->ops = &rdt_fs_context_ops;
2105	put_user_ns(fc->user_ns);
2106	fc->user_ns = get_user_ns(&init_user_ns);
2107	fc->global = true;
2108	return 0;
2109}
2110
2111static int reset_all_ctrls(struct rdt_resource *r)
2112{
2113	struct msr_param msr_param;
2114	cpumask_var_t cpu_mask;
2115	struct rdt_domain *d;
2116	int i, cpu;
2117
2118	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2119		return -ENOMEM;
2120
2121	msr_param.res = r;
2122	msr_param.low = 0;
2123	msr_param.high = r->num_closid;
2124
2125	/*
2126	 * Disable resource control for this resource by setting all
2127	 * CBMs in all domains to the maximum mask value. Pick one CPU
2128	 * from each domain to update the MSRs below.
2129	 */
2130	list_for_each_entry(d, &r->domains, list) {
2131		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
2132
2133		for (i = 0; i < r->num_closid; i++)
2134			d->ctrl_val[i] = r->default_ctrl;
2135	}
2136	cpu = get_cpu();
2137	/* Update CBM on this cpu if it's in cpu_mask. */
2138	if (cpumask_test_cpu(cpu, cpu_mask))
2139		rdt_ctrl_update(&msr_param);
2140	/* Update CBM on all other cpus in cpu_mask. */
2141	smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
2142	put_cpu();
2143
2144	free_cpumask_var(cpu_mask);
2145
2146	return 0;
2147}
2148
2149static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
2150{
2151	return (rdt_alloc_capable &&
2152		(r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
2153}
2154
2155static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
2156{
2157	return (rdt_mon_capable &&
2158		(r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
2159}
2160
2161/*
2162 * Move tasks from one to the other group. If @from is NULL, then all tasks
2163 * in the systems are moved unconditionally (used for teardown).
2164 *
2165 * If @mask is not NULL the cpus on which moved tasks are running are set
2166 * in that mask so the update smp function call is restricted to affected
2167 * cpus.
2168 */
2169static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2170				 struct cpumask *mask)
2171{
2172	struct task_struct *p, *t;
2173
2174	read_lock(&tasklist_lock);
2175	for_each_process_thread(p, t) {
2176		if (!from || is_closid_match(t, from) ||
2177		    is_rmid_match(t, from)) {
2178			t->closid = to->closid;
2179			t->rmid = to->mon.rmid;
2180
2181#ifdef CONFIG_SMP
2182			/*
2183			 * This is safe on x86 w/o barriers as the ordering
2184			 * of writing to task_cpu() and t->on_cpu is
2185			 * reverse to the reading here. The detection is
2186			 * inaccurate as tasks might move or schedule
2187			 * before the smp function call takes place. In
2188			 * such a case the function call is pointless, but
2189			 * there is no other side effect.
2190			 */
2191			if (mask && t->on_cpu)
2192				cpumask_set_cpu(task_cpu(t), mask);
2193#endif
2194		}
2195	}
2196	read_unlock(&tasklist_lock);
2197}
2198
2199static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2200{
2201	struct rdtgroup *sentry, *stmp;
2202	struct list_head *head;
2203
2204	head = &rdtgrp->mon.crdtgrp_list;
2205	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2206		free_rmid(sentry->mon.rmid);
2207		list_del(&sentry->mon.crdtgrp_list);
2208		kfree(sentry);
 
 
 
 
2209	}
2210}
2211
2212/*
2213 * Forcibly remove all of subdirectories under root.
2214 */
2215static void rmdir_all_sub(void)
2216{
2217	struct rdtgroup *rdtgrp, *tmp;
2218
2219	/* Move all tasks to the default resource group */
2220	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2221
2222	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2223		/* Free any child rmids */
2224		free_all_child_rdtgrp(rdtgrp);
2225
2226		/* Remove each rdtgroup other than root */
2227		if (rdtgrp == &rdtgroup_default)
2228			continue;
2229
2230		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2231		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2232			rdtgroup_pseudo_lock_remove(rdtgrp);
2233
2234		/*
2235		 * Give any CPUs back to the default group. We cannot copy
2236		 * cpu_online_mask because a CPU might have executed the
2237		 * offline callback already, but is still marked online.
2238		 */
2239		cpumask_or(&rdtgroup_default.cpu_mask,
2240			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2241
2242		free_rmid(rdtgrp->mon.rmid);
2243
2244		kernfs_remove(rdtgrp->kn);
2245		list_del(&rdtgrp->rdtgroup_list);
2246		kfree(rdtgrp);
 
 
 
 
2247	}
2248	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2249	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2250
2251	kernfs_remove(kn_info);
2252	kernfs_remove(kn_mongrp);
2253	kernfs_remove(kn_mondata);
2254}
2255
2256static void rdt_kill_sb(struct super_block *sb)
2257{
2258	struct rdt_resource *r;
2259
2260	cpus_read_lock();
2261	mutex_lock(&rdtgroup_mutex);
2262
2263	set_mba_sc(false);
2264
2265	/*Put everything back to default values. */
2266	for_each_alloc_enabled_rdt_resource(r)
2267		reset_all_ctrls(r);
2268	cdp_disable_all();
2269	rmdir_all_sub();
2270	rdt_pseudo_lock_release();
2271	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
2272	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
2273	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
2274	static_branch_disable_cpuslocked(&rdt_enable_key);
2275	kernfs_kill_sb(sb);
2276	mutex_unlock(&rdtgroup_mutex);
2277	cpus_read_unlock();
2278}
2279
2280static struct file_system_type rdt_fs_type = {
2281	.name			= "resctrl",
2282	.init_fs_context	= rdt_init_fs_context,
2283	.parameters		= &rdt_fs_parameters,
2284	.kill_sb		= rdt_kill_sb,
2285};
2286
2287static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
2288		       void *priv)
2289{
2290	struct kernfs_node *kn;
2291	int ret = 0;
2292
2293	kn = __kernfs_create_file(parent_kn, name, 0444,
2294				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
2295				  &kf_mondata_ops, priv, NULL, NULL);
2296	if (IS_ERR(kn))
2297		return PTR_ERR(kn);
2298
2299	ret = rdtgroup_kn_set_ugid(kn);
2300	if (ret) {
2301		kernfs_remove(kn);
2302		return ret;
2303	}
2304
2305	return ret;
2306}
2307
2308/*
2309 * Remove all subdirectories of mon_data of ctrl_mon groups
2310 * and monitor groups with given domain id.
2311 */
2312void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
2313{
2314	struct rdtgroup *prgrp, *crgrp;
2315	char name[32];
2316
2317	if (!r->mon_enabled)
2318		return;
2319
2320	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2321		sprintf(name, "mon_%s_%02d", r->name, dom_id);
2322		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
2323
2324		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
2325			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
2326	}
2327}
2328
2329static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
2330				struct rdt_domain *d,
2331				struct rdt_resource *r, struct rdtgroup *prgrp)
2332{
2333	union mon_data_bits priv;
2334	struct kernfs_node *kn;
2335	struct mon_evt *mevt;
2336	struct rmid_read rr;
2337	char name[32];
2338	int ret;
2339
2340	sprintf(name, "mon_%s_%02d", r->name, d->id);
2341	/* create the directory */
2342	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2343	if (IS_ERR(kn))
2344		return PTR_ERR(kn);
2345
2346	/*
2347	 * This extra ref will be put in kernfs_remove() and guarantees
2348	 * that kn is always accessible.
2349	 */
2350	kernfs_get(kn);
2351	ret = rdtgroup_kn_set_ugid(kn);
2352	if (ret)
2353		goto out_destroy;
2354
2355	if (WARN_ON(list_empty(&r->evt_list))) {
2356		ret = -EPERM;
2357		goto out_destroy;
2358	}
2359
2360	priv.u.rid = r->rid;
2361	priv.u.domid = d->id;
2362	list_for_each_entry(mevt, &r->evt_list, list) {
2363		priv.u.evtid = mevt->evtid;
2364		ret = mon_addfile(kn, mevt->name, priv.priv);
2365		if (ret)
2366			goto out_destroy;
2367
2368		if (is_mbm_event(mevt->evtid))
2369			mon_event_read(&rr, d, prgrp, mevt->evtid, true);
2370	}
2371	kernfs_activate(kn);
2372	return 0;
2373
2374out_destroy:
2375	kernfs_remove(kn);
2376	return ret;
2377}
2378
2379/*
2380 * Add all subdirectories of mon_data for "ctrl_mon" groups
2381 * and "monitor" groups with given domain id.
2382 */
2383void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
2384				    struct rdt_domain *d)
2385{
2386	struct kernfs_node *parent_kn;
2387	struct rdtgroup *prgrp, *crgrp;
2388	struct list_head *head;
2389
2390	if (!r->mon_enabled)
2391		return;
2392
2393	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2394		parent_kn = prgrp->mon.mon_data_kn;
2395		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
2396
2397		head = &prgrp->mon.crdtgrp_list;
2398		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
2399			parent_kn = crgrp->mon.mon_data_kn;
2400			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
2401		}
2402	}
2403}
2404
2405static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
2406				       struct rdt_resource *r,
2407				       struct rdtgroup *prgrp)
2408{
2409	struct rdt_domain *dom;
2410	int ret;
2411
2412	list_for_each_entry(dom, &r->domains, list) {
2413		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
2414		if (ret)
2415			return ret;
2416	}
2417
2418	return 0;
2419}
2420
2421/*
2422 * This creates a directory mon_data which contains the monitored data.
2423 *
2424 * mon_data has one directory for each domain whic are named
2425 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
2426 * with L3 domain looks as below:
2427 * ./mon_data:
2428 * mon_L3_00
2429 * mon_L3_01
2430 * mon_L3_02
2431 * ...
2432 *
2433 * Each domain directory has one file per event:
2434 * ./mon_L3_00/:
2435 * llc_occupancy
2436 *
2437 */
2438static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2439			     struct rdtgroup *prgrp,
2440			     struct kernfs_node **dest_kn)
2441{
2442	struct rdt_resource *r;
2443	struct kernfs_node *kn;
2444	int ret;
2445
2446	/*
2447	 * Create the mon_data directory first.
2448	 */
2449	ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
2450	if (ret)
2451		return ret;
2452
2453	if (dest_kn)
2454		*dest_kn = kn;
2455
2456	/*
2457	 * Create the subdirectories for each domain. Note that all events
2458	 * in a domain like L3 are grouped into a resource whose domain is L3
2459	 */
2460	for_each_mon_enabled_rdt_resource(r) {
2461		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
2462		if (ret)
2463			goto out_destroy;
2464	}
2465
2466	return 0;
2467
2468out_destroy:
2469	kernfs_remove(kn);
2470	return ret;
2471}
2472
2473/**
2474 * cbm_ensure_valid - Enforce validity on provided CBM
2475 * @_val:	Candidate CBM
2476 * @r:		RDT resource to which the CBM belongs
2477 *
2478 * The provided CBM represents all cache portions available for use. This
2479 * may be represented by a bitmap that does not consist of contiguous ones
2480 * and thus be an invalid CBM.
2481 * Here the provided CBM is forced to be a valid CBM by only considering
2482 * the first set of contiguous bits as valid and clearing all bits.
2483 * The intention here is to provide a valid default CBM with which a new
2484 * resource group is initialized. The user can follow this with a
2485 * modification to the CBM if the default does not satisfy the
2486 * requirements.
2487 */
2488static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
2489{
2490	unsigned int cbm_len = r->cache.cbm_len;
2491	unsigned long first_bit, zero_bit;
2492	unsigned long val = _val;
2493
2494	if (!val)
2495		return 0;
2496
2497	first_bit = find_first_bit(&val, cbm_len);
2498	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
2499
2500	/* Clear any remaining bits to ensure contiguous region */
2501	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
2502	return (u32)val;
2503}
2504
2505/*
2506 * Initialize cache resources per RDT domain
2507 *
2508 * Set the RDT domain up to start off with all usable allocations. That is,
2509 * all shareable and unused bits. All-zero CBM is invalid.
2510 */
2511static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
2512				 u32 closid)
2513{
2514	struct rdt_resource *r_cdp = NULL;
2515	struct rdt_domain *d_cdp = NULL;
2516	u32 used_b = 0, unused_b = 0;
2517	unsigned long tmp_cbm;
2518	enum rdtgrp_mode mode;
2519	u32 peer_ctl, *ctrl;
2520	int i;
2521
2522	rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
2523	d->have_new_ctrl = false;
2524	d->new_ctrl = r->cache.shareable_bits;
2525	used_b = r->cache.shareable_bits;
2526	ctrl = d->ctrl_val;
2527	for (i = 0; i < closids_supported(); i++, ctrl++) {
2528		if (closid_allocated(i) && i != closid) {
2529			mode = rdtgroup_mode_by_closid(i);
2530			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
2531				/*
2532				 * ctrl values for locksetup aren't relevant
2533				 * until the schemata is written, and the mode
2534				 * becomes RDT_MODE_PSEUDO_LOCKED.
2535				 */
2536				continue;
2537			/*
2538			 * If CDP is active include peer domain's
2539			 * usage to ensure there is no overlap
2540			 * with an exclusive group.
2541			 */
2542			if (d_cdp)
2543				peer_ctl = d_cdp->ctrl_val[i];
2544			else
2545				peer_ctl = 0;
2546			used_b |= *ctrl | peer_ctl;
2547			if (mode == RDT_MODE_SHAREABLE)
2548				d->new_ctrl |= *ctrl | peer_ctl;
2549		}
2550	}
2551	if (d->plr && d->plr->cbm > 0)
2552		used_b |= d->plr->cbm;
2553	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
2554	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
2555	d->new_ctrl |= unused_b;
2556	/*
2557	 * Force the initial CBM to be valid, user can
2558	 * modify the CBM based on system availability.
2559	 */
2560	d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
2561	/*
2562	 * Assign the u32 CBM to an unsigned long to ensure that
2563	 * bitmap_weight() does not access out-of-bound memory.
2564	 */
2565	tmp_cbm = d->new_ctrl;
2566	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
2567		rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
2568		return -ENOSPC;
2569	}
2570	d->have_new_ctrl = true;
2571
2572	return 0;
2573}
2574
2575/*
2576 * Initialize cache resources with default values.
2577 *
2578 * A new RDT group is being created on an allocation capable (CAT)
2579 * supporting system. Set this group up to start off with all usable
2580 * allocations.
2581 *
2582 * If there are no more shareable bits available on any domain then
2583 * the entire allocation will fail.
2584 */
2585static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
2586{
2587	struct rdt_domain *d;
2588	int ret;
2589
2590	list_for_each_entry(d, &r->domains, list) {
2591		ret = __init_one_rdt_domain(d, r, closid);
2592		if (ret < 0)
2593			return ret;
2594	}
2595
2596	return 0;
2597}
2598
2599/* Initialize MBA resource with default values. */
2600static void rdtgroup_init_mba(struct rdt_resource *r)
2601{
2602	struct rdt_domain *d;
2603
2604	list_for_each_entry(d, &r->domains, list) {
2605		d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
2606		d->have_new_ctrl = true;
2607	}
2608}
2609
2610/* Initialize the RDT group's allocations. */
2611static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2612{
2613	struct rdt_resource *r;
2614	int ret;
2615
2616	for_each_alloc_enabled_rdt_resource(r) {
2617		if (r->rid == RDT_RESOURCE_MBA) {
2618			rdtgroup_init_mba(r);
2619		} else {
2620			ret = rdtgroup_init_cat(r, rdtgrp->closid);
2621			if (ret < 0)
2622				return ret;
2623		}
2624
2625		ret = update_domains(r, rdtgrp->closid);
2626		if (ret < 0) {
2627			rdt_last_cmd_puts("Failed to initialize allocations\n");
2628			return ret;
2629		}
2630
2631	}
2632
2633	rdtgrp->mode = RDT_MODE_SHAREABLE;
2634
2635	return 0;
2636}
2637
2638static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
2639			     struct kernfs_node *prgrp_kn,
2640			     const char *name, umode_t mode,
2641			     enum rdt_group_type rtype, struct rdtgroup **r)
2642{
2643	struct rdtgroup *prdtgrp, *rdtgrp;
2644	struct kernfs_node *kn;
2645	uint files = 0;
2646	int ret;
2647
2648	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
2649	if (!prdtgrp) {
2650		ret = -ENODEV;
2651		goto out_unlock;
2652	}
2653
2654	if (rtype == RDTMON_GROUP &&
2655	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2656	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
2657		ret = -EINVAL;
2658		rdt_last_cmd_puts("Pseudo-locking in progress\n");
2659		goto out_unlock;
2660	}
2661
2662	/* allocate the rdtgroup. */
2663	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
2664	if (!rdtgrp) {
2665		ret = -ENOSPC;
2666		rdt_last_cmd_puts("Kernel out of memory\n");
2667		goto out_unlock;
2668	}
2669	*r = rdtgrp;
2670	rdtgrp->mon.parent = prdtgrp;
2671	rdtgrp->type = rtype;
2672	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
2673
2674	/* kernfs creates the directory for rdtgrp */
2675	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
2676	if (IS_ERR(kn)) {
2677		ret = PTR_ERR(kn);
2678		rdt_last_cmd_puts("kernfs create error\n");
2679		goto out_free_rgrp;
2680	}
2681	rdtgrp->kn = kn;
2682
2683	/*
2684	 * kernfs_remove() will drop the reference count on "kn" which
2685	 * will free it. But we still need it to stick around for the
2686	 * rdtgroup_kn_unlock(kn} call below. Take one extra reference
2687	 * here, which will be dropped inside rdtgroup_kn_unlock().
2688	 */
2689	kernfs_get(kn);
2690
2691	ret = rdtgroup_kn_set_ugid(kn);
2692	if (ret) {
2693		rdt_last_cmd_puts("kernfs perm error\n");
2694		goto out_destroy;
2695	}
2696
2697	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
2698	ret = rdtgroup_add_files(kn, files);
2699	if (ret) {
2700		rdt_last_cmd_puts("kernfs fill error\n");
2701		goto out_destroy;
2702	}
2703
2704	if (rdt_mon_capable) {
2705		ret = alloc_rmid();
2706		if (ret < 0) {
2707			rdt_last_cmd_puts("Out of RMIDs\n");
2708			goto out_destroy;
2709		}
2710		rdtgrp->mon.rmid = ret;
2711
2712		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
2713		if (ret) {
2714			rdt_last_cmd_puts("kernfs subdir error\n");
2715			goto out_idfree;
2716		}
2717	}
2718	kernfs_activate(kn);
2719
2720	/*
2721	 * The caller unlocks the prgrp_kn upon success.
2722	 */
2723	return 0;
2724
2725out_idfree:
2726	free_rmid(rdtgrp->mon.rmid);
2727out_destroy:
 
2728	kernfs_remove(rdtgrp->kn);
2729out_free_rgrp:
2730	kfree(rdtgrp);
2731out_unlock:
2732	rdtgroup_kn_unlock(prgrp_kn);
2733	return ret;
2734}
2735
2736static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
2737{
2738	kernfs_remove(rgrp->kn);
2739	free_rmid(rgrp->mon.rmid);
2740	kfree(rgrp);
2741}
2742
2743/*
2744 * Create a monitor group under "mon_groups" directory of a control
2745 * and monitor group(ctrl_mon). This is a resource group
2746 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
2747 */
2748static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
2749			      struct kernfs_node *prgrp_kn,
2750			      const char *name,
2751			      umode_t mode)
2752{
2753	struct rdtgroup *rdtgrp, *prgrp;
2754	int ret;
2755
2756	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP,
2757				&rdtgrp);
2758	if (ret)
2759		return ret;
2760
2761	prgrp = rdtgrp->mon.parent;
2762	rdtgrp->closid = prgrp->closid;
2763
2764	/*
2765	 * Add the rdtgrp to the list of rdtgrps the parent
2766	 * ctrl_mon group has to track.
2767	 */
2768	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
2769
2770	rdtgroup_kn_unlock(prgrp_kn);
2771	return ret;
2772}
2773
2774/*
2775 * These are rdtgroups created under the root directory. Can be used
2776 * to allocate and monitor resources.
2777 */
2778static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
2779				   struct kernfs_node *prgrp_kn,
2780				   const char *name, umode_t mode)
2781{
2782	struct rdtgroup *rdtgrp;
2783	struct kernfs_node *kn;
2784	u32 closid;
2785	int ret;
2786
2787	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP,
2788				&rdtgrp);
2789	if (ret)
2790		return ret;
2791
2792	kn = rdtgrp->kn;
2793	ret = closid_alloc();
2794	if (ret < 0) {
2795		rdt_last_cmd_puts("Out of CLOSIDs\n");
2796		goto out_common_fail;
2797	}
2798	closid = ret;
2799	ret = 0;
2800
2801	rdtgrp->closid = closid;
2802	ret = rdtgroup_init_alloc(rdtgrp);
2803	if (ret < 0)
2804		goto out_id_free;
2805
2806	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
2807
2808	if (rdt_mon_capable) {
2809		/*
2810		 * Create an empty mon_groups directory to hold the subset
2811		 * of tasks and cpus to monitor.
2812		 */
2813		ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
2814		if (ret) {
2815			rdt_last_cmd_puts("kernfs subdir error\n");
2816			goto out_del_list;
2817		}
2818	}
2819
2820	goto out_unlock;
2821
2822out_del_list:
2823	list_del(&rdtgrp->rdtgroup_list);
2824out_id_free:
2825	closid_free(closid);
2826out_common_fail:
2827	mkdir_rdt_prepare_clean(rdtgrp);
2828out_unlock:
2829	rdtgroup_kn_unlock(prgrp_kn);
2830	return ret;
2831}
2832
2833/*
2834 * We allow creating mon groups only with in a directory called "mon_groups"
2835 * which is present in every ctrl_mon group. Check if this is a valid
2836 * "mon_groups" directory.
2837 *
2838 * 1. The directory should be named "mon_groups".
2839 * 2. The mon group itself should "not" be named "mon_groups".
2840 *   This makes sure "mon_groups" directory always has a ctrl_mon group
2841 *   as parent.
2842 */
2843static bool is_mon_groups(struct kernfs_node *kn, const char *name)
2844{
2845	return (!strcmp(kn->name, "mon_groups") &&
2846		strcmp(name, "mon_groups"));
2847}
2848
2849static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
2850			  umode_t mode)
2851{
2852	/* Do not accept '\n' to avoid unparsable situation. */
2853	if (strchr(name, '\n'))
2854		return -EINVAL;
2855
2856	/*
2857	 * If the parent directory is the root directory and RDT
2858	 * allocation is supported, add a control and monitoring
2859	 * subdirectory
2860	 */
2861	if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
2862		return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode);
2863
2864	/*
2865	 * If RDT monitoring is supported and the parent directory is a valid
2866	 * "mon_groups" directory, add a monitoring subdirectory.
2867	 */
2868	if (rdt_mon_capable && is_mon_groups(parent_kn, name))
2869		return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode);
2870
2871	return -EPERM;
2872}
2873
2874static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
2875			      cpumask_var_t tmpmask)
2876{
2877	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
2878	int cpu;
2879
2880	/* Give any tasks back to the parent group */
2881	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
2882
2883	/* Update per cpu rmid of the moved CPUs first */
2884	for_each_cpu(cpu, &rdtgrp->cpu_mask)
2885		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
2886	/*
2887	 * Update the MSR on moved CPUs and CPUs which have moved
2888	 * task running on them.
2889	 */
2890	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
2891	update_closid_rmid(tmpmask, NULL);
2892
2893	rdtgrp->flags = RDT_DELETED;
2894	free_rmid(rdtgrp->mon.rmid);
2895
2896	/*
2897	 * Remove the rdtgrp from the parent ctrl_mon group's list
2898	 */
2899	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
2900	list_del(&rdtgrp->mon.crdtgrp_list);
2901
2902	/*
2903	 * one extra hold on this, will drop when we kfree(rdtgrp)
2904	 * in rdtgroup_kn_unlock()
2905	 */
2906	kernfs_get(kn);
2907	kernfs_remove(rdtgrp->kn);
2908
2909	return 0;
2910}
2911
2912static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
2913				struct rdtgroup *rdtgrp)
2914{
2915	rdtgrp->flags = RDT_DELETED;
2916	list_del(&rdtgrp->rdtgroup_list);
2917
2918	/*
2919	 * one extra hold on this, will drop when we kfree(rdtgrp)
2920	 * in rdtgroup_kn_unlock()
2921	 */
2922	kernfs_get(kn);
2923	kernfs_remove(rdtgrp->kn);
2924	return 0;
2925}
2926
2927static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
2928			       cpumask_var_t tmpmask)
2929{
2930	int cpu;
2931
2932	/* Give any tasks back to the default group */
2933	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
2934
2935	/* Give any CPUs back to the default group */
2936	cpumask_or(&rdtgroup_default.cpu_mask,
2937		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2938
2939	/* Update per cpu closid and rmid of the moved CPUs first */
2940	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
2941		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
2942		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
2943	}
2944
2945	/*
2946	 * Update the MSR on moved CPUs and CPUs which have moved
2947	 * task running on them.
2948	 */
2949	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
2950	update_closid_rmid(tmpmask, NULL);
2951
2952	closid_free(rdtgrp->closid);
2953	free_rmid(rdtgrp->mon.rmid);
2954
 
 
2955	/*
2956	 * Free all the child monitor group rmids.
2957	 */
2958	free_all_child_rdtgrp(rdtgrp);
2959
2960	rdtgroup_ctrl_remove(kn, rdtgrp);
2961
2962	return 0;
2963}
2964
2965static int rdtgroup_rmdir(struct kernfs_node *kn)
2966{
2967	struct kernfs_node *parent_kn = kn->parent;
2968	struct rdtgroup *rdtgrp;
2969	cpumask_var_t tmpmask;
2970	int ret = 0;
2971
2972	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
2973		return -ENOMEM;
2974
2975	rdtgrp = rdtgroup_kn_lock_live(kn);
2976	if (!rdtgrp) {
2977		ret = -EPERM;
2978		goto out;
2979	}
2980
2981	/*
2982	 * If the rdtgroup is a ctrl_mon group and parent directory
2983	 * is the root directory, remove the ctrl_mon group.
2984	 *
2985	 * If the rdtgroup is a mon group and parent directory
2986	 * is a valid "mon_groups" directory, remove the mon group.
2987	 */
2988	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) {
 
2989		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2990		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
2991			ret = rdtgroup_ctrl_remove(kn, rdtgrp);
2992		} else {
2993			ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
2994		}
2995	} else if (rdtgrp->type == RDTMON_GROUP &&
2996		 is_mon_groups(parent_kn, kn->name)) {
2997		ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
2998	} else {
2999		ret = -EPERM;
3000	}
3001
3002out:
3003	rdtgroup_kn_unlock(kn);
3004	free_cpumask_var(tmpmask);
3005	return ret;
3006}
3007
3008static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
3009{
3010	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
3011		seq_puts(seq, ",cdp");
3012
3013	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
3014		seq_puts(seq, ",cdpl2");
3015
3016	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
3017		seq_puts(seq, ",mba_MBps");
3018
3019	return 0;
3020}
3021
3022static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
3023	.mkdir		= rdtgroup_mkdir,
3024	.rmdir		= rdtgroup_rmdir,
3025	.show_options	= rdtgroup_show_options,
3026};
3027
3028static int __init rdtgroup_setup_root(void)
3029{
3030	int ret;
3031
3032	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
3033				      KERNFS_ROOT_CREATE_DEACTIVATED |
3034				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
3035				      &rdtgroup_default);
3036	if (IS_ERR(rdt_root))
3037		return PTR_ERR(rdt_root);
3038
3039	mutex_lock(&rdtgroup_mutex);
3040
3041	rdtgroup_default.closid = 0;
3042	rdtgroup_default.mon.rmid = 0;
3043	rdtgroup_default.type = RDTCTRL_GROUP;
3044	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
3045
3046	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
3047
3048	ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE);
3049	if (ret) {
3050		kernfs_destroy_root(rdt_root);
3051		goto out;
3052	}
3053
3054	rdtgroup_default.kn = rdt_root->kn;
3055	kernfs_activate(rdtgroup_default.kn);
3056
3057out:
3058	mutex_unlock(&rdtgroup_mutex);
3059
3060	return ret;
3061}
3062
3063/*
3064 * rdtgroup_init - rdtgroup initialization
3065 *
3066 * Setup resctrl file system including set up root, create mount point,
3067 * register rdtgroup filesystem, and initialize files under root directory.
3068 *
3069 * Return: 0 on success or -errno
3070 */
3071int __init rdtgroup_init(void)
3072{
3073	int ret = 0;
3074
3075	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
3076		     sizeof(last_cmd_status_buf));
3077
3078	ret = rdtgroup_setup_root();
3079	if (ret)
3080		return ret;
3081
3082	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
3083	if (ret)
3084		goto cleanup_root;
3085
3086	ret = register_filesystem(&rdt_fs_type);
3087	if (ret)
3088		goto cleanup_mountpoint;
3089
3090	/*
3091	 * Adding the resctrl debugfs directory here may not be ideal since
3092	 * it would let the resctrl debugfs directory appear on the debugfs
3093	 * filesystem before the resctrl filesystem is mounted.
3094	 * It may also be ok since that would enable debugging of RDT before
3095	 * resctrl is mounted.
3096	 * The reason why the debugfs directory is created here and not in
3097	 * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and
3098	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
3099	 * (the lockdep class of inode->i_rwsem). Other filesystem
3100	 * interactions (eg. SyS_getdents) have the lock ordering:
3101	 * &sb->s_type->i_mutex_key --> &mm->mmap_sem
3102	 * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex
3103	 * is taken, thus creating dependency:
3104	 * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause
3105	 * issues considering the other two lock dependencies.
3106	 * By creating the debugfs directory here we avoid a dependency
3107	 * that may cause deadlock (even though file operations cannot
3108	 * occur until the filesystem is mounted, but I do not know how to
3109	 * tell lockdep that).
3110	 */
3111	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
3112
3113	return 0;
3114
3115cleanup_mountpoint:
3116	sysfs_remove_mount_point(fs_kobj, "resctrl");
3117cleanup_root:
3118	kernfs_destroy_root(rdt_root);
3119
3120	return ret;
3121}
3122
3123void __exit rdtgroup_exit(void)
3124{
3125	debugfs_remove_recursive(debugfs_resctrl);
3126	unregister_filesystem(&rdt_fs_type);
3127	sysfs_remove_mount_point(fs_kobj, "resctrl");
3128	kernfs_destroy_root(rdt_root);
3129}

   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * User interface for Resource Allocation in Resource Director Technology(RDT)
   4 *
   5 * Copyright (C) 2016 Intel Corporation
   6 *
   7 * Author: Fenghua Yu <fenghua.yu@intel.com>
   8 *
   9 * More information about RDT be found in the Intel (R) x86 Architecture
  10 * Software Developer Manual.
  11 */
  12
  13#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
  14
  15#include <linux/cacheinfo.h>
  16#include <linux/cpu.h>
  17#include <linux/debugfs.h>
  18#include <linux/fs.h>
  19#include <linux/fs_parser.h>
  20#include <linux/sysfs.h>
  21#include <linux/kernfs.h>
  22#include <linux/seq_buf.h>
  23#include <linux/seq_file.h>
  24#include <linux/sched/signal.h>
  25#include <linux/sched/task.h>
  26#include <linux/slab.h>
  27#include <linux/task_work.h>
  28#include <linux/user_namespace.h>
  29
  30#include <uapi/linux/magic.h>
  31
  32#include <asm/resctrl.h>
  33#include "internal.h"
  34
  35DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
  36DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  37DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
  38static struct kernfs_root *rdt_root;
  39struct rdtgroup rdtgroup_default;
  40LIST_HEAD(rdt_all_groups);
  41
  42/* Kernel fs node for "info" directory under root */
  43static struct kernfs_node *kn_info;
  44
  45/* Kernel fs node for "mon_groups" directory under root */
  46static struct kernfs_node *kn_mongrp;
  47
  48/* Kernel fs node for "mon_data" directory under root */
  49static struct kernfs_node *kn_mondata;
  50
  51static struct seq_buf last_cmd_status;
  52static char last_cmd_status_buf[512];
  53
  54struct dentry *debugfs_resctrl;
  55
  56void rdt_last_cmd_clear(void)
  57{
  58	lockdep_assert_held(&rdtgroup_mutex);
  59	seq_buf_clear(&last_cmd_status);
  60}
  61
  62void rdt_last_cmd_puts(const char *s)
  63{
  64	lockdep_assert_held(&rdtgroup_mutex);
  65	seq_buf_puts(&last_cmd_status, s);
  66}
  67
  68void rdt_last_cmd_printf(const char *fmt, ...)
  69{
  70	va_list ap;
  71
  72	va_start(ap, fmt);
  73	lockdep_assert_held(&rdtgroup_mutex);
  74	seq_buf_vprintf(&last_cmd_status, fmt, ap);
  75	va_end(ap);
  76}
  77
  78/*
  79 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  80 * we can keep a bitmap of free CLOSIDs in a single integer.
  81 *
  82 * Using a global CLOSID across all resources has some advantages and
  83 * some drawbacks:
  84 * + We can simply set "current->closid" to assign a task to a resource
  85 *   group.
  86 * + Context switch code can avoid extra memory references deciding which
  87 *   CLOSID to load into the PQR_ASSOC MSR
  88 * - We give up some options in configuring resource groups across multi-socket
  89 *   systems.
  90 * - Our choices on how to configure each resource become progressively more
  91 *   limited as the number of resources grows.
  92 */
  93static int closid_free_map;
  94static int closid_free_map_len;
  95
  96int closids_supported(void)
  97{
  98	return closid_free_map_len;
  99}
 100
 101static void closid_init(void)
 102{
 103	struct rdt_resource *r;
 104	int rdt_min_closid = 32;
 105
 106	/* Compute rdt_min_closid across all resources */
 107	for_each_alloc_enabled_rdt_resource(r)
 108		rdt_min_closid = min(rdt_min_closid, r->num_closid);
 109
 110	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
 111
 112	/* CLOSID 0 is always reserved for the default group */
 113	closid_free_map &= ~1;
 114	closid_free_map_len = rdt_min_closid;
 115}
 116
 117static int closid_alloc(void)
 118{
 119	u32 closid = ffs(closid_free_map);
 120
 121	if (closid == 0)
 122		return -ENOSPC;
 123	closid--;
 124	closid_free_map &= ~(1 << closid);
 125
 126	return closid;
 127}
 128
 129void closid_free(int closid)
 130{
 131	closid_free_map |= 1 << closid;
 132}
 133
 134/**
 135 * closid_allocated - test if provided closid is in use
 136 * @closid: closid to be tested
 137 *
 138 * Return: true if @closid is currently associated with a resource group,
 139 * false if @closid is free
 140 */
 141static bool closid_allocated(unsigned int closid)
 142{
 143	return (closid_free_map & (1 << closid)) == 0;
 144}
 145
 146/**
 147 * rdtgroup_mode_by_closid - Return mode of resource group with closid
 148 * @closid: closid if the resource group
 149 *
 150 * Each resource group is associated with a @closid. Here the mode
 151 * of a resource group can be queried by searching for it using its closid.
 152 *
 153 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
 154 */
 155enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
 156{
 157	struct rdtgroup *rdtgrp;
 158
 159	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
 160		if (rdtgrp->closid == closid)
 161			return rdtgrp->mode;
 162	}
 163
 164	return RDT_NUM_MODES;
 165}
 166
 167static const char * const rdt_mode_str[] = {
 168	[RDT_MODE_SHAREABLE]		= "shareable",
 169	[RDT_MODE_EXCLUSIVE]		= "exclusive",
 170	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
 171	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
 172};
 173
 174/**
 175 * rdtgroup_mode_str - Return the string representation of mode
 176 * @mode: the resource group mode as &enum rdtgroup_mode
 177 *
 178 * Return: string representation of valid mode, "unknown" otherwise
 179 */
 180static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
 181{
 182	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
 183		return "unknown";
 184
 185	return rdt_mode_str[mode];
 186}
 187
 188/* set uid and gid of rdtgroup dirs and files to that of the creator */
 189static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
 190{
 191	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
 192				.ia_uid = current_fsuid(),
 193				.ia_gid = current_fsgid(), };
 194
 195	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
 196	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
 197		return 0;
 198
 199	return kernfs_setattr(kn, &iattr);
 200}
 201
 202static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 203{
 204	struct kernfs_node *kn;
 205	int ret;
 206
 207	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
 208				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 209				  0, rft->kf_ops, rft, NULL, NULL);
 210	if (IS_ERR(kn))
 211		return PTR_ERR(kn);
 212
 213	ret = rdtgroup_kn_set_ugid(kn);
 214	if (ret) {
 215		kernfs_remove(kn);
 216		return ret;
 217	}
 218
 219	return 0;
 220}
 221
 222static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
 223{
 224	struct kernfs_open_file *of = m->private;
 225	struct rftype *rft = of->kn->priv;
 226
 227	if (rft->seq_show)
 228		return rft->seq_show(of, m, arg);
 229	return 0;
 230}
 231
 232static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
 233				   size_t nbytes, loff_t off)
 234{
 235	struct rftype *rft = of->kn->priv;
 236
 237	if (rft->write)
 238		return rft->write(of, buf, nbytes, off);
 239
 240	return -EINVAL;
 241}
 242
 243static const struct kernfs_ops rdtgroup_kf_single_ops = {
 244	.atomic_write_len	= PAGE_SIZE,
 245	.write			= rdtgroup_file_write,
 246	.seq_show		= rdtgroup_seqfile_show,
 247};
 248
 249static const struct kernfs_ops kf_mondata_ops = {
 250	.atomic_write_len	= PAGE_SIZE,
 251	.seq_show		= rdtgroup_mondata_show,
 252};
 253
 254static bool is_cpu_list(struct kernfs_open_file *of)
 255{
 256	struct rftype *rft = of->kn->priv;
 257
 258	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
 259}
 260
 261static int rdtgroup_cpus_show(struct kernfs_open_file *of,
 262			      struct seq_file *s, void *v)
 263{
 264	struct rdtgroup *rdtgrp;
 265	struct cpumask *mask;
 266	int ret = 0;
 267
 268	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 269
 270	if (rdtgrp) {
 271		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
 272			if (!rdtgrp->plr->d) {
 273				rdt_last_cmd_clear();
 274				rdt_last_cmd_puts("Cache domain offline\n");
 275				ret = -ENODEV;
 276			} else {
 277				mask = &rdtgrp->plr->d->cpu_mask;
 278				seq_printf(s, is_cpu_list(of) ?
 279					   "%*pbl\n" : "%*pb\n",
 280					   cpumask_pr_args(mask));
 281			}
 282		} else {
 283			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
 284				   cpumask_pr_args(&rdtgrp->cpu_mask));
 285		}
 286	} else {
 287		ret = -ENOENT;
 288	}
 289	rdtgroup_kn_unlock(of->kn);
 290
 291	return ret;
 292}
 293
 294/*
 295 * This is safe against resctrl_sched_in() called from __switch_to()
 296 * because __switch_to() is executed with interrupts disabled. A local call
 297 * from update_closid_rmid() is protected against __switch_to() because
 298 * preemption is disabled.
 299 */
 300static void update_cpu_closid_rmid(void *info)
 301{
 302	struct rdtgroup *r = info;
 303
 304	if (r) {
 305		this_cpu_write(pqr_state.default_closid, r->closid);
 306		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
 307	}
 308
 309	/*
 310	 * We cannot unconditionally write the MSR because the current
 311	 * executing task might have its own closid selected. Just reuse
 312	 * the context switch code.
 313	 */
 314	resctrl_sched_in();
 315}
 316
 317/*
 318 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
 319 *
 320 * Per task closids/rmids must have been set up before calling this function.
 321 */
 322static void
 323update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
 324{
 325	int cpu = get_cpu();
 326
 327	if (cpumask_test_cpu(cpu, cpu_mask))
 328		update_cpu_closid_rmid(r);
 329	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
 330	put_cpu();
 331}
 332
 333static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 334			  cpumask_var_t tmpmask)
 335{
 336	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
 337	struct list_head *head;
 338
 339	/* Check whether cpus belong to parent ctrl group */
 340	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
 341	if (cpumask_weight(tmpmask)) {
 342		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
 343		return -EINVAL;
 344	}
 345
 346	/* Check whether cpus are dropped from this group */
 347	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 348	if (cpumask_weight(tmpmask)) {
 349		/* Give any dropped cpus to parent rdtgroup */
 350		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
 351		update_closid_rmid(tmpmask, prgrp);
 352	}
 353
 354	/*
 355	 * If we added cpus, remove them from previous group that owned them
 356	 * and update per-cpu rmid
 357	 */
 358	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 359	if (cpumask_weight(tmpmask)) {
 360		head = &prgrp->mon.crdtgrp_list;
 361		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 362			if (crgrp == rdtgrp)
 363				continue;
 364			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
 365				       tmpmask);
 366		}
 367		update_closid_rmid(tmpmask, rdtgrp);
 368	}
 369
 370	/* Done pushing/pulling - update this group with new mask */
 371	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 372
 373	return 0;
 374}
 375
 376static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
 377{
 378	struct rdtgroup *crgrp;
 379
 380	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
 381	/* update the child mon group masks as well*/
 382	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
 383		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
 384}
 385
 386static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 387			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
 388{
 389	struct rdtgroup *r, *crgrp;
 390	struct list_head *head;
 391
 392	/* Check whether cpus are dropped from this group */
 393	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 394	if (cpumask_weight(tmpmask)) {
 395		/* Can't drop from default group */
 396		if (rdtgrp == &rdtgroup_default) {
 397			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
 398			return -EINVAL;
 399		}
 400
 401		/* Give any dropped cpus to rdtgroup_default */
 402		cpumask_or(&rdtgroup_default.cpu_mask,
 403			   &rdtgroup_default.cpu_mask, tmpmask);
 404		update_closid_rmid(tmpmask, &rdtgroup_default);
 405	}
 406
 407	/*
 408	 * If we added cpus, remove them from previous group and
 409	 * the prev group's child groups that owned them
 410	 * and update per-cpu closid/rmid.
 411	 */
 412	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 413	if (cpumask_weight(tmpmask)) {
 414		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
 415			if (r == rdtgrp)
 416				continue;
 417			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
 418			if (cpumask_weight(tmpmask1))
 419				cpumask_rdtgrp_clear(r, tmpmask1);
 420		}
 421		update_closid_rmid(tmpmask, rdtgrp);
 422	}
 423
 424	/* Done pushing/pulling - update this group with new mask */
 425	cpumask_copy(&rdtgrp->cpu_mask, newmask);
 426
 427	/*
 428	 * Clear child mon group masks since there is a new parent mask
 429	 * now and update the rmid for the cpus the child lost.
 430	 */
 431	head = &rdtgrp->mon.crdtgrp_list;
 432	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 433		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
 434		update_closid_rmid(tmpmask, rdtgrp);
 435		cpumask_clear(&crgrp->cpu_mask);
 436	}
 437
 438	return 0;
 439}
 440
 441static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 442				   char *buf, size_t nbytes, loff_t off)
 443{
 444	cpumask_var_t tmpmask, newmask, tmpmask1;
 445	struct rdtgroup *rdtgrp;
 446	int ret;
 447
 448	if (!buf)
 449		return -EINVAL;
 450
 451	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 452		return -ENOMEM;
 453	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
 454		free_cpumask_var(tmpmask);
 455		return -ENOMEM;
 456	}
 457	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
 458		free_cpumask_var(tmpmask);
 459		free_cpumask_var(newmask);
 460		return -ENOMEM;
 461	}
 462
 463	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 464	if (!rdtgrp) {
 465		ret = -ENOENT;
 466		goto unlock;
 467	}
 468
 469	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 470	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 471		ret = -EINVAL;
 472		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 473		goto unlock;
 474	}
 475
 476	if (is_cpu_list(of))
 477		ret = cpulist_parse(buf, newmask);
 478	else
 479		ret = cpumask_parse(buf, newmask);
 480
 481	if (ret) {
 482		rdt_last_cmd_puts("Bad CPU list/mask\n");
 483		goto unlock;
 484	}
 485
 486	/* check that user didn't specify any offline cpus */
 487	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 488	if (cpumask_weight(tmpmask)) {
 489		ret = -EINVAL;
 490		rdt_last_cmd_puts("Can only assign online CPUs\n");
 491		goto unlock;
 492	}
 493
 494	if (rdtgrp->type == RDTCTRL_GROUP)
 495		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
 496	else if (rdtgrp->type == RDTMON_GROUP)
 497		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
 498	else
 499		ret = -EINVAL;
 500
 501unlock:
 502	rdtgroup_kn_unlock(of->kn);
 503	free_cpumask_var(tmpmask);
 504	free_cpumask_var(newmask);
 505	free_cpumask_var(tmpmask1);
 506
 507	return ret ?: nbytes;
 508}
 509
 510/**
 511 * rdtgroup_remove - the helper to remove resource group safely
 512 * @rdtgrp: resource group to remove
 513 *
 514 * On resource group creation via a mkdir, an extra kernfs_node reference is
 515 * taken to ensure that the rdtgroup structure remains accessible for the
 516 * rdtgroup_kn_unlock() calls where it is removed.
 517 *
 518 * Drop the extra reference here, then free the rdtgroup structure.
 519 *
 520 * Return: void
 521 */
 522static void rdtgroup_remove(struct rdtgroup *rdtgrp)
 523{
 524	kernfs_put(rdtgrp->kn);
 525	kfree(rdtgrp);
 526}
 
 
 527
 528static void _update_task_closid_rmid(void *task)
 529{
 530	/*
 531	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
 532	 * Otherwise, the MSR is updated when the task is scheduled in.
 
 533	 */
 534	if (task == current)
 535		resctrl_sched_in();
 536}
 
 
 
 
 
 
 
 
 537
 538static void update_task_closid_rmid(struct task_struct *t)
 539{
 540	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
 541		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
 542	else
 543		_update_task_closid_rmid(t);
 544}
 545
 546static int __rdtgroup_move_task(struct task_struct *tsk,
 547				struct rdtgroup *rdtgrp)
 548{
 549	/* If the task is already in rdtgrp, no need to move the task. */
 550	if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
 551	     tsk->rmid == rdtgrp->mon.rmid) ||
 552	    (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
 553	     tsk->closid == rdtgrp->mon.parent->closid))
 554		return 0;
 
 
 555
 556	/*
 557	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
 558	 * updated by them.
 559	 *
 560	 * For ctrl_mon groups, move both closid and rmid.
 561	 * For monitor groups, can move the tasks only from
 562	 * their parent CTRL group.
 563	 */
 564
 565	if (rdtgrp->type == RDTCTRL_GROUP) {
 566		WRITE_ONCE(tsk->closid, rdtgrp->closid);
 567		WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
 568	} else if (rdtgrp->type == RDTMON_GROUP) {
 569		if (rdtgrp->mon.parent->closid == tsk->closid) {
 570			WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
 571		} else {
 572			rdt_last_cmd_puts("Can't move task to different control group\n");
 573			return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 574		}
 575	}
 576
 577	/*
 578	 * Ensure the task's closid and rmid are written before determining if
 579	 * the task is current that will decide if it will be interrupted.
 580	 */
 581	barrier();
 582
 583	/*
 584	 * By now, the task's closid and rmid are set. If the task is current
 585	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
 586	 * group go into effect. If the task is not current, the MSR will be
 587	 * updated when the task is scheduled in.
 588	 */
 589	update_task_closid_rmid(tsk);
 590
 591	return 0;
 592}
 593
 594static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
 595{
 596	return (rdt_alloc_capable &&
 597	       (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
 598}
 599
 600static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
 601{
 602	return (rdt_mon_capable &&
 603	       (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
 604}
 605
 606/**
 607 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
 608 * @r: Resource group
 609 *
 610 * Return: 1 if tasks have been assigned to @r, 0 otherwise
 611 */
 612int rdtgroup_tasks_assigned(struct rdtgroup *r)
 613{
 614	struct task_struct *p, *t;
 615	int ret = 0;
 616
 617	lockdep_assert_held(&rdtgroup_mutex);
 618
 619	rcu_read_lock();
 620	for_each_process_thread(p, t) {
 621		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
 
 622			ret = 1;
 623			break;
 624		}
 625	}
 626	rcu_read_unlock();
 627
 628	return ret;
 629}
 630
 631static int rdtgroup_task_write_permission(struct task_struct *task,
 632					  struct kernfs_open_file *of)
 633{
 634	const struct cred *tcred = get_task_cred(task);
 635	const struct cred *cred = current_cred();
 636	int ret = 0;
 637
 638	/*
 639	 * Even if we're attaching all tasks in the thread group, we only
 640	 * need to check permissions on one of them.
 641	 */
 642	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 643	    !uid_eq(cred->euid, tcred->uid) &&
 644	    !uid_eq(cred->euid, tcred->suid)) {
 645		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
 646		ret = -EPERM;
 647	}
 648
 649	put_cred(tcred);
 650	return ret;
 651}
 652
 653static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 654			      struct kernfs_open_file *of)
 655{
 656	struct task_struct *tsk;
 657	int ret;
 658
 659	rcu_read_lock();
 660	if (pid) {
 661		tsk = find_task_by_vpid(pid);
 662		if (!tsk) {
 663			rcu_read_unlock();
 664			rdt_last_cmd_printf("No task %d\n", pid);
 665			return -ESRCH;
 666		}
 667	} else {
 668		tsk = current;
 669	}
 670
 671	get_task_struct(tsk);
 672	rcu_read_unlock();
 673
 674	ret = rdtgroup_task_write_permission(tsk, of);
 675	if (!ret)
 676		ret = __rdtgroup_move_task(tsk, rdtgrp);
 677
 678	put_task_struct(tsk);
 679	return ret;
 680}
 681
 682static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 683				    char *buf, size_t nbytes, loff_t off)
 684{
 685	struct rdtgroup *rdtgrp;
 686	int ret = 0;
 687	pid_t pid;
 688
 689	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 690		return -EINVAL;
 691	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 692	if (!rdtgrp) {
 693		rdtgroup_kn_unlock(of->kn);
 694		return -ENOENT;
 695	}
 696	rdt_last_cmd_clear();
 697
 698	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
 699	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
 700		ret = -EINVAL;
 701		rdt_last_cmd_puts("Pseudo-locking in progress\n");
 702		goto unlock;
 703	}
 704
 705	ret = rdtgroup_move_task(pid, rdtgrp, of);
 706
 707unlock:
 708	rdtgroup_kn_unlock(of->kn);
 709
 710	return ret ?: nbytes;
 711}
 712
 713static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 714{
 715	struct task_struct *p, *t;
 716
 717	rcu_read_lock();
 718	for_each_process_thread(p, t) {
 719		if (is_closid_match(t, r) || is_rmid_match(t, r))
 
 720			seq_printf(s, "%d\n", t->pid);
 721	}
 722	rcu_read_unlock();
 723}
 724
 725static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 726			       struct seq_file *s, void *v)
 727{
 728	struct rdtgroup *rdtgrp;
 729	int ret = 0;
 730
 731	rdtgrp = rdtgroup_kn_lock_live(of->kn);
 732	if (rdtgrp)
 733		show_rdt_tasks(rdtgrp, s);
 734	else
 735		ret = -ENOENT;
 736	rdtgroup_kn_unlock(of->kn);
 737
 738	return ret;
 739}
 740
 741#ifdef CONFIG_PROC_CPU_RESCTRL
 742
 743/*
 744 * A task can only be part of one resctrl control group and of one monitor
 745 * group which is associated to that control group.
 746 *
 747 * 1)   res:
 748 *      mon:
 749 *
 750 *    resctrl is not available.
 751 *
 752 * 2)   res:/
 753 *      mon:
 754 *
 755 *    Task is part of the root resctrl control group, and it is not associated
 756 *    to any monitor group.
 757 *
 758 * 3)  res:/
 759 *     mon:mon0
 760 *
 761 *    Task is part of the root resctrl control group and monitor group mon0.
 762 *
 763 * 4)  res:group0
 764 *     mon:
 765 *
 766 *    Task is part of resctrl control group group0, and it is not associated
 767 *    to any monitor group.
 768 *
 769 * 5) res:group0
 770 *    mon:mon1
 771 *
 772 *    Task is part of resctrl control group group0 and monitor group mon1.
 773 */
 774int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
 775		      struct pid *pid, struct task_struct *tsk)
 776{
 777	struct rdtgroup *rdtg;
 778	int ret = 0;
 779
 780	mutex_lock(&rdtgroup_mutex);
 781
 782	/* Return empty if resctrl has not been mounted. */
 783	if (!static_branch_unlikely(&rdt_enable_key)) {
 784		seq_puts(s, "res:\nmon:\n");
 785		goto unlock;
 786	}
 787
 788	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
 789		struct rdtgroup *crg;
 790
 791		/*
 792		 * Task information is only relevant for shareable
 793		 * and exclusive groups.
 794		 */
 795		if (rdtg->mode != RDT_MODE_SHAREABLE &&
 796		    rdtg->mode != RDT_MODE_EXCLUSIVE)
 797			continue;
 798
 799		if (rdtg->closid != tsk->closid)
 800			continue;
 801
 802		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
 803			   rdtg->kn->name);
 804		seq_puts(s, "mon:");
 805		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
 806				    mon.crdtgrp_list) {
 807			if (tsk->rmid != crg->mon.rmid)
 808				continue;
 809			seq_printf(s, "%s", crg->kn->name);
 810			break;
 811		}
 812		seq_putc(s, '\n');
 813		goto unlock;
 814	}
 815	/*
 816	 * The above search should succeed. Otherwise return
 817	 * with an error.
 818	 */
 819	ret = -ENOENT;
 820unlock:
 821	mutex_unlock(&rdtgroup_mutex);
 822
 823	return ret;
 824}
 825#endif
 826
 827static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
 828				    struct seq_file *seq, void *v)
 829{
 830	int len;
 831
 832	mutex_lock(&rdtgroup_mutex);
 833	len = seq_buf_used(&last_cmd_status);
 834	if (len)
 835		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
 836	else
 837		seq_puts(seq, "ok\n");
 838	mutex_unlock(&rdtgroup_mutex);
 839	return 0;
 840}
 841
 842static int rdt_num_closids_show(struct kernfs_open_file *of,
 843				struct seq_file *seq, void *v)
 844{
 845	struct rdt_resource *r = of->kn->parent->priv;
 846
 847	seq_printf(seq, "%d\n", r->num_closid);
 848	return 0;
 849}
 850
 851static int rdt_default_ctrl_show(struct kernfs_open_file *of,
 852			     struct seq_file *seq, void *v)
 853{
 854	struct rdt_resource *r = of->kn->parent->priv;
 855
 856	seq_printf(seq, "%x\n", r->default_ctrl);
 857	return 0;
 858}
 859
 860static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
 861			     struct seq_file *seq, void *v)
 862{
 863	struct rdt_resource *r = of->kn->parent->priv;
 864
 865	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
 866	return 0;
 867}
 868
 869static int rdt_shareable_bits_show(struct kernfs_open_file *of,
 870				   struct seq_file *seq, void *v)
 871{
 872	struct rdt_resource *r = of->kn->parent->priv;
 873
 874	seq_printf(seq, "%x\n", r->cache.shareable_bits);
 875	return 0;
 876}
 877
 878/**
 879 * rdt_bit_usage_show - Display current usage of resources
 880 *
 881 * A domain is a shared resource that can now be allocated differently. Here
 882 * we display the current regions of the domain as an annotated bitmask.
 883 * For each domain of this resource its allocation bitmask
 884 * is annotated as below to indicate the current usage of the corresponding bit:
 885 *   0 - currently unused
 886 *   X - currently available for sharing and used by software and hardware
 887 *   H - currently used by hardware only but available for software use
 888 *   S - currently used and shareable by software only
 889 *   E - currently used exclusively by one resource group
 890 *   P - currently pseudo-locked by one resource group
 891 */
 892static int rdt_bit_usage_show(struct kernfs_open_file *of,
 893			      struct seq_file *seq, void *v)
 894{
 895	struct rdt_resource *r = of->kn->parent->priv;
 896	/*
 897	 * Use unsigned long even though only 32 bits are used to ensure
 898	 * test_bit() is used safely.
 899	 */
 900	unsigned long sw_shareable = 0, hw_shareable = 0;
 901	unsigned long exclusive = 0, pseudo_locked = 0;
 902	struct rdt_domain *dom;
 903	int i, hwb, swb, excl, psl;
 904	enum rdtgrp_mode mode;
 905	bool sep = false;
 906	u32 *ctrl;
 907
 908	mutex_lock(&rdtgroup_mutex);
 909	hw_shareable = r->cache.shareable_bits;
 910	list_for_each_entry(dom, &r->domains, list) {
 911		if (sep)
 912			seq_putc(seq, ';');
 913		ctrl = dom->ctrl_val;
 914		sw_shareable = 0;
 915		exclusive = 0;
 916		seq_printf(seq, "%d=", dom->id);
 917		for (i = 0; i < closids_supported(); i++, ctrl++) {
 918			if (!closid_allocated(i))
 919				continue;
 920			mode = rdtgroup_mode_by_closid(i);
 921			switch (mode) {
 922			case RDT_MODE_SHAREABLE:
 923				sw_shareable |= *ctrl;
 924				break;
 925			case RDT_MODE_EXCLUSIVE:
 926				exclusive |= *ctrl;
 927				break;
 928			case RDT_MODE_PSEUDO_LOCKSETUP:
 929			/*
 930			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
 931			 * here but not included since the CBM
 932			 * associated with this CLOSID in this mode
 933			 * is not initialized and no task or cpu can be
 934			 * assigned this CLOSID.
 935			 */
 936				break;
 937			case RDT_MODE_PSEUDO_LOCKED:
 938			case RDT_NUM_MODES:
 939				WARN(1,
 940				     "invalid mode for closid %d\n", i);
 941				break;
 942			}
 943		}
 944		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
 945			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
 946			hwb = test_bit(i, &hw_shareable);
 947			swb = test_bit(i, &sw_shareable);
 948			excl = test_bit(i, &exclusive);
 949			psl = test_bit(i, &pseudo_locked);
 950			if (hwb && swb)
 951				seq_putc(seq, 'X');
 952			else if (hwb && !swb)
 953				seq_putc(seq, 'H');
 954			else if (!hwb && swb)
 955				seq_putc(seq, 'S');
 956			else if (excl)
 957				seq_putc(seq, 'E');
 958			else if (psl)
 959				seq_putc(seq, 'P');
 960			else /* Unused bits remain */
 961				seq_putc(seq, '0');
 962		}
 963		sep = true;
 964	}
 965	seq_putc(seq, '\n');
 966	mutex_unlock(&rdtgroup_mutex);
 967	return 0;
 968}
 969
 970static int rdt_min_bw_show(struct kernfs_open_file *of,
 971			     struct seq_file *seq, void *v)
 972{
 973	struct rdt_resource *r = of->kn->parent->priv;
 974
 975	seq_printf(seq, "%u\n", r->membw.min_bw);
 976	return 0;
 977}
 978
 979static int rdt_num_rmids_show(struct kernfs_open_file *of,
 980			      struct seq_file *seq, void *v)
 981{
 982	struct rdt_resource *r = of->kn->parent->priv;
 983
 984	seq_printf(seq, "%d\n", r->num_rmid);
 985
 986	return 0;
 987}
 988
 989static int rdt_mon_features_show(struct kernfs_open_file *of,
 990				 struct seq_file *seq, void *v)
 991{
 992	struct rdt_resource *r = of->kn->parent->priv;
 993	struct mon_evt *mevt;
 994
 995	list_for_each_entry(mevt, &r->evt_list, list)
 996		seq_printf(seq, "%s\n", mevt->name);
 997
 998	return 0;
 999}
1000
1001static int rdt_bw_gran_show(struct kernfs_open_file *of,
1002			     struct seq_file *seq, void *v)
1003{
1004	struct rdt_resource *r = of->kn->parent->priv;
1005
1006	seq_printf(seq, "%u\n", r->membw.bw_gran);
1007	return 0;
1008}
1009
1010static int rdt_delay_linear_show(struct kernfs_open_file *of,
1011			     struct seq_file *seq, void *v)
1012{
1013	struct rdt_resource *r = of->kn->parent->priv;
1014
1015	seq_printf(seq, "%u\n", r->membw.delay_linear);
1016	return 0;
1017}
1018
1019static int max_threshold_occ_show(struct kernfs_open_file *of,
1020				  struct seq_file *seq, void *v)
1021{
1022	struct rdt_resource *r = of->kn->parent->priv;
1023
1024	seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale);
1025
1026	return 0;
1027}
1028
1029static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
1030					 struct seq_file *seq, void *v)
1031{
1032	struct rdt_resource *r = of->kn->parent->priv;
1033
1034	if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
1035		seq_puts(seq, "per-thread\n");
1036	else
1037		seq_puts(seq, "max\n");
1038
1039	return 0;
1040}
1041
1042static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
1043				       char *buf, size_t nbytes, loff_t off)
1044{
1045	struct rdt_resource *r = of->kn->parent->priv;
1046	unsigned int bytes;
1047	int ret;
1048
1049	ret = kstrtouint(buf, 0, &bytes);
1050	if (ret)
1051		return ret;
1052
1053	if (bytes > (boot_cpu_data.x86_cache_size * 1024))
1054		return -EINVAL;
1055
1056	resctrl_cqm_threshold = bytes / r->mon_scale;
1057
1058	return nbytes;
1059}
1060
1061/*
1062 * rdtgroup_mode_show - Display mode of this resource group
1063 */
1064static int rdtgroup_mode_show(struct kernfs_open_file *of,
1065			      struct seq_file *s, void *v)
1066{
1067	struct rdtgroup *rdtgrp;
1068
1069	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1070	if (!rdtgrp) {
1071		rdtgroup_kn_unlock(of->kn);
1072		return -ENOENT;
1073	}
1074
1075	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
1076
1077	rdtgroup_kn_unlock(of->kn);
1078	return 0;
1079}
1080
1081/**
1082 * rdt_cdp_peer_get - Retrieve CDP peer if it exists
1083 * @r: RDT resource to which RDT domain @d belongs
1084 * @d: Cache instance for which a CDP peer is requested
1085 * @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
1086 *         Used to return the result.
1087 * @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
1088 *         Used to return the result.
1089 *
1090 * RDT resources are managed independently and by extension the RDT domains
1091 * (RDT resource instances) are managed independently also. The Code and
1092 * Data Prioritization (CDP) RDT resources, while managed independently,
1093 * could refer to the same underlying hardware. For example,
1094 * RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
1095 *
1096 * When provided with an RDT resource @r and an instance of that RDT
1097 * resource @d rdt_cdp_peer_get() will return if there is a peer RDT
1098 * resource and the exact instance that shares the same hardware.
1099 *
1100 * Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
1101 *         If a CDP peer was found, @r_cdp will point to the peer RDT resource
1102 *         and @d_cdp will point to the peer RDT domain.
1103 */
1104static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
1105			    struct rdt_resource **r_cdp,
1106			    struct rdt_domain **d_cdp)
1107{
1108	struct rdt_resource *_r_cdp = NULL;
1109	struct rdt_domain *_d_cdp = NULL;
1110	int ret = 0;
1111
1112	switch (r->rid) {
1113	case RDT_RESOURCE_L3DATA:
1114		_r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
1115		break;
1116	case RDT_RESOURCE_L3CODE:
1117		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L3DATA];
1118		break;
1119	case RDT_RESOURCE_L2DATA:
1120		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2CODE];
1121		break;
1122	case RDT_RESOURCE_L2CODE:
1123		_r_cdp =  &rdt_resources_all[RDT_RESOURCE_L2DATA];
1124		break;
1125	default:
1126		ret = -ENOENT;
1127		goto out;
1128	}
1129
1130	/*
1131	 * When a new CPU comes online and CDP is enabled then the new
1132	 * RDT domains (if any) associated with both CDP RDT resources
1133	 * are added in the same CPU online routine while the
1134	 * rdtgroup_mutex is held. It should thus not happen for one
1135	 * RDT domain to exist and be associated with its RDT CDP
1136	 * resource but there is no RDT domain associated with the
1137	 * peer RDT CDP resource. Hence the WARN.
1138	 */
1139	_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
1140	if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
1141		_r_cdp = NULL;
1142		_d_cdp = NULL;
1143		ret = -EINVAL;
1144	}
1145
1146out:
1147	*r_cdp = _r_cdp;
1148	*d_cdp = _d_cdp;
1149
1150	return ret;
1151}
1152
1153/**
1154 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1155 * @r: Resource to which domain instance @d belongs.
1156 * @d: The domain instance for which @closid is being tested.
1157 * @cbm: Capacity bitmask being tested.
1158 * @closid: Intended closid for @cbm.
1159 * @exclusive: Only check if overlaps with exclusive resource groups
1160 *
1161 * Checks if provided @cbm intended to be used for @closid on domain
1162 * @d overlaps with any other closids or other hardware usage associated
1163 * with this domain. If @exclusive is true then only overlaps with
1164 * resource groups in exclusive mode will be considered. If @exclusive
1165 * is false then overlaps with any resource group or hardware entities
1166 * will be considered.
1167 *
1168 * @cbm is unsigned long, even if only 32 bits are used, to make the
1169 * bitmap functions work correctly.
1170 *
1171 * Return: false if CBM does not overlap, true if it does.
1172 */
1173static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1174				    unsigned long cbm, int closid, bool exclusive)
1175{
1176	enum rdtgrp_mode mode;
1177	unsigned long ctrl_b;
1178	u32 *ctrl;
1179	int i;
1180
1181	/* Check for any overlap with regions used by hardware directly */
1182	if (!exclusive) {
1183		ctrl_b = r->cache.shareable_bits;
1184		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1185			return true;
1186	}
1187
1188	/* Check for overlap with other resource groups */
1189	ctrl = d->ctrl_val;
1190	for (i = 0; i < closids_supported(); i++, ctrl++) {
1191		ctrl_b = *ctrl;
1192		mode = rdtgroup_mode_by_closid(i);
1193		if (closid_allocated(i) && i != closid &&
1194		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1195			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1196				if (exclusive) {
1197					if (mode == RDT_MODE_EXCLUSIVE)
1198						return true;
1199					continue;
1200				}
1201				return true;
1202			}
1203		}
1204	}
1205
1206	return false;
1207}
1208
1209/**
1210 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1211 * @r: Resource to which domain instance @d belongs.
1212 * @d: The domain instance for which @closid is being tested.
1213 * @cbm: Capacity bitmask being tested.
1214 * @closid: Intended closid for @cbm.
1215 * @exclusive: Only check if overlaps with exclusive resource groups
1216 *
1217 * Resources that can be allocated using a CBM can use the CBM to control
1218 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1219 * for overlap. Overlap test is not limited to the specific resource for
1220 * which the CBM is intended though - when dealing with CDP resources that
1221 * share the underlying hardware the overlap check should be performed on
1222 * the CDP resource sharing the hardware also.
1223 *
1224 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1225 * overlap test.
1226 *
1227 * Return: true if CBM overlap detected, false if there is no overlap
1228 */
1229bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1230			   unsigned long cbm, int closid, bool exclusive)
1231{
1232	struct rdt_resource *r_cdp;
1233	struct rdt_domain *d_cdp;
1234
1235	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
1236		return true;
1237
1238	if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
1239		return false;
1240
1241	return  __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
1242}
1243
1244/**
1245 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1246 *
1247 * An exclusive resource group implies that there should be no sharing of
1248 * its allocated resources. At the time this group is considered to be
1249 * exclusive this test can determine if its current schemata supports this
1250 * setting by testing for overlap with all other resource groups.
1251 *
1252 * Return: true if resource group can be exclusive, false if there is overlap
1253 * with allocations of other resource groups and thus this resource group
1254 * cannot be exclusive.
1255 */
1256static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1257{
1258	int closid = rdtgrp->closid;
1259	struct rdt_resource *r;
1260	bool has_cache = false;
1261	struct rdt_domain *d;
1262
1263	for_each_alloc_enabled_rdt_resource(r) {
1264		if (r->rid == RDT_RESOURCE_MBA)
1265			continue;
1266		has_cache = true;
1267		list_for_each_entry(d, &r->domains, list) {
1268			if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
1269						  rdtgrp->closid, false)) {
1270				rdt_last_cmd_puts("Schemata overlaps\n");
1271				return false;
1272			}
1273		}
1274	}
1275
1276	if (!has_cache) {
1277		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1278		return false;
1279	}
1280
1281	return true;
1282}
1283
1284/**
1285 * rdtgroup_mode_write - Modify the resource group's mode
1286 *
1287 */
1288static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1289				   char *buf, size_t nbytes, loff_t off)
1290{
1291	struct rdtgroup *rdtgrp;
1292	enum rdtgrp_mode mode;
1293	int ret = 0;
1294
1295	/* Valid input requires a trailing newline */
1296	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1297		return -EINVAL;
1298	buf[nbytes - 1] = '\0';
1299
1300	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1301	if (!rdtgrp) {
1302		rdtgroup_kn_unlock(of->kn);
1303		return -ENOENT;
1304	}
1305
1306	rdt_last_cmd_clear();
1307
1308	mode = rdtgrp->mode;
1309
1310	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1311	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1312	    (!strcmp(buf, "pseudo-locksetup") &&
1313	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1314	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1315		goto out;
1316
1317	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1318		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1319		ret = -EINVAL;
1320		goto out;
1321	}
1322
1323	if (!strcmp(buf, "shareable")) {
1324		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1325			ret = rdtgroup_locksetup_exit(rdtgrp);
1326			if (ret)
1327				goto out;
1328		}
1329		rdtgrp->mode = RDT_MODE_SHAREABLE;
1330	} else if (!strcmp(buf, "exclusive")) {
1331		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1332			ret = -EINVAL;
1333			goto out;
1334		}
1335		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1336			ret = rdtgroup_locksetup_exit(rdtgrp);
1337			if (ret)
1338				goto out;
1339		}
1340		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1341	} else if (!strcmp(buf, "pseudo-locksetup")) {
1342		ret = rdtgroup_locksetup_enter(rdtgrp);
1343		if (ret)
1344			goto out;
1345		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1346	} else {
1347		rdt_last_cmd_puts("Unknown or unsupported mode\n");
1348		ret = -EINVAL;
1349	}
1350
1351out:
1352	rdtgroup_kn_unlock(of->kn);
1353	return ret ?: nbytes;
1354}
1355
1356/**
1357 * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1358 * @r: RDT resource to which @d belongs.
1359 * @d: RDT domain instance.
1360 * @cbm: bitmask for which the size should be computed.
1361 *
1362 * The bitmask provided associated with the RDT domain instance @d will be
1363 * translated into how many bytes it represents. The size in bytes is
1364 * computed by first dividing the total cache size by the CBM length to
1365 * determine how many bytes each bit in the bitmask represents. The result
1366 * is multiplied with the number of bits set in the bitmask.
1367 *
1368 * @cbm is unsigned long, even if only 32 bits are used to make the
1369 * bitmap functions work correctly.
1370 */
1371unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1372				  struct rdt_domain *d, unsigned long cbm)
1373{
1374	struct cpu_cacheinfo *ci;
1375	unsigned int size = 0;
1376	int num_b, i;
1377
1378	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1379	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
1380	for (i = 0; i < ci->num_leaves; i++) {
1381		if (ci->info_list[i].level == r->cache_level) {
1382			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
1383			break;
1384		}
1385	}
1386
1387	return size;
1388}
1389
1390/**
1391 * rdtgroup_size_show - Display size in bytes of allocated regions
1392 *
1393 * The "size" file mirrors the layout of the "schemata" file, printing the
1394 * size in bytes of each region instead of the capacity bitmask.
1395 *
1396 */
1397static int rdtgroup_size_show(struct kernfs_open_file *of,
1398			      struct seq_file *s, void *v)
1399{
1400	struct rdtgroup *rdtgrp;
1401	struct rdt_resource *r;
1402	struct rdt_domain *d;
1403	unsigned int size;
1404	int ret = 0;
1405	bool sep;
1406	u32 ctrl;
1407
1408	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1409	if (!rdtgrp) {
1410		rdtgroup_kn_unlock(of->kn);
1411		return -ENOENT;
1412	}
1413
1414	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1415		if (!rdtgrp->plr->d) {
1416			rdt_last_cmd_clear();
1417			rdt_last_cmd_puts("Cache domain offline\n");
1418			ret = -ENODEV;
1419		} else {
1420			seq_printf(s, "%*s:", max_name_width,
1421				   rdtgrp->plr->r->name);
1422			size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
1423						    rdtgrp->plr->d,
1424						    rdtgrp->plr->cbm);
1425			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
1426		}
1427		goto out;
1428	}
1429
1430	for_each_alloc_enabled_rdt_resource(r) {
1431		sep = false;
1432		seq_printf(s, "%*s:", max_name_width, r->name);
1433		list_for_each_entry(d, &r->domains, list) {
1434			if (sep)
1435				seq_putc(s, ';');
1436			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1437				size = 0;
1438			} else {
1439				ctrl = (!is_mba_sc(r) ?
1440						d->ctrl_val[rdtgrp->closid] :
1441						d->mbps_val[rdtgrp->closid]);
1442				if (r->rid == RDT_RESOURCE_MBA)
1443					size = ctrl;
1444				else
1445					size = rdtgroup_cbm_to_size(r, d, ctrl);
1446			}
1447			seq_printf(s, "%d=%u", d->id, size);
1448			sep = true;
1449		}
1450		seq_putc(s, '\n');
1451	}
1452
1453out:
1454	rdtgroup_kn_unlock(of->kn);
1455
1456	return ret;
1457}
1458
1459/* rdtgroup information files for one cache resource. */
1460static struct rftype res_common_files[] = {
1461	{
1462		.name		= "last_cmd_status",
1463		.mode		= 0444,
1464		.kf_ops		= &rdtgroup_kf_single_ops,
1465		.seq_show	= rdt_last_cmd_status_show,
1466		.fflags		= RF_TOP_INFO,
1467	},
1468	{
1469		.name		= "num_closids",
1470		.mode		= 0444,
1471		.kf_ops		= &rdtgroup_kf_single_ops,
1472		.seq_show	= rdt_num_closids_show,
1473		.fflags		= RF_CTRL_INFO,
1474	},
1475	{
1476		.name		= "mon_features",
1477		.mode		= 0444,
1478		.kf_ops		= &rdtgroup_kf_single_ops,
1479		.seq_show	= rdt_mon_features_show,
1480		.fflags		= RF_MON_INFO,
1481	},
1482	{
1483		.name		= "num_rmids",
1484		.mode		= 0444,
1485		.kf_ops		= &rdtgroup_kf_single_ops,
1486		.seq_show	= rdt_num_rmids_show,
1487		.fflags		= RF_MON_INFO,
1488	},
1489	{
1490		.name		= "cbm_mask",
1491		.mode		= 0444,
1492		.kf_ops		= &rdtgroup_kf_single_ops,
1493		.seq_show	= rdt_default_ctrl_show,
1494		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1495	},
1496	{
1497		.name		= "min_cbm_bits",
1498		.mode		= 0444,
1499		.kf_ops		= &rdtgroup_kf_single_ops,
1500		.seq_show	= rdt_min_cbm_bits_show,
1501		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1502	},
1503	{
1504		.name		= "shareable_bits",
1505		.mode		= 0444,
1506		.kf_ops		= &rdtgroup_kf_single_ops,
1507		.seq_show	= rdt_shareable_bits_show,
1508		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1509	},
1510	{
1511		.name		= "bit_usage",
1512		.mode		= 0444,
1513		.kf_ops		= &rdtgroup_kf_single_ops,
1514		.seq_show	= rdt_bit_usage_show,
1515		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1516	},
1517	{
1518		.name		= "min_bandwidth",
1519		.mode		= 0444,
1520		.kf_ops		= &rdtgroup_kf_single_ops,
1521		.seq_show	= rdt_min_bw_show,
1522		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1523	},
1524	{
1525		.name		= "bandwidth_gran",
1526		.mode		= 0444,
1527		.kf_ops		= &rdtgroup_kf_single_ops,
1528		.seq_show	= rdt_bw_gran_show,
1529		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1530	},
1531	{
1532		.name		= "delay_linear",
1533		.mode		= 0444,
1534		.kf_ops		= &rdtgroup_kf_single_ops,
1535		.seq_show	= rdt_delay_linear_show,
1536		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1537	},
1538	/*
1539	 * Platform specific which (if any) capabilities are provided by
1540	 * thread_throttle_mode. Defer "fflags" initialization to platform
1541	 * discovery.
1542	 */
1543	{
1544		.name		= "thread_throttle_mode",
1545		.mode		= 0444,
1546		.kf_ops		= &rdtgroup_kf_single_ops,
1547		.seq_show	= rdt_thread_throttle_mode_show,
1548	},
1549	{
1550		.name		= "max_threshold_occupancy",
1551		.mode		= 0644,
1552		.kf_ops		= &rdtgroup_kf_single_ops,
1553		.write		= max_threshold_occ_write,
1554		.seq_show	= max_threshold_occ_show,
1555		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
1556	},
1557	{
1558		.name		= "cpus",
1559		.mode		= 0644,
1560		.kf_ops		= &rdtgroup_kf_single_ops,
1561		.write		= rdtgroup_cpus_write,
1562		.seq_show	= rdtgroup_cpus_show,
1563		.fflags		= RFTYPE_BASE,
1564	},
1565	{
1566		.name		= "cpus_list",
1567		.mode		= 0644,
1568		.kf_ops		= &rdtgroup_kf_single_ops,
1569		.write		= rdtgroup_cpus_write,
1570		.seq_show	= rdtgroup_cpus_show,
1571		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1572		.fflags		= RFTYPE_BASE,
1573	},
1574	{
1575		.name		= "tasks",
1576		.mode		= 0644,
1577		.kf_ops		= &rdtgroup_kf_single_ops,
1578		.write		= rdtgroup_tasks_write,
1579		.seq_show	= rdtgroup_tasks_show,
1580		.fflags		= RFTYPE_BASE,
1581	},
1582	{
1583		.name		= "schemata",
1584		.mode		= 0644,
1585		.kf_ops		= &rdtgroup_kf_single_ops,
1586		.write		= rdtgroup_schemata_write,
1587		.seq_show	= rdtgroup_schemata_show,
1588		.fflags		= RF_CTRL_BASE,
1589	},
1590	{
1591		.name		= "mode",
1592		.mode		= 0644,
1593		.kf_ops		= &rdtgroup_kf_single_ops,
1594		.write		= rdtgroup_mode_write,
1595		.seq_show	= rdtgroup_mode_show,
1596		.fflags		= RF_CTRL_BASE,
1597	},
1598	{
1599		.name		= "size",
1600		.mode		= 0444,
1601		.kf_ops		= &rdtgroup_kf_single_ops,
1602		.seq_show	= rdtgroup_size_show,
1603		.fflags		= RF_CTRL_BASE,
1604	},
1605
1606};
1607
1608static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1609{
1610	struct rftype *rfts, *rft;
1611	int ret, len;
1612
1613	rfts = res_common_files;
1614	len = ARRAY_SIZE(res_common_files);
1615
1616	lockdep_assert_held(&rdtgroup_mutex);
1617
1618	for (rft = rfts; rft < rfts + len; rft++) {
1619		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
1620			ret = rdtgroup_add_file(kn, rft);
1621			if (ret)
1622				goto error;
1623		}
1624	}
1625
1626	return 0;
1627error:
1628	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
1629	while (--rft >= rfts) {
1630		if ((fflags & rft->fflags) == rft->fflags)
1631			kernfs_remove_by_name(kn, rft->name);
1632	}
1633	return ret;
1634}
1635
1636static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
1637{
1638	struct rftype *rfts, *rft;
1639	int len;
1640
1641	rfts = res_common_files;
1642	len = ARRAY_SIZE(res_common_files);
1643
1644	for (rft = rfts; rft < rfts + len; rft++) {
1645		if (!strcmp(rft->name, name))
1646			return rft;
1647	}
1648
1649	return NULL;
1650}
1651
1652void __init thread_throttle_mode_init(void)
1653{
1654	struct rftype *rft;
1655
1656	rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
1657	if (!rft)
1658		return;
1659
1660	rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
1661}
1662
1663/**
1664 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
1665 * @r: The resource group with which the file is associated.
1666 * @name: Name of the file
1667 *
1668 * The permissions of named resctrl file, directory, or link are modified
1669 * to not allow read, write, or execute by any user.
1670 *
1671 * WARNING: This function is intended to communicate to the user that the
1672 * resctrl file has been locked down - that it is not relevant to the
1673 * particular state the system finds itself in. It should not be relied
1674 * on to protect from user access because after the file's permissions
1675 * are restricted the user can still change the permissions using chmod
1676 * from the command line.
1677 *
1678 * Return: 0 on success, <0 on failure.
1679 */
1680int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
1681{
1682	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1683	struct kernfs_node *kn;
1684	int ret = 0;
1685
1686	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1687	if (!kn)
1688		return -ENOENT;
1689
1690	switch (kernfs_type(kn)) {
1691	case KERNFS_DIR:
1692		iattr.ia_mode = S_IFDIR;
1693		break;
1694	case KERNFS_FILE:
1695		iattr.ia_mode = S_IFREG;
1696		break;
1697	case KERNFS_LINK:
1698		iattr.ia_mode = S_IFLNK;
1699		break;
1700	}
1701
1702	ret = kernfs_setattr(kn, &iattr);
1703	kernfs_put(kn);
1704	return ret;
1705}
1706
1707/**
1708 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
1709 * @r: The resource group with which the file is associated.
1710 * @name: Name of the file
1711 * @mask: Mask of permissions that should be restored
1712 *
1713 * Restore the permissions of the named file. If @name is a directory the
1714 * permissions of its parent will be used.
1715 *
1716 * Return: 0 on success, <0 on failure.
1717 */
1718int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
1719			     umode_t mask)
1720{
1721	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1722	struct kernfs_node *kn, *parent;
1723	struct rftype *rfts, *rft;
1724	int ret, len;
1725
1726	rfts = res_common_files;
1727	len = ARRAY_SIZE(res_common_files);
1728
1729	for (rft = rfts; rft < rfts + len; rft++) {
1730		if (!strcmp(rft->name, name))
1731			iattr.ia_mode = rft->mode & mask;
1732	}
1733
1734	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1735	if (!kn)
1736		return -ENOENT;
1737
1738	switch (kernfs_type(kn)) {
1739	case KERNFS_DIR:
1740		parent = kernfs_get_parent(kn);
1741		if (parent) {
1742			iattr.ia_mode |= parent->mode;
1743			kernfs_put(parent);
1744		}
1745		iattr.ia_mode |= S_IFDIR;
1746		break;
1747	case KERNFS_FILE:
1748		iattr.ia_mode |= S_IFREG;
1749		break;
1750	case KERNFS_LINK:
1751		iattr.ia_mode |= S_IFLNK;
1752		break;
1753	}
1754
1755	ret = kernfs_setattr(kn, &iattr);
1756	kernfs_put(kn);
1757	return ret;
1758}
1759
1760static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
1761				      unsigned long fflags)
1762{
1763	struct kernfs_node *kn_subdir;
1764	int ret;
1765
1766	kn_subdir = kernfs_create_dir(kn_info, name,
1767				      kn_info->mode, r);
1768	if (IS_ERR(kn_subdir))
1769		return PTR_ERR(kn_subdir);
1770
 
1771	ret = rdtgroup_kn_set_ugid(kn_subdir);
1772	if (ret)
1773		return ret;
1774
1775	ret = rdtgroup_add_files(kn_subdir, fflags);
1776	if (!ret)
1777		kernfs_activate(kn_subdir);
1778
1779	return ret;
1780}
1781
1782static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
1783{
1784	struct rdt_resource *r;
1785	unsigned long fflags;
1786	char name[32];
1787	int ret;
1788
1789	/* create the directory */
1790	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
1791	if (IS_ERR(kn_info))
1792		return PTR_ERR(kn_info);
 
1793
1794	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
1795	if (ret)
1796		goto out_destroy;
1797
1798	for_each_alloc_enabled_rdt_resource(r) {
1799		fflags =  r->fflags | RF_CTRL_INFO;
1800		ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
1801		if (ret)
1802			goto out_destroy;
1803	}
1804
1805	for_each_mon_enabled_rdt_resource(r) {
1806		fflags =  r->fflags | RF_MON_INFO;
1807		sprintf(name, "%s_MON", r->name);
1808		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
1809		if (ret)
1810			goto out_destroy;
1811	}
1812
 
 
 
 
 
 
1813	ret = rdtgroup_kn_set_ugid(kn_info);
1814	if (ret)
1815		goto out_destroy;
1816
1817	kernfs_activate(kn_info);
1818
1819	return 0;
1820
1821out_destroy:
1822	kernfs_remove(kn_info);
1823	return ret;
1824}
1825
1826static int
1827mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
1828		    char *name, struct kernfs_node **dest_kn)
1829{
1830	struct kernfs_node *kn;
1831	int ret;
1832
1833	/* create the directory */
1834	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
1835	if (IS_ERR(kn))
1836		return PTR_ERR(kn);
1837
1838	if (dest_kn)
1839		*dest_kn = kn;
1840
 
 
 
 
 
 
1841	ret = rdtgroup_kn_set_ugid(kn);
1842	if (ret)
1843		goto out_destroy;
1844
1845	kernfs_activate(kn);
1846
1847	return 0;
1848
1849out_destroy:
1850	kernfs_remove(kn);
1851	return ret;
1852}
1853
1854static void l3_qos_cfg_update(void *arg)
1855{
1856	bool *enable = arg;
1857
1858	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
1859}
1860
1861static void l2_qos_cfg_update(void *arg)
1862{
1863	bool *enable = arg;
1864
1865	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
1866}
1867
1868static inline bool is_mba_linear(void)
1869{
1870	return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
1871}
1872
1873static int set_cache_qos_cfg(int level, bool enable)
1874{
1875	void (*update)(void *arg);
1876	struct rdt_resource *r_l;
1877	cpumask_var_t cpu_mask;
1878	struct rdt_domain *d;
1879	int cpu;
1880
 
 
 
1881	if (level == RDT_RESOURCE_L3)
1882		update = l3_qos_cfg_update;
1883	else if (level == RDT_RESOURCE_L2)
1884		update = l2_qos_cfg_update;
1885	else
1886		return -EINVAL;
1887
1888	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1889		return -ENOMEM;
1890
1891	r_l = &rdt_resources_all[level];
1892	list_for_each_entry(d, &r_l->domains, list) {
1893		if (r_l->cache.arch_has_per_cpu_cfg)
1894			/* Pick all the CPUs in the domain instance */
1895			for_each_cpu(cpu, &d->cpu_mask)
1896				cpumask_set_cpu(cpu, cpu_mask);
1897		else
1898			/* Pick one CPU from each domain instance to update MSR */
1899			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
1900	}
1901	cpu = get_cpu();
1902	/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
1903	if (cpumask_test_cpu(cpu, cpu_mask))
1904		update(&enable);
1905	/* Update QOS_CFG MSR on all other cpus in cpu_mask. */
1906	smp_call_function_many(cpu_mask, update, &enable, 1);
1907	put_cpu();
1908
1909	free_cpumask_var(cpu_mask);
1910
1911	return 0;
1912}
1913
1914/* Restore the qos cfg state when a domain comes online */
1915void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
1916{
1917	if (!r->alloc_capable)
1918		return;
1919
1920	if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA])
1921		l2_qos_cfg_update(&r->alloc_enabled);
1922
1923	if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA])
1924		l3_qos_cfg_update(&r->alloc_enabled);
1925}
1926
1927/*
1928 * Enable or disable the MBA software controller
1929 * which helps user specify bandwidth in MBps.
1930 * MBA software controller is supported only if
1931 * MBM is supported and MBA is in linear scale.
1932 */
1933static int set_mba_sc(bool mba_sc)
1934{
1935	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
1936	struct rdt_domain *d;
1937
1938	if (!is_mbm_enabled() || !is_mba_linear() ||
1939	    mba_sc == is_mba_sc(r))
1940		return -EINVAL;
1941
1942	r->membw.mba_sc = mba_sc;
1943	list_for_each_entry(d, &r->domains, list)
1944		setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
1945
1946	return 0;
1947}
1948
1949static int cdp_enable(int level, int data_type, int code_type)
1950{
1951	struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
1952	struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
1953	struct rdt_resource *r_l = &rdt_resources_all[level];
1954	int ret;
1955
1956	if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
1957	    !r_lcode->alloc_capable)
1958		return -EINVAL;
1959
1960	ret = set_cache_qos_cfg(level, true);
1961	if (!ret) {
1962		r_l->alloc_enabled = false;
1963		r_ldata->alloc_enabled = true;
1964		r_lcode->alloc_enabled = true;
1965	}
1966	return ret;
1967}
1968
1969static int cdpl3_enable(void)
1970{
1971	return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
1972			  RDT_RESOURCE_L3CODE);
1973}
1974
1975static int cdpl2_enable(void)
1976{
1977	return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
1978			  RDT_RESOURCE_L2CODE);
1979}
1980
1981static void cdp_disable(int level, int data_type, int code_type)
1982{
1983	struct rdt_resource *r = &rdt_resources_all[level];
1984
1985	r->alloc_enabled = r->alloc_capable;
1986
1987	if (rdt_resources_all[data_type].alloc_enabled) {
1988		rdt_resources_all[data_type].alloc_enabled = false;
1989		rdt_resources_all[code_type].alloc_enabled = false;
1990		set_cache_qos_cfg(level, false);
1991	}
1992}
1993
1994static void cdpl3_disable(void)
1995{
1996	cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
1997}
1998
1999static void cdpl2_disable(void)
2000{
2001	cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
2002}
2003
2004static void cdp_disable_all(void)
2005{
2006	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
2007		cdpl3_disable();
2008	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
2009		cdpl2_disable();
2010}
2011
2012/*
2013 * We don't allow rdtgroup directories to be created anywhere
2014 * except the root directory. Thus when looking for the rdtgroup
2015 * structure for a kernfs node we are either looking at a directory,
2016 * in which case the rdtgroup structure is pointed at by the "priv"
2017 * field, otherwise we have a file, and need only look to the parent
2018 * to find the rdtgroup.
2019 */
2020static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
2021{
2022	if (kernfs_type(kn) == KERNFS_DIR) {
2023		/*
2024		 * All the resource directories use "kn->priv"
2025		 * to point to the "struct rdtgroup" for the
2026		 * resource. "info" and its subdirectories don't
2027		 * have rdtgroup structures, so return NULL here.
2028		 */
2029		if (kn == kn_info || kn->parent == kn_info)
2030			return NULL;
2031		else
2032			return kn->priv;
2033	} else {
2034		return kn->parent->priv;
2035	}
2036}
2037
2038struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
2039{
2040	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2041
2042	if (!rdtgrp)
2043		return NULL;
2044
2045	atomic_inc(&rdtgrp->waitcount);
2046	kernfs_break_active_protection(kn);
2047
2048	mutex_lock(&rdtgroup_mutex);
2049
2050	/* Was this group deleted while we waited? */
2051	if (rdtgrp->flags & RDT_DELETED)
2052		return NULL;
2053
2054	return rdtgrp;
2055}
2056
2057void rdtgroup_kn_unlock(struct kernfs_node *kn)
2058{
2059	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2060
2061	if (!rdtgrp)
2062		return;
2063
2064	mutex_unlock(&rdtgroup_mutex);
2065
2066	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
2067	    (rdtgrp->flags & RDT_DELETED)) {
2068		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2069		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2070			rdtgroup_pseudo_lock_remove(rdtgrp);
2071		kernfs_unbreak_active_protection(kn);
2072		rdtgroup_remove(rdtgrp);
 
2073	} else {
2074		kernfs_unbreak_active_protection(kn);
2075	}
2076}
2077
2078static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2079			     struct rdtgroup *prgrp,
2080			     struct kernfs_node **mon_data_kn);
2081
2082static int rdt_enable_ctx(struct rdt_fs_context *ctx)
2083{
2084	int ret = 0;
2085
2086	if (ctx->enable_cdpl2)
2087		ret = cdpl2_enable();
2088
2089	if (!ret && ctx->enable_cdpl3)
2090		ret = cdpl3_enable();
2091
2092	if (!ret && ctx->enable_mba_mbps)
2093		ret = set_mba_sc(true);
2094
2095	return ret;
2096}
2097
2098static int rdt_get_tree(struct fs_context *fc)
2099{
2100	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2101	struct rdt_domain *dom;
2102	struct rdt_resource *r;
2103	int ret;
2104
2105	cpus_read_lock();
2106	mutex_lock(&rdtgroup_mutex);
2107	/*
2108	 * resctrl file system can only be mounted once.
2109	 */
2110	if (static_branch_unlikely(&rdt_enable_key)) {
2111		ret = -EBUSY;
2112		goto out;
2113	}
2114
2115	ret = rdt_enable_ctx(ctx);
2116	if (ret < 0)
2117		goto out_cdp;
2118
2119	closid_init();
2120
2121	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
2122	if (ret < 0)
2123		goto out_mba;
2124
2125	if (rdt_mon_capable) {
2126		ret = mongroup_create_dir(rdtgroup_default.kn,
2127					  &rdtgroup_default, "mon_groups",
2128					  &kn_mongrp);
2129		if (ret < 0)
2130			goto out_info;
 
2131
2132		ret = mkdir_mondata_all(rdtgroup_default.kn,
2133					&rdtgroup_default, &kn_mondata);
2134		if (ret < 0)
2135			goto out_mongrp;
 
2136		rdtgroup_default.mon.mon_data_kn = kn_mondata;
2137	}
2138
2139	ret = rdt_pseudo_lock_init();
2140	if (ret)
2141		goto out_mondata;
2142
2143	ret = kernfs_get_tree(fc);
2144	if (ret < 0)
2145		goto out_psl;
2146
2147	if (rdt_alloc_capable)
2148		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
2149	if (rdt_mon_capable)
2150		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
2151
2152	if (rdt_alloc_capable || rdt_mon_capable)
2153		static_branch_enable_cpuslocked(&rdt_enable_key);
2154
2155	if (is_mbm_enabled()) {
2156		r = &rdt_resources_all[RDT_RESOURCE_L3];
2157		list_for_each_entry(dom, &r->domains, list)
2158			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
2159	}
2160
2161	goto out;
2162
2163out_psl:
2164	rdt_pseudo_lock_release();
2165out_mondata:
2166	if (rdt_mon_capable)
2167		kernfs_remove(kn_mondata);
2168out_mongrp:
2169	if (rdt_mon_capable)
2170		kernfs_remove(kn_mongrp);
2171out_info:
2172	kernfs_remove(kn_info);
2173out_mba:
2174	if (ctx->enable_mba_mbps)
2175		set_mba_sc(false);
2176out_cdp:
2177	cdp_disable_all();
2178out:
2179	rdt_last_cmd_clear();
2180	mutex_unlock(&rdtgroup_mutex);
2181	cpus_read_unlock();
2182	return ret;
2183}
2184
2185enum rdt_param {
2186	Opt_cdp,
2187	Opt_cdpl2,
2188	Opt_mba_mbps,
2189	nr__rdt_params
2190};
2191
2192static const struct fs_parameter_spec rdt_fs_parameters[] = {
2193	fsparam_flag("cdp",		Opt_cdp),
2194	fsparam_flag("cdpl2",		Opt_cdpl2),
2195	fsparam_flag("mba_MBps",	Opt_mba_mbps),
2196	{}
2197};
2198
 
 
 
 
 
2199static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2200{
2201	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2202	struct fs_parse_result result;
2203	int opt;
2204
2205	opt = fs_parse(fc, rdt_fs_parameters, param, &result);
2206	if (opt < 0)
2207		return opt;
2208
2209	switch (opt) {
2210	case Opt_cdp:
2211		ctx->enable_cdpl3 = true;
2212		return 0;
2213	case Opt_cdpl2:
2214		ctx->enable_cdpl2 = true;
2215		return 0;
2216	case Opt_mba_mbps:
2217		if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
2218			return -EINVAL;
2219		ctx->enable_mba_mbps = true;
2220		return 0;
2221	}
2222
2223	return -EINVAL;
2224}
2225
2226static void rdt_fs_context_free(struct fs_context *fc)
2227{
2228	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2229
2230	kernfs_free_fs_context(fc);
2231	kfree(ctx);
2232}
2233
2234static const struct fs_context_operations rdt_fs_context_ops = {
2235	.free		= rdt_fs_context_free,
2236	.parse_param	= rdt_parse_param,
2237	.get_tree	= rdt_get_tree,
2238};
2239
2240static int rdt_init_fs_context(struct fs_context *fc)
2241{
2242	struct rdt_fs_context *ctx;
2243
2244	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2245	if (!ctx)
2246		return -ENOMEM;
2247
2248	ctx->kfc.root = rdt_root;
2249	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2250	fc->fs_private = &ctx->kfc;
2251	fc->ops = &rdt_fs_context_ops;
2252	put_user_ns(fc->user_ns);
2253	fc->user_ns = get_user_ns(&init_user_ns);
2254	fc->global = true;
2255	return 0;
2256}
2257
2258static int reset_all_ctrls(struct rdt_resource *r)
2259{
2260	struct msr_param msr_param;
2261	cpumask_var_t cpu_mask;
2262	struct rdt_domain *d;
2263	int i, cpu;
2264
2265	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2266		return -ENOMEM;
2267
2268	msr_param.res = r;
2269	msr_param.low = 0;
2270	msr_param.high = r->num_closid;
2271
2272	/*
2273	 * Disable resource control for this resource by setting all
2274	 * CBMs in all domains to the maximum mask value. Pick one CPU
2275	 * from each domain to update the MSRs below.
2276	 */
2277	list_for_each_entry(d, &r->domains, list) {
2278		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
2279
2280		for (i = 0; i < r->num_closid; i++)
2281			d->ctrl_val[i] = r->default_ctrl;
2282	}
2283	cpu = get_cpu();
2284	/* Update CBM on this cpu if it's in cpu_mask. */
2285	if (cpumask_test_cpu(cpu, cpu_mask))
2286		rdt_ctrl_update(&msr_param);
2287	/* Update CBM on all other cpus in cpu_mask. */
2288	smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
2289	put_cpu();
2290
2291	free_cpumask_var(cpu_mask);
2292
2293	return 0;
2294}
2295
 
 
 
 
 
 
 
 
 
 
 
 
2296/*
2297 * Move tasks from one to the other group. If @from is NULL, then all tasks
2298 * in the systems are moved unconditionally (used for teardown).
2299 *
2300 * If @mask is not NULL the cpus on which moved tasks are running are set
2301 * in that mask so the update smp function call is restricted to affected
2302 * cpus.
2303 */
2304static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2305				 struct cpumask *mask)
2306{
2307	struct task_struct *p, *t;
2308
2309	read_lock(&tasklist_lock);
2310	for_each_process_thread(p, t) {
2311		if (!from || is_closid_match(t, from) ||
2312		    is_rmid_match(t, from)) {
2313			WRITE_ONCE(t->closid, to->closid);
2314			WRITE_ONCE(t->rmid, to->mon.rmid);
2315
 
2316			/*
2317			 * If the task is on a CPU, set the CPU in the mask.
2318			 * The detection is inaccurate as tasks might move or
2319			 * schedule before the smp function call takes place.
2320			 * In such a case the function call is pointless, but
 
 
2321			 * there is no other side effect.
2322			 */
2323			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
2324				cpumask_set_cpu(task_cpu(t), mask);
 
2325		}
2326	}
2327	read_unlock(&tasklist_lock);
2328}
2329
2330static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2331{
2332	struct rdtgroup *sentry, *stmp;
2333	struct list_head *head;
2334
2335	head = &rdtgrp->mon.crdtgrp_list;
2336	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2337		free_rmid(sentry->mon.rmid);
2338		list_del(&sentry->mon.crdtgrp_list);
2339
2340		if (atomic_read(&sentry->waitcount) != 0)
2341			sentry->flags = RDT_DELETED;
2342		else
2343			rdtgroup_remove(sentry);
2344	}
2345}
2346
2347/*
2348 * Forcibly remove all of subdirectories under root.
2349 */
2350static void rmdir_all_sub(void)
2351{
2352	struct rdtgroup *rdtgrp, *tmp;
2353
2354	/* Move all tasks to the default resource group */
2355	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2356
2357	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2358		/* Free any child rmids */
2359		free_all_child_rdtgrp(rdtgrp);
2360
2361		/* Remove each rdtgroup other than root */
2362		if (rdtgrp == &rdtgroup_default)
2363			continue;
2364
2365		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2366		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2367			rdtgroup_pseudo_lock_remove(rdtgrp);
2368
2369		/*
2370		 * Give any CPUs back to the default group. We cannot copy
2371		 * cpu_online_mask because a CPU might have executed the
2372		 * offline callback already, but is still marked online.
2373		 */
2374		cpumask_or(&rdtgroup_default.cpu_mask,
2375			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2376
2377		free_rmid(rdtgrp->mon.rmid);
2378
2379		kernfs_remove(rdtgrp->kn);
2380		list_del(&rdtgrp->rdtgroup_list);
2381
2382		if (atomic_read(&rdtgrp->waitcount) != 0)
2383			rdtgrp->flags = RDT_DELETED;
2384		else
2385			rdtgroup_remove(rdtgrp);
2386	}
2387	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2388	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2389
2390	kernfs_remove(kn_info);
2391	kernfs_remove(kn_mongrp);
2392	kernfs_remove(kn_mondata);
2393}
2394
2395static void rdt_kill_sb(struct super_block *sb)
2396{
2397	struct rdt_resource *r;
2398
2399	cpus_read_lock();
2400	mutex_lock(&rdtgroup_mutex);
2401
2402	set_mba_sc(false);
2403
2404	/*Put everything back to default values. */
2405	for_each_alloc_enabled_rdt_resource(r)
2406		reset_all_ctrls(r);
2407	cdp_disable_all();
2408	rmdir_all_sub();
2409	rdt_pseudo_lock_release();
2410	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
2411	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
2412	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
2413	static_branch_disable_cpuslocked(&rdt_enable_key);
2414	kernfs_kill_sb(sb);
2415	mutex_unlock(&rdtgroup_mutex);
2416	cpus_read_unlock();
2417}
2418
2419static struct file_system_type rdt_fs_type = {
2420	.name			= "resctrl",
2421	.init_fs_context	= rdt_init_fs_context,
2422	.parameters		= rdt_fs_parameters,
2423	.kill_sb		= rdt_kill_sb,
2424};
2425
2426static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
2427		       void *priv)
2428{
2429	struct kernfs_node *kn;
2430	int ret = 0;
2431
2432	kn = __kernfs_create_file(parent_kn, name, 0444,
2433				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
2434				  &kf_mondata_ops, priv, NULL, NULL);
2435	if (IS_ERR(kn))
2436		return PTR_ERR(kn);
2437
2438	ret = rdtgroup_kn_set_ugid(kn);
2439	if (ret) {
2440		kernfs_remove(kn);
2441		return ret;
2442	}
2443
2444	return ret;
2445}
2446
2447/*
2448 * Remove all subdirectories of mon_data of ctrl_mon groups
2449 * and monitor groups with given domain id.
2450 */
2451void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
2452{
2453	struct rdtgroup *prgrp, *crgrp;
2454	char name[32];
2455
2456	if (!r->mon_enabled)
2457		return;
2458
2459	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2460		sprintf(name, "mon_%s_%02d", r->name, dom_id);
2461		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
2462
2463		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
2464			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
2465	}
2466}
2467
2468static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
2469				struct rdt_domain *d,
2470				struct rdt_resource *r, struct rdtgroup *prgrp)
2471{
2472	union mon_data_bits priv;
2473	struct kernfs_node *kn;
2474	struct mon_evt *mevt;
2475	struct rmid_read rr;
2476	char name[32];
2477	int ret;
2478
2479	sprintf(name, "mon_%s_%02d", r->name, d->id);
2480	/* create the directory */
2481	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2482	if (IS_ERR(kn))
2483		return PTR_ERR(kn);
2484
 
 
 
 
 
2485	ret = rdtgroup_kn_set_ugid(kn);
2486	if (ret)
2487		goto out_destroy;
2488
2489	if (WARN_ON(list_empty(&r->evt_list))) {
2490		ret = -EPERM;
2491		goto out_destroy;
2492	}
2493
2494	priv.u.rid = r->rid;
2495	priv.u.domid = d->id;
2496	list_for_each_entry(mevt, &r->evt_list, list) {
2497		priv.u.evtid = mevt->evtid;
2498		ret = mon_addfile(kn, mevt->name, priv.priv);
2499		if (ret)
2500			goto out_destroy;
2501
2502		if (is_mbm_event(mevt->evtid))
2503			mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
2504	}
2505	kernfs_activate(kn);
2506	return 0;
2507
2508out_destroy:
2509	kernfs_remove(kn);
2510	return ret;
2511}
2512
2513/*
2514 * Add all subdirectories of mon_data for "ctrl_mon" groups
2515 * and "monitor" groups with given domain id.
2516 */
2517void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
2518				    struct rdt_domain *d)
2519{
2520	struct kernfs_node *parent_kn;
2521	struct rdtgroup *prgrp, *crgrp;
2522	struct list_head *head;
2523
2524	if (!r->mon_enabled)
2525		return;
2526
2527	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2528		parent_kn = prgrp->mon.mon_data_kn;
2529		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
2530
2531		head = &prgrp->mon.crdtgrp_list;
2532		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
2533			parent_kn = crgrp->mon.mon_data_kn;
2534			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
2535		}
2536	}
2537}
2538
2539static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
2540				       struct rdt_resource *r,
2541				       struct rdtgroup *prgrp)
2542{
2543	struct rdt_domain *dom;
2544	int ret;
2545
2546	list_for_each_entry(dom, &r->domains, list) {
2547		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
2548		if (ret)
2549			return ret;
2550	}
2551
2552	return 0;
2553}
2554
2555/*
2556 * This creates a directory mon_data which contains the monitored data.
2557 *
2558 * mon_data has one directory for each domain which are named
2559 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
2560 * with L3 domain looks as below:
2561 * ./mon_data:
2562 * mon_L3_00
2563 * mon_L3_01
2564 * mon_L3_02
2565 * ...
2566 *
2567 * Each domain directory has one file per event:
2568 * ./mon_L3_00/:
2569 * llc_occupancy
2570 *
2571 */
2572static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2573			     struct rdtgroup *prgrp,
2574			     struct kernfs_node **dest_kn)
2575{
2576	struct rdt_resource *r;
2577	struct kernfs_node *kn;
2578	int ret;
2579
2580	/*
2581	 * Create the mon_data directory first.
2582	 */
2583	ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
2584	if (ret)
2585		return ret;
2586
2587	if (dest_kn)
2588		*dest_kn = kn;
2589
2590	/*
2591	 * Create the subdirectories for each domain. Note that all events
2592	 * in a domain like L3 are grouped into a resource whose domain is L3
2593	 */
2594	for_each_mon_enabled_rdt_resource(r) {
2595		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
2596		if (ret)
2597			goto out_destroy;
2598	}
2599
2600	return 0;
2601
2602out_destroy:
2603	kernfs_remove(kn);
2604	return ret;
2605}
2606
2607/**
2608 * cbm_ensure_valid - Enforce validity on provided CBM
2609 * @_val:	Candidate CBM
2610 * @r:		RDT resource to which the CBM belongs
2611 *
2612 * The provided CBM represents all cache portions available for use. This
2613 * may be represented by a bitmap that does not consist of contiguous ones
2614 * and thus be an invalid CBM.
2615 * Here the provided CBM is forced to be a valid CBM by only considering
2616 * the first set of contiguous bits as valid and clearing all bits.
2617 * The intention here is to provide a valid default CBM with which a new
2618 * resource group is initialized. The user can follow this with a
2619 * modification to the CBM if the default does not satisfy the
2620 * requirements.
2621 */
2622static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
2623{
2624	unsigned int cbm_len = r->cache.cbm_len;
2625	unsigned long first_bit, zero_bit;
2626	unsigned long val = _val;
2627
2628	if (!val)
2629		return 0;
2630
2631	first_bit = find_first_bit(&val, cbm_len);
2632	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
2633
2634	/* Clear any remaining bits to ensure contiguous region */
2635	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
2636	return (u32)val;
2637}
2638
2639/*
2640 * Initialize cache resources per RDT domain
2641 *
2642 * Set the RDT domain up to start off with all usable allocations. That is,
2643 * all shareable and unused bits. All-zero CBM is invalid.
2644 */
2645static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
2646				 u32 closid)
2647{
2648	struct rdt_resource *r_cdp = NULL;
2649	struct rdt_domain *d_cdp = NULL;
2650	u32 used_b = 0, unused_b = 0;
2651	unsigned long tmp_cbm;
2652	enum rdtgrp_mode mode;
2653	u32 peer_ctl, *ctrl;
2654	int i;
2655
2656	rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
2657	d->have_new_ctrl = false;
2658	d->new_ctrl = r->cache.shareable_bits;
2659	used_b = r->cache.shareable_bits;
2660	ctrl = d->ctrl_val;
2661	for (i = 0; i < closids_supported(); i++, ctrl++) {
2662		if (closid_allocated(i) && i != closid) {
2663			mode = rdtgroup_mode_by_closid(i);
2664			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
2665				/*
2666				 * ctrl values for locksetup aren't relevant
2667				 * until the schemata is written, and the mode
2668				 * becomes RDT_MODE_PSEUDO_LOCKED.
2669				 */
2670				continue;
2671			/*
2672			 * If CDP is active include peer domain's
2673			 * usage to ensure there is no overlap
2674			 * with an exclusive group.
2675			 */
2676			if (d_cdp)
2677				peer_ctl = d_cdp->ctrl_val[i];
2678			else
2679				peer_ctl = 0;
2680			used_b |= *ctrl | peer_ctl;
2681			if (mode == RDT_MODE_SHAREABLE)
2682				d->new_ctrl |= *ctrl | peer_ctl;
2683		}
2684	}
2685	if (d->plr && d->plr->cbm > 0)
2686		used_b |= d->plr->cbm;
2687	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
2688	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
2689	d->new_ctrl |= unused_b;
2690	/*
2691	 * Force the initial CBM to be valid, user can
2692	 * modify the CBM based on system availability.
2693	 */
2694	d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
2695	/*
2696	 * Assign the u32 CBM to an unsigned long to ensure that
2697	 * bitmap_weight() does not access out-of-bound memory.
2698	 */
2699	tmp_cbm = d->new_ctrl;
2700	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
2701		rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
2702		return -ENOSPC;
2703	}
2704	d->have_new_ctrl = true;
2705
2706	return 0;
2707}
2708
2709/*
2710 * Initialize cache resources with default values.
2711 *
2712 * A new RDT group is being created on an allocation capable (CAT)
2713 * supporting system. Set this group up to start off with all usable
2714 * allocations.
2715 *
2716 * If there are no more shareable bits available on any domain then
2717 * the entire allocation will fail.
2718 */
2719static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
2720{
2721	struct rdt_domain *d;
2722	int ret;
2723
2724	list_for_each_entry(d, &r->domains, list) {
2725		ret = __init_one_rdt_domain(d, r, closid);
2726		if (ret < 0)
2727			return ret;
2728	}
2729
2730	return 0;
2731}
2732
2733/* Initialize MBA resource with default values. */
2734static void rdtgroup_init_mba(struct rdt_resource *r)
2735{
2736	struct rdt_domain *d;
2737
2738	list_for_each_entry(d, &r->domains, list) {
2739		d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
2740		d->have_new_ctrl = true;
2741	}
2742}
2743
2744/* Initialize the RDT group's allocations. */
2745static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2746{
2747	struct rdt_resource *r;
2748	int ret;
2749
2750	for_each_alloc_enabled_rdt_resource(r) {
2751		if (r->rid == RDT_RESOURCE_MBA) {
2752			rdtgroup_init_mba(r);
2753		} else {
2754			ret = rdtgroup_init_cat(r, rdtgrp->closid);
2755			if (ret < 0)
2756				return ret;
2757		}
2758
2759		ret = update_domains(r, rdtgrp->closid);
2760		if (ret < 0) {
2761			rdt_last_cmd_puts("Failed to initialize allocations\n");
2762			return ret;
2763		}
2764
2765	}
2766
2767	rdtgrp->mode = RDT_MODE_SHAREABLE;
2768
2769	return 0;
2770}
2771
2772static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 
2773			     const char *name, umode_t mode,
2774			     enum rdt_group_type rtype, struct rdtgroup **r)
2775{
2776	struct rdtgroup *prdtgrp, *rdtgrp;
2777	struct kernfs_node *kn;
2778	uint files = 0;
2779	int ret;
2780
2781	prdtgrp = rdtgroup_kn_lock_live(parent_kn);
2782	if (!prdtgrp) {
2783		ret = -ENODEV;
2784		goto out_unlock;
2785	}
2786
2787	if (rtype == RDTMON_GROUP &&
2788	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2789	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
2790		ret = -EINVAL;
2791		rdt_last_cmd_puts("Pseudo-locking in progress\n");
2792		goto out_unlock;
2793	}
2794
2795	/* allocate the rdtgroup. */
2796	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
2797	if (!rdtgrp) {
2798		ret = -ENOSPC;
2799		rdt_last_cmd_puts("Kernel out of memory\n");
2800		goto out_unlock;
2801	}
2802	*r = rdtgrp;
2803	rdtgrp->mon.parent = prdtgrp;
2804	rdtgrp->type = rtype;
2805	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
2806
2807	/* kernfs creates the directory for rdtgrp */
2808	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
2809	if (IS_ERR(kn)) {
2810		ret = PTR_ERR(kn);
2811		rdt_last_cmd_puts("kernfs create error\n");
2812		goto out_free_rgrp;
2813	}
2814	rdtgrp->kn = kn;
2815
2816	/*
2817	 * kernfs_remove() will drop the reference count on "kn" which
2818	 * will free it. But we still need it to stick around for the
2819	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
2820	 * which will be dropped by kernfs_put() in rdtgroup_remove().
2821	 */
2822	kernfs_get(kn);
2823
2824	ret = rdtgroup_kn_set_ugid(kn);
2825	if (ret) {
2826		rdt_last_cmd_puts("kernfs perm error\n");
2827		goto out_destroy;
2828	}
2829
2830	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
2831	ret = rdtgroup_add_files(kn, files);
2832	if (ret) {
2833		rdt_last_cmd_puts("kernfs fill error\n");
2834		goto out_destroy;
2835	}
2836
2837	if (rdt_mon_capable) {
2838		ret = alloc_rmid();
2839		if (ret < 0) {
2840			rdt_last_cmd_puts("Out of RMIDs\n");
2841			goto out_destroy;
2842		}
2843		rdtgrp->mon.rmid = ret;
2844
2845		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
2846		if (ret) {
2847			rdt_last_cmd_puts("kernfs subdir error\n");
2848			goto out_idfree;
2849		}
2850	}
2851	kernfs_activate(kn);
2852
2853	/*
2854	 * The caller unlocks the parent_kn upon success.
2855	 */
2856	return 0;
2857
2858out_idfree:
2859	free_rmid(rdtgrp->mon.rmid);
2860out_destroy:
2861	kernfs_put(rdtgrp->kn);
2862	kernfs_remove(rdtgrp->kn);
2863out_free_rgrp:
2864	kfree(rdtgrp);
2865out_unlock:
2866	rdtgroup_kn_unlock(parent_kn);
2867	return ret;
2868}
2869
2870static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
2871{
2872	kernfs_remove(rgrp->kn);
2873	free_rmid(rgrp->mon.rmid);
2874	rdtgroup_remove(rgrp);
2875}
2876
2877/*
2878 * Create a monitor group under "mon_groups" directory of a control
2879 * and monitor group(ctrl_mon). This is a resource group
2880 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
2881 */
2882static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
2883			      const char *name, umode_t mode)
 
 
2884{
2885	struct rdtgroup *rdtgrp, *prgrp;
2886	int ret;
2887
2888	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
 
2889	if (ret)
2890		return ret;
2891
2892	prgrp = rdtgrp->mon.parent;
2893	rdtgrp->closid = prgrp->closid;
2894
2895	/*
2896	 * Add the rdtgrp to the list of rdtgrps the parent
2897	 * ctrl_mon group has to track.
2898	 */
2899	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
2900
2901	rdtgroup_kn_unlock(parent_kn);
2902	return ret;
2903}
2904
2905/*
2906 * These are rdtgroups created under the root directory. Can be used
2907 * to allocate and monitor resources.
2908 */
2909static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 
2910				   const char *name, umode_t mode)
2911{
2912	struct rdtgroup *rdtgrp;
2913	struct kernfs_node *kn;
2914	u32 closid;
2915	int ret;
2916
2917	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
 
2918	if (ret)
2919		return ret;
2920
2921	kn = rdtgrp->kn;
2922	ret = closid_alloc();
2923	if (ret < 0) {
2924		rdt_last_cmd_puts("Out of CLOSIDs\n");
2925		goto out_common_fail;
2926	}
2927	closid = ret;
2928	ret = 0;
2929
2930	rdtgrp->closid = closid;
2931	ret = rdtgroup_init_alloc(rdtgrp);
2932	if (ret < 0)
2933		goto out_id_free;
2934
2935	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
2936
2937	if (rdt_mon_capable) {
2938		/*
2939		 * Create an empty mon_groups directory to hold the subset
2940		 * of tasks and cpus to monitor.
2941		 */
2942		ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
2943		if (ret) {
2944			rdt_last_cmd_puts("kernfs subdir error\n");
2945			goto out_del_list;
2946		}
2947	}
2948
2949	goto out_unlock;
2950
2951out_del_list:
2952	list_del(&rdtgrp->rdtgroup_list);
2953out_id_free:
2954	closid_free(closid);
2955out_common_fail:
2956	mkdir_rdt_prepare_clean(rdtgrp);
2957out_unlock:
2958	rdtgroup_kn_unlock(parent_kn);
2959	return ret;
2960}
2961
2962/*
2963 * We allow creating mon groups only with in a directory called "mon_groups"
2964 * which is present in every ctrl_mon group. Check if this is a valid
2965 * "mon_groups" directory.
2966 *
2967 * 1. The directory should be named "mon_groups".
2968 * 2. The mon group itself should "not" be named "mon_groups".
2969 *   This makes sure "mon_groups" directory always has a ctrl_mon group
2970 *   as parent.
2971 */
2972static bool is_mon_groups(struct kernfs_node *kn, const char *name)
2973{
2974	return (!strcmp(kn->name, "mon_groups") &&
2975		strcmp(name, "mon_groups"));
2976}
2977
2978static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
2979			  umode_t mode)
2980{
2981	/* Do not accept '\n' to avoid unparsable situation. */
2982	if (strchr(name, '\n'))
2983		return -EINVAL;
2984
2985	/*
2986	 * If the parent directory is the root directory and RDT
2987	 * allocation is supported, add a control and monitoring
2988	 * subdirectory
2989	 */
2990	if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
2991		return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
2992
2993	/*
2994	 * If RDT monitoring is supported and the parent directory is a valid
2995	 * "mon_groups" directory, add a monitoring subdirectory.
2996	 */
2997	if (rdt_mon_capable && is_mon_groups(parent_kn, name))
2998		return rdtgroup_mkdir_mon(parent_kn, name, mode);
2999
3000	return -EPERM;
3001}
3002
3003static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
 
3004{
3005	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3006	int cpu;
3007
3008	/* Give any tasks back to the parent group */
3009	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
3010
3011	/* Update per cpu rmid of the moved CPUs first */
3012	for_each_cpu(cpu, &rdtgrp->cpu_mask)
3013		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
3014	/*
3015	 * Update the MSR on moved CPUs and CPUs which have moved
3016	 * task running on them.
3017	 */
3018	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3019	update_closid_rmid(tmpmask, NULL);
3020
3021	rdtgrp->flags = RDT_DELETED;
3022	free_rmid(rdtgrp->mon.rmid);
3023
3024	/*
3025	 * Remove the rdtgrp from the parent ctrl_mon group's list
3026	 */
3027	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3028	list_del(&rdtgrp->mon.crdtgrp_list);
3029
 
 
 
 
 
3030	kernfs_remove(rdtgrp->kn);
3031
3032	return 0;
3033}
3034
3035static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
 
3036{
3037	rdtgrp->flags = RDT_DELETED;
3038	list_del(&rdtgrp->rdtgroup_list);
3039
 
 
 
 
 
3040	kernfs_remove(rdtgrp->kn);
3041	return 0;
3042}
3043
3044static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
 
3045{
3046	int cpu;
3047
3048	/* Give any tasks back to the default group */
3049	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
3050
3051	/* Give any CPUs back to the default group */
3052	cpumask_or(&rdtgroup_default.cpu_mask,
3053		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3054
3055	/* Update per cpu closid and rmid of the moved CPUs first */
3056	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
3057		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
3058		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
3059	}
3060
3061	/*
3062	 * Update the MSR on moved CPUs and CPUs which have moved
3063	 * task running on them.
3064	 */
3065	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3066	update_closid_rmid(tmpmask, NULL);
3067
3068	closid_free(rdtgrp->closid);
3069	free_rmid(rdtgrp->mon.rmid);
3070
3071	rdtgroup_ctrl_remove(rdtgrp);
3072
3073	/*
3074	 * Free all the child monitor group rmids.
3075	 */
3076	free_all_child_rdtgrp(rdtgrp);
3077
 
 
3078	return 0;
3079}
3080
3081static int rdtgroup_rmdir(struct kernfs_node *kn)
3082{
3083	struct kernfs_node *parent_kn = kn->parent;
3084	struct rdtgroup *rdtgrp;
3085	cpumask_var_t tmpmask;
3086	int ret = 0;
3087
3088	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
3089		return -ENOMEM;
3090
3091	rdtgrp = rdtgroup_kn_lock_live(kn);
3092	if (!rdtgrp) {
3093		ret = -EPERM;
3094		goto out;
3095	}
3096
3097	/*
3098	 * If the rdtgroup is a ctrl_mon group and parent directory
3099	 * is the root directory, remove the ctrl_mon group.
3100	 *
3101	 * If the rdtgroup is a mon group and parent directory
3102	 * is a valid "mon_groups" directory, remove the mon group.
3103	 */
3104	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
3105	    rdtgrp != &rdtgroup_default) {
3106		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3107		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
3108			ret = rdtgroup_ctrl_remove(rdtgrp);
3109		} else {
3110			ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
3111		}
3112	} else if (rdtgrp->type == RDTMON_GROUP &&
3113		 is_mon_groups(parent_kn, kn->name)) {
3114		ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
3115	} else {
3116		ret = -EPERM;
3117	}
3118
3119out:
3120	rdtgroup_kn_unlock(kn);
3121	free_cpumask_var(tmpmask);
3122	return ret;
3123}
3124
3125static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
3126{
3127	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
3128		seq_puts(seq, ",cdp");
3129
3130	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
3131		seq_puts(seq, ",cdpl2");
3132
3133	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
3134		seq_puts(seq, ",mba_MBps");
3135
3136	return 0;
3137}
3138
3139static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
3140	.mkdir		= rdtgroup_mkdir,
3141	.rmdir		= rdtgroup_rmdir,
3142	.show_options	= rdtgroup_show_options,
3143};
3144
3145static int __init rdtgroup_setup_root(void)
3146{
3147	int ret;
3148
3149	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
3150				      KERNFS_ROOT_CREATE_DEACTIVATED |
3151				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
3152				      &rdtgroup_default);
3153	if (IS_ERR(rdt_root))
3154		return PTR_ERR(rdt_root);
3155
3156	mutex_lock(&rdtgroup_mutex);
3157
3158	rdtgroup_default.closid = 0;
3159	rdtgroup_default.mon.rmid = 0;
3160	rdtgroup_default.type = RDTCTRL_GROUP;
3161	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
3162
3163	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
3164
3165	ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE);
3166	if (ret) {
3167		kernfs_destroy_root(rdt_root);
3168		goto out;
3169	}
3170
3171	rdtgroup_default.kn = rdt_root->kn;
3172	kernfs_activate(rdtgroup_default.kn);
3173
3174out:
3175	mutex_unlock(&rdtgroup_mutex);
3176
3177	return ret;
3178}
3179
3180/*
3181 * rdtgroup_init - rdtgroup initialization
3182 *
3183 * Setup resctrl file system including set up root, create mount point,
3184 * register rdtgroup filesystem, and initialize files under root directory.
3185 *
3186 * Return: 0 on success or -errno
3187 */
3188int __init rdtgroup_init(void)
3189{
3190	int ret = 0;
3191
3192	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
3193		     sizeof(last_cmd_status_buf));
3194
3195	ret = rdtgroup_setup_root();
3196	if (ret)
3197		return ret;
3198
3199	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
3200	if (ret)
3201		goto cleanup_root;
3202
3203	ret = register_filesystem(&rdt_fs_type);
3204	if (ret)
3205		goto cleanup_mountpoint;
3206
3207	/*
3208	 * Adding the resctrl debugfs directory here may not be ideal since
3209	 * it would let the resctrl debugfs directory appear on the debugfs
3210	 * filesystem before the resctrl filesystem is mounted.
3211	 * It may also be ok since that would enable debugging of RDT before
3212	 * resctrl is mounted.
3213	 * The reason why the debugfs directory is created here and not in
3214	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
3215	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
3216	 * (the lockdep class of inode->i_rwsem). Other filesystem
3217	 * interactions (eg. SyS_getdents) have the lock ordering:
3218	 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
3219	 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
3220	 * is taken, thus creating dependency:
3221	 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
3222	 * issues considering the other two lock dependencies.
3223	 * By creating the debugfs directory here we avoid a dependency
3224	 * that may cause deadlock (even though file operations cannot
3225	 * occur until the filesystem is mounted, but I do not know how to
3226	 * tell lockdep that).
3227	 */
3228	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
3229
3230	return 0;
3231
3232cleanup_mountpoint:
3233	sysfs_remove_mount_point(fs_kobj, "resctrl");
3234cleanup_root:
3235	kernfs_destroy_root(rdt_root);
3236
3237	return ret;
3238}
3239
3240void __exit rdtgroup_exit(void)
3241{
3242	debugfs_remove_recursive(debugfs_resctrl);
3243	unregister_filesystem(&rdt_fs_type);
3244	sysfs_remove_mount_point(fs_kobj, "resctrl");
3245	kernfs_destroy_root(rdt_root);
3246}