Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *	Routines to identify caches on Intel CPU.
   4 *
   5 *	Changes:
   6 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
   7 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   8 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
   9 */
  10
  11#include <linux/slab.h>
  12#include <linux/cacheinfo.h>
  13#include <linux/cpu.h>
  14#include <linux/cpuhotplug.h>
  15#include <linux/sched.h>
  16#include <linux/capability.h>
  17#include <linux/sysfs.h>
  18#include <linux/pci.h>
  19#include <linux/stop_machine.h>
  20
  21#include <asm/cpufeature.h>
  22#include <asm/cacheinfo.h>
  23#include <asm/amd_nb.h>
  24#include <asm/smp.h>
  25#include <asm/mtrr.h>
  26#include <asm/tlbflush.h>
  27
  28#include "cpu.h"
  29
  30#define LVL_1_INST	1
  31#define LVL_1_DATA	2
  32#define LVL_2		3
  33#define LVL_3		4
  34#define LVL_TRACE	5
  35
  36/* Shared last level cache maps */
  37DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
  38
  39/* Shared L2 cache maps */
  40DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
  41
  42static cpumask_var_t cpu_cacheinfo_mask;
  43
  44/* Kernel controls MTRR and/or PAT MSRs. */
  45unsigned int memory_caching_control __ro_after_init;
  46
  47struct _cache_table {
  48	unsigned char descriptor;
  49	char cache_type;
  50	short size;
  51};
  52
  53#define MB(x)	((x) * 1024)
  54
  55/* All the cache descriptor types we care about (no TLB or
  56   trace cache entries) */
  57
  58static const struct _cache_table cache_table[] =
  59{
  60	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
  61	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
  62	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
  63	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
  64	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
  65	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
  66	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
  67	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
  68	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  69	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  70	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  71	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  72	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
  73	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
  74	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  75	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  76	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
  77	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  78	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  79	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  80	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
  81	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
  82	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
  83	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
  84	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
  85	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
  86	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
  87	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
  88	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
  89	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
  90	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
  91	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
  92	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
  93	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
  94	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
  95	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  96	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  97	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  98	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  99	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
 100	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
 101	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 102	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 103	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 104	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 105	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 106	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 107	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 108	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 109	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 110	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 111	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 112	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 113	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 114	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 115	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 116	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 117	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 118	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 119	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
 120	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 121	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 122	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 123	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
 124	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 125	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
 126	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
 127	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 128	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 129	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
 130	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
 131	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
 132	{ 0x00, 0, 0}
 133};
 134
 135
 136enum _cache_type {
 137	CTYPE_NULL = 0,
 138	CTYPE_DATA = 1,
 139	CTYPE_INST = 2,
 140	CTYPE_UNIFIED = 3
 141};
 142
 143union _cpuid4_leaf_eax {
 144	struct {
 145		enum _cache_type	type:5;
 146		unsigned int		level:3;
 147		unsigned int		is_self_initializing:1;
 148		unsigned int		is_fully_associative:1;
 149		unsigned int		reserved:4;
 150		unsigned int		num_threads_sharing:12;
 151		unsigned int		num_cores_on_die:6;
 152	} split;
 153	u32 full;
 154};
 155
 156union _cpuid4_leaf_ebx {
 157	struct {
 158		unsigned int		coherency_line_size:12;
 159		unsigned int		physical_line_partition:10;
 160		unsigned int		ways_of_associativity:10;
 161	} split;
 162	u32 full;
 163};
 164
 165union _cpuid4_leaf_ecx {
 166	struct {
 167		unsigned int		number_of_sets:32;
 168	} split;
 169	u32 full;
 170};
 171
 172struct _cpuid4_info_regs {
 173	union _cpuid4_leaf_eax eax;
 174	union _cpuid4_leaf_ebx ebx;
 175	union _cpuid4_leaf_ecx ecx;
 176	unsigned int id;
 177	unsigned long size;
 178	struct amd_northbridge *nb;
 179};
 180
 
 
 181/* AMD doesn't have CPUID4. Emulate it here to report the same
 182   information to the user.  This makes some assumptions about the machine:
 183   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 184
 185   In theory the TLBs could be reported as fake type (they are in "dummy").
 186   Maybe later */
 187union l1_cache {
 188	struct {
 189		unsigned line_size:8;
 190		unsigned lines_per_tag:8;
 191		unsigned assoc:8;
 192		unsigned size_in_kb:8;
 193	};
 194	unsigned val;
 195};
 196
 197union l2_cache {
 198	struct {
 199		unsigned line_size:8;
 200		unsigned lines_per_tag:4;
 201		unsigned assoc:4;
 202		unsigned size_in_kb:16;
 203	};
 204	unsigned val;
 205};
 206
 207union l3_cache {
 208	struct {
 209		unsigned line_size:8;
 210		unsigned lines_per_tag:4;
 211		unsigned assoc:4;
 212		unsigned res:2;
 213		unsigned size_encoded:14;
 214	};
 215	unsigned val;
 216};
 217
 218static const unsigned short assocs[] = {
 219	[1] = 1,
 220	[2] = 2,
 221	[4] = 4,
 222	[6] = 8,
 223	[8] = 16,
 224	[0xa] = 32,
 225	[0xb] = 48,
 226	[0xc] = 64,
 227	[0xd] = 96,
 228	[0xe] = 128,
 229	[0xf] = 0xffff /* fully associative - no way to show this currently */
 230};
 231
 232static const unsigned char levels[] = { 1, 1, 2, 3 };
 233static const unsigned char types[] = { 1, 2, 3, 3 };
 234
 235static const enum cache_type cache_type_map[] = {
 236	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
 237	[CTYPE_DATA] = CACHE_TYPE_DATA,
 238	[CTYPE_INST] = CACHE_TYPE_INST,
 239	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
 240};
 241
 242static void
 243amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 244		     union _cpuid4_leaf_ebx *ebx,
 245		     union _cpuid4_leaf_ecx *ecx)
 246{
 247	unsigned dummy;
 248	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 249	union l1_cache l1i, l1d;
 250	union l2_cache l2;
 251	union l3_cache l3;
 252	union l1_cache *l1 = &l1d;
 253
 254	eax->full = 0;
 255	ebx->full = 0;
 256	ecx->full = 0;
 257
 258	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 259	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 260
 261	switch (leaf) {
 262	case 1:
 263		l1 = &l1i;
 264		fallthrough;
 265	case 0:
 266		if (!l1->val)
 267			return;
 268		assoc = assocs[l1->assoc];
 269		line_size = l1->line_size;
 270		lines_per_tag = l1->lines_per_tag;
 271		size_in_kb = l1->size_in_kb;
 272		break;
 273	case 2:
 274		if (!l2.val)
 275			return;
 276		assoc = assocs[l2.assoc];
 277		line_size = l2.line_size;
 278		lines_per_tag = l2.lines_per_tag;
 279		/* cpu_data has errata corrections for K7 applied */
 280		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 281		break;
 282	case 3:
 283		if (!l3.val)
 284			return;
 285		assoc = assocs[l3.assoc];
 286		line_size = l3.line_size;
 287		lines_per_tag = l3.lines_per_tag;
 288		size_in_kb = l3.size_encoded * 512;
 289		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 290			size_in_kb = size_in_kb >> 1;
 291			assoc = assoc >> 1;
 292		}
 293		break;
 294	default:
 295		return;
 296	}
 297
 298	eax->split.is_self_initializing = 1;
 299	eax->split.type = types[leaf];
 300	eax->split.level = levels[leaf];
 301	eax->split.num_threads_sharing = 0;
 302	eax->split.num_cores_on_die = topology_num_cores_per_package();
 303
 304
 305	if (assoc == 0xffff)
 306		eax->split.is_fully_associative = 1;
 307	ebx->split.coherency_line_size = line_size - 1;
 308	ebx->split.ways_of_associativity = assoc - 1;
 309	ebx->split.physical_line_partition = lines_per_tag - 1;
 310	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 311		(ebx->split.ways_of_associativity + 1) - 1;
 312}
 313
 314#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 315
 316/*
 317 * L3 cache descriptors
 318 */
 319static void amd_calc_l3_indices(struct amd_northbridge *nb)
 320{
 321	struct amd_l3_cache *l3 = &nb->l3_cache;
 322	unsigned int sc0, sc1, sc2, sc3;
 323	u32 val = 0;
 324
 325	pci_read_config_dword(nb->misc, 0x1C4, &val);
 326
 327	/* calculate subcache sizes */
 328	l3->subcaches[0] = sc0 = !(val & BIT(0));
 329	l3->subcaches[1] = sc1 = !(val & BIT(4));
 330
 331	if (boot_cpu_data.x86 == 0x15) {
 332		l3->subcaches[0] = sc0 += !(val & BIT(1));
 333		l3->subcaches[1] = sc1 += !(val & BIT(5));
 334	}
 335
 336	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 337	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 338
 339	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 340}
 341
 342/*
 343 * check whether a slot used for disabling an L3 index is occupied.
 344 * @l3: L3 cache descriptor
 345 * @slot: slot number (0..1)
 346 *
 347 * @returns: the disabled index if used or negative value if slot free.
 348 */
 349static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 350{
 351	unsigned int reg = 0;
 352
 353	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 354
 355	/* check whether this slot is activated already */
 356	if (reg & (3UL << 30))
 357		return reg & 0xfff;
 358
 359	return -1;
 360}
 361
 362static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 363				  unsigned int slot)
 364{
 365	int index;
 366	struct amd_northbridge *nb = this_leaf->priv;
 367
 368	index = amd_get_l3_disable_slot(nb, slot);
 369	if (index >= 0)
 370		return sprintf(buf, "%d\n", index);
 371
 372	return sprintf(buf, "FREE\n");
 373}
 374
 375#define SHOW_CACHE_DISABLE(slot)					\
 376static ssize_t								\
 377cache_disable_##slot##_show(struct device *dev,				\
 378			    struct device_attribute *attr, char *buf)	\
 379{									\
 380	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 381	return show_cache_disable(this_leaf, buf, slot);		\
 382}
 383SHOW_CACHE_DISABLE(0)
 384SHOW_CACHE_DISABLE(1)
 385
 386static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 387				 unsigned slot, unsigned long idx)
 388{
 389	int i;
 390
 391	idx |= BIT(30);
 392
 393	/*
 394	 *  disable index in all 4 subcaches
 395	 */
 396	for (i = 0; i < 4; i++) {
 397		u32 reg = idx | (i << 20);
 398
 399		if (!nb->l3_cache.subcaches[i])
 400			continue;
 401
 402		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 403
 404		/*
 405		 * We need to WBINVD on a core on the node containing the L3
 406		 * cache which indices we disable therefore a simple wbinvd()
 407		 * is not sufficient.
 408		 */
 409		wbinvd_on_cpu(cpu);
 410
 411		reg |= BIT(31);
 412		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 413	}
 414}
 415
 416/*
 417 * disable a L3 cache index by using a disable-slot
 418 *
 419 * @l3:    L3 cache descriptor
 420 * @cpu:   A CPU on the node containing the L3 cache
 421 * @slot:  slot number (0..1)
 422 * @index: index to disable
 423 *
 424 * @return: 0 on success, error status on failure
 425 */
 426static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
 427			    unsigned slot, unsigned long index)
 428{
 429	int ret = 0;
 430
 431	/*  check if @slot is already used or the index is already disabled */
 432	ret = amd_get_l3_disable_slot(nb, slot);
 433	if (ret >= 0)
 434		return -EEXIST;
 435
 436	if (index > nb->l3_cache.indices)
 437		return -EINVAL;
 438
 439	/* check whether the other slot has disabled the same index already */
 440	if (index == amd_get_l3_disable_slot(nb, !slot))
 441		return -EEXIST;
 442
 443	amd_l3_disable_index(nb, cpu, slot, index);
 444
 445	return 0;
 446}
 447
 448static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
 449				   const char *buf, size_t count,
 450				   unsigned int slot)
 451{
 452	unsigned long val = 0;
 453	int cpu, err = 0;
 454	struct amd_northbridge *nb = this_leaf->priv;
 455
 456	if (!capable(CAP_SYS_ADMIN))
 457		return -EPERM;
 458
 459	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 460
 461	if (kstrtoul(buf, 10, &val) < 0)
 462		return -EINVAL;
 463
 464	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 465	if (err) {
 466		if (err == -EEXIST)
 467			pr_warn("L3 slot %d in use/index already disabled!\n",
 468				   slot);
 469		return err;
 470	}
 471	return count;
 472}
 473
 474#define STORE_CACHE_DISABLE(slot)					\
 475static ssize_t								\
 476cache_disable_##slot##_store(struct device *dev,			\
 477			     struct device_attribute *attr,		\
 478			     const char *buf, size_t count)		\
 479{									\
 480	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 481	return store_cache_disable(this_leaf, buf, count, slot);	\
 482}
 483STORE_CACHE_DISABLE(0)
 484STORE_CACHE_DISABLE(1)
 485
 486static ssize_t subcaches_show(struct device *dev,
 487			      struct device_attribute *attr, char *buf)
 488{
 489	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 490	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 491
 492	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 493}
 494
 495static ssize_t subcaches_store(struct device *dev,
 496			       struct device_attribute *attr,
 497			       const char *buf, size_t count)
 498{
 499	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 500	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 501	unsigned long val;
 502
 503	if (!capable(CAP_SYS_ADMIN))
 504		return -EPERM;
 505
 506	if (kstrtoul(buf, 16, &val) < 0)
 507		return -EINVAL;
 508
 509	if (amd_set_subcaches(cpu, val))
 510		return -EINVAL;
 511
 512	return count;
 513}
 514
 515static DEVICE_ATTR_RW(cache_disable_0);
 516static DEVICE_ATTR_RW(cache_disable_1);
 517static DEVICE_ATTR_RW(subcaches);
 518
 519static umode_t
 520cache_private_attrs_is_visible(struct kobject *kobj,
 521			       struct attribute *attr, int unused)
 522{
 523	struct device *dev = kobj_to_dev(kobj);
 524	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 525	umode_t mode = attr->mode;
 526
 527	if (!this_leaf->priv)
 528		return 0;
 529
 530	if ((attr == &dev_attr_subcaches.attr) &&
 531	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 532		return mode;
 533
 534	if ((attr == &dev_attr_cache_disable_0.attr ||
 535	     attr == &dev_attr_cache_disable_1.attr) &&
 536	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 537		return mode;
 538
 539	return 0;
 540}
 541
 542static struct attribute_group cache_private_group = {
 543	.is_visible = cache_private_attrs_is_visible,
 544};
 545
 546static void init_amd_l3_attrs(void)
 547{
 548	int n = 1;
 549	static struct attribute **amd_l3_attrs;
 550
 551	if (amd_l3_attrs) /* already initialized */
 552		return;
 553
 554	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 555		n += 2;
 556	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 557		n += 1;
 558
 559	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
 560	if (!amd_l3_attrs)
 561		return;
 562
 563	n = 0;
 564	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 565		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
 566		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
 567	}
 568	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 569		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 570
 571	cache_private_group.attrs = amd_l3_attrs;
 572}
 573
 574const struct attribute_group *
 575cache_get_priv_group(struct cacheinfo *this_leaf)
 576{
 577	struct amd_northbridge *nb = this_leaf->priv;
 578
 579	if (this_leaf->level < 3 || !nb)
 580		return NULL;
 581
 582	if (nb && nb->l3_cache.indices)
 583		init_amd_l3_attrs();
 584
 585	return &cache_private_group;
 586}
 587
 588static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 589{
 590	int node;
 591
 592	/* only for L3, and not in virtualized environments */
 593	if (index < 3)
 594		return;
 595
 596	node = topology_amd_node_id(smp_processor_id());
 597	this_leaf->nb = node_to_amd_nb(node);
 598	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 599		amd_calc_l3_indices(this_leaf->nb);
 600}
 601#else
 602#define amd_init_l3_cache(x, y)
 603#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 604
 605static int
 606cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 607{
 608	union _cpuid4_leaf_eax	eax;
 609	union _cpuid4_leaf_ebx	ebx;
 610	union _cpuid4_leaf_ecx	ecx;
 611	unsigned		edx;
 612
 613	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 614		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
 615			cpuid_count(0x8000001d, index, &eax.full,
 616				    &ebx.full, &ecx.full, &edx);
 617		else
 618			amd_cpuid4(index, &eax, &ebx, &ecx);
 619		amd_init_l3_cache(this_leaf, index);
 620	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
 621		cpuid_count(0x8000001d, index, &eax.full,
 622			    &ebx.full, &ecx.full, &edx);
 623		amd_init_l3_cache(this_leaf, index);
 624	} else {
 625		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 626	}
 627
 628	if (eax.split.type == CTYPE_NULL)
 629		return -EIO; /* better error ? */
 630
 631	this_leaf->eax = eax;
 632	this_leaf->ebx = ebx;
 633	this_leaf->ecx = ecx;
 634	this_leaf->size = (ecx.split.number_of_sets          + 1) *
 635			  (ebx.split.coherency_line_size     + 1) *
 636			  (ebx.split.physical_line_partition + 1) *
 637			  (ebx.split.ways_of_associativity   + 1);
 638	return 0;
 639}
 640
 641static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 642{
 643	unsigned int		eax, ebx, ecx, edx, op;
 644	union _cpuid4_leaf_eax	cache_eax;
 645	int 			i = -1;
 646
 647	if (c->x86_vendor == X86_VENDOR_AMD ||
 648	    c->x86_vendor == X86_VENDOR_HYGON)
 649		op = 0x8000001d;
 650	else
 651		op = 4;
 652
 653	do {
 654		++i;
 655		/* Do cpuid(op) loop to find out num_cache_leaves */
 656		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 657		cache_eax.full = eax;
 658	} while (cache_eax.split.type != CTYPE_NULL);
 659	return i;
 660}
 661
 662void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
 663{
 664	/*
 665	 * We may have multiple LLCs if L3 caches exist, so check if we
 666	 * have an L3 cache by looking at the L3 cache CPUID leaf.
 667	 */
 668	if (!cpuid_edx(0x80000006))
 669		return;
 670
 671	if (c->x86 < 0x17) {
 672		/* LLC is at the node level. */
 673		c->topo.llc_id = die_id;
 674	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
 675		/*
 676		 * LLC is at the core complex level.
 677		 * Core complex ID is ApicId[3] for these processors.
 678		 */
 679		c->topo.llc_id = c->topo.apicid >> 3;
 680	} else {
 681		/*
 682		 * LLC ID is calculated from the number of threads sharing the
 683		 * cache.
 684		 * */
 685		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
 686		u32 llc_index = find_num_cache_leaves(c) - 1;
 687
 688		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
 689		if (eax)
 690			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
 691
 692		if (num_sharing_cache) {
 693			int bits = get_count_order(num_sharing_cache);
 694
 695			c->topo.llc_id = c->topo.apicid >> bits;
 696		}
 697	}
 698}
 699
 700void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
 701{
 702	/*
 703	 * We may have multiple LLCs if L3 caches exist, so check if we
 704	 * have an L3 cache by looking at the L3 cache CPUID leaf.
 705	 */
 706	if (!cpuid_edx(0x80000006))
 707		return;
 708
 709	/*
 710	 * LLC is at the core complex level.
 711	 * Core complex ID is ApicId[3] for these processors.
 712	 */
 713	c->topo.llc_id = c->topo.apicid >> 3;
 714}
 715
 716void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 717{
 718	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
 719
 720	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 721		ci->num_leaves = find_num_cache_leaves(c);
 722	} else if (c->extended_cpuid_level >= 0x80000006) {
 723		if (cpuid_edx(0x80000006) & 0xf000)
 724			ci->num_leaves = 4;
 725		else
 726			ci->num_leaves = 3;
 727	}
 728}
 729
 730void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 731{
 732	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
 733
 734	ci->num_leaves = find_num_cache_leaves(c);
 735}
 736
 737void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 738{
 739	/* Cache sizes */
 740	unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 741	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 742	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 743	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 744	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
 745
 746	if (c->cpuid_level > 3) {
 747		/*
 748		 * There should be at least one leaf. A non-zero value means
 749		 * that the number of leaves has been initialized.
 750		 */
 751		if (!ci->num_leaves)
 752			ci->num_leaves = find_num_cache_leaves(c);
 
 753
 754		/*
 755		 * Whenever possible use cpuid(4), deterministic cache
 756		 * parameters cpuid leaf to find the cache details
 757		 */
 758		for (i = 0; i < ci->num_leaves; i++) {
 759			struct _cpuid4_info_regs this_leaf = {};
 760			int retval;
 761
 762			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 763			if (retval < 0)
 764				continue;
 765
 766			switch (this_leaf.eax.split.level) {
 767			case 1:
 768				if (this_leaf.eax.split.type == CTYPE_DATA)
 769					new_l1d = this_leaf.size/1024;
 770				else if (this_leaf.eax.split.type == CTYPE_INST)
 771					new_l1i = this_leaf.size/1024;
 772				break;
 773			case 2:
 774				new_l2 = this_leaf.size/1024;
 775				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 776				index_msb = get_count_order(num_threads_sharing);
 777				l2_id = c->topo.apicid & ~((1 << index_msb) - 1);
 778				break;
 779			case 3:
 780				new_l3 = this_leaf.size/1024;
 781				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 782				index_msb = get_count_order(num_threads_sharing);
 783				l3_id = c->topo.apicid & ~((1 << index_msb) - 1);
 784				break;
 785			default:
 786				break;
 787			}
 788		}
 789	}
 790	/*
 791	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 792	 * trace cache
 793	 */
 794	if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) {
 795		/* supports eax=2  call */
 796		int j, n;
 797		unsigned int regs[4];
 798		unsigned char *dp = (unsigned char *)regs;
 799		int only_trace = 0;
 800
 801		if (ci->num_leaves && c->x86 == 15)
 802			only_trace = 1;
 803
 804		/* Number of times to iterate */
 805		n = cpuid_eax(2) & 0xFF;
 806
 807		for (i = 0 ; i < n ; i++) {
 808			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 809
 810			/* If bit 31 is set, this is an unknown format */
 811			for (j = 0 ; j < 4 ; j++)
 812				if (regs[j] & (1 << 31))
 813					regs[j] = 0;
 814
 815			/* Byte 0 is level count, not a descriptor */
 816			for (j = 1 ; j < 16 ; j++) {
 817				unsigned char des = dp[j];
 818				unsigned char k = 0;
 819
 820				/* look up this descriptor in the table */
 821				while (cache_table[k].descriptor != 0) {
 822					if (cache_table[k].descriptor == des) {
 823						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 824							break;
 825						switch (cache_table[k].cache_type) {
 826						case LVL_1_INST:
 827							l1i += cache_table[k].size;
 828							break;
 829						case LVL_1_DATA:
 830							l1d += cache_table[k].size;
 831							break;
 832						case LVL_2:
 833							l2 += cache_table[k].size;
 834							break;
 835						case LVL_3:
 836							l3 += cache_table[k].size;
 837							break;
 838						}
 839
 840						break;
 841					}
 842
 843					k++;
 844				}
 845			}
 846		}
 847	}
 848
 849	if (new_l1d)
 850		l1d = new_l1d;
 851
 852	if (new_l1i)
 853		l1i = new_l1i;
 854
 855	if (new_l2) {
 856		l2 = new_l2;
 857		c->topo.llc_id = l2_id;
 858		c->topo.l2c_id = l2_id;
 859	}
 860
 861	if (new_l3) {
 862		l3 = new_l3;
 863		c->topo.llc_id = l3_id;
 864	}
 865
 866	/*
 867	 * If llc_id is not yet set, this means cpuid_level < 4 which in
 868	 * turns means that the only possibility is SMT (as indicated in
 869	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
 870	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
 871	 * c->topo.pkg_id.
 872	 */
 873	if (c->topo.llc_id == BAD_APICID)
 874		c->topo.llc_id = c->topo.pkg_id;
 875
 876	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 877
 878	if (!l2)
 879		cpu_detect_cache_sizes(c);
 880}
 881
 882static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 883				    struct _cpuid4_info_regs *base)
 884{
 885	struct cpu_cacheinfo *this_cpu_ci;
 886	struct cacheinfo *this_leaf;
 887	int i, sibling;
 888
 889	/*
 890	 * For L3, always use the pre-calculated cpu_llc_shared_mask
 891	 * to derive shared_cpu_map.
 892	 */
 893	if (index == 3) {
 894		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 895			this_cpu_ci = get_cpu_cacheinfo(i);
 896			if (!this_cpu_ci->info_list)
 897				continue;
 898			this_leaf = this_cpu_ci->info_list + index;
 899			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 900				if (!cpu_online(sibling))
 901					continue;
 902				cpumask_set_cpu(sibling,
 903						&this_leaf->shared_cpu_map);
 904			}
 905		}
 906	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 907		unsigned int apicid, nshared, first, last;
 908
 909		nshared = base->eax.split.num_threads_sharing + 1;
 910		apicid = cpu_data(cpu).topo.apicid;
 911		first = apicid - (apicid % nshared);
 912		last = first + nshared - 1;
 913
 914		for_each_online_cpu(i) {
 915			this_cpu_ci = get_cpu_cacheinfo(i);
 916			if (!this_cpu_ci->info_list)
 917				continue;
 918
 919			apicid = cpu_data(i).topo.apicid;
 920			if ((apicid < first) || (apicid > last))
 921				continue;
 922
 923			this_leaf = this_cpu_ci->info_list + index;
 924
 925			for_each_online_cpu(sibling) {
 926				apicid = cpu_data(sibling).topo.apicid;
 927				if ((apicid < first) || (apicid > last))
 928					continue;
 929				cpumask_set_cpu(sibling,
 930						&this_leaf->shared_cpu_map);
 931			}
 932		}
 933	} else
 934		return 0;
 935
 936	return 1;
 937}
 938
 939static void __cache_cpumap_setup(unsigned int cpu, int index,
 940				 struct _cpuid4_info_regs *base)
 941{
 942	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 943	struct cacheinfo *this_leaf, *sibling_leaf;
 944	unsigned long num_threads_sharing;
 945	int index_msb, i;
 946	struct cpuinfo_x86 *c = &cpu_data(cpu);
 947
 948	if (c->x86_vendor == X86_VENDOR_AMD ||
 949	    c->x86_vendor == X86_VENDOR_HYGON) {
 950		if (__cache_amd_cpumap_setup(cpu, index, base))
 951			return;
 952	}
 953
 954	this_leaf = this_cpu_ci->info_list + index;
 955	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 956
 957	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 958	if (num_threads_sharing == 1)
 959		return;
 960
 961	index_msb = get_count_order(num_threads_sharing);
 962
 963	for_each_online_cpu(i)
 964		if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
 965			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 966
 967			if (i == cpu || !sib_cpu_ci->info_list)
 968				continue;/* skip if itself or no cacheinfo */
 969			sibling_leaf = sib_cpu_ci->info_list + index;
 970			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
 971			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 972		}
 973}
 974
 975static void ci_leaf_init(struct cacheinfo *this_leaf,
 976			 struct _cpuid4_info_regs *base)
 977{
 978	this_leaf->id = base->id;
 979	this_leaf->attributes = CACHE_ID;
 980	this_leaf->level = base->eax.split.level;
 981	this_leaf->type = cache_type_map[base->eax.split.type];
 982	this_leaf->coherency_line_size =
 983				base->ebx.split.coherency_line_size + 1;
 984	this_leaf->ways_of_associativity =
 985				base->ebx.split.ways_of_associativity + 1;
 986	this_leaf->size = base->size;
 987	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
 988	this_leaf->physical_line_partition =
 989				base->ebx.split.physical_line_partition + 1;
 990	this_leaf->priv = base->nb;
 991}
 992
 993int init_cache_level(unsigned int cpu)
 994{
 995	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
 996
 997	/* There should be at least one leaf. */
 998	if (!ci->num_leaves)
 999		return -ENOENT;
1000
 
 
 
1001	return 0;
1002}
1003
1004/*
1005 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1006 * ECX as cache index. Then right shift apicid by the number's order to get
1007 * cache id for this cache node.
1008 */
1009static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1010{
1011	struct cpuinfo_x86 *c = &cpu_data(cpu);
1012	unsigned long num_threads_sharing;
1013	int index_msb;
1014
1015	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1016	index_msb = get_count_order(num_threads_sharing);
1017	id4_regs->id = c->topo.apicid >> index_msb;
1018}
1019
1020int populate_cache_leaves(unsigned int cpu)
1021{
1022	unsigned int idx, ret;
1023	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1024	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1025	struct _cpuid4_info_regs id4_regs = {};
1026
1027	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1028		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1029		if (ret)
1030			return ret;
1031		get_cache_id(cpu, &id4_regs);
1032		ci_leaf_init(this_leaf++, &id4_regs);
1033		__cache_cpumap_setup(cpu, idx, &id4_regs);
1034	}
1035	this_cpu_ci->cpu_map_populated = true;
1036
1037	return 0;
1038}
1039
1040/*
1041 * Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
1042 *
1043 * Since we are disabling the cache don't allow any interrupts,
1044 * they would run extremely slow and would only increase the pain.
1045 *
1046 * The caller must ensure that local interrupts are disabled and
1047 * are reenabled after cache_enable() has been called.
1048 */
1049static unsigned long saved_cr4;
1050static DEFINE_RAW_SPINLOCK(cache_disable_lock);
1051
1052void cache_disable(void) __acquires(cache_disable_lock)
1053{
1054	unsigned long cr0;
1055
1056	/*
1057	 * Note that this is not ideal
1058	 * since the cache is only flushed/disabled for this CPU while the
1059	 * MTRRs are changed, but changing this requires more invasive
1060	 * changes to the way the kernel boots
1061	 */
1062
1063	raw_spin_lock(&cache_disable_lock);
1064
1065	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
1066	cr0 = read_cr0() | X86_CR0_CD;
1067	write_cr0(cr0);
1068
1069	/*
1070	 * Cache flushing is the most time-consuming step when programming
1071	 * the MTRRs. Fortunately, as per the Intel Software Development
1072	 * Manual, we can skip it if the processor supports cache self-
1073	 * snooping.
1074	 */
1075	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1076		wbinvd();
1077
1078	/* Save value of CR4 and clear Page Global Enable (bit 7) */
1079	if (cpu_feature_enabled(X86_FEATURE_PGE)) {
1080		saved_cr4 = __read_cr4();
1081		__write_cr4(saved_cr4 & ~X86_CR4_PGE);
1082	}
1083
1084	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
1085	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1086	flush_tlb_local();
1087
1088	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1089		mtrr_disable();
1090
1091	/* Again, only flush caches if we have to. */
1092	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1093		wbinvd();
1094}
1095
1096void cache_enable(void) __releases(cache_disable_lock)
1097{
1098	/* Flush TLBs (no need to flush caches - they are disabled) */
1099	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1100	flush_tlb_local();
1101
1102	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1103		mtrr_enable();
1104
1105	/* Enable caches */
1106	write_cr0(read_cr0() & ~X86_CR0_CD);
1107
1108	/* Restore value of CR4 */
1109	if (cpu_feature_enabled(X86_FEATURE_PGE))
1110		__write_cr4(saved_cr4);
1111
1112	raw_spin_unlock(&cache_disable_lock);
1113}
1114
1115static void cache_cpu_init(void)
1116{
1117	unsigned long flags;
1118
1119	local_irq_save(flags);
1120
1121	if (memory_caching_control & CACHE_MTRR) {
1122		cache_disable();
1123		mtrr_generic_set_state();
1124		cache_enable();
1125	}
1126
1127	if (memory_caching_control & CACHE_PAT)
1128		pat_cpu_init();
1129
1130	local_irq_restore(flags);
1131}
1132
1133static bool cache_aps_delayed_init = true;
1134
1135void set_cache_aps_delayed_init(bool val)
1136{
1137	cache_aps_delayed_init = val;
1138}
1139
1140bool get_cache_aps_delayed_init(void)
1141{
1142	return cache_aps_delayed_init;
1143}
1144
1145static int cache_rendezvous_handler(void *unused)
1146{
1147	if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id()))
1148		cache_cpu_init();
1149
1150	return 0;
1151}
1152
1153void __init cache_bp_init(void)
1154{
1155	mtrr_bp_init();
1156	pat_bp_init();
1157
1158	if (memory_caching_control)
1159		cache_cpu_init();
1160}
1161
1162void cache_bp_restore(void)
1163{
1164	if (memory_caching_control)
1165		cache_cpu_init();
1166}
1167
1168static int cache_ap_online(unsigned int cpu)
1169{
1170	cpumask_set_cpu(cpu, cpu_cacheinfo_mask);
1171
1172	if (!memory_caching_control || get_cache_aps_delayed_init())
1173		return 0;
1174
1175	/*
1176	 * Ideally we should hold mtrr_mutex here to avoid MTRR entries
1177	 * changed, but this routine will be called in CPU boot time,
1178	 * holding the lock breaks it.
1179	 *
1180	 * This routine is called in two cases:
1181	 *
1182	 *   1. very early time of software resume, when there absolutely
1183	 *      isn't MTRR entry changes;
1184	 *
1185	 *   2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
1186	 *      lock to prevent MTRR entry changes
1187	 */
1188	stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
1189				       cpu_cacheinfo_mask);
1190
1191	return 0;
1192}
1193
1194static int cache_ap_offline(unsigned int cpu)
1195{
1196	cpumask_clear_cpu(cpu, cpu_cacheinfo_mask);
1197	return 0;
1198}
1199
1200/*
1201 * Delayed cache initialization for all AP's
1202 */
1203void cache_aps_init(void)
1204{
1205	if (!memory_caching_control || !get_cache_aps_delayed_init())
1206		return;
1207
1208	stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask);
1209	set_cache_aps_delayed_init(false);
1210}
1211
1212static int __init cache_ap_register(void)
1213{
1214	zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL);
1215	cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask);
1216
1217	cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
1218				  "x86/cachectrl:starting",
1219				  cache_ap_online, cache_ap_offline);
1220	return 0;
1221}
1222early_initcall(cache_ap_register);
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *	Routines to identify caches on Intel CPU.
   4 *
   5 *	Changes:
   6 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
   7 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   8 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
   9 */
  10
  11#include <linux/slab.h>
  12#include <linux/cacheinfo.h>
  13#include <linux/cpu.h>
  14#include <linux/cpuhotplug.h>
  15#include <linux/sched.h>
  16#include <linux/capability.h>
  17#include <linux/sysfs.h>
  18#include <linux/pci.h>
  19#include <linux/stop_machine.h>
  20
  21#include <asm/cpufeature.h>
  22#include <asm/cacheinfo.h>
  23#include <asm/amd_nb.h>
  24#include <asm/smp.h>
  25#include <asm/mtrr.h>
  26#include <asm/tlbflush.h>
  27
  28#include "cpu.h"
  29
  30#define LVL_1_INST	1
  31#define LVL_1_DATA	2
  32#define LVL_2		3
  33#define LVL_3		4
  34#define LVL_TRACE	5
  35
  36/* Shared last level cache maps */
  37DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
  38
  39/* Shared L2 cache maps */
  40DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
  41
  42static cpumask_var_t cpu_cacheinfo_mask;
  43
  44/* Kernel controls MTRR and/or PAT MSRs. */
  45unsigned int memory_caching_control __ro_after_init;
  46
  47struct _cache_table {
  48	unsigned char descriptor;
  49	char cache_type;
  50	short size;
  51};
  52
  53#define MB(x)	((x) * 1024)
  54
  55/* All the cache descriptor types we care about (no TLB or
  56   trace cache entries) */
  57
  58static const struct _cache_table cache_table[] =
  59{
  60	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
  61	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
  62	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
  63	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
  64	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
  65	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
  66	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
  67	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
  68	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  69	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  70	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  71	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  72	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
  73	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
  74	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  75	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  76	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
  77	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  78	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  79	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  80	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
  81	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
  82	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
  83	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
  84	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
  85	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
  86	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
  87	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
  88	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
  89	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
  90	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
  91	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
  92	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
  93	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
  94	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
  95	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  96	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  97	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  98	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  99	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
 100	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
 101	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 102	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 103	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 104	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 105	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 106	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 107	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 108	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 109	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 110	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 111	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 112	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 113	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 114	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 115	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 116	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 117	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 118	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 119	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
 120	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 121	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 122	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 123	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
 124	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 125	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
 126	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
 127	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 128	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 129	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
 130	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
 131	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
 132	{ 0x00, 0, 0}
 133};
 134
 135
 136enum _cache_type {
 137	CTYPE_NULL = 0,
 138	CTYPE_DATA = 1,
 139	CTYPE_INST = 2,
 140	CTYPE_UNIFIED = 3
 141};
 142
 143union _cpuid4_leaf_eax {
 144	struct {
 145		enum _cache_type	type:5;
 146		unsigned int		level:3;
 147		unsigned int		is_self_initializing:1;
 148		unsigned int		is_fully_associative:1;
 149		unsigned int		reserved:4;
 150		unsigned int		num_threads_sharing:12;
 151		unsigned int		num_cores_on_die:6;
 152	} split;
 153	u32 full;
 154};
 155
 156union _cpuid4_leaf_ebx {
 157	struct {
 158		unsigned int		coherency_line_size:12;
 159		unsigned int		physical_line_partition:10;
 160		unsigned int		ways_of_associativity:10;
 161	} split;
 162	u32 full;
 163};
 164
 165union _cpuid4_leaf_ecx {
 166	struct {
 167		unsigned int		number_of_sets:32;
 168	} split;
 169	u32 full;
 170};
 171
 172struct _cpuid4_info_regs {
 173	union _cpuid4_leaf_eax eax;
 174	union _cpuid4_leaf_ebx ebx;
 175	union _cpuid4_leaf_ecx ecx;
 176	unsigned int id;
 177	unsigned long size;
 178	struct amd_northbridge *nb;
 179};
 180
 181static unsigned short num_cache_leaves;
 182
 183/* AMD doesn't have CPUID4. Emulate it here to report the same
 184   information to the user.  This makes some assumptions about the machine:
 185   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 186
 187   In theory the TLBs could be reported as fake type (they are in "dummy").
 188   Maybe later */
 189union l1_cache {
 190	struct {
 191		unsigned line_size:8;
 192		unsigned lines_per_tag:8;
 193		unsigned assoc:8;
 194		unsigned size_in_kb:8;
 195	};
 196	unsigned val;
 197};
 198
 199union l2_cache {
 200	struct {
 201		unsigned line_size:8;
 202		unsigned lines_per_tag:4;
 203		unsigned assoc:4;
 204		unsigned size_in_kb:16;
 205	};
 206	unsigned val;
 207};
 208
 209union l3_cache {
 210	struct {
 211		unsigned line_size:8;
 212		unsigned lines_per_tag:4;
 213		unsigned assoc:4;
 214		unsigned res:2;
 215		unsigned size_encoded:14;
 216	};
 217	unsigned val;
 218};
 219
 220static const unsigned short assocs[] = {
 221	[1] = 1,
 222	[2] = 2,
 223	[4] = 4,
 224	[6] = 8,
 225	[8] = 16,
 226	[0xa] = 32,
 227	[0xb] = 48,
 228	[0xc] = 64,
 229	[0xd] = 96,
 230	[0xe] = 128,
 231	[0xf] = 0xffff /* fully associative - no way to show this currently */
 232};
 233
 234static const unsigned char levels[] = { 1, 1, 2, 3 };
 235static const unsigned char types[] = { 1, 2, 3, 3 };
 236
 237static const enum cache_type cache_type_map[] = {
 238	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
 239	[CTYPE_DATA] = CACHE_TYPE_DATA,
 240	[CTYPE_INST] = CACHE_TYPE_INST,
 241	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
 242};
 243
 244static void
 245amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 246		     union _cpuid4_leaf_ebx *ebx,
 247		     union _cpuid4_leaf_ecx *ecx)
 248{
 249	unsigned dummy;
 250	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 251	union l1_cache l1i, l1d;
 252	union l2_cache l2;
 253	union l3_cache l3;
 254	union l1_cache *l1 = &l1d;
 255
 256	eax->full = 0;
 257	ebx->full = 0;
 258	ecx->full = 0;
 259
 260	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 261	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 262
 263	switch (leaf) {
 264	case 1:
 265		l1 = &l1i;
 266		fallthrough;
 267	case 0:
 268		if (!l1->val)
 269			return;
 270		assoc = assocs[l1->assoc];
 271		line_size = l1->line_size;
 272		lines_per_tag = l1->lines_per_tag;
 273		size_in_kb = l1->size_in_kb;
 274		break;
 275	case 2:
 276		if (!l2.val)
 277			return;
 278		assoc = assocs[l2.assoc];
 279		line_size = l2.line_size;
 280		lines_per_tag = l2.lines_per_tag;
 281		/* cpu_data has errata corrections for K7 applied */
 282		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 283		break;
 284	case 3:
 285		if (!l3.val)
 286			return;
 287		assoc = assocs[l3.assoc];
 288		line_size = l3.line_size;
 289		lines_per_tag = l3.lines_per_tag;
 290		size_in_kb = l3.size_encoded * 512;
 291		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 292			size_in_kb = size_in_kb >> 1;
 293			assoc = assoc >> 1;
 294		}
 295		break;
 296	default:
 297		return;
 298	}
 299
 300	eax->split.is_self_initializing = 1;
 301	eax->split.type = types[leaf];
 302	eax->split.level = levels[leaf];
 303	eax->split.num_threads_sharing = 0;
 304	eax->split.num_cores_on_die = topology_num_cores_per_package();
 305
 306
 307	if (assoc == 0xffff)
 308		eax->split.is_fully_associative = 1;
 309	ebx->split.coherency_line_size = line_size - 1;
 310	ebx->split.ways_of_associativity = assoc - 1;
 311	ebx->split.physical_line_partition = lines_per_tag - 1;
 312	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 313		(ebx->split.ways_of_associativity + 1) - 1;
 314}
 315
 316#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 317
 318/*
 319 * L3 cache descriptors
 320 */
 321static void amd_calc_l3_indices(struct amd_northbridge *nb)
 322{
 323	struct amd_l3_cache *l3 = &nb->l3_cache;
 324	unsigned int sc0, sc1, sc2, sc3;
 325	u32 val = 0;
 326
 327	pci_read_config_dword(nb->misc, 0x1C4, &val);
 328
 329	/* calculate subcache sizes */
 330	l3->subcaches[0] = sc0 = !(val & BIT(0));
 331	l3->subcaches[1] = sc1 = !(val & BIT(4));
 332
 333	if (boot_cpu_data.x86 == 0x15) {
 334		l3->subcaches[0] = sc0 += !(val & BIT(1));
 335		l3->subcaches[1] = sc1 += !(val & BIT(5));
 336	}
 337
 338	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 339	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 340
 341	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 342}
 343
 344/*
 345 * check whether a slot used for disabling an L3 index is occupied.
 346 * @l3: L3 cache descriptor
 347 * @slot: slot number (0..1)
 348 *
 349 * @returns: the disabled index if used or negative value if slot free.
 350 */
 351static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 352{
 353	unsigned int reg = 0;
 354
 355	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 356
 357	/* check whether this slot is activated already */
 358	if (reg & (3UL << 30))
 359		return reg & 0xfff;
 360
 361	return -1;
 362}
 363
 364static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 365				  unsigned int slot)
 366{
 367	int index;
 368	struct amd_northbridge *nb = this_leaf->priv;
 369
 370	index = amd_get_l3_disable_slot(nb, slot);
 371	if (index >= 0)
 372		return sprintf(buf, "%d\n", index);
 373
 374	return sprintf(buf, "FREE\n");
 375}
 376
 377#define SHOW_CACHE_DISABLE(slot)					\
 378static ssize_t								\
 379cache_disable_##slot##_show(struct device *dev,				\
 380			    struct device_attribute *attr, char *buf)	\
 381{									\
 382	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 383	return show_cache_disable(this_leaf, buf, slot);		\
 384}
 385SHOW_CACHE_DISABLE(0)
 386SHOW_CACHE_DISABLE(1)
 387
 388static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 389				 unsigned slot, unsigned long idx)
 390{
 391	int i;
 392
 393	idx |= BIT(30);
 394
 395	/*
 396	 *  disable index in all 4 subcaches
 397	 */
 398	for (i = 0; i < 4; i++) {
 399		u32 reg = idx | (i << 20);
 400
 401		if (!nb->l3_cache.subcaches[i])
 402			continue;
 403
 404		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 405
 406		/*
 407		 * We need to WBINVD on a core on the node containing the L3
 408		 * cache which indices we disable therefore a simple wbinvd()
 409		 * is not sufficient.
 410		 */
 411		wbinvd_on_cpu(cpu);
 412
 413		reg |= BIT(31);
 414		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 415	}
 416}
 417
 418/*
 419 * disable a L3 cache index by using a disable-slot
 420 *
 421 * @l3:    L3 cache descriptor
 422 * @cpu:   A CPU on the node containing the L3 cache
 423 * @slot:  slot number (0..1)
 424 * @index: index to disable
 425 *
 426 * @return: 0 on success, error status on failure
 427 */
 428static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
 429			    unsigned slot, unsigned long index)
 430{
 431	int ret = 0;
 432
 433	/*  check if @slot is already used or the index is already disabled */
 434	ret = amd_get_l3_disable_slot(nb, slot);
 435	if (ret >= 0)
 436		return -EEXIST;
 437
 438	if (index > nb->l3_cache.indices)
 439		return -EINVAL;
 440
 441	/* check whether the other slot has disabled the same index already */
 442	if (index == amd_get_l3_disable_slot(nb, !slot))
 443		return -EEXIST;
 444
 445	amd_l3_disable_index(nb, cpu, slot, index);
 446
 447	return 0;
 448}
 449
 450static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
 451				   const char *buf, size_t count,
 452				   unsigned int slot)
 453{
 454	unsigned long val = 0;
 455	int cpu, err = 0;
 456	struct amd_northbridge *nb = this_leaf->priv;
 457
 458	if (!capable(CAP_SYS_ADMIN))
 459		return -EPERM;
 460
 461	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 462
 463	if (kstrtoul(buf, 10, &val) < 0)
 464		return -EINVAL;
 465
 466	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 467	if (err) {
 468		if (err == -EEXIST)
 469			pr_warn("L3 slot %d in use/index already disabled!\n",
 470				   slot);
 471		return err;
 472	}
 473	return count;
 474}
 475
 476#define STORE_CACHE_DISABLE(slot)					\
 477static ssize_t								\
 478cache_disable_##slot##_store(struct device *dev,			\
 479			     struct device_attribute *attr,		\
 480			     const char *buf, size_t count)		\
 481{									\
 482	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 483	return store_cache_disable(this_leaf, buf, count, slot);	\
 484}
 485STORE_CACHE_DISABLE(0)
 486STORE_CACHE_DISABLE(1)
 487
 488static ssize_t subcaches_show(struct device *dev,
 489			      struct device_attribute *attr, char *buf)
 490{
 491	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 492	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 493
 494	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 495}
 496
 497static ssize_t subcaches_store(struct device *dev,
 498			       struct device_attribute *attr,
 499			       const char *buf, size_t count)
 500{
 501	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 502	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 503	unsigned long val;
 504
 505	if (!capable(CAP_SYS_ADMIN))
 506		return -EPERM;
 507
 508	if (kstrtoul(buf, 16, &val) < 0)
 509		return -EINVAL;
 510
 511	if (amd_set_subcaches(cpu, val))
 512		return -EINVAL;
 513
 514	return count;
 515}
 516
 517static DEVICE_ATTR_RW(cache_disable_0);
 518static DEVICE_ATTR_RW(cache_disable_1);
 519static DEVICE_ATTR_RW(subcaches);
 520
 521static umode_t
 522cache_private_attrs_is_visible(struct kobject *kobj,
 523			       struct attribute *attr, int unused)
 524{
 525	struct device *dev = kobj_to_dev(kobj);
 526	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 527	umode_t mode = attr->mode;
 528
 529	if (!this_leaf->priv)
 530		return 0;
 531
 532	if ((attr == &dev_attr_subcaches.attr) &&
 533	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 534		return mode;
 535
 536	if ((attr == &dev_attr_cache_disable_0.attr ||
 537	     attr == &dev_attr_cache_disable_1.attr) &&
 538	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 539		return mode;
 540
 541	return 0;
 542}
 543
 544static struct attribute_group cache_private_group = {
 545	.is_visible = cache_private_attrs_is_visible,
 546};
 547
 548static void init_amd_l3_attrs(void)
 549{
 550	int n = 1;
 551	static struct attribute **amd_l3_attrs;
 552
 553	if (amd_l3_attrs) /* already initialized */
 554		return;
 555
 556	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 557		n += 2;
 558	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 559		n += 1;
 560
 561	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
 562	if (!amd_l3_attrs)
 563		return;
 564
 565	n = 0;
 566	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 567		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
 568		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
 569	}
 570	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 571		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 572
 573	cache_private_group.attrs = amd_l3_attrs;
 574}
 575
 576const struct attribute_group *
 577cache_get_priv_group(struct cacheinfo *this_leaf)
 578{
 579	struct amd_northbridge *nb = this_leaf->priv;
 580
 581	if (this_leaf->level < 3 || !nb)
 582		return NULL;
 583
 584	if (nb && nb->l3_cache.indices)
 585		init_amd_l3_attrs();
 586
 587	return &cache_private_group;
 588}
 589
 590static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 591{
 592	int node;
 593
 594	/* only for L3, and not in virtualized environments */
 595	if (index < 3)
 596		return;
 597
 598	node = topology_amd_node_id(smp_processor_id());
 599	this_leaf->nb = node_to_amd_nb(node);
 600	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 601		amd_calc_l3_indices(this_leaf->nb);
 602}
 603#else
 604#define amd_init_l3_cache(x, y)
 605#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 606
 607static int
 608cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 609{
 610	union _cpuid4_leaf_eax	eax;
 611	union _cpuid4_leaf_ebx	ebx;
 612	union _cpuid4_leaf_ecx	ecx;
 613	unsigned		edx;
 614
 615	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 616		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
 617			cpuid_count(0x8000001d, index, &eax.full,
 618				    &ebx.full, &ecx.full, &edx);
 619		else
 620			amd_cpuid4(index, &eax, &ebx, &ecx);
 621		amd_init_l3_cache(this_leaf, index);
 622	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
 623		cpuid_count(0x8000001d, index, &eax.full,
 624			    &ebx.full, &ecx.full, &edx);
 625		amd_init_l3_cache(this_leaf, index);
 626	} else {
 627		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 628	}
 629
 630	if (eax.split.type == CTYPE_NULL)
 631		return -EIO; /* better error ? */
 632
 633	this_leaf->eax = eax;
 634	this_leaf->ebx = ebx;
 635	this_leaf->ecx = ecx;
 636	this_leaf->size = (ecx.split.number_of_sets          + 1) *
 637			  (ebx.split.coherency_line_size     + 1) *
 638			  (ebx.split.physical_line_partition + 1) *
 639			  (ebx.split.ways_of_associativity   + 1);
 640	return 0;
 641}
 642
 643static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 644{
 645	unsigned int		eax, ebx, ecx, edx, op;
 646	union _cpuid4_leaf_eax	cache_eax;
 647	int 			i = -1;
 648
 649	if (c->x86_vendor == X86_VENDOR_AMD ||
 650	    c->x86_vendor == X86_VENDOR_HYGON)
 651		op = 0x8000001d;
 652	else
 653		op = 4;
 654
 655	do {
 656		++i;
 657		/* Do cpuid(op) loop to find out num_cache_leaves */
 658		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 659		cache_eax.full = eax;
 660	} while (cache_eax.split.type != CTYPE_NULL);
 661	return i;
 662}
 663
 664void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
 665{
 666	/*
 667	 * We may have multiple LLCs if L3 caches exist, so check if we
 668	 * have an L3 cache by looking at the L3 cache CPUID leaf.
 669	 */
 670	if (!cpuid_edx(0x80000006))
 671		return;
 672
 673	if (c->x86 < 0x17) {
 674		/* LLC is at the node level. */
 675		c->topo.llc_id = die_id;
 676	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
 677		/*
 678		 * LLC is at the core complex level.
 679		 * Core complex ID is ApicId[3] for these processors.
 680		 */
 681		c->topo.llc_id = c->topo.apicid >> 3;
 682	} else {
 683		/*
 684		 * LLC ID is calculated from the number of threads sharing the
 685		 * cache.
 686		 * */
 687		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
 688		u32 llc_index = find_num_cache_leaves(c) - 1;
 689
 690		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
 691		if (eax)
 692			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
 693
 694		if (num_sharing_cache) {
 695			int bits = get_count_order(num_sharing_cache);
 696
 697			c->topo.llc_id = c->topo.apicid >> bits;
 698		}
 699	}
 700}
 701
 702void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
 703{
 704	/*
 705	 * We may have multiple LLCs if L3 caches exist, so check if we
 706	 * have an L3 cache by looking at the L3 cache CPUID leaf.
 707	 */
 708	if (!cpuid_edx(0x80000006))
 709		return;
 710
 711	/*
 712	 * LLC is at the core complex level.
 713	 * Core complex ID is ApicId[3] for these processors.
 714	 */
 715	c->topo.llc_id = c->topo.apicid >> 3;
 716}
 717
 718void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 719{
 
 720
 721	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 722		num_cache_leaves = find_num_cache_leaves(c);
 723	} else if (c->extended_cpuid_level >= 0x80000006) {
 724		if (cpuid_edx(0x80000006) & 0xf000)
 725			num_cache_leaves = 4;
 726		else
 727			num_cache_leaves = 3;
 728	}
 729}
 730
 731void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 732{
 733	num_cache_leaves = find_num_cache_leaves(c);
 
 
 734}
 735
 736void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 737{
 738	/* Cache sizes */
 739	unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 740	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 741	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 742	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 
 743
 744	if (c->cpuid_level > 3) {
 745		static int is_initialized;
 746
 747		if (is_initialized == 0) {
 748			/* Init num_cache_leaves from boot CPU */
 749			num_cache_leaves = find_num_cache_leaves(c);
 750			is_initialized++;
 751		}
 752
 753		/*
 754		 * Whenever possible use cpuid(4), deterministic cache
 755		 * parameters cpuid leaf to find the cache details
 756		 */
 757		for (i = 0; i < num_cache_leaves; i++) {
 758			struct _cpuid4_info_regs this_leaf = {};
 759			int retval;
 760
 761			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 762			if (retval < 0)
 763				continue;
 764
 765			switch (this_leaf.eax.split.level) {
 766			case 1:
 767				if (this_leaf.eax.split.type == CTYPE_DATA)
 768					new_l1d = this_leaf.size/1024;
 769				else if (this_leaf.eax.split.type == CTYPE_INST)
 770					new_l1i = this_leaf.size/1024;
 771				break;
 772			case 2:
 773				new_l2 = this_leaf.size/1024;
 774				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 775				index_msb = get_count_order(num_threads_sharing);
 776				l2_id = c->topo.apicid & ~((1 << index_msb) - 1);
 777				break;
 778			case 3:
 779				new_l3 = this_leaf.size/1024;
 780				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 781				index_msb = get_count_order(num_threads_sharing);
 782				l3_id = c->topo.apicid & ~((1 << index_msb) - 1);
 783				break;
 784			default:
 785				break;
 786			}
 787		}
 788	}
 789	/*
 790	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 791	 * trace cache
 792	 */
 793	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 794		/* supports eax=2  call */
 795		int j, n;
 796		unsigned int regs[4];
 797		unsigned char *dp = (unsigned char *)regs;
 798		int only_trace = 0;
 799
 800		if (num_cache_leaves != 0 && c->x86 == 15)
 801			only_trace = 1;
 802
 803		/* Number of times to iterate */
 804		n = cpuid_eax(2) & 0xFF;
 805
 806		for (i = 0 ; i < n ; i++) {
 807			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 808
 809			/* If bit 31 is set, this is an unknown format */
 810			for (j = 0 ; j < 3 ; j++)
 811				if (regs[j] & (1 << 31))
 812					regs[j] = 0;
 813
 814			/* Byte 0 is level count, not a descriptor */
 815			for (j = 1 ; j < 16 ; j++) {
 816				unsigned char des = dp[j];
 817				unsigned char k = 0;
 818
 819				/* look up this descriptor in the table */
 820				while (cache_table[k].descriptor != 0) {
 821					if (cache_table[k].descriptor == des) {
 822						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 823							break;
 824						switch (cache_table[k].cache_type) {
 825						case LVL_1_INST:
 826							l1i += cache_table[k].size;
 827							break;
 828						case LVL_1_DATA:
 829							l1d += cache_table[k].size;
 830							break;
 831						case LVL_2:
 832							l2 += cache_table[k].size;
 833							break;
 834						case LVL_3:
 835							l3 += cache_table[k].size;
 836							break;
 837						}
 838
 839						break;
 840					}
 841
 842					k++;
 843				}
 844			}
 845		}
 846	}
 847
 848	if (new_l1d)
 849		l1d = new_l1d;
 850
 851	if (new_l1i)
 852		l1i = new_l1i;
 853
 854	if (new_l2) {
 855		l2 = new_l2;
 856		c->topo.llc_id = l2_id;
 857		c->topo.l2c_id = l2_id;
 858	}
 859
 860	if (new_l3) {
 861		l3 = new_l3;
 862		c->topo.llc_id = l3_id;
 863	}
 864
 865	/*
 866	 * If llc_id is not yet set, this means cpuid_level < 4 which in
 867	 * turns means that the only possibility is SMT (as indicated in
 868	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
 869	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
 870	 * c->topo.pkg_id.
 871	 */
 872	if (c->topo.llc_id == BAD_APICID)
 873		c->topo.llc_id = c->topo.pkg_id;
 874
 875	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 876
 877	if (!l2)
 878		cpu_detect_cache_sizes(c);
 879}
 880
 881static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 882				    struct _cpuid4_info_regs *base)
 883{
 884	struct cpu_cacheinfo *this_cpu_ci;
 885	struct cacheinfo *this_leaf;
 886	int i, sibling;
 887
 888	/*
 889	 * For L3, always use the pre-calculated cpu_llc_shared_mask
 890	 * to derive shared_cpu_map.
 891	 */
 892	if (index == 3) {
 893		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 894			this_cpu_ci = get_cpu_cacheinfo(i);
 895			if (!this_cpu_ci->info_list)
 896				continue;
 897			this_leaf = this_cpu_ci->info_list + index;
 898			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 899				if (!cpu_online(sibling))
 900					continue;
 901				cpumask_set_cpu(sibling,
 902						&this_leaf->shared_cpu_map);
 903			}
 904		}
 905	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 906		unsigned int apicid, nshared, first, last;
 907
 908		nshared = base->eax.split.num_threads_sharing + 1;
 909		apicid = cpu_data(cpu).topo.apicid;
 910		first = apicid - (apicid % nshared);
 911		last = first + nshared - 1;
 912
 913		for_each_online_cpu(i) {
 914			this_cpu_ci = get_cpu_cacheinfo(i);
 915			if (!this_cpu_ci->info_list)
 916				continue;
 917
 918			apicid = cpu_data(i).topo.apicid;
 919			if ((apicid < first) || (apicid > last))
 920				continue;
 921
 922			this_leaf = this_cpu_ci->info_list + index;
 923
 924			for_each_online_cpu(sibling) {
 925				apicid = cpu_data(sibling).topo.apicid;
 926				if ((apicid < first) || (apicid > last))
 927					continue;
 928				cpumask_set_cpu(sibling,
 929						&this_leaf->shared_cpu_map);
 930			}
 931		}
 932	} else
 933		return 0;
 934
 935	return 1;
 936}
 937
 938static void __cache_cpumap_setup(unsigned int cpu, int index,
 939				 struct _cpuid4_info_regs *base)
 940{
 941	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 942	struct cacheinfo *this_leaf, *sibling_leaf;
 943	unsigned long num_threads_sharing;
 944	int index_msb, i;
 945	struct cpuinfo_x86 *c = &cpu_data(cpu);
 946
 947	if (c->x86_vendor == X86_VENDOR_AMD ||
 948	    c->x86_vendor == X86_VENDOR_HYGON) {
 949		if (__cache_amd_cpumap_setup(cpu, index, base))
 950			return;
 951	}
 952
 953	this_leaf = this_cpu_ci->info_list + index;
 954	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 955
 956	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 957	if (num_threads_sharing == 1)
 958		return;
 959
 960	index_msb = get_count_order(num_threads_sharing);
 961
 962	for_each_online_cpu(i)
 963		if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
 964			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 965
 966			if (i == cpu || !sib_cpu_ci->info_list)
 967				continue;/* skip if itself or no cacheinfo */
 968			sibling_leaf = sib_cpu_ci->info_list + index;
 969			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
 970			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 971		}
 972}
 973
 974static void ci_leaf_init(struct cacheinfo *this_leaf,
 975			 struct _cpuid4_info_regs *base)
 976{
 977	this_leaf->id = base->id;
 978	this_leaf->attributes = CACHE_ID;
 979	this_leaf->level = base->eax.split.level;
 980	this_leaf->type = cache_type_map[base->eax.split.type];
 981	this_leaf->coherency_line_size =
 982				base->ebx.split.coherency_line_size + 1;
 983	this_leaf->ways_of_associativity =
 984				base->ebx.split.ways_of_associativity + 1;
 985	this_leaf->size = base->size;
 986	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
 987	this_leaf->physical_line_partition =
 988				base->ebx.split.physical_line_partition + 1;
 989	this_leaf->priv = base->nb;
 990}
 991
 992int init_cache_level(unsigned int cpu)
 993{
 994	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 995
 996	if (!num_cache_leaves)
 
 997		return -ENOENT;
 998	if (!this_cpu_ci)
 999		return -EINVAL;
1000	this_cpu_ci->num_levels = 3;
1001	this_cpu_ci->num_leaves = num_cache_leaves;
1002	return 0;
1003}
1004
1005/*
1006 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1007 * ECX as cache index. Then right shift apicid by the number's order to get
1008 * cache id for this cache node.
1009 */
1010static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1011{
1012	struct cpuinfo_x86 *c = &cpu_data(cpu);
1013	unsigned long num_threads_sharing;
1014	int index_msb;
1015
1016	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1017	index_msb = get_count_order(num_threads_sharing);
1018	id4_regs->id = c->topo.apicid >> index_msb;
1019}
1020
1021int populate_cache_leaves(unsigned int cpu)
1022{
1023	unsigned int idx, ret;
1024	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1025	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1026	struct _cpuid4_info_regs id4_regs = {};
1027
1028	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1029		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1030		if (ret)
1031			return ret;
1032		get_cache_id(cpu, &id4_regs);
1033		ci_leaf_init(this_leaf++, &id4_regs);
1034		__cache_cpumap_setup(cpu, idx, &id4_regs);
1035	}
1036	this_cpu_ci->cpu_map_populated = true;
1037
1038	return 0;
1039}
1040
1041/*
1042 * Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
1043 *
1044 * Since we are disabling the cache don't allow any interrupts,
1045 * they would run extremely slow and would only increase the pain.
1046 *
1047 * The caller must ensure that local interrupts are disabled and
1048 * are reenabled after cache_enable() has been called.
1049 */
1050static unsigned long saved_cr4;
1051static DEFINE_RAW_SPINLOCK(cache_disable_lock);
1052
1053void cache_disable(void) __acquires(cache_disable_lock)
1054{
1055	unsigned long cr0;
1056
1057	/*
1058	 * Note that this is not ideal
1059	 * since the cache is only flushed/disabled for this CPU while the
1060	 * MTRRs are changed, but changing this requires more invasive
1061	 * changes to the way the kernel boots
1062	 */
1063
1064	raw_spin_lock(&cache_disable_lock);
1065
1066	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
1067	cr0 = read_cr0() | X86_CR0_CD;
1068	write_cr0(cr0);
1069
1070	/*
1071	 * Cache flushing is the most time-consuming step when programming
1072	 * the MTRRs. Fortunately, as per the Intel Software Development
1073	 * Manual, we can skip it if the processor supports cache self-
1074	 * snooping.
1075	 */
1076	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1077		wbinvd();
1078
1079	/* Save value of CR4 and clear Page Global Enable (bit 7) */
1080	if (cpu_feature_enabled(X86_FEATURE_PGE)) {
1081		saved_cr4 = __read_cr4();
1082		__write_cr4(saved_cr4 & ~X86_CR4_PGE);
1083	}
1084
1085	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
1086	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1087	flush_tlb_local();
1088
1089	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1090		mtrr_disable();
1091
1092	/* Again, only flush caches if we have to. */
1093	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1094		wbinvd();
1095}
1096
1097void cache_enable(void) __releases(cache_disable_lock)
1098{
1099	/* Flush TLBs (no need to flush caches - they are disabled) */
1100	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1101	flush_tlb_local();
1102
1103	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1104		mtrr_enable();
1105
1106	/* Enable caches */
1107	write_cr0(read_cr0() & ~X86_CR0_CD);
1108
1109	/* Restore value of CR4 */
1110	if (cpu_feature_enabled(X86_FEATURE_PGE))
1111		__write_cr4(saved_cr4);
1112
1113	raw_spin_unlock(&cache_disable_lock);
1114}
1115
1116static void cache_cpu_init(void)
1117{
1118	unsigned long flags;
1119
1120	local_irq_save(flags);
1121
1122	if (memory_caching_control & CACHE_MTRR) {
1123		cache_disable();
1124		mtrr_generic_set_state();
1125		cache_enable();
1126	}
1127
1128	if (memory_caching_control & CACHE_PAT)
1129		pat_cpu_init();
1130
1131	local_irq_restore(flags);
1132}
1133
1134static bool cache_aps_delayed_init = true;
1135
1136void set_cache_aps_delayed_init(bool val)
1137{
1138	cache_aps_delayed_init = val;
1139}
1140
1141bool get_cache_aps_delayed_init(void)
1142{
1143	return cache_aps_delayed_init;
1144}
1145
1146static int cache_rendezvous_handler(void *unused)
1147{
1148	if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id()))
1149		cache_cpu_init();
1150
1151	return 0;
1152}
1153
1154void __init cache_bp_init(void)
1155{
1156	mtrr_bp_init();
1157	pat_bp_init();
1158
1159	if (memory_caching_control)
1160		cache_cpu_init();
1161}
1162
1163void cache_bp_restore(void)
1164{
1165	if (memory_caching_control)
1166		cache_cpu_init();
1167}
1168
1169static int cache_ap_online(unsigned int cpu)
1170{
1171	cpumask_set_cpu(cpu, cpu_cacheinfo_mask);
1172
1173	if (!memory_caching_control || get_cache_aps_delayed_init())
1174		return 0;
1175
1176	/*
1177	 * Ideally we should hold mtrr_mutex here to avoid MTRR entries
1178	 * changed, but this routine will be called in CPU boot time,
1179	 * holding the lock breaks it.
1180	 *
1181	 * This routine is called in two cases:
1182	 *
1183	 *   1. very early time of software resume, when there absolutely
1184	 *      isn't MTRR entry changes;
1185	 *
1186	 *   2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
1187	 *      lock to prevent MTRR entry changes
1188	 */
1189	stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
1190				       cpu_cacheinfo_mask);
1191
1192	return 0;
1193}
1194
1195static int cache_ap_offline(unsigned int cpu)
1196{
1197	cpumask_clear_cpu(cpu, cpu_cacheinfo_mask);
1198	return 0;
1199}
1200
1201/*
1202 * Delayed cache initialization for all AP's
1203 */
1204void cache_aps_init(void)
1205{
1206	if (!memory_caching_control || !get_cache_aps_delayed_init())
1207		return;
1208
1209	stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask);
1210	set_cache_aps_delayed_init(false);
1211}
1212
1213static int __init cache_ap_register(void)
1214{
1215	zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL);
1216	cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask);
1217
1218	cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
1219				  "x86/cachectrl:starting",
1220				  cache_ap_online, cache_ap_offline);
1221	return 0;
1222}
1223early_initcall(cache_ap_register);