Linux Audio

Check our new training course

Loading...
v5.4
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *	Routines to identify caches on Intel CPU.
   4 *
   5 *	Changes:
   6 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
   7 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   8 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
   9 */
  10
  11#include <linux/slab.h>
  12#include <linux/cacheinfo.h>
  13#include <linux/cpu.h>
 
  14#include <linux/sched.h>
  15#include <linux/capability.h>
  16#include <linux/sysfs.h>
  17#include <linux/pci.h>
 
  18
  19#include <asm/cpufeature.h>
  20#include <asm/cacheinfo.h>
  21#include <asm/amd_nb.h>
  22#include <asm/smp.h>
 
 
  23
  24#include "cpu.h"
  25
  26#define LVL_1_INST	1
  27#define LVL_1_DATA	2
  28#define LVL_2		3
  29#define LVL_3		4
  30#define LVL_TRACE	5
  31
 
 
 
 
 
 
 
 
 
  32struct _cache_table {
  33	unsigned char descriptor;
  34	char cache_type;
  35	short size;
  36};
  37
  38#define MB(x)	((x) * 1024)
  39
  40/* All the cache descriptor types we care about (no TLB or
  41   trace cache entries) */
  42
  43static const struct _cache_table cache_table[] =
  44{
  45	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
  46	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
  47	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
  48	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
  49	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
  50	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
  51	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
  52	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
  53	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  54	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  55	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  56	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  57	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
  58	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
  59	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  60	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  61	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
  62	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  63	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  64	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  65	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
  66	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
  67	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
  68	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
  69	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
  70	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
  71	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
  72	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
  73	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
  74	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
  75	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
  76	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
  77	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
  78	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
  79	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
  80	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  81	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  82	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  83	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  84	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
  85	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
  86	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
  87	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
  88	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
  89	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  90	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  91	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  92	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  93	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
  94	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
  95	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
  96	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
  97	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
  98	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
  99	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 100	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 101	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 102	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 103	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 104	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
 105	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 106	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 107	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 108	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
 109	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 110	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
 111	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
 112	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 113	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 114	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
 115	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
 116	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
 117	{ 0x00, 0, 0}
 118};
 119
 120
 121enum _cache_type {
 122	CTYPE_NULL = 0,
 123	CTYPE_DATA = 1,
 124	CTYPE_INST = 2,
 125	CTYPE_UNIFIED = 3
 126};
 127
 128union _cpuid4_leaf_eax {
 129	struct {
 130		enum _cache_type	type:5;
 131		unsigned int		level:3;
 132		unsigned int		is_self_initializing:1;
 133		unsigned int		is_fully_associative:1;
 134		unsigned int		reserved:4;
 135		unsigned int		num_threads_sharing:12;
 136		unsigned int		num_cores_on_die:6;
 137	} split;
 138	u32 full;
 139};
 140
 141union _cpuid4_leaf_ebx {
 142	struct {
 143		unsigned int		coherency_line_size:12;
 144		unsigned int		physical_line_partition:10;
 145		unsigned int		ways_of_associativity:10;
 146	} split;
 147	u32 full;
 148};
 149
 150union _cpuid4_leaf_ecx {
 151	struct {
 152		unsigned int		number_of_sets:32;
 153	} split;
 154	u32 full;
 155};
 156
 157struct _cpuid4_info_regs {
 158	union _cpuid4_leaf_eax eax;
 159	union _cpuid4_leaf_ebx ebx;
 160	union _cpuid4_leaf_ecx ecx;
 161	unsigned int id;
 162	unsigned long size;
 163	struct amd_northbridge *nb;
 164};
 165
 166static unsigned short num_cache_leaves;
 167
 168/* AMD doesn't have CPUID4. Emulate it here to report the same
 169   information to the user.  This makes some assumptions about the machine:
 170   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 171
 172   In theory the TLBs could be reported as fake type (they are in "dummy").
 173   Maybe later */
 174union l1_cache {
 175	struct {
 176		unsigned line_size:8;
 177		unsigned lines_per_tag:8;
 178		unsigned assoc:8;
 179		unsigned size_in_kb:8;
 180	};
 181	unsigned val;
 182};
 183
 184union l2_cache {
 185	struct {
 186		unsigned line_size:8;
 187		unsigned lines_per_tag:4;
 188		unsigned assoc:4;
 189		unsigned size_in_kb:16;
 190	};
 191	unsigned val;
 192};
 193
 194union l3_cache {
 195	struct {
 196		unsigned line_size:8;
 197		unsigned lines_per_tag:4;
 198		unsigned assoc:4;
 199		unsigned res:2;
 200		unsigned size_encoded:14;
 201	};
 202	unsigned val;
 203};
 204
 205static const unsigned short assocs[] = {
 206	[1] = 1,
 207	[2] = 2,
 208	[4] = 4,
 209	[6] = 8,
 210	[8] = 16,
 211	[0xa] = 32,
 212	[0xb] = 48,
 213	[0xc] = 64,
 214	[0xd] = 96,
 215	[0xe] = 128,
 216	[0xf] = 0xffff /* fully associative - no way to show this currently */
 217};
 218
 219static const unsigned char levels[] = { 1, 1, 2, 3 };
 220static const unsigned char types[] = { 1, 2, 3, 3 };
 221
 222static const enum cache_type cache_type_map[] = {
 223	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
 224	[CTYPE_DATA] = CACHE_TYPE_DATA,
 225	[CTYPE_INST] = CACHE_TYPE_INST,
 226	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
 227};
 228
 229static void
 230amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 231		     union _cpuid4_leaf_ebx *ebx,
 232		     union _cpuid4_leaf_ecx *ecx)
 233{
 234	unsigned dummy;
 235	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 236	union l1_cache l1i, l1d;
 237	union l2_cache l2;
 238	union l3_cache l3;
 239	union l1_cache *l1 = &l1d;
 240
 241	eax->full = 0;
 242	ebx->full = 0;
 243	ecx->full = 0;
 244
 245	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 246	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 247
 248	switch (leaf) {
 249	case 1:
 250		l1 = &l1i;
 251		/* fall through */
 252	case 0:
 253		if (!l1->val)
 254			return;
 255		assoc = assocs[l1->assoc];
 256		line_size = l1->line_size;
 257		lines_per_tag = l1->lines_per_tag;
 258		size_in_kb = l1->size_in_kb;
 259		break;
 260	case 2:
 261		if (!l2.val)
 262			return;
 263		assoc = assocs[l2.assoc];
 264		line_size = l2.line_size;
 265		lines_per_tag = l2.lines_per_tag;
 266		/* cpu_data has errata corrections for K7 applied */
 267		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 268		break;
 269	case 3:
 270		if (!l3.val)
 271			return;
 272		assoc = assocs[l3.assoc];
 273		line_size = l3.line_size;
 274		lines_per_tag = l3.lines_per_tag;
 275		size_in_kb = l3.size_encoded * 512;
 276		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 277			size_in_kb = size_in_kb >> 1;
 278			assoc = assoc >> 1;
 279		}
 280		break;
 281	default:
 282		return;
 283	}
 284
 285	eax->split.is_self_initializing = 1;
 286	eax->split.type = types[leaf];
 287	eax->split.level = levels[leaf];
 288	eax->split.num_threads_sharing = 0;
 289	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 290
 291
 292	if (assoc == 0xffff)
 293		eax->split.is_fully_associative = 1;
 294	ebx->split.coherency_line_size = line_size - 1;
 295	ebx->split.ways_of_associativity = assoc - 1;
 296	ebx->split.physical_line_partition = lines_per_tag - 1;
 297	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 298		(ebx->split.ways_of_associativity + 1) - 1;
 299}
 300
 301#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 302
 303/*
 304 * L3 cache descriptors
 305 */
 306static void amd_calc_l3_indices(struct amd_northbridge *nb)
 307{
 308	struct amd_l3_cache *l3 = &nb->l3_cache;
 309	unsigned int sc0, sc1, sc2, sc3;
 310	u32 val = 0;
 311
 312	pci_read_config_dword(nb->misc, 0x1C4, &val);
 313
 314	/* calculate subcache sizes */
 315	l3->subcaches[0] = sc0 = !(val & BIT(0));
 316	l3->subcaches[1] = sc1 = !(val & BIT(4));
 317
 318	if (boot_cpu_data.x86 == 0x15) {
 319		l3->subcaches[0] = sc0 += !(val & BIT(1));
 320		l3->subcaches[1] = sc1 += !(val & BIT(5));
 321	}
 322
 323	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 324	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 325
 326	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 327}
 328
 329/*
 330 * check whether a slot used for disabling an L3 index is occupied.
 331 * @l3: L3 cache descriptor
 332 * @slot: slot number (0..1)
 333 *
 334 * @returns: the disabled index if used or negative value if slot free.
 335 */
 336static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 337{
 338	unsigned int reg = 0;
 339
 340	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 341
 342	/* check whether this slot is activated already */
 343	if (reg & (3UL << 30))
 344		return reg & 0xfff;
 345
 346	return -1;
 347}
 348
 349static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 350				  unsigned int slot)
 351{
 352	int index;
 353	struct amd_northbridge *nb = this_leaf->priv;
 354
 355	index = amd_get_l3_disable_slot(nb, slot);
 356	if (index >= 0)
 357		return sprintf(buf, "%d\n", index);
 358
 359	return sprintf(buf, "FREE\n");
 360}
 361
 362#define SHOW_CACHE_DISABLE(slot)					\
 363static ssize_t								\
 364cache_disable_##slot##_show(struct device *dev,				\
 365			    struct device_attribute *attr, char *buf)	\
 366{									\
 367	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 368	return show_cache_disable(this_leaf, buf, slot);		\
 369}
 370SHOW_CACHE_DISABLE(0)
 371SHOW_CACHE_DISABLE(1)
 372
 373static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 374				 unsigned slot, unsigned long idx)
 375{
 376	int i;
 377
 378	idx |= BIT(30);
 379
 380	/*
 381	 *  disable index in all 4 subcaches
 382	 */
 383	for (i = 0; i < 4; i++) {
 384		u32 reg = idx | (i << 20);
 385
 386		if (!nb->l3_cache.subcaches[i])
 387			continue;
 388
 389		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 390
 391		/*
 392		 * We need to WBINVD on a core on the node containing the L3
 393		 * cache which indices we disable therefore a simple wbinvd()
 394		 * is not sufficient.
 395		 */
 396		wbinvd_on_cpu(cpu);
 397
 398		reg |= BIT(31);
 399		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 400	}
 401}
 402
 403/*
 404 * disable a L3 cache index by using a disable-slot
 405 *
 406 * @l3:    L3 cache descriptor
 407 * @cpu:   A CPU on the node containing the L3 cache
 408 * @slot:  slot number (0..1)
 409 * @index: index to disable
 410 *
 411 * @return: 0 on success, error status on failure
 412 */
 413static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
 414			    unsigned slot, unsigned long index)
 415{
 416	int ret = 0;
 417
 418	/*  check if @slot is already used or the index is already disabled */
 419	ret = amd_get_l3_disable_slot(nb, slot);
 420	if (ret >= 0)
 421		return -EEXIST;
 422
 423	if (index > nb->l3_cache.indices)
 424		return -EINVAL;
 425
 426	/* check whether the other slot has disabled the same index already */
 427	if (index == amd_get_l3_disable_slot(nb, !slot))
 428		return -EEXIST;
 429
 430	amd_l3_disable_index(nb, cpu, slot, index);
 431
 432	return 0;
 433}
 434
 435static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
 436				   const char *buf, size_t count,
 437				   unsigned int slot)
 438{
 439	unsigned long val = 0;
 440	int cpu, err = 0;
 441	struct amd_northbridge *nb = this_leaf->priv;
 442
 443	if (!capable(CAP_SYS_ADMIN))
 444		return -EPERM;
 445
 446	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 447
 448	if (kstrtoul(buf, 10, &val) < 0)
 449		return -EINVAL;
 450
 451	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 452	if (err) {
 453		if (err == -EEXIST)
 454			pr_warn("L3 slot %d in use/index already disabled!\n",
 455				   slot);
 456		return err;
 457	}
 458	return count;
 459}
 460
 461#define STORE_CACHE_DISABLE(slot)					\
 462static ssize_t								\
 463cache_disable_##slot##_store(struct device *dev,			\
 464			     struct device_attribute *attr,		\
 465			     const char *buf, size_t count)		\
 466{									\
 467	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 468	return store_cache_disable(this_leaf, buf, count, slot);	\
 469}
 470STORE_CACHE_DISABLE(0)
 471STORE_CACHE_DISABLE(1)
 472
 473static ssize_t subcaches_show(struct device *dev,
 474			      struct device_attribute *attr, char *buf)
 475{
 476	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 477	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 478
 479	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 480}
 481
 482static ssize_t subcaches_store(struct device *dev,
 483			       struct device_attribute *attr,
 484			       const char *buf, size_t count)
 485{
 486	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 487	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 488	unsigned long val;
 489
 490	if (!capable(CAP_SYS_ADMIN))
 491		return -EPERM;
 492
 493	if (kstrtoul(buf, 16, &val) < 0)
 494		return -EINVAL;
 495
 496	if (amd_set_subcaches(cpu, val))
 497		return -EINVAL;
 498
 499	return count;
 500}
 501
 502static DEVICE_ATTR_RW(cache_disable_0);
 503static DEVICE_ATTR_RW(cache_disable_1);
 504static DEVICE_ATTR_RW(subcaches);
 505
 506static umode_t
 507cache_private_attrs_is_visible(struct kobject *kobj,
 508			       struct attribute *attr, int unused)
 509{
 510	struct device *dev = kobj_to_dev(kobj);
 511	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 512	umode_t mode = attr->mode;
 513
 514	if (!this_leaf->priv)
 515		return 0;
 516
 517	if ((attr == &dev_attr_subcaches.attr) &&
 518	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 519		return mode;
 520
 521	if ((attr == &dev_attr_cache_disable_0.attr ||
 522	     attr == &dev_attr_cache_disable_1.attr) &&
 523	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 524		return mode;
 525
 526	return 0;
 527}
 528
 529static struct attribute_group cache_private_group = {
 530	.is_visible = cache_private_attrs_is_visible,
 531};
 532
 533static void init_amd_l3_attrs(void)
 534{
 535	int n = 1;
 536	static struct attribute **amd_l3_attrs;
 537
 538	if (amd_l3_attrs) /* already initialized */
 539		return;
 540
 541	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 542		n += 2;
 543	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 544		n += 1;
 545
 546	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
 547	if (!amd_l3_attrs)
 548		return;
 549
 550	n = 0;
 551	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 552		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
 553		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
 554	}
 555	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 556		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 557
 558	cache_private_group.attrs = amd_l3_attrs;
 559}
 560
 561const struct attribute_group *
 562cache_get_priv_group(struct cacheinfo *this_leaf)
 563{
 564	struct amd_northbridge *nb = this_leaf->priv;
 565
 566	if (this_leaf->level < 3 || !nb)
 567		return NULL;
 568
 569	if (nb && nb->l3_cache.indices)
 570		init_amd_l3_attrs();
 571
 572	return &cache_private_group;
 573}
 574
 575static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 576{
 577	int node;
 578
 579	/* only for L3, and not in virtualized environments */
 580	if (index < 3)
 581		return;
 582
 583	node = amd_get_nb_id(smp_processor_id());
 584	this_leaf->nb = node_to_amd_nb(node);
 585	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 586		amd_calc_l3_indices(this_leaf->nb);
 587}
 588#else
 589#define amd_init_l3_cache(x, y)
 590#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 591
 592static int
 593cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 594{
 595	union _cpuid4_leaf_eax	eax;
 596	union _cpuid4_leaf_ebx	ebx;
 597	union _cpuid4_leaf_ecx	ecx;
 598	unsigned		edx;
 599
 600	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 601		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
 602			cpuid_count(0x8000001d, index, &eax.full,
 603				    &ebx.full, &ecx.full, &edx);
 604		else
 605			amd_cpuid4(index, &eax, &ebx, &ecx);
 606		amd_init_l3_cache(this_leaf, index);
 607	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
 608		cpuid_count(0x8000001d, index, &eax.full,
 609			    &ebx.full, &ecx.full, &edx);
 610		amd_init_l3_cache(this_leaf, index);
 611	} else {
 612		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 613	}
 614
 615	if (eax.split.type == CTYPE_NULL)
 616		return -EIO; /* better error ? */
 617
 618	this_leaf->eax = eax;
 619	this_leaf->ebx = ebx;
 620	this_leaf->ecx = ecx;
 621	this_leaf->size = (ecx.split.number_of_sets          + 1) *
 622			  (ebx.split.coherency_line_size     + 1) *
 623			  (ebx.split.physical_line_partition + 1) *
 624			  (ebx.split.ways_of_associativity   + 1);
 625	return 0;
 626}
 627
 628static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 629{
 630	unsigned int		eax, ebx, ecx, edx, op;
 631	union _cpuid4_leaf_eax	cache_eax;
 632	int 			i = -1;
 633
 634	if (c->x86_vendor == X86_VENDOR_AMD ||
 635	    c->x86_vendor == X86_VENDOR_HYGON)
 636		op = 0x8000001d;
 637	else
 638		op = 4;
 639
 640	do {
 641		++i;
 642		/* Do cpuid(op) loop to find out num_cache_leaves */
 643		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 644		cache_eax.full = eax;
 645	} while (cache_eax.split.type != CTYPE_NULL);
 646	return i;
 647}
 648
 649void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 650{
 651	/*
 652	 * We may have multiple LLCs if L3 caches exist, so check if we
 653	 * have an L3 cache by looking at the L3 cache CPUID leaf.
 654	 */
 655	if (!cpuid_edx(0x80000006))
 656		return;
 657
 658	if (c->x86 < 0x17) {
 659		/* LLC is at the node level. */
 660		per_cpu(cpu_llc_id, cpu) = node_id;
 661	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
 662		/*
 663		 * LLC is at the core complex level.
 664		 * Core complex ID is ApicId[3] for these processors.
 665		 */
 666		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 667	} else {
 668		/*
 669		 * LLC ID is calculated from the number of threads sharing the
 670		 * cache.
 671		 * */
 672		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
 673		u32 llc_index = find_num_cache_leaves(c) - 1;
 674
 675		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
 676		if (eax)
 677			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
 678
 679		if (num_sharing_cache) {
 680			int bits = get_count_order(num_sharing_cache);
 681
 682			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
 683		}
 684	}
 685}
 686
 687void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 688{
 689	/*
 690	 * We may have multiple LLCs if L3 caches exist, so check if we
 691	 * have an L3 cache by looking at the L3 cache CPUID leaf.
 692	 */
 693	if (!cpuid_edx(0x80000006))
 694		return;
 695
 696	/*
 697	 * LLC is at the core complex level.
 698	 * Core complex ID is ApicId[3] for these processors.
 699	 */
 700	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 701}
 702
 703void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 704{
 705
 706	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 707		num_cache_leaves = find_num_cache_leaves(c);
 708	} else if (c->extended_cpuid_level >= 0x80000006) {
 709		if (cpuid_edx(0x80000006) & 0xf000)
 710			num_cache_leaves = 4;
 711		else
 712			num_cache_leaves = 3;
 713	}
 714}
 715
 716void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 717{
 718	num_cache_leaves = find_num_cache_leaves(c);
 719}
 720
 721void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 722{
 723	/* Cache sizes */
 724	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 725	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 726	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 727	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 728#ifdef CONFIG_SMP
 729	unsigned int cpu = c->cpu_index;
 730#endif
 731
 732	if (c->cpuid_level > 3) {
 733		static int is_initialized;
 734
 735		if (is_initialized == 0) {
 736			/* Init num_cache_leaves from boot CPU */
 737			num_cache_leaves = find_num_cache_leaves(c);
 738			is_initialized++;
 739		}
 740
 741		/*
 742		 * Whenever possible use cpuid(4), deterministic cache
 743		 * parameters cpuid leaf to find the cache details
 744		 */
 745		for (i = 0; i < num_cache_leaves; i++) {
 746			struct _cpuid4_info_regs this_leaf = {};
 747			int retval;
 748
 749			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 750			if (retval < 0)
 751				continue;
 752
 753			switch (this_leaf.eax.split.level) {
 754			case 1:
 755				if (this_leaf.eax.split.type == CTYPE_DATA)
 756					new_l1d = this_leaf.size/1024;
 757				else if (this_leaf.eax.split.type == CTYPE_INST)
 758					new_l1i = this_leaf.size/1024;
 759				break;
 760			case 2:
 761				new_l2 = this_leaf.size/1024;
 762				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 763				index_msb = get_count_order(num_threads_sharing);
 764				l2_id = c->apicid & ~((1 << index_msb) - 1);
 765				break;
 766			case 3:
 767				new_l3 = this_leaf.size/1024;
 768				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 769				index_msb = get_count_order(num_threads_sharing);
 770				l3_id = c->apicid & ~((1 << index_msb) - 1);
 771				break;
 772			default:
 773				break;
 774			}
 775		}
 776	}
 777	/*
 778	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 779	 * trace cache
 780	 */
 781	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 782		/* supports eax=2  call */
 783		int j, n;
 784		unsigned int regs[4];
 785		unsigned char *dp = (unsigned char *)regs;
 786		int only_trace = 0;
 787
 788		if (num_cache_leaves != 0 && c->x86 == 15)
 789			only_trace = 1;
 790
 791		/* Number of times to iterate */
 792		n = cpuid_eax(2) & 0xFF;
 793
 794		for (i = 0 ; i < n ; i++) {
 795			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 796
 797			/* If bit 31 is set, this is an unknown format */
 798			for (j = 0 ; j < 3 ; j++)
 799				if (regs[j] & (1 << 31))
 800					regs[j] = 0;
 801
 802			/* Byte 0 is level count, not a descriptor */
 803			for (j = 1 ; j < 16 ; j++) {
 804				unsigned char des = dp[j];
 805				unsigned char k = 0;
 806
 807				/* look up this descriptor in the table */
 808				while (cache_table[k].descriptor != 0) {
 809					if (cache_table[k].descriptor == des) {
 810						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 811							break;
 812						switch (cache_table[k].cache_type) {
 813						case LVL_1_INST:
 814							l1i += cache_table[k].size;
 815							break;
 816						case LVL_1_DATA:
 817							l1d += cache_table[k].size;
 818							break;
 819						case LVL_2:
 820							l2 += cache_table[k].size;
 821							break;
 822						case LVL_3:
 823							l3 += cache_table[k].size;
 824							break;
 825						case LVL_TRACE:
 826							trace += cache_table[k].size;
 827							break;
 828						}
 829
 830						break;
 831					}
 832
 833					k++;
 834				}
 835			}
 836		}
 837	}
 838
 839	if (new_l1d)
 840		l1d = new_l1d;
 841
 842	if (new_l1i)
 843		l1i = new_l1i;
 844
 845	if (new_l2) {
 846		l2 = new_l2;
 847#ifdef CONFIG_SMP
 848		per_cpu(cpu_llc_id, cpu) = l2_id;
 
 849#endif
 850	}
 851
 852	if (new_l3) {
 853		l3 = new_l3;
 854#ifdef CONFIG_SMP
 855		per_cpu(cpu_llc_id, cpu) = l3_id;
 856#endif
 857	}
 858
 859#ifdef CONFIG_SMP
 860	/*
 861	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
 862	 * turns means that the only possibility is SMT (as indicated in
 863	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
 864	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
 865	 * c->phys_proc_id.
 866	 */
 867	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
 868		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
 869#endif
 870
 871	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 872
 873	if (!l2)
 874		cpu_detect_cache_sizes(c);
 875}
 876
 877static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 878				    struct _cpuid4_info_regs *base)
 879{
 880	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 881	struct cacheinfo *this_leaf;
 882	int i, sibling;
 883
 884	/*
 885	 * For L3, always use the pre-calculated cpu_llc_shared_mask
 886	 * to derive shared_cpu_map.
 887	 */
 888	if (index == 3) {
 889		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 890			this_cpu_ci = get_cpu_cacheinfo(i);
 891			if (!this_cpu_ci->info_list)
 892				continue;
 893			this_leaf = this_cpu_ci->info_list + index;
 894			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 895				if (!cpu_online(sibling))
 896					continue;
 897				cpumask_set_cpu(sibling,
 898						&this_leaf->shared_cpu_map);
 899			}
 900		}
 901	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 902		unsigned int apicid, nshared, first, last;
 903
 904		nshared = base->eax.split.num_threads_sharing + 1;
 905		apicid = cpu_data(cpu).apicid;
 906		first = apicid - (apicid % nshared);
 907		last = first + nshared - 1;
 908
 909		for_each_online_cpu(i) {
 910			this_cpu_ci = get_cpu_cacheinfo(i);
 911			if (!this_cpu_ci->info_list)
 912				continue;
 913
 914			apicid = cpu_data(i).apicid;
 915			if ((apicid < first) || (apicid > last))
 916				continue;
 917
 918			this_leaf = this_cpu_ci->info_list + index;
 919
 920			for_each_online_cpu(sibling) {
 921				apicid = cpu_data(sibling).apicid;
 922				if ((apicid < first) || (apicid > last))
 923					continue;
 924				cpumask_set_cpu(sibling,
 925						&this_leaf->shared_cpu_map);
 926			}
 927		}
 928	} else
 929		return 0;
 930
 931	return 1;
 932}
 933
 934static void __cache_cpumap_setup(unsigned int cpu, int index,
 935				 struct _cpuid4_info_regs *base)
 936{
 937	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 938	struct cacheinfo *this_leaf, *sibling_leaf;
 939	unsigned long num_threads_sharing;
 940	int index_msb, i;
 941	struct cpuinfo_x86 *c = &cpu_data(cpu);
 942
 943	if (c->x86_vendor == X86_VENDOR_AMD ||
 944	    c->x86_vendor == X86_VENDOR_HYGON) {
 945		if (__cache_amd_cpumap_setup(cpu, index, base))
 946			return;
 947	}
 948
 949	this_leaf = this_cpu_ci->info_list + index;
 950	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 951
 952	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 953	if (num_threads_sharing == 1)
 954		return;
 955
 956	index_msb = get_count_order(num_threads_sharing);
 957
 958	for_each_online_cpu(i)
 959		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
 960			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 961
 962			if (i == cpu || !sib_cpu_ci->info_list)
 963				continue;/* skip if itself or no cacheinfo */
 964			sibling_leaf = sib_cpu_ci->info_list + index;
 965			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
 966			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 967		}
 968}
 969
 970static void ci_leaf_init(struct cacheinfo *this_leaf,
 971			 struct _cpuid4_info_regs *base)
 972{
 973	this_leaf->id = base->id;
 974	this_leaf->attributes = CACHE_ID;
 975	this_leaf->level = base->eax.split.level;
 976	this_leaf->type = cache_type_map[base->eax.split.type];
 977	this_leaf->coherency_line_size =
 978				base->ebx.split.coherency_line_size + 1;
 979	this_leaf->ways_of_associativity =
 980				base->ebx.split.ways_of_associativity + 1;
 981	this_leaf->size = base->size;
 982	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
 983	this_leaf->physical_line_partition =
 984				base->ebx.split.physical_line_partition + 1;
 985	this_leaf->priv = base->nb;
 986}
 987
 988static int __init_cache_level(unsigned int cpu)
 989{
 990	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 991
 992	if (!num_cache_leaves)
 993		return -ENOENT;
 994	if (!this_cpu_ci)
 995		return -EINVAL;
 996	this_cpu_ci->num_levels = 3;
 997	this_cpu_ci->num_leaves = num_cache_leaves;
 998	return 0;
 999}
1000
1001/*
1002 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1003 * ECX as cache index. Then right shift apicid by the number's order to get
1004 * cache id for this cache node.
1005 */
1006static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1007{
1008	struct cpuinfo_x86 *c = &cpu_data(cpu);
1009	unsigned long num_threads_sharing;
1010	int index_msb;
1011
1012	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1013	index_msb = get_count_order(num_threads_sharing);
1014	id4_regs->id = c->apicid >> index_msb;
1015}
1016
1017static int __populate_cache_leaves(unsigned int cpu)
1018{
1019	unsigned int idx, ret;
1020	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1021	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1022	struct _cpuid4_info_regs id4_regs = {};
1023
1024	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1025		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1026		if (ret)
1027			return ret;
1028		get_cache_id(cpu, &id4_regs);
1029		ci_leaf_init(this_leaf++, &id4_regs);
1030		__cache_cpumap_setup(cpu, idx, &id4_regs);
1031	}
1032	this_cpu_ci->cpu_map_populated = true;
1033
1034	return 0;
1035}
1036
1037DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
1038DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *	Routines to identify caches on Intel CPU.
   4 *
   5 *	Changes:
   6 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
   7 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   8 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
   9 */
  10
  11#include <linux/slab.h>
  12#include <linux/cacheinfo.h>
  13#include <linux/cpu.h>
  14#include <linux/cpuhotplug.h>
  15#include <linux/sched.h>
  16#include <linux/capability.h>
  17#include <linux/sysfs.h>
  18#include <linux/pci.h>
  19#include <linux/stop_machine.h>
  20
  21#include <asm/cpufeature.h>
  22#include <asm/cacheinfo.h>
  23#include <asm/amd_nb.h>
  24#include <asm/smp.h>
  25#include <asm/mtrr.h>
  26#include <asm/tlbflush.h>
  27
  28#include "cpu.h"
  29
  30#define LVL_1_INST	1
  31#define LVL_1_DATA	2
  32#define LVL_2		3
  33#define LVL_3		4
  34#define LVL_TRACE	5
  35
  36/* Shared last level cache maps */
  37DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
  38
  39/* Shared L2 cache maps */
  40DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
  41
  42/* Kernel controls MTRR and/or PAT MSRs. */
  43unsigned int memory_caching_control __ro_after_init;
  44
  45struct _cache_table {
  46	unsigned char descriptor;
  47	char cache_type;
  48	short size;
  49};
  50
  51#define MB(x)	((x) * 1024)
  52
  53/* All the cache descriptor types we care about (no TLB or
  54   trace cache entries) */
  55
  56static const struct _cache_table cache_table[] =
  57{
  58	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
  59	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
  60	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
  61	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
  62	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
  63	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
  64	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
  65	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
  66	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  67	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  68	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  69	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  70	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
  71	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
  72	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  73	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  74	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
  75	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  76	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  77	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  78	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
  79	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
  80	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
  81	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
  82	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
  83	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
  84	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
  85	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
  86	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
  87	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
  88	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
  89	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
  90	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
  91	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
  92	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
  93	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  94	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  95	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  96	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  97	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
  98	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
  99	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 100	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 101	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 102	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 103	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 104	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 105	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 106	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 107	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 108	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 109	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 110	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 111	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 112	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 113	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 114	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 115	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 116	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 117	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
 118	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 119	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 120	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 121	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
 122	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 123	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
 124	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
 125	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 126	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 127	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
 128	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
 129	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
 130	{ 0x00, 0, 0}
 131};
 132
 133
 134enum _cache_type {
 135	CTYPE_NULL = 0,
 136	CTYPE_DATA = 1,
 137	CTYPE_INST = 2,
 138	CTYPE_UNIFIED = 3
 139};
 140
 141union _cpuid4_leaf_eax {
 142	struct {
 143		enum _cache_type	type:5;
 144		unsigned int		level:3;
 145		unsigned int		is_self_initializing:1;
 146		unsigned int		is_fully_associative:1;
 147		unsigned int		reserved:4;
 148		unsigned int		num_threads_sharing:12;
 149		unsigned int		num_cores_on_die:6;
 150	} split;
 151	u32 full;
 152};
 153
 154union _cpuid4_leaf_ebx {
 155	struct {
 156		unsigned int		coherency_line_size:12;
 157		unsigned int		physical_line_partition:10;
 158		unsigned int		ways_of_associativity:10;
 159	} split;
 160	u32 full;
 161};
 162
 163union _cpuid4_leaf_ecx {
 164	struct {
 165		unsigned int		number_of_sets:32;
 166	} split;
 167	u32 full;
 168};
 169
 170struct _cpuid4_info_regs {
 171	union _cpuid4_leaf_eax eax;
 172	union _cpuid4_leaf_ebx ebx;
 173	union _cpuid4_leaf_ecx ecx;
 174	unsigned int id;
 175	unsigned long size;
 176	struct amd_northbridge *nb;
 177};
 178
 179static unsigned short num_cache_leaves;
 180
 181/* AMD doesn't have CPUID4. Emulate it here to report the same
 182   information to the user.  This makes some assumptions about the machine:
 183   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 184
 185   In theory the TLBs could be reported as fake type (they are in "dummy").
 186   Maybe later */
 187union l1_cache {
 188	struct {
 189		unsigned line_size:8;
 190		unsigned lines_per_tag:8;
 191		unsigned assoc:8;
 192		unsigned size_in_kb:8;
 193	};
 194	unsigned val;
 195};
 196
 197union l2_cache {
 198	struct {
 199		unsigned line_size:8;
 200		unsigned lines_per_tag:4;
 201		unsigned assoc:4;
 202		unsigned size_in_kb:16;
 203	};
 204	unsigned val;
 205};
 206
 207union l3_cache {
 208	struct {
 209		unsigned line_size:8;
 210		unsigned lines_per_tag:4;
 211		unsigned assoc:4;
 212		unsigned res:2;
 213		unsigned size_encoded:14;
 214	};
 215	unsigned val;
 216};
 217
 218static const unsigned short assocs[] = {
 219	[1] = 1,
 220	[2] = 2,
 221	[4] = 4,
 222	[6] = 8,
 223	[8] = 16,
 224	[0xa] = 32,
 225	[0xb] = 48,
 226	[0xc] = 64,
 227	[0xd] = 96,
 228	[0xe] = 128,
 229	[0xf] = 0xffff /* fully associative - no way to show this currently */
 230};
 231
 232static const unsigned char levels[] = { 1, 1, 2, 3 };
 233static const unsigned char types[] = { 1, 2, 3, 3 };
 234
 235static const enum cache_type cache_type_map[] = {
 236	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
 237	[CTYPE_DATA] = CACHE_TYPE_DATA,
 238	[CTYPE_INST] = CACHE_TYPE_INST,
 239	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
 240};
 241
 242static void
 243amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 244		     union _cpuid4_leaf_ebx *ebx,
 245		     union _cpuid4_leaf_ecx *ecx)
 246{
 247	unsigned dummy;
 248	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 249	union l1_cache l1i, l1d;
 250	union l2_cache l2;
 251	union l3_cache l3;
 252	union l1_cache *l1 = &l1d;
 253
 254	eax->full = 0;
 255	ebx->full = 0;
 256	ecx->full = 0;
 257
 258	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 259	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 260
 261	switch (leaf) {
 262	case 1:
 263		l1 = &l1i;
 264		fallthrough;
 265	case 0:
 266		if (!l1->val)
 267			return;
 268		assoc = assocs[l1->assoc];
 269		line_size = l1->line_size;
 270		lines_per_tag = l1->lines_per_tag;
 271		size_in_kb = l1->size_in_kb;
 272		break;
 273	case 2:
 274		if (!l2.val)
 275			return;
 276		assoc = assocs[l2.assoc];
 277		line_size = l2.line_size;
 278		lines_per_tag = l2.lines_per_tag;
 279		/* cpu_data has errata corrections for K7 applied */
 280		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 281		break;
 282	case 3:
 283		if (!l3.val)
 284			return;
 285		assoc = assocs[l3.assoc];
 286		line_size = l3.line_size;
 287		lines_per_tag = l3.lines_per_tag;
 288		size_in_kb = l3.size_encoded * 512;
 289		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 290			size_in_kb = size_in_kb >> 1;
 291			assoc = assoc >> 1;
 292		}
 293		break;
 294	default:
 295		return;
 296	}
 297
 298	eax->split.is_self_initializing = 1;
 299	eax->split.type = types[leaf];
 300	eax->split.level = levels[leaf];
 301	eax->split.num_threads_sharing = 0;
 302	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 303
 304
 305	if (assoc == 0xffff)
 306		eax->split.is_fully_associative = 1;
 307	ebx->split.coherency_line_size = line_size - 1;
 308	ebx->split.ways_of_associativity = assoc - 1;
 309	ebx->split.physical_line_partition = lines_per_tag - 1;
 310	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 311		(ebx->split.ways_of_associativity + 1) - 1;
 312}
 313
 314#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 315
 316/*
 317 * L3 cache descriptors
 318 */
 319static void amd_calc_l3_indices(struct amd_northbridge *nb)
 320{
 321	struct amd_l3_cache *l3 = &nb->l3_cache;
 322	unsigned int sc0, sc1, sc2, sc3;
 323	u32 val = 0;
 324
 325	pci_read_config_dword(nb->misc, 0x1C4, &val);
 326
 327	/* calculate subcache sizes */
 328	l3->subcaches[0] = sc0 = !(val & BIT(0));
 329	l3->subcaches[1] = sc1 = !(val & BIT(4));
 330
 331	if (boot_cpu_data.x86 == 0x15) {
 332		l3->subcaches[0] = sc0 += !(val & BIT(1));
 333		l3->subcaches[1] = sc1 += !(val & BIT(5));
 334	}
 335
 336	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 337	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 338
 339	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 340}
 341
 342/*
 343 * check whether a slot used for disabling an L3 index is occupied.
 344 * @l3: L3 cache descriptor
 345 * @slot: slot number (0..1)
 346 *
 347 * @returns: the disabled index if used or negative value if slot free.
 348 */
 349static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 350{
 351	unsigned int reg = 0;
 352
 353	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 354
 355	/* check whether this slot is activated already */
 356	if (reg & (3UL << 30))
 357		return reg & 0xfff;
 358
 359	return -1;
 360}
 361
 362static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
 363				  unsigned int slot)
 364{
 365	int index;
 366	struct amd_northbridge *nb = this_leaf->priv;
 367
 368	index = amd_get_l3_disable_slot(nb, slot);
 369	if (index >= 0)
 370		return sprintf(buf, "%d\n", index);
 371
 372	return sprintf(buf, "FREE\n");
 373}
 374
 375#define SHOW_CACHE_DISABLE(slot)					\
 376static ssize_t								\
 377cache_disable_##slot##_show(struct device *dev,				\
 378			    struct device_attribute *attr, char *buf)	\
 379{									\
 380	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 381	return show_cache_disable(this_leaf, buf, slot);		\
 382}
 383SHOW_CACHE_DISABLE(0)
 384SHOW_CACHE_DISABLE(1)
 385
 386static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 387				 unsigned slot, unsigned long idx)
 388{
 389	int i;
 390
 391	idx |= BIT(30);
 392
 393	/*
 394	 *  disable index in all 4 subcaches
 395	 */
 396	for (i = 0; i < 4; i++) {
 397		u32 reg = idx | (i << 20);
 398
 399		if (!nb->l3_cache.subcaches[i])
 400			continue;
 401
 402		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 403
 404		/*
 405		 * We need to WBINVD on a core on the node containing the L3
 406		 * cache which indices we disable therefore a simple wbinvd()
 407		 * is not sufficient.
 408		 */
 409		wbinvd_on_cpu(cpu);
 410
 411		reg |= BIT(31);
 412		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 413	}
 414}
 415
 416/*
 417 * disable a L3 cache index by using a disable-slot
 418 *
 419 * @l3:    L3 cache descriptor
 420 * @cpu:   A CPU on the node containing the L3 cache
 421 * @slot:  slot number (0..1)
 422 * @index: index to disable
 423 *
 424 * @return: 0 on success, error status on failure
 425 */
 426static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
 427			    unsigned slot, unsigned long index)
 428{
 429	int ret = 0;
 430
 431	/*  check if @slot is already used or the index is already disabled */
 432	ret = amd_get_l3_disable_slot(nb, slot);
 433	if (ret >= 0)
 434		return -EEXIST;
 435
 436	if (index > nb->l3_cache.indices)
 437		return -EINVAL;
 438
 439	/* check whether the other slot has disabled the same index already */
 440	if (index == amd_get_l3_disable_slot(nb, !slot))
 441		return -EEXIST;
 442
 443	amd_l3_disable_index(nb, cpu, slot, index);
 444
 445	return 0;
 446}
 447
 448static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
 449				   const char *buf, size_t count,
 450				   unsigned int slot)
 451{
 452	unsigned long val = 0;
 453	int cpu, err = 0;
 454	struct amd_northbridge *nb = this_leaf->priv;
 455
 456	if (!capable(CAP_SYS_ADMIN))
 457		return -EPERM;
 458
 459	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 460
 461	if (kstrtoul(buf, 10, &val) < 0)
 462		return -EINVAL;
 463
 464	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 465	if (err) {
 466		if (err == -EEXIST)
 467			pr_warn("L3 slot %d in use/index already disabled!\n",
 468				   slot);
 469		return err;
 470	}
 471	return count;
 472}
 473
 474#define STORE_CACHE_DISABLE(slot)					\
 475static ssize_t								\
 476cache_disable_##slot##_store(struct device *dev,			\
 477			     struct device_attribute *attr,		\
 478			     const char *buf, size_t count)		\
 479{									\
 480	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
 481	return store_cache_disable(this_leaf, buf, count, slot);	\
 482}
 483STORE_CACHE_DISABLE(0)
 484STORE_CACHE_DISABLE(1)
 485
 486static ssize_t subcaches_show(struct device *dev,
 487			      struct device_attribute *attr, char *buf)
 488{
 489	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 490	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 491
 492	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 493}
 494
 495static ssize_t subcaches_store(struct device *dev,
 496			       struct device_attribute *attr,
 497			       const char *buf, size_t count)
 498{
 499	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 500	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 501	unsigned long val;
 502
 503	if (!capable(CAP_SYS_ADMIN))
 504		return -EPERM;
 505
 506	if (kstrtoul(buf, 16, &val) < 0)
 507		return -EINVAL;
 508
 509	if (amd_set_subcaches(cpu, val))
 510		return -EINVAL;
 511
 512	return count;
 513}
 514
 515static DEVICE_ATTR_RW(cache_disable_0);
 516static DEVICE_ATTR_RW(cache_disable_1);
 517static DEVICE_ATTR_RW(subcaches);
 518
 519static umode_t
 520cache_private_attrs_is_visible(struct kobject *kobj,
 521			       struct attribute *attr, int unused)
 522{
 523	struct device *dev = kobj_to_dev(kobj);
 524	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
 525	umode_t mode = attr->mode;
 526
 527	if (!this_leaf->priv)
 528		return 0;
 529
 530	if ((attr == &dev_attr_subcaches.attr) &&
 531	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 532		return mode;
 533
 534	if ((attr == &dev_attr_cache_disable_0.attr ||
 535	     attr == &dev_attr_cache_disable_1.attr) &&
 536	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 537		return mode;
 538
 539	return 0;
 540}
 541
 542static struct attribute_group cache_private_group = {
 543	.is_visible = cache_private_attrs_is_visible,
 544};
 545
 546static void init_amd_l3_attrs(void)
 547{
 548	int n = 1;
 549	static struct attribute **amd_l3_attrs;
 550
 551	if (amd_l3_attrs) /* already initialized */
 552		return;
 553
 554	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 555		n += 2;
 556	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 557		n += 1;
 558
 559	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
 560	if (!amd_l3_attrs)
 561		return;
 562
 563	n = 0;
 564	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 565		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
 566		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
 567	}
 568	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 569		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 570
 571	cache_private_group.attrs = amd_l3_attrs;
 572}
 573
 574const struct attribute_group *
 575cache_get_priv_group(struct cacheinfo *this_leaf)
 576{
 577	struct amd_northbridge *nb = this_leaf->priv;
 578
 579	if (this_leaf->level < 3 || !nb)
 580		return NULL;
 581
 582	if (nb && nb->l3_cache.indices)
 583		init_amd_l3_attrs();
 584
 585	return &cache_private_group;
 586}
 587
 588static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 589{
 590	int node;
 591
 592	/* only for L3, and not in virtualized environments */
 593	if (index < 3)
 594		return;
 595
 596	node = topology_die_id(smp_processor_id());
 597	this_leaf->nb = node_to_amd_nb(node);
 598	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 599		amd_calc_l3_indices(this_leaf->nb);
 600}
 601#else
 602#define amd_init_l3_cache(x, y)
 603#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 604
 605static int
 606cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 607{
 608	union _cpuid4_leaf_eax	eax;
 609	union _cpuid4_leaf_ebx	ebx;
 610	union _cpuid4_leaf_ecx	ecx;
 611	unsigned		edx;
 612
 613	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 614		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
 615			cpuid_count(0x8000001d, index, &eax.full,
 616				    &ebx.full, &ecx.full, &edx);
 617		else
 618			amd_cpuid4(index, &eax, &ebx, &ecx);
 619		amd_init_l3_cache(this_leaf, index);
 620	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
 621		cpuid_count(0x8000001d, index, &eax.full,
 622			    &ebx.full, &ecx.full, &edx);
 623		amd_init_l3_cache(this_leaf, index);
 624	} else {
 625		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 626	}
 627
 628	if (eax.split.type == CTYPE_NULL)
 629		return -EIO; /* better error ? */
 630
 631	this_leaf->eax = eax;
 632	this_leaf->ebx = ebx;
 633	this_leaf->ecx = ecx;
 634	this_leaf->size = (ecx.split.number_of_sets          + 1) *
 635			  (ebx.split.coherency_line_size     + 1) *
 636			  (ebx.split.physical_line_partition + 1) *
 637			  (ebx.split.ways_of_associativity   + 1);
 638	return 0;
 639}
 640
 641static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 642{
 643	unsigned int		eax, ebx, ecx, edx, op;
 644	union _cpuid4_leaf_eax	cache_eax;
 645	int 			i = -1;
 646
 647	if (c->x86_vendor == X86_VENDOR_AMD ||
 648	    c->x86_vendor == X86_VENDOR_HYGON)
 649		op = 0x8000001d;
 650	else
 651		op = 4;
 652
 653	do {
 654		++i;
 655		/* Do cpuid(op) loop to find out num_cache_leaves */
 656		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 657		cache_eax.full = eax;
 658	} while (cache_eax.split.type != CTYPE_NULL);
 659	return i;
 660}
 661
 662void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
 663{
 664	/*
 665	 * We may have multiple LLCs if L3 caches exist, so check if we
 666	 * have an L3 cache by looking at the L3 cache CPUID leaf.
 667	 */
 668	if (!cpuid_edx(0x80000006))
 669		return;
 670
 671	if (c->x86 < 0x17) {
 672		/* LLC is at the node level. */
 673		per_cpu(cpu_llc_id, cpu) = c->cpu_die_id;
 674	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
 675		/*
 676		 * LLC is at the core complex level.
 677		 * Core complex ID is ApicId[3] for these processors.
 678		 */
 679		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 680	} else {
 681		/*
 682		 * LLC ID is calculated from the number of threads sharing the
 683		 * cache.
 684		 * */
 685		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
 686		u32 llc_index = find_num_cache_leaves(c) - 1;
 687
 688		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
 689		if (eax)
 690			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
 691
 692		if (num_sharing_cache) {
 693			int bits = get_count_order(num_sharing_cache);
 694
 695			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
 696		}
 697	}
 698}
 699
 700void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
 701{
 702	/*
 703	 * We may have multiple LLCs if L3 caches exist, so check if we
 704	 * have an L3 cache by looking at the L3 cache CPUID leaf.
 705	 */
 706	if (!cpuid_edx(0x80000006))
 707		return;
 708
 709	/*
 710	 * LLC is at the core complex level.
 711	 * Core complex ID is ApicId[3] for these processors.
 712	 */
 713	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 714}
 715
 716void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 717{
 718
 719	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 720		num_cache_leaves = find_num_cache_leaves(c);
 721	} else if (c->extended_cpuid_level >= 0x80000006) {
 722		if (cpuid_edx(0x80000006) & 0xf000)
 723			num_cache_leaves = 4;
 724		else
 725			num_cache_leaves = 3;
 726	}
 727}
 728
 729void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
 730{
 731	num_cache_leaves = find_num_cache_leaves(c);
 732}
 733
 734void init_intel_cacheinfo(struct cpuinfo_x86 *c)
 735{
 736	/* Cache sizes */
 737	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 738	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 739	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 740	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 741#ifdef CONFIG_SMP
 742	unsigned int cpu = c->cpu_index;
 743#endif
 744
 745	if (c->cpuid_level > 3) {
 746		static int is_initialized;
 747
 748		if (is_initialized == 0) {
 749			/* Init num_cache_leaves from boot CPU */
 750			num_cache_leaves = find_num_cache_leaves(c);
 751			is_initialized++;
 752		}
 753
 754		/*
 755		 * Whenever possible use cpuid(4), deterministic cache
 756		 * parameters cpuid leaf to find the cache details
 757		 */
 758		for (i = 0; i < num_cache_leaves; i++) {
 759			struct _cpuid4_info_regs this_leaf = {};
 760			int retval;
 761
 762			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 763			if (retval < 0)
 764				continue;
 765
 766			switch (this_leaf.eax.split.level) {
 767			case 1:
 768				if (this_leaf.eax.split.type == CTYPE_DATA)
 769					new_l1d = this_leaf.size/1024;
 770				else if (this_leaf.eax.split.type == CTYPE_INST)
 771					new_l1i = this_leaf.size/1024;
 772				break;
 773			case 2:
 774				new_l2 = this_leaf.size/1024;
 775				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 776				index_msb = get_count_order(num_threads_sharing);
 777				l2_id = c->apicid & ~((1 << index_msb) - 1);
 778				break;
 779			case 3:
 780				new_l3 = this_leaf.size/1024;
 781				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 782				index_msb = get_count_order(num_threads_sharing);
 783				l3_id = c->apicid & ~((1 << index_msb) - 1);
 784				break;
 785			default:
 786				break;
 787			}
 788		}
 789	}
 790	/*
 791	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 792	 * trace cache
 793	 */
 794	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 795		/* supports eax=2  call */
 796		int j, n;
 797		unsigned int regs[4];
 798		unsigned char *dp = (unsigned char *)regs;
 799		int only_trace = 0;
 800
 801		if (num_cache_leaves != 0 && c->x86 == 15)
 802			only_trace = 1;
 803
 804		/* Number of times to iterate */
 805		n = cpuid_eax(2) & 0xFF;
 806
 807		for (i = 0 ; i < n ; i++) {
 808			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 809
 810			/* If bit 31 is set, this is an unknown format */
 811			for (j = 0 ; j < 3 ; j++)
 812				if (regs[j] & (1 << 31))
 813					regs[j] = 0;
 814
 815			/* Byte 0 is level count, not a descriptor */
 816			for (j = 1 ; j < 16 ; j++) {
 817				unsigned char des = dp[j];
 818				unsigned char k = 0;
 819
 820				/* look up this descriptor in the table */
 821				while (cache_table[k].descriptor != 0) {
 822					if (cache_table[k].descriptor == des) {
 823						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 824							break;
 825						switch (cache_table[k].cache_type) {
 826						case LVL_1_INST:
 827							l1i += cache_table[k].size;
 828							break;
 829						case LVL_1_DATA:
 830							l1d += cache_table[k].size;
 831							break;
 832						case LVL_2:
 833							l2 += cache_table[k].size;
 834							break;
 835						case LVL_3:
 836							l3 += cache_table[k].size;
 837							break;
 838						case LVL_TRACE:
 839							trace += cache_table[k].size;
 840							break;
 841						}
 842
 843						break;
 844					}
 845
 846					k++;
 847				}
 848			}
 849		}
 850	}
 851
 852	if (new_l1d)
 853		l1d = new_l1d;
 854
 855	if (new_l1i)
 856		l1i = new_l1i;
 857
 858	if (new_l2) {
 859		l2 = new_l2;
 860#ifdef CONFIG_SMP
 861		per_cpu(cpu_llc_id, cpu) = l2_id;
 862		per_cpu(cpu_l2c_id, cpu) = l2_id;
 863#endif
 864	}
 865
 866	if (new_l3) {
 867		l3 = new_l3;
 868#ifdef CONFIG_SMP
 869		per_cpu(cpu_llc_id, cpu) = l3_id;
 870#endif
 871	}
 872
 873#ifdef CONFIG_SMP
 874	/*
 875	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
 876	 * turns means that the only possibility is SMT (as indicated in
 877	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
 878	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
 879	 * c->phys_proc_id.
 880	 */
 881	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
 882		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
 883#endif
 884
 885	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 886
 887	if (!l2)
 888		cpu_detect_cache_sizes(c);
 889}
 890
 891static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 892				    struct _cpuid4_info_regs *base)
 893{
 894	struct cpu_cacheinfo *this_cpu_ci;
 895	struct cacheinfo *this_leaf;
 896	int i, sibling;
 897
 898	/*
 899	 * For L3, always use the pre-calculated cpu_llc_shared_mask
 900	 * to derive shared_cpu_map.
 901	 */
 902	if (index == 3) {
 903		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 904			this_cpu_ci = get_cpu_cacheinfo(i);
 905			if (!this_cpu_ci->info_list)
 906				continue;
 907			this_leaf = this_cpu_ci->info_list + index;
 908			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 909				if (!cpu_online(sibling))
 910					continue;
 911				cpumask_set_cpu(sibling,
 912						&this_leaf->shared_cpu_map);
 913			}
 914		}
 915	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 916		unsigned int apicid, nshared, first, last;
 917
 918		nshared = base->eax.split.num_threads_sharing + 1;
 919		apicid = cpu_data(cpu).apicid;
 920		first = apicid - (apicid % nshared);
 921		last = first + nshared - 1;
 922
 923		for_each_online_cpu(i) {
 924			this_cpu_ci = get_cpu_cacheinfo(i);
 925			if (!this_cpu_ci->info_list)
 926				continue;
 927
 928			apicid = cpu_data(i).apicid;
 929			if ((apicid < first) || (apicid > last))
 930				continue;
 931
 932			this_leaf = this_cpu_ci->info_list + index;
 933
 934			for_each_online_cpu(sibling) {
 935				apicid = cpu_data(sibling).apicid;
 936				if ((apicid < first) || (apicid > last))
 937					continue;
 938				cpumask_set_cpu(sibling,
 939						&this_leaf->shared_cpu_map);
 940			}
 941		}
 942	} else
 943		return 0;
 944
 945	return 1;
 946}
 947
 948static void __cache_cpumap_setup(unsigned int cpu, int index,
 949				 struct _cpuid4_info_regs *base)
 950{
 951	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 952	struct cacheinfo *this_leaf, *sibling_leaf;
 953	unsigned long num_threads_sharing;
 954	int index_msb, i;
 955	struct cpuinfo_x86 *c = &cpu_data(cpu);
 956
 957	if (c->x86_vendor == X86_VENDOR_AMD ||
 958	    c->x86_vendor == X86_VENDOR_HYGON) {
 959		if (__cache_amd_cpumap_setup(cpu, index, base))
 960			return;
 961	}
 962
 963	this_leaf = this_cpu_ci->info_list + index;
 964	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 965
 966	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
 967	if (num_threads_sharing == 1)
 968		return;
 969
 970	index_msb = get_count_order(num_threads_sharing);
 971
 972	for_each_online_cpu(i)
 973		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
 974			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 975
 976			if (i == cpu || !sib_cpu_ci->info_list)
 977				continue;/* skip if itself or no cacheinfo */
 978			sibling_leaf = sib_cpu_ci->info_list + index;
 979			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
 980			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
 981		}
 982}
 983
 984static void ci_leaf_init(struct cacheinfo *this_leaf,
 985			 struct _cpuid4_info_regs *base)
 986{
 987	this_leaf->id = base->id;
 988	this_leaf->attributes = CACHE_ID;
 989	this_leaf->level = base->eax.split.level;
 990	this_leaf->type = cache_type_map[base->eax.split.type];
 991	this_leaf->coherency_line_size =
 992				base->ebx.split.coherency_line_size + 1;
 993	this_leaf->ways_of_associativity =
 994				base->ebx.split.ways_of_associativity + 1;
 995	this_leaf->size = base->size;
 996	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
 997	this_leaf->physical_line_partition =
 998				base->ebx.split.physical_line_partition + 1;
 999	this_leaf->priv = base->nb;
1000}
1001
1002int init_cache_level(unsigned int cpu)
1003{
1004	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1005
1006	if (!num_cache_leaves)
1007		return -ENOENT;
1008	if (!this_cpu_ci)
1009		return -EINVAL;
1010	this_cpu_ci->num_levels = 3;
1011	this_cpu_ci->num_leaves = num_cache_leaves;
1012	return 0;
1013}
1014
1015/*
1016 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1017 * ECX as cache index. Then right shift apicid by the number's order to get
1018 * cache id for this cache node.
1019 */
1020static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1021{
1022	struct cpuinfo_x86 *c = &cpu_data(cpu);
1023	unsigned long num_threads_sharing;
1024	int index_msb;
1025
1026	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1027	index_msb = get_count_order(num_threads_sharing);
1028	id4_regs->id = c->apicid >> index_msb;
1029}
1030
1031int populate_cache_leaves(unsigned int cpu)
1032{
1033	unsigned int idx, ret;
1034	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1035	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1036	struct _cpuid4_info_regs id4_regs = {};
1037
1038	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1039		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1040		if (ret)
1041			return ret;
1042		get_cache_id(cpu, &id4_regs);
1043		ci_leaf_init(this_leaf++, &id4_regs);
1044		__cache_cpumap_setup(cpu, idx, &id4_regs);
1045	}
1046	this_cpu_ci->cpu_map_populated = true;
1047
1048	return 0;
1049}
1050
1051/*
1052 * Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
1053 *
1054 * Since we are disabling the cache don't allow any interrupts,
1055 * they would run extremely slow and would only increase the pain.
1056 *
1057 * The caller must ensure that local interrupts are disabled and
1058 * are reenabled after cache_enable() has been called.
1059 */
1060static unsigned long saved_cr4;
1061static DEFINE_RAW_SPINLOCK(cache_disable_lock);
1062
1063void cache_disable(void) __acquires(cache_disable_lock)
1064{
1065	unsigned long cr0;
1066
1067	/*
1068	 * Note that this is not ideal
1069	 * since the cache is only flushed/disabled for this CPU while the
1070	 * MTRRs are changed, but changing this requires more invasive
1071	 * changes to the way the kernel boots
1072	 */
1073
1074	raw_spin_lock(&cache_disable_lock);
1075
1076	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
1077	cr0 = read_cr0() | X86_CR0_CD;
1078	write_cr0(cr0);
1079
1080	/*
1081	 * Cache flushing is the most time-consuming step when programming
1082	 * the MTRRs. Fortunately, as per the Intel Software Development
1083	 * Manual, we can skip it if the processor supports cache self-
1084	 * snooping.
1085	 */
1086	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1087		wbinvd();
1088
1089	/* Save value of CR4 and clear Page Global Enable (bit 7) */
1090	if (cpu_feature_enabled(X86_FEATURE_PGE)) {
1091		saved_cr4 = __read_cr4();
1092		__write_cr4(saved_cr4 & ~X86_CR4_PGE);
1093	}
1094
1095	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
1096	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1097	flush_tlb_local();
1098
1099	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1100		mtrr_disable();
1101
1102	/* Again, only flush caches if we have to. */
1103	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
1104		wbinvd();
1105}
1106
1107void cache_enable(void) __releases(cache_disable_lock)
1108{
1109	/* Flush TLBs (no need to flush caches - they are disabled) */
1110	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
1111	flush_tlb_local();
1112
1113	if (cpu_feature_enabled(X86_FEATURE_MTRR))
1114		mtrr_enable();
1115
1116	/* Enable caches */
1117	write_cr0(read_cr0() & ~X86_CR0_CD);
1118
1119	/* Restore value of CR4 */
1120	if (cpu_feature_enabled(X86_FEATURE_PGE))
1121		__write_cr4(saved_cr4);
1122
1123	raw_spin_unlock(&cache_disable_lock);
1124}
1125
1126static void cache_cpu_init(void)
1127{
1128	unsigned long flags;
1129
1130	local_irq_save(flags);
1131	cache_disable();
1132
1133	if (memory_caching_control & CACHE_MTRR)
1134		mtrr_generic_set_state();
1135
1136	if (memory_caching_control & CACHE_PAT)
1137		pat_cpu_init();
1138
1139	cache_enable();
1140	local_irq_restore(flags);
1141}
1142
1143static bool cache_aps_delayed_init = true;
1144
1145void set_cache_aps_delayed_init(bool val)
1146{
1147	cache_aps_delayed_init = val;
1148}
1149
1150bool get_cache_aps_delayed_init(void)
1151{
1152	return cache_aps_delayed_init;
1153}
1154
1155static int cache_rendezvous_handler(void *unused)
1156{
1157	if (get_cache_aps_delayed_init() || !cpu_online(smp_processor_id()))
1158		cache_cpu_init();
1159
1160	return 0;
1161}
1162
1163void __init cache_bp_init(void)
1164{
1165	mtrr_bp_init();
1166	pat_bp_init();
1167
1168	if (memory_caching_control)
1169		cache_cpu_init();
1170}
1171
1172void cache_bp_restore(void)
1173{
1174	if (memory_caching_control)
1175		cache_cpu_init();
1176}
1177
1178static int cache_ap_init(unsigned int cpu)
1179{
1180	if (!memory_caching_control || get_cache_aps_delayed_init())
1181		return 0;
1182
1183	/*
1184	 * Ideally we should hold mtrr_mutex here to avoid MTRR entries
1185	 * changed, but this routine will be called in CPU boot time,
1186	 * holding the lock breaks it.
1187	 *
1188	 * This routine is called in two cases:
1189	 *
1190	 *   1. very early time of software resume, when there absolutely
1191	 *      isn't MTRR entry changes;
1192	 *
1193	 *   2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
1194	 *      lock to prevent MTRR entry changes
1195	 */
1196	stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
1197				       cpu_callout_mask);
1198
1199	return 0;
1200}
1201
1202/*
1203 * Delayed cache initialization for all AP's
1204 */
1205void cache_aps_init(void)
1206{
1207	if (!memory_caching_control || !get_cache_aps_delayed_init())
1208		return;
1209
1210	stop_machine(cache_rendezvous_handler, NULL, cpu_online_mask);
1211	set_cache_aps_delayed_init(false);
1212}
1213
1214static int __init cache_ap_register(void)
1215{
1216	cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
1217				  "x86/cachectrl:starting",
1218				  cache_ap_init, NULL);
1219	return 0;
1220}
1221core_initcall(cache_ap_register);