Linux Audio

Check our new training course

Loading...
v3.5.6
   1/*
   2 *	Routines to indentify caches on Intel CPU.
   3 *
   4 *	Changes:
   5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
   6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
   8 */
   9
  10#include <linux/init.h>
  11#include <linux/slab.h>
  12#include <linux/device.h>
  13#include <linux/compiler.h>
  14#include <linux/cpu.h>
  15#include <linux/sched.h>
 
  16#include <linux/pci.h>
  17
  18#include <asm/processor.h>
  19#include <linux/smp.h>
  20#include <asm/amd_nb.h>
  21#include <asm/smp.h>
  22
  23#define LVL_1_INST	1
  24#define LVL_1_DATA	2
  25#define LVL_2		3
  26#define LVL_3		4
  27#define LVL_TRACE	5
  28
  29struct _cache_table {
  30	unsigned char descriptor;
  31	char cache_type;
  32	short size;
  33};
  34
  35#define MB(x)	((x) * 1024)
  36
  37/* All the cache descriptor types we care about (no TLB or
  38   trace cache entries) */
  39
  40static const struct _cache_table __cpuinitconst cache_table[] =
  41{
  42	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
  43	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
  44	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
  45	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
  46	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
  47	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
  48	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
  49	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
  50	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  51	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  52	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  53	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  54	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
  55	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
  56	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  57	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  58	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
  59	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  60	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  61	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  62	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
  63	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
  64	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
  65	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
  66	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
  67	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
  68	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
  69	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
  70	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
  71	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
  72	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
  73	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
  74	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
  75	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
  76	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
  77	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  78	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  79	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  80	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  81	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
  82	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
  83	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
  84	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
  85	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
  86	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  87	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  88	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  89	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  90	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
  91	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
  92	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
  93	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
  94	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
  95	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
  96	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
  97	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
  98	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
  99	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 100	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 101	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
 102	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 103	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 104	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 105	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
 106	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 107	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
 108	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
 109	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 110	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 111	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
 112	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
 113	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
 114	{ 0x00, 0, 0}
 115};
 116
 117
 118enum _cache_type {
 119	CACHE_TYPE_NULL	= 0,
 120	CACHE_TYPE_DATA = 1,
 121	CACHE_TYPE_INST = 2,
 122	CACHE_TYPE_UNIFIED = 3
 123};
 124
 125union _cpuid4_leaf_eax {
 126	struct {
 127		enum _cache_type	type:5;
 128		unsigned int		level:3;
 129		unsigned int		is_self_initializing:1;
 130		unsigned int		is_fully_associative:1;
 131		unsigned int		reserved:4;
 132		unsigned int		num_threads_sharing:12;
 133		unsigned int		num_cores_on_die:6;
 134	} split;
 135	u32 full;
 136};
 137
 138union _cpuid4_leaf_ebx {
 139	struct {
 140		unsigned int		coherency_line_size:12;
 141		unsigned int		physical_line_partition:10;
 142		unsigned int		ways_of_associativity:10;
 143	} split;
 144	u32 full;
 145};
 146
 147union _cpuid4_leaf_ecx {
 148	struct {
 149		unsigned int		number_of_sets:32;
 150	} split;
 151	u32 full;
 152};
 153
 154struct _cpuid4_info_regs {
 155	union _cpuid4_leaf_eax eax;
 156	union _cpuid4_leaf_ebx ebx;
 157	union _cpuid4_leaf_ecx ecx;
 158	unsigned long size;
 159	struct amd_northbridge *nb;
 160};
 161
 162struct _cpuid4_info {
 163	struct _cpuid4_info_regs base;
 164	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
 165};
 166
 167unsigned short			num_cache_leaves;
 168
 169/* AMD doesn't have CPUID4. Emulate it here to report the same
 170   information to the user.  This makes some assumptions about the machine:
 171   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 172
 173   In theory the TLBs could be reported as fake type (they are in "dummy").
 174   Maybe later */
 175union l1_cache {
 176	struct {
 177		unsigned line_size:8;
 178		unsigned lines_per_tag:8;
 179		unsigned assoc:8;
 180		unsigned size_in_kb:8;
 181	};
 182	unsigned val;
 183};
 184
 185union l2_cache {
 186	struct {
 187		unsigned line_size:8;
 188		unsigned lines_per_tag:4;
 189		unsigned assoc:4;
 190		unsigned size_in_kb:16;
 191	};
 192	unsigned val;
 193};
 194
 195union l3_cache {
 196	struct {
 197		unsigned line_size:8;
 198		unsigned lines_per_tag:4;
 199		unsigned assoc:4;
 200		unsigned res:2;
 201		unsigned size_encoded:14;
 202	};
 203	unsigned val;
 204};
 205
 206static const unsigned short __cpuinitconst assocs[] = {
 207	[1] = 1,
 208	[2] = 2,
 209	[4] = 4,
 210	[6] = 8,
 211	[8] = 16,
 212	[0xa] = 32,
 213	[0xb] = 48,
 214	[0xc] = 64,
 215	[0xd] = 96,
 216	[0xe] = 128,
 217	[0xf] = 0xffff /* fully associative - no way to show this currently */
 218};
 219
 220static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
 221static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
 
 
 
 
 
 
 
 222
 223static void __cpuinit
 224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 225		     union _cpuid4_leaf_ebx *ebx,
 226		     union _cpuid4_leaf_ecx *ecx)
 227{
 228	unsigned dummy;
 229	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 230	union l1_cache l1i, l1d;
 231	union l2_cache l2;
 232	union l3_cache l3;
 233	union l1_cache *l1 = &l1d;
 234
 235	eax->full = 0;
 236	ebx->full = 0;
 237	ecx->full = 0;
 238
 239	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 240	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 241
 242	switch (leaf) {
 243	case 1:
 244		l1 = &l1i;
 245	case 0:
 246		if (!l1->val)
 247			return;
 248		assoc = assocs[l1->assoc];
 249		line_size = l1->line_size;
 250		lines_per_tag = l1->lines_per_tag;
 251		size_in_kb = l1->size_in_kb;
 252		break;
 253	case 2:
 254		if (!l2.val)
 255			return;
 256		assoc = assocs[l2.assoc];
 257		line_size = l2.line_size;
 258		lines_per_tag = l2.lines_per_tag;
 259		/* cpu_data has errata corrections for K7 applied */
 260		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 261		break;
 262	case 3:
 263		if (!l3.val)
 264			return;
 265		assoc = assocs[l3.assoc];
 266		line_size = l3.line_size;
 267		lines_per_tag = l3.lines_per_tag;
 268		size_in_kb = l3.size_encoded * 512;
 269		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 270			size_in_kb = size_in_kb >> 1;
 271			assoc = assoc >> 1;
 272		}
 273		break;
 274	default:
 275		return;
 276	}
 277
 278	eax->split.is_self_initializing = 1;
 279	eax->split.type = types[leaf];
 280	eax->split.level = levels[leaf];
 281	eax->split.num_threads_sharing = 0;
 282	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 283
 284
 285	if (assoc == 0xffff)
 286		eax->split.is_fully_associative = 1;
 287	ebx->split.coherency_line_size = line_size - 1;
 288	ebx->split.ways_of_associativity = assoc - 1;
 289	ebx->split.physical_line_partition = lines_per_tag - 1;
 290	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 291		(ebx->split.ways_of_associativity + 1) - 1;
 292}
 293
 294struct _cache_attr {
 295	struct attribute attr;
 296	ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
 297	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
 298			 unsigned int);
 299};
 300
 301#ifdef CONFIG_AMD_NB
 302
 303/*
 304 * L3 cache descriptors
 305 */
 306static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
 307{
 308	struct amd_l3_cache *l3 = &nb->l3_cache;
 309	unsigned int sc0, sc1, sc2, sc3;
 310	u32 val = 0;
 311
 312	pci_read_config_dword(nb->misc, 0x1C4, &val);
 313
 314	/* calculate subcache sizes */
 315	l3->subcaches[0] = sc0 = !(val & BIT(0));
 316	l3->subcaches[1] = sc1 = !(val & BIT(4));
 317
 318	if (boot_cpu_data.x86 == 0x15) {
 319		l3->subcaches[0] = sc0 += !(val & BIT(1));
 320		l3->subcaches[1] = sc1 += !(val & BIT(5));
 321	}
 322
 323	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 324	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 325
 326	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 327}
 328
 329static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 330{
 331	int node;
 332
 333	/* only for L3, and not in virtualized environments */
 334	if (index < 3)
 335		return;
 336
 337	node = amd_get_nb_id(smp_processor_id());
 338	this_leaf->nb = node_to_amd_nb(node);
 339	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 340		amd_calc_l3_indices(this_leaf->nb);
 341}
 342
 343/*
 344 * check whether a slot used for disabling an L3 index is occupied.
 345 * @l3: L3 cache descriptor
 346 * @slot: slot number (0..1)
 347 *
 348 * @returns: the disabled index if used or negative value if slot free.
 349 */
 350int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 351{
 352	unsigned int reg = 0;
 353
 354	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 355
 356	/* check whether this slot is activated already */
 357	if (reg & (3UL << 30))
 358		return reg & 0xfff;
 359
 360	return -1;
 361}
 362
 363static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 364				  unsigned int slot)
 365{
 366	int index;
 
 367
 368	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 369		return -EINVAL;
 370
 371	index = amd_get_l3_disable_slot(this_leaf->base.nb, slot);
 372	if (index >= 0)
 373		return sprintf(buf, "%d\n", index);
 374
 375	return sprintf(buf, "FREE\n");
 376}
 377
 378#define SHOW_CACHE_DISABLE(slot)					\
 379static ssize_t								\
 380show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf,	\
 381			  unsigned int cpu)				\
 382{									\
 
 383	return show_cache_disable(this_leaf, buf, slot);		\
 384}
 385SHOW_CACHE_DISABLE(0)
 386SHOW_CACHE_DISABLE(1)
 387
 388static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 389				 unsigned slot, unsigned long idx)
 390{
 391	int i;
 392
 393	idx |= BIT(30);
 394
 395	/*
 396	 *  disable index in all 4 subcaches
 397	 */
 398	for (i = 0; i < 4; i++) {
 399		u32 reg = idx | (i << 20);
 400
 401		if (!nb->l3_cache.subcaches[i])
 402			continue;
 403
 404		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 405
 406		/*
 407		 * We need to WBINVD on a core on the node containing the L3
 408		 * cache which indices we disable therefore a simple wbinvd()
 409		 * is not sufficient.
 410		 */
 411		wbinvd_on_cpu(cpu);
 412
 413		reg |= BIT(31);
 414		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 415	}
 416}
 417
 418/*
 419 * disable a L3 cache index by using a disable-slot
 420 *
 421 * @l3:    L3 cache descriptor
 422 * @cpu:   A CPU on the node containing the L3 cache
 423 * @slot:  slot number (0..1)
 424 * @index: index to disable
 425 *
 426 * @return: 0 on success, error status on failure
 427 */
 428int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
 429			    unsigned long index)
 430{
 431	int ret = 0;
 432
 433	/*  check if @slot is already used or the index is already disabled */
 434	ret = amd_get_l3_disable_slot(nb, slot);
 435	if (ret >= 0)
 436		return -EEXIST;
 437
 438	if (index > nb->l3_cache.indices)
 439		return -EINVAL;
 440
 441	/* check whether the other slot has disabled the same index already */
 442	if (index == amd_get_l3_disable_slot(nb, !slot))
 443		return -EEXIST;
 444
 445	amd_l3_disable_index(nb, cpu, slot, index);
 446
 447	return 0;
 448}
 449
 450static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 451				  const char *buf, size_t count,
 452				  unsigned int slot)
 453{
 454	unsigned long val = 0;
 455	int cpu, err = 0;
 
 456
 457	if (!capable(CAP_SYS_ADMIN))
 458		return -EPERM;
 459
 460	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 461		return -EINVAL;
 462
 463	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
 464
 465	if (strict_strtoul(buf, 10, &val) < 0)
 466		return -EINVAL;
 467
 468	err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
 469	if (err) {
 470		if (err == -EEXIST)
 471			pr_warning("L3 slot %d in use/index already disabled!\n",
 472				   slot);
 473		return err;
 474	}
 475	return count;
 476}
 477
 478#define STORE_CACHE_DISABLE(slot)					\
 479static ssize_t								\
 480store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
 481			   const char *buf, size_t count,		\
 482			   unsigned int cpu)				\
 483{									\
 
 484	return store_cache_disable(this_leaf, buf, count, slot);	\
 485}
 486STORE_CACHE_DISABLE(0)
 487STORE_CACHE_DISABLE(1)
 488
 489static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
 490		show_cache_disable_0, store_cache_disable_0);
 491static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 492		show_cache_disable_1, store_cache_disable_1);
 493
 494static ssize_t
 495show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
 496{
 497	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 498		return -EINVAL;
 499
 500	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 501}
 502
 503static ssize_t
 504store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
 505		unsigned int cpu)
 506{
 
 
 507	unsigned long val;
 508
 509	if (!capable(CAP_SYS_ADMIN))
 510		return -EPERM;
 511
 512	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 513		return -EINVAL;
 514
 515	if (strict_strtoul(buf, 16, &val) < 0)
 516		return -EINVAL;
 517
 518	if (amd_set_subcaches(cpu, val))
 519		return -EINVAL;
 520
 521	return count;
 522}
 523
 524static struct _cache_attr subcaches =
 525	__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 526
 527#else	/* CONFIG_AMD_NB */
 
 
 
 
 
 528#define amd_init_l3_cache(x, y)
 529#endif /* CONFIG_AMD_NB */
 530
 531static int
 532__cpuinit cpuid4_cache_lookup_regs(int index,
 533				   struct _cpuid4_info_regs *this_leaf)
 534{
 535	union _cpuid4_leaf_eax	eax;
 536	union _cpuid4_leaf_ebx	ebx;
 537	union _cpuid4_leaf_ecx	ecx;
 538	unsigned		edx;
 539
 540	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 541		amd_cpuid4(index, &eax, &ebx, &ecx);
 
 
 
 
 542		amd_init_l3_cache(this_leaf, index);
 543	} else {
 544		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 545	}
 546
 547	if (eax.split.type == CACHE_TYPE_NULL)
 548		return -EIO; /* better error ? */
 549
 550	this_leaf->eax = eax;
 551	this_leaf->ebx = ebx;
 552	this_leaf->ecx = ecx;
 553	this_leaf->size = (ecx.split.number_of_sets          + 1) *
 554			  (ebx.split.coherency_line_size     + 1) *
 555			  (ebx.split.physical_line_partition + 1) *
 556			  (ebx.split.ways_of_associativity   + 1);
 557	return 0;
 558}
 559
 560static int __cpuinit find_num_cache_leaves(void)
 561{
 562	unsigned int		eax, ebx, ecx, edx;
 563	union _cpuid4_leaf_eax	cache_eax;
 564	int 			i = -1;
 565
 
 
 
 
 
 566	do {
 567		++i;
 568		/* Do cpuid(4) loop to find out num_cache_leaves */
 569		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
 570		cache_eax.full = eax;
 571	} while (cache_eax.split.type != CACHE_TYPE_NULL);
 572	return i;
 573}
 574
 575unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 
 
 
 
 
 
 
 
 
 
 
 
 
 576{
 577	/* Cache sizes */
 578	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 579	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 580	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 581	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 582#ifdef CONFIG_X86_HT
 583	unsigned int cpu = c->cpu_index;
 584#endif
 585
 586	if (c->cpuid_level > 3) {
 587		static int is_initialized;
 588
 589		if (is_initialized == 0) {
 590			/* Init num_cache_leaves from boot CPU */
 591			num_cache_leaves = find_num_cache_leaves();
 592			is_initialized++;
 593		}
 594
 595		/*
 596		 * Whenever possible use cpuid(4), deterministic cache
 597		 * parameters cpuid leaf to find the cache details
 598		 */
 599		for (i = 0; i < num_cache_leaves; i++) {
 600			struct _cpuid4_info_regs this_leaf;
 601			int retval;
 602
 603			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 604			if (retval >= 0) {
 605				switch (this_leaf.eax.split.level) {
 606				case 1:
 607					if (this_leaf.eax.split.type ==
 608							CACHE_TYPE_DATA)
 609						new_l1d = this_leaf.size/1024;
 610					else if (this_leaf.eax.split.type ==
 611							CACHE_TYPE_INST)
 612						new_l1i = this_leaf.size/1024;
 613					break;
 614				case 2:
 615					new_l2 = this_leaf.size/1024;
 616					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 617					index_msb = get_count_order(num_threads_sharing);
 618					l2_id = c->apicid & ~((1 << index_msb) - 1);
 619					break;
 620				case 3:
 621					new_l3 = this_leaf.size/1024;
 622					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 623					index_msb = get_count_order(
 624							num_threads_sharing);
 625					l3_id = c->apicid & ~((1 << index_msb) - 1);
 626					break;
 627				default:
 628					break;
 629				}
 630			}
 631		}
 632	}
 633	/*
 634	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 635	 * trace cache
 636	 */
 637	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 638		/* supports eax=2  call */
 639		int j, n;
 640		unsigned int regs[4];
 641		unsigned char *dp = (unsigned char *)regs;
 642		int only_trace = 0;
 643
 644		if (num_cache_leaves != 0 && c->x86 == 15)
 645			only_trace = 1;
 646
 647		/* Number of times to iterate */
 648		n = cpuid_eax(2) & 0xFF;
 649
 650		for (i = 0 ; i < n ; i++) {
 651			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 652
 653			/* If bit 31 is set, this is an unknown format */
 654			for (j = 0 ; j < 3 ; j++)
 655				if (regs[j] & (1 << 31))
 656					regs[j] = 0;
 657
 658			/* Byte 0 is level count, not a descriptor */
 659			for (j = 1 ; j < 16 ; j++) {
 660				unsigned char des = dp[j];
 661				unsigned char k = 0;
 662
 663				/* look up this descriptor in the table */
 664				while (cache_table[k].descriptor != 0) {
 665					if (cache_table[k].descriptor == des) {
 666						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 667							break;
 668						switch (cache_table[k].cache_type) {
 669						case LVL_1_INST:
 670							l1i += cache_table[k].size;
 671							break;
 672						case LVL_1_DATA:
 673							l1d += cache_table[k].size;
 674							break;
 675						case LVL_2:
 676							l2 += cache_table[k].size;
 677							break;
 678						case LVL_3:
 679							l3 += cache_table[k].size;
 680							break;
 681						case LVL_TRACE:
 682							trace += cache_table[k].size;
 683							break;
 684						}
 685
 686						break;
 687					}
 688
 689					k++;
 690				}
 691			}
 692		}
 693	}
 694
 695	if (new_l1d)
 696		l1d = new_l1d;
 697
 698	if (new_l1i)
 699		l1i = new_l1i;
 700
 701	if (new_l2) {
 702		l2 = new_l2;
 703#ifdef CONFIG_X86_HT
 704		per_cpu(cpu_llc_id, cpu) = l2_id;
 705#endif
 706	}
 707
 708	if (new_l3) {
 709		l3 = new_l3;
 710#ifdef CONFIG_X86_HT
 711		per_cpu(cpu_llc_id, cpu) = l3_id;
 712#endif
 713	}
 714
 
 
 
 
 
 
 
 
 
 
 
 
 715	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 716
 717	return l2;
 718}
 719
 720#ifdef CONFIG_SYSFS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 721
 722/* pointer to _cpuid4_info array (for each cache leaf) */
 723static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
 724#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
 
 725
 726#ifdef CONFIG_SMP
 
 
 727
 728static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
 729{
 730	struct _cpuid4_info *this_leaf;
 731	int ret, i, sibling;
 732	struct cpuinfo_x86 *c = &cpu_data(cpu);
 733
 734	ret = 0;
 735	if (index == 3) {
 736		ret = 1;
 737		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 738			if (!per_cpu(ici_cpuid4_info, i))
 739				continue;
 740			this_leaf = CPUID4_INFO_IDX(i, index);
 741			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 742				if (!cpu_online(sibling))
 743					continue;
 744				set_bit(sibling, this_leaf->shared_cpu_map);
 
 745			}
 746		}
 747	} else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
 748		ret = 1;
 749		for_each_cpu(i, cpu_sibling_mask(cpu)) {
 750			if (!per_cpu(ici_cpuid4_info, i))
 751				continue;
 752			this_leaf = CPUID4_INFO_IDX(i, index);
 753			for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
 754				if (!cpu_online(sibling))
 755					continue;
 756				set_bit(sibling, this_leaf->shared_cpu_map);
 
 757			}
 758		}
 759	}
 
 760
 761	return ret;
 762}
 763
 764static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 
 765{
 766	struct _cpuid4_info *this_leaf, *sibling_leaf;
 
 767	unsigned long num_threads_sharing;
 768	int index_msb, i;
 769	struct cpuinfo_x86 *c = &cpu_data(cpu);
 770
 771	if (c->x86_vendor == X86_VENDOR_AMD) {
 772		if (cache_shared_amd_cpu_map_setup(cpu, index))
 773			return;
 774	}
 775
 776	this_leaf = CPUID4_INFO_IDX(cpu, index);
 777	num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
 778
 
 779	if (num_threads_sharing == 1)
 780		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
 781	else {
 782		index_msb = get_count_order(num_threads_sharing);
 783
 784		for_each_online_cpu(i) {
 785			if (cpu_data(i).apicid >> index_msb ==
 786			    c->apicid >> index_msb) {
 787				cpumask_set_cpu(i,
 788					to_cpumask(this_leaf->shared_cpu_map));
 789				if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
 790					sibling_leaf =
 791						CPUID4_INFO_IDX(i, index);
 792					cpumask_set_cpu(cpu, to_cpumask(
 793						sibling_leaf->shared_cpu_map));
 794				}
 795			}
 796		}
 797	}
 798}
 799static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 800{
 801	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 802	int sibling;
 803
 804	this_leaf = CPUID4_INFO_IDX(cpu, index);
 805	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
 806		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
 807		cpumask_clear_cpu(cpu,
 808				  to_cpumask(sibling_leaf->shared_cpu_map));
 809	}
 810}
 811#else
 812static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 813{
 814}
 815
 816static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 817{
 818}
 819#endif
 820
 821static void __cpuinit free_cache_attributes(unsigned int cpu)
 822{
 823	int i;
 824
 825	for (i = 0; i < num_cache_leaves; i++)
 826		cache_remove_shared_cpu_map(cpu, i);
 827
 828	kfree(per_cpu(ici_cpuid4_info, cpu));
 829	per_cpu(ici_cpuid4_info, cpu) = NULL;
 830}
 831
 832static void __cpuinit get_cpu_leaves(void *_retval)
 833{
 834	int j, *retval = _retval, cpu = smp_processor_id();
 835
 836	/* Do cpuid and store the results */
 837	for (j = 0; j < num_cache_leaves; j++) {
 838		struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j);
 839
 840		*retval = cpuid4_cache_lookup_regs(j, &this_leaf->base);
 841		if (unlikely(*retval < 0)) {
 842			int i;
 843
 844			for (i = 0; i < j; i++)
 845				cache_remove_shared_cpu_map(cpu, i);
 846			break;
 
 
 
 
 
 
 847		}
 848		cache_shared_cpu_map_setup(cpu, j);
 849	}
 850}
 851
 852static int __cpuinit detect_cache_attributes(unsigned int cpu)
 853{
 854	int			retval;
 855
 856	if (num_cache_leaves == 0)
 857		return -ENOENT;
 858
 859	per_cpu(ici_cpuid4_info, cpu) = kzalloc(
 860	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
 861	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
 862		return -ENOMEM;
 863
 864	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
 865	if (retval) {
 866		kfree(per_cpu(ici_cpuid4_info, cpu));
 867		per_cpu(ici_cpuid4_info, cpu) = NULL;
 868	}
 869
 870	return retval;
 871}
 872
 873#include <linux/kobject.h>
 874#include <linux/sysfs.h>
 875#include <linux/cpu.h>
 876
 877/* pointer to kobject for cpuX/cache */
 878static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
 879
 880struct _index_kobject {
 881	struct kobject kobj;
 882	unsigned int cpu;
 883	unsigned short index;
 884};
 885
 886/* pointer to array of kobjects for cpuX/cache/indexY */
 887static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
 888#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(ici_index_kobject, x))[y]))
 889
 890#define show_one_plus(file_name, object, val)				\
 891static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
 892				unsigned int cpu)			\
 893{									\
 894	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
 895}
 896
 897show_one_plus(level, base.eax.split.level, 0);
 898show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1);
 899show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1);
 900show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1);
 901show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1);
 902
 903static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
 904			 unsigned int cpu)
 905{
 906	return sprintf(buf, "%luK\n", this_leaf->base.size / 1024);
 907}
 908
 909static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 910					int type, char *buf)
 911{
 912	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
 913	int n = 0;
 914
 915	if (len > 1) {
 916		const struct cpumask *mask;
 917
 918		mask = to_cpumask(this_leaf->shared_cpu_map);
 919		n = type ?
 920			cpulist_scnprintf(buf, len-2, mask) :
 921			cpumask_scnprintf(buf, len-2, mask);
 922		buf[n++] = '\n';
 923		buf[n] = '\0';
 924	}
 925	return n;
 926}
 927
 928static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
 929					  unsigned int cpu)
 930{
 931	return show_shared_cpu_map_func(leaf, 0, buf);
 932}
 933
 934static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
 935					   unsigned int cpu)
 936{
 937	return show_shared_cpu_map_func(leaf, 1, buf);
 938}
 939
 940static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
 941			 unsigned int cpu)
 942{
 943	switch (this_leaf->base.eax.split.type) {
 944	case CACHE_TYPE_DATA:
 945		return sprintf(buf, "Data\n");
 946	case CACHE_TYPE_INST:
 947		return sprintf(buf, "Instruction\n");
 948	case CACHE_TYPE_UNIFIED:
 949		return sprintf(buf, "Unified\n");
 950	default:
 951		return sprintf(buf, "Unknown\n");
 952	}
 953}
 954
 955#define to_object(k)	container_of(k, struct _index_kobject, kobj)
 956#define to_attr(a)	container_of(a, struct _cache_attr, attr)
 957
 958#define define_one_ro(_name) \
 959static struct _cache_attr _name = \
 960	__ATTR(_name, 0444, show_##_name, NULL)
 961
 962define_one_ro(level);
 963define_one_ro(type);
 964define_one_ro(coherency_line_size);
 965define_one_ro(physical_line_partition);
 966define_one_ro(ways_of_associativity);
 967define_one_ro(number_of_sets);
 968define_one_ro(size);
 969define_one_ro(shared_cpu_map);
 970define_one_ro(shared_cpu_list);
 971
 972static struct attribute *default_attrs[] = {
 973	&type.attr,
 974	&level.attr,
 975	&coherency_line_size.attr,
 976	&physical_line_partition.attr,
 977	&ways_of_associativity.attr,
 978	&number_of_sets.attr,
 979	&size.attr,
 980	&shared_cpu_map.attr,
 981	&shared_cpu_list.attr,
 982	NULL
 983};
 984
 985#ifdef CONFIG_AMD_NB
 986static struct attribute ** __cpuinit amd_l3_attrs(void)
 987{
 988	static struct attribute **attrs;
 989	int n;
 990
 991	if (attrs)
 992		return attrs;
 993
 994	n = sizeof (default_attrs) / sizeof (struct attribute *);
 995
 996	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 997		n += 2;
 998
 999	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1000		n += 1;
1001
1002	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
1003	if (attrs == NULL)
1004		return attrs = default_attrs;
1005
1006	for (n = 0; default_attrs[n]; n++)
1007		attrs[n] = default_attrs[n];
1008
1009	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
1010		attrs[n++] = &cache_disable_0.attr;
1011		attrs[n++] = &cache_disable_1.attr;
1012	}
1013
1014	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1015		attrs[n++] = &subcaches.attr;
1016
1017	return attrs;
1018}
1019#endif
1020
1021static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 
1022{
1023	struct _cache_attr *fattr = to_attr(attr);
1024	struct _index_kobject *this_leaf = to_object(kobj);
1025	ssize_t ret;
1026
1027	ret = fattr->show ?
1028		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1029			buf, this_leaf->cpu) :
1030		0;
1031	return ret;
 
 
1032}
1033
1034static ssize_t store(struct kobject *kobj, struct attribute *attr,
1035		     const char *buf, size_t count)
1036{
1037	struct _cache_attr *fattr = to_attr(attr);
1038	struct _index_kobject *this_leaf = to_object(kobj);
1039	ssize_t ret;
1040
1041	ret = fattr->store ?
1042		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1043			buf, count, this_leaf->cpu) :
1044		0;
1045	return ret;
1046}
1047
1048static const struct sysfs_ops sysfs_ops = {
1049	.show   = show,
1050	.store  = store,
1051};
1052
1053static struct kobj_type ktype_cache = {
1054	.sysfs_ops	= &sysfs_ops,
1055	.default_attrs	= default_attrs,
1056};
1057
1058static struct kobj_type ktype_percpu_entry = {
1059	.sysfs_ops	= &sysfs_ops,
1060};
1061
1062static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1063{
1064	kfree(per_cpu(ici_cache_kobject, cpu));
1065	kfree(per_cpu(ici_index_kobject, cpu));
1066	per_cpu(ici_cache_kobject, cpu) = NULL;
1067	per_cpu(ici_index_kobject, cpu) = NULL;
1068	free_cache_attributes(cpu);
1069}
1070
1071static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1072{
1073	int err;
1074
1075	if (num_cache_leaves == 0)
1076		return -ENOENT;
1077
1078	err = detect_cache_attributes(cpu);
1079	if (err)
1080		return err;
1081
1082	/* Allocate all required memory */
1083	per_cpu(ici_cache_kobject, cpu) =
1084		kzalloc(sizeof(struct kobject), GFP_KERNEL);
1085	if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1086		goto err_out;
1087
1088	per_cpu(ici_index_kobject, cpu) = kzalloc(
1089	    sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1090	if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1091		goto err_out;
1092
1093	return 0;
1094
1095err_out:
1096	cpuid4_cache_sysfs_exit(cpu);
1097	return -ENOMEM;
1098}
1099
1100static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1101
1102/* Add/Remove cache interface for CPU device */
1103static int __cpuinit cache_add_dev(struct device *dev)
1104{
1105	unsigned int cpu = dev->id;
1106	unsigned long i, j;
1107	struct _index_kobject *this_object;
1108	struct _cpuid4_info   *this_leaf;
1109	int retval;
1110
1111	retval = cpuid4_cache_sysfs_init(cpu);
1112	if (unlikely(retval < 0))
1113		return retval;
1114
1115	retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1116				      &ktype_percpu_entry,
1117				      &dev->kobj, "%s", "cache");
1118	if (retval < 0) {
1119		cpuid4_cache_sysfs_exit(cpu);
1120		return retval;
1121	}
1122
1123	for (i = 0; i < num_cache_leaves; i++) {
1124		this_object = INDEX_KOBJECT_PTR(cpu, i);
1125		this_object->cpu = cpu;
1126		this_object->index = i;
1127
1128		this_leaf = CPUID4_INFO_IDX(cpu, i);
1129
1130		ktype_cache.default_attrs = default_attrs;
1131#ifdef CONFIG_AMD_NB
1132		if (this_leaf->base.nb)
1133			ktype_cache.default_attrs = amd_l3_attrs();
1134#endif
1135		retval = kobject_init_and_add(&(this_object->kobj),
1136					      &ktype_cache,
1137					      per_cpu(ici_cache_kobject, cpu),
1138					      "index%1lu", i);
1139		if (unlikely(retval)) {
1140			for (j = 0; j < i; j++)
1141				kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1142			kobject_put(per_cpu(ici_cache_kobject, cpu));
1143			cpuid4_cache_sysfs_exit(cpu);
1144			return retval;
1145		}
1146		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1147	}
1148	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1149
1150	kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1151	return 0;
1152}
1153
1154static void __cpuinit cache_remove_dev(struct device *dev)
1155{
1156	unsigned int cpu = dev->id;
1157	unsigned long i;
1158
1159	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1160		return;
1161	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1162		return;
1163	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1164
1165	for (i = 0; i < num_cache_leaves; i++)
1166		kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1167	kobject_put(per_cpu(ici_cache_kobject, cpu));
1168	cpuid4_cache_sysfs_exit(cpu);
1169}
1170
1171static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1172					unsigned long action, void *hcpu)
1173{
1174	unsigned int cpu = (unsigned long)hcpu;
1175	struct device *dev;
1176
1177	dev = get_cpu_device(cpu);
1178	switch (action) {
1179	case CPU_ONLINE:
1180	case CPU_ONLINE_FROZEN:
1181		cache_add_dev(dev);
1182		break;
1183	case CPU_DEAD:
1184	case CPU_DEAD_FROZEN:
1185		cache_remove_dev(dev);
1186		break;
1187	}
1188	return NOTIFY_OK;
1189}
1190
1191static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1192	.notifier_call = cacheinfo_cpu_callback,
1193};
1194
1195static int __cpuinit cache_sysfs_init(void)
1196{
1197	int i;
1198
1199	if (num_cache_leaves == 0)
1200		return 0;
1201
1202	for_each_online_cpu(i) {
1203		int err;
1204		struct device *dev = get_cpu_device(i);
1205
1206		err = cache_add_dev(dev);
1207		if (err)
1208			return err;
1209	}
1210	register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1211	return 0;
1212}
1213
1214device_initcall(cache_sysfs_init);
1215
1216#endif
v4.6
  1/*
  2 *	Routines to identify caches on Intel CPU.
  3 *
  4 *	Changes:
  5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  8 */
  9
 
 10#include <linux/slab.h>
 11#include <linux/cacheinfo.h>
 
 12#include <linux/cpu.h>
 13#include <linux/sched.h>
 14#include <linux/sysfs.h>
 15#include <linux/pci.h>
 16
 17#include <asm/cpufeature.h>
 
 18#include <asm/amd_nb.h>
 19#include <asm/smp.h>
 20
 21#define LVL_1_INST	1
 22#define LVL_1_DATA	2
 23#define LVL_2		3
 24#define LVL_3		4
 25#define LVL_TRACE	5
 26
 27struct _cache_table {
 28	unsigned char descriptor;
 29	char cache_type;
 30	short size;
 31};
 32
 33#define MB(x)	((x) * 1024)
 34
 35/* All the cache descriptor types we care about (no TLB or
 36   trace cache entries) */
 37
 38static const struct _cache_table cache_table[] =
 39{
 40	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
 41	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
 42	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
 43	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
 44	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
 45	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
 46	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
 47	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
 48	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 49	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 50	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 51	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 52	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
 53	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
 54	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 55	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 56	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
 57	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 58	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 59	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 60	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
 61	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
 62	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
 63	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
 64	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
 65	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
 66	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
 67	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
 68	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
 69	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 70	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
 71	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 72	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
 73	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
 74	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
 75	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 76	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 77	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 78	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 79	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
 80	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
 81	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 82	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 83	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 84	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 85	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 86	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 87	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 88	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 89	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 90	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 91	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 92	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 93	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 94	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 95	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 96	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 97	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 98	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 99	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
100	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
101	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
102	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
103	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
104	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
105	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
106	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
107	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
108	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
109	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
110	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
111	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
112	{ 0x00, 0, 0}
113};
114
115
116enum _cache_type {
117	CTYPE_NULL = 0,
118	CTYPE_DATA = 1,
119	CTYPE_INST = 2,
120	CTYPE_UNIFIED = 3
121};
122
123union _cpuid4_leaf_eax {
124	struct {
125		enum _cache_type	type:5;
126		unsigned int		level:3;
127		unsigned int		is_self_initializing:1;
128		unsigned int		is_fully_associative:1;
129		unsigned int		reserved:4;
130		unsigned int		num_threads_sharing:12;
131		unsigned int		num_cores_on_die:6;
132	} split;
133	u32 full;
134};
135
136union _cpuid4_leaf_ebx {
137	struct {
138		unsigned int		coherency_line_size:12;
139		unsigned int		physical_line_partition:10;
140		unsigned int		ways_of_associativity:10;
141	} split;
142	u32 full;
143};
144
145union _cpuid4_leaf_ecx {
146	struct {
147		unsigned int		number_of_sets:32;
148	} split;
149	u32 full;
150};
151
152struct _cpuid4_info_regs {
153	union _cpuid4_leaf_eax eax;
154	union _cpuid4_leaf_ebx ebx;
155	union _cpuid4_leaf_ecx ecx;
156	unsigned long size;
157	struct amd_northbridge *nb;
158};
159
160static unsigned short num_cache_leaves;
 
 
 
 
 
161
162/* AMD doesn't have CPUID4. Emulate it here to report the same
163   information to the user.  This makes some assumptions about the machine:
164   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
165
166   In theory the TLBs could be reported as fake type (they are in "dummy").
167   Maybe later */
168union l1_cache {
169	struct {
170		unsigned line_size:8;
171		unsigned lines_per_tag:8;
172		unsigned assoc:8;
173		unsigned size_in_kb:8;
174	};
175	unsigned val;
176};
177
178union l2_cache {
179	struct {
180		unsigned line_size:8;
181		unsigned lines_per_tag:4;
182		unsigned assoc:4;
183		unsigned size_in_kb:16;
184	};
185	unsigned val;
186};
187
188union l3_cache {
189	struct {
190		unsigned line_size:8;
191		unsigned lines_per_tag:4;
192		unsigned assoc:4;
193		unsigned res:2;
194		unsigned size_encoded:14;
195	};
196	unsigned val;
197};
198
199static const unsigned short assocs[] = {
200	[1] = 1,
201	[2] = 2,
202	[4] = 4,
203	[6] = 8,
204	[8] = 16,
205	[0xa] = 32,
206	[0xb] = 48,
207	[0xc] = 64,
208	[0xd] = 96,
209	[0xe] = 128,
210	[0xf] = 0xffff /* fully associative - no way to show this currently */
211};
212
213static const unsigned char levels[] = { 1, 1, 2, 3 };
214static const unsigned char types[] = { 1, 2, 3, 3 };
215
216static const enum cache_type cache_type_map[] = {
217	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
218	[CTYPE_DATA] = CACHE_TYPE_DATA,
219	[CTYPE_INST] = CACHE_TYPE_INST,
220	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
221};
222
223static void
224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
225		     union _cpuid4_leaf_ebx *ebx,
226		     union _cpuid4_leaf_ecx *ecx)
227{
228	unsigned dummy;
229	unsigned line_size, lines_per_tag, assoc, size_in_kb;
230	union l1_cache l1i, l1d;
231	union l2_cache l2;
232	union l3_cache l3;
233	union l1_cache *l1 = &l1d;
234
235	eax->full = 0;
236	ebx->full = 0;
237	ecx->full = 0;
238
239	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
240	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
241
242	switch (leaf) {
243	case 1:
244		l1 = &l1i;
245	case 0:
246		if (!l1->val)
247			return;
248		assoc = assocs[l1->assoc];
249		line_size = l1->line_size;
250		lines_per_tag = l1->lines_per_tag;
251		size_in_kb = l1->size_in_kb;
252		break;
253	case 2:
254		if (!l2.val)
255			return;
256		assoc = assocs[l2.assoc];
257		line_size = l2.line_size;
258		lines_per_tag = l2.lines_per_tag;
259		/* cpu_data has errata corrections for K7 applied */
260		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
261		break;
262	case 3:
263		if (!l3.val)
264			return;
265		assoc = assocs[l3.assoc];
266		line_size = l3.line_size;
267		lines_per_tag = l3.lines_per_tag;
268		size_in_kb = l3.size_encoded * 512;
269		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
270			size_in_kb = size_in_kb >> 1;
271			assoc = assoc >> 1;
272		}
273		break;
274	default:
275		return;
276	}
277
278	eax->split.is_self_initializing = 1;
279	eax->split.type = types[leaf];
280	eax->split.level = levels[leaf];
281	eax->split.num_threads_sharing = 0;
282	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
283
284
285	if (assoc == 0xffff)
286		eax->split.is_fully_associative = 1;
287	ebx->split.coherency_line_size = line_size - 1;
288	ebx->split.ways_of_associativity = assoc - 1;
289	ebx->split.physical_line_partition = lines_per_tag - 1;
290	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
291		(ebx->split.ways_of_associativity + 1) - 1;
292}
293
294#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 
 
 
 
 
 
 
295
296/*
297 * L3 cache descriptors
298 */
299static void amd_calc_l3_indices(struct amd_northbridge *nb)
300{
301	struct amd_l3_cache *l3 = &nb->l3_cache;
302	unsigned int sc0, sc1, sc2, sc3;
303	u32 val = 0;
304
305	pci_read_config_dword(nb->misc, 0x1C4, &val);
306
307	/* calculate subcache sizes */
308	l3->subcaches[0] = sc0 = !(val & BIT(0));
309	l3->subcaches[1] = sc1 = !(val & BIT(4));
310
311	if (boot_cpu_data.x86 == 0x15) {
312		l3->subcaches[0] = sc0 += !(val & BIT(1));
313		l3->subcaches[1] = sc1 += !(val & BIT(5));
314	}
315
316	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
317	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
318
319	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
320}
321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322/*
323 * check whether a slot used for disabling an L3 index is occupied.
324 * @l3: L3 cache descriptor
325 * @slot: slot number (0..1)
326 *
327 * @returns: the disabled index if used or negative value if slot free.
328 */
329static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
330{
331	unsigned int reg = 0;
332
333	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
334
335	/* check whether this slot is activated already */
336	if (reg & (3UL << 30))
337		return reg & 0xfff;
338
339	return -1;
340}
341
342static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
343				  unsigned int slot)
344{
345	int index;
346	struct amd_northbridge *nb = this_leaf->priv;
347
348	index = amd_get_l3_disable_slot(nb, slot);
 
 
 
349	if (index >= 0)
350		return sprintf(buf, "%d\n", index);
351
352	return sprintf(buf, "FREE\n");
353}
354
355#define SHOW_CACHE_DISABLE(slot)					\
356static ssize_t								\
357cache_disable_##slot##_show(struct device *dev,				\
358			    struct device_attribute *attr, char *buf)	\
359{									\
360	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
361	return show_cache_disable(this_leaf, buf, slot);		\
362}
363SHOW_CACHE_DISABLE(0)
364SHOW_CACHE_DISABLE(1)
365
366static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
367				 unsigned slot, unsigned long idx)
368{
369	int i;
370
371	idx |= BIT(30);
372
373	/*
374	 *  disable index in all 4 subcaches
375	 */
376	for (i = 0; i < 4; i++) {
377		u32 reg = idx | (i << 20);
378
379		if (!nb->l3_cache.subcaches[i])
380			continue;
381
382		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
383
384		/*
385		 * We need to WBINVD on a core on the node containing the L3
386		 * cache which indices we disable therefore a simple wbinvd()
387		 * is not sufficient.
388		 */
389		wbinvd_on_cpu(cpu);
390
391		reg |= BIT(31);
392		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
393	}
394}
395
396/*
397 * disable a L3 cache index by using a disable-slot
398 *
399 * @l3:    L3 cache descriptor
400 * @cpu:   A CPU on the node containing the L3 cache
401 * @slot:  slot number (0..1)
402 * @index: index to disable
403 *
404 * @return: 0 on success, error status on failure
405 */
406static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
407			    unsigned slot, unsigned long index)
408{
409	int ret = 0;
410
411	/*  check if @slot is already used or the index is already disabled */
412	ret = amd_get_l3_disable_slot(nb, slot);
413	if (ret >= 0)
414		return -EEXIST;
415
416	if (index > nb->l3_cache.indices)
417		return -EINVAL;
418
419	/* check whether the other slot has disabled the same index already */
420	if (index == amd_get_l3_disable_slot(nb, !slot))
421		return -EEXIST;
422
423	amd_l3_disable_index(nb, cpu, slot, index);
424
425	return 0;
426}
427
428static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
429				   const char *buf, size_t count,
430				   unsigned int slot)
431{
432	unsigned long val = 0;
433	int cpu, err = 0;
434	struct amd_northbridge *nb = this_leaf->priv;
435
436	if (!capable(CAP_SYS_ADMIN))
437		return -EPERM;
438
439	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
440
441	if (kstrtoul(buf, 10, &val) < 0)
 
 
442		return -EINVAL;
443
444	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
445	if (err) {
446		if (err == -EEXIST)
447			pr_warn("L3 slot %d in use/index already disabled!\n",
448				   slot);
449		return err;
450	}
451	return count;
452}
453
454#define STORE_CACHE_DISABLE(slot)					\
455static ssize_t								\
456cache_disable_##slot##_store(struct device *dev,			\
457			     struct device_attribute *attr,		\
458			     const char *buf, size_t count)		\
459{									\
460	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
461	return store_cache_disable(this_leaf, buf, count, slot);	\
462}
463STORE_CACHE_DISABLE(0)
464STORE_CACHE_DISABLE(1)
465
466static ssize_t subcaches_show(struct device *dev,
467			      struct device_attribute *attr, char *buf)
 
 
 
 
 
468{
469	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
470	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
471
472	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
473}
474
475static ssize_t subcaches_store(struct device *dev,
476			       struct device_attribute *attr,
477			       const char *buf, size_t count)
478{
479	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
480	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
481	unsigned long val;
482
483	if (!capable(CAP_SYS_ADMIN))
484		return -EPERM;
485
486	if (kstrtoul(buf, 16, &val) < 0)
 
 
 
487		return -EINVAL;
488
489	if (amd_set_subcaches(cpu, val))
490		return -EINVAL;
491
492	return count;
493}
494
495static DEVICE_ATTR_RW(cache_disable_0);
496static DEVICE_ATTR_RW(cache_disable_1);
497static DEVICE_ATTR_RW(subcaches);
498
499static umode_t
500cache_private_attrs_is_visible(struct kobject *kobj,
501			       struct attribute *attr, int unused)
502{
503	struct device *dev = kobj_to_dev(kobj);
504	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
505	umode_t mode = attr->mode;
506
507	if (!this_leaf->priv)
508		return 0;
509
510	if ((attr == &dev_attr_subcaches.attr) &&
511	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
512		return mode;
513
514	if ((attr == &dev_attr_cache_disable_0.attr ||
515	     attr == &dev_attr_cache_disable_1.attr) &&
516	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
517		return mode;
518
519	return 0;
520}
521
522static struct attribute_group cache_private_group = {
523	.is_visible = cache_private_attrs_is_visible,
524};
525
526static void init_amd_l3_attrs(void)
527{
528	int n = 1;
529	static struct attribute **amd_l3_attrs;
530
531	if (amd_l3_attrs) /* already initialized */
532		return;
533
534	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
535		n += 2;
536	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
537		n += 1;
538
539	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
540	if (!amd_l3_attrs)
541		return;
542
543	n = 0;
544	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
545		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
546		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
547	}
548	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
549		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
550
551	cache_private_group.attrs = amd_l3_attrs;
552}
553
554const struct attribute_group *
555cache_get_priv_group(struct cacheinfo *this_leaf)
556{
557	struct amd_northbridge *nb = this_leaf->priv;
558
559	if (this_leaf->level < 3 || !nb)
560		return NULL;
561
562	if (nb && nb->l3_cache.indices)
563		init_amd_l3_attrs();
564
565	return &cache_private_group;
566}
567
568static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
569{
570	int node;
571
572	/* only for L3, and not in virtualized environments */
573	if (index < 3)
574		return;
575
576	node = amd_get_nb_id(smp_processor_id());
577	this_leaf->nb = node_to_amd_nb(node);
578	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
579		amd_calc_l3_indices(this_leaf->nb);
580}
581#else
582#define amd_init_l3_cache(x, y)
583#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
584
585static int
586cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 
587{
588	union _cpuid4_leaf_eax	eax;
589	union _cpuid4_leaf_ebx	ebx;
590	union _cpuid4_leaf_ecx	ecx;
591	unsigned		edx;
592
593	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
594		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
595			cpuid_count(0x8000001d, index, &eax.full,
596				    &ebx.full, &ecx.full, &edx);
597		else
598			amd_cpuid4(index, &eax, &ebx, &ecx);
599		amd_init_l3_cache(this_leaf, index);
600	} else {
601		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
602	}
603
604	if (eax.split.type == CTYPE_NULL)
605		return -EIO; /* better error ? */
606
607	this_leaf->eax = eax;
608	this_leaf->ebx = ebx;
609	this_leaf->ecx = ecx;
610	this_leaf->size = (ecx.split.number_of_sets          + 1) *
611			  (ebx.split.coherency_line_size     + 1) *
612			  (ebx.split.physical_line_partition + 1) *
613			  (ebx.split.ways_of_associativity   + 1);
614	return 0;
615}
616
617static int find_num_cache_leaves(struct cpuinfo_x86 *c)
618{
619	unsigned int		eax, ebx, ecx, edx, op;
620	union _cpuid4_leaf_eax	cache_eax;
621	int 			i = -1;
622
623	if (c->x86_vendor == X86_VENDOR_AMD)
624		op = 0x8000001d;
625	else
626		op = 4;
627
628	do {
629		++i;
630		/* Do cpuid(op) loop to find out num_cache_leaves */
631		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
632		cache_eax.full = eax;
633	} while (cache_eax.split.type != CTYPE_NULL);
634	return i;
635}
636
637void init_amd_cacheinfo(struct cpuinfo_x86 *c)
638{
639
640	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
641		num_cache_leaves = find_num_cache_leaves(c);
642	} else if (c->extended_cpuid_level >= 0x80000006) {
643		if (cpuid_edx(0x80000006) & 0xf000)
644			num_cache_leaves = 4;
645		else
646			num_cache_leaves = 3;
647	}
648}
649
650unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
651{
652	/* Cache sizes */
653	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
654	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
655	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
656	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
657#ifdef CONFIG_SMP
658	unsigned int cpu = c->cpu_index;
659#endif
660
661	if (c->cpuid_level > 3) {
662		static int is_initialized;
663
664		if (is_initialized == 0) {
665			/* Init num_cache_leaves from boot CPU */
666			num_cache_leaves = find_num_cache_leaves(c);
667			is_initialized++;
668		}
669
670		/*
671		 * Whenever possible use cpuid(4), deterministic cache
672		 * parameters cpuid leaf to find the cache details
673		 */
674		for (i = 0; i < num_cache_leaves; i++) {
675			struct _cpuid4_info_regs this_leaf = {};
676			int retval;
677
678			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
679			if (retval < 0)
680				continue;
681
682			switch (this_leaf.eax.split.level) {
683			case 1:
684				if (this_leaf.eax.split.type == CTYPE_DATA)
685					new_l1d = this_leaf.size/1024;
686				else if (this_leaf.eax.split.type == CTYPE_INST)
687					new_l1i = this_leaf.size/1024;
688				break;
689			case 2:
690				new_l2 = this_leaf.size/1024;
691				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
692				index_msb = get_count_order(num_threads_sharing);
693				l2_id = c->apicid & ~((1 << index_msb) - 1);
694				break;
695			case 3:
696				new_l3 = this_leaf.size/1024;
697				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
698				index_msb = get_count_order(num_threads_sharing);
699				l3_id = c->apicid & ~((1 << index_msb) - 1);
700				break;
701			default:
702				break;
 
 
703			}
704		}
705	}
706	/*
707	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
708	 * trace cache
709	 */
710	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
711		/* supports eax=2  call */
712		int j, n;
713		unsigned int regs[4];
714		unsigned char *dp = (unsigned char *)regs;
715		int only_trace = 0;
716
717		if (num_cache_leaves != 0 && c->x86 == 15)
718			only_trace = 1;
719
720		/* Number of times to iterate */
721		n = cpuid_eax(2) & 0xFF;
722
723		for (i = 0 ; i < n ; i++) {
724			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
725
726			/* If bit 31 is set, this is an unknown format */
727			for (j = 0 ; j < 3 ; j++)
728				if (regs[j] & (1 << 31))
729					regs[j] = 0;
730
731			/* Byte 0 is level count, not a descriptor */
732			for (j = 1 ; j < 16 ; j++) {
733				unsigned char des = dp[j];
734				unsigned char k = 0;
735
736				/* look up this descriptor in the table */
737				while (cache_table[k].descriptor != 0) {
738					if (cache_table[k].descriptor == des) {
739						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
740							break;
741						switch (cache_table[k].cache_type) {
742						case LVL_1_INST:
743							l1i += cache_table[k].size;
744							break;
745						case LVL_1_DATA:
746							l1d += cache_table[k].size;
747							break;
748						case LVL_2:
749							l2 += cache_table[k].size;
750							break;
751						case LVL_3:
752							l3 += cache_table[k].size;
753							break;
754						case LVL_TRACE:
755							trace += cache_table[k].size;
756							break;
757						}
758
759						break;
760					}
761
762					k++;
763				}
764			}
765		}
766	}
767
768	if (new_l1d)
769		l1d = new_l1d;
770
771	if (new_l1i)
772		l1i = new_l1i;
773
774	if (new_l2) {
775		l2 = new_l2;
776#ifdef CONFIG_SMP
777		per_cpu(cpu_llc_id, cpu) = l2_id;
778#endif
779	}
780
781	if (new_l3) {
782		l3 = new_l3;
783#ifdef CONFIG_SMP
784		per_cpu(cpu_llc_id, cpu) = l3_id;
785#endif
786	}
787
788#ifdef CONFIG_SMP
789	/*
790	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
791	 * turns means that the only possibility is SMT (as indicated in
792	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
793	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
794	 * c->phys_proc_id.
795	 */
796	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
797		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
798#endif
799
800	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
801
802	return l2;
803}
804
805static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
806				    struct _cpuid4_info_regs *base)
807{
808	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
809	struct cacheinfo *this_leaf;
810	int i, sibling;
811
812	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
813		unsigned int apicid, nshared, first, last;
814
815		this_leaf = this_cpu_ci->info_list + index;
816		nshared = base->eax.split.num_threads_sharing + 1;
817		apicid = cpu_data(cpu).apicid;
818		first = apicid - (apicid % nshared);
819		last = first + nshared - 1;
820
821		for_each_online_cpu(i) {
822			this_cpu_ci = get_cpu_cacheinfo(i);
823			if (!this_cpu_ci->info_list)
824				continue;
825
826			apicid = cpu_data(i).apicid;
827			if ((apicid < first) || (apicid > last))
828				continue;
829
830			this_leaf = this_cpu_ci->info_list + index;
 
 
 
 
831
832			for_each_online_cpu(sibling) {
833				apicid = cpu_data(sibling).apicid;
834				if ((apicid < first) || (apicid > last))
 
 
 
 
 
 
835					continue;
836				cpumask_set_cpu(sibling,
837						&this_leaf->shared_cpu_map);
838			}
839		}
840	} else if (index == 3) {
841		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
842			this_cpu_ci = get_cpu_cacheinfo(i);
843			if (!this_cpu_ci->info_list)
844				continue;
845			this_leaf = this_cpu_ci->info_list + index;
846			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
847				if (!cpu_online(sibling))
848					continue;
849				cpumask_set_cpu(sibling,
850						&this_leaf->shared_cpu_map);
851			}
852		}
853	} else
854		return 0;
855
856	return 1;
857}
858
859static void __cache_cpumap_setup(unsigned int cpu, int index,
860				 struct _cpuid4_info_regs *base)
861{
862	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
863	struct cacheinfo *this_leaf, *sibling_leaf;
864	unsigned long num_threads_sharing;
865	int index_msb, i;
866	struct cpuinfo_x86 *c = &cpu_data(cpu);
867
868	if (c->x86_vendor == X86_VENDOR_AMD) {
869		if (__cache_amd_cpumap_setup(cpu, index, base))
870			return;
871	}
872
873	this_leaf = this_cpu_ci->info_list + index;
874	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
875
876	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
877	if (num_threads_sharing == 1)
878		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
879
880	index_msb = get_count_order(num_threads_sharing);
 
 
881
882	for_each_online_cpu(i)
883		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
884			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
885
886			if (i == cpu || !sib_cpu_ci->info_list)
887				continue;/* skip if itself or no cacheinfo */
888			sibling_leaf = sib_cpu_ci->info_list + index;
889			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
890			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
891		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892}
 
893
894static void ci_leaf_init(struct cacheinfo *this_leaf,
895			 struct _cpuid4_info_regs *base)
896{
897	this_leaf->level = base->eax.split.level;
898	this_leaf->type = cache_type_map[base->eax.split.type];
899	this_leaf->coherency_line_size =
900				base->ebx.split.coherency_line_size + 1;
901	this_leaf->ways_of_associativity =
902				base->ebx.split.ways_of_associativity + 1;
903	this_leaf->size = base->size;
904	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
905	this_leaf->physical_line_partition =
906				base->ebx.split.physical_line_partition + 1;
907	this_leaf->priv = base->nb;
908}
909
910static int __init_cache_level(unsigned int cpu)
 
911{
912	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
 
 
 
 
 
 
 
 
913
914	if (!num_cache_leaves)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
915		return -ENOENT;
916	if (!this_cpu_ci)
917		return -EINVAL;
918	this_cpu_ci->num_levels = 3;
919	this_cpu_ci->num_leaves = num_cache_leaves;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920	return 0;
921}
922
923static int __populate_cache_leaves(unsigned int cpu)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
924{
925	unsigned int idx, ret;
926	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
927	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
928	struct _cpuid4_info_regs id4_regs = {};
929
930	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
931		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
932		if (ret)
933			return ret;
934		ci_leaf_init(this_leaf++, &id4_regs);
935		__cache_cpumap_setup(cpu, idx, &id4_regs);
 
 
936	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
937	return 0;
938}
939
940DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
941DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)