Linux Audio

Check our new training course

Loading...
v3.1
   1/*
   2 *	Routines to indentify caches on Intel CPU.
   3 *
   4 *	Changes:
   5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
   6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
   8 */
   9
  10#include <linux/init.h>
  11#include <linux/slab.h>
  12#include <linux/device.h>
  13#include <linux/compiler.h>
  14#include <linux/cpu.h>
  15#include <linux/sched.h>
 
  16#include <linux/pci.h>
  17
  18#include <asm/processor.h>
  19#include <linux/smp.h>
  20#include <asm/amd_nb.h>
  21#include <asm/smp.h>
  22
  23#define LVL_1_INST	1
  24#define LVL_1_DATA	2
  25#define LVL_2		3
  26#define LVL_3		4
  27#define LVL_TRACE	5
  28
  29struct _cache_table {
  30	unsigned char descriptor;
  31	char cache_type;
  32	short size;
  33};
  34
  35#define MB(x)	((x) * 1024)
  36
  37/* All the cache descriptor types we care about (no TLB or
  38   trace cache entries) */
  39
  40static const struct _cache_table __cpuinitconst cache_table[] =
  41{
  42	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
  43	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
  44	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
  45	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
  46	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
  47	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
  48	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
  49	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
  50	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  51	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  52	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  53	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  54	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
  55	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
  56	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  57	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  58	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
  59	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  60	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  61	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  62	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
  63	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
  64	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
  65	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
  66	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
  67	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
  68	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
  69	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
  70	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
  71	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
  72	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
  73	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
  74	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
  75	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
  76	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
  77	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  78	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  79	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  80	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  81	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
  82	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
  83	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
  84	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
  85	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
  86	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  87	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  88	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  89	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  90	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
  91	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
  92	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
  93	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
  94	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
  95	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
  96	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
  97	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
  98	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
  99	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 100	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 101	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
 102	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 103	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 104	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 105	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
 106	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 107	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
 108	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
 109	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 110	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 111	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
 112	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
 113	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
 114	{ 0x00, 0, 0}
 115};
 116
 117
 118enum _cache_type {
 119	CACHE_TYPE_NULL	= 0,
 120	CACHE_TYPE_DATA = 1,
 121	CACHE_TYPE_INST = 2,
 122	CACHE_TYPE_UNIFIED = 3
 123};
 124
 125union _cpuid4_leaf_eax {
 126	struct {
 127		enum _cache_type	type:5;
 128		unsigned int		level:3;
 129		unsigned int		is_self_initializing:1;
 130		unsigned int		is_fully_associative:1;
 131		unsigned int		reserved:4;
 132		unsigned int		num_threads_sharing:12;
 133		unsigned int		num_cores_on_die:6;
 134	} split;
 135	u32 full;
 136};
 137
 138union _cpuid4_leaf_ebx {
 139	struct {
 140		unsigned int		coherency_line_size:12;
 141		unsigned int		physical_line_partition:10;
 142		unsigned int		ways_of_associativity:10;
 143	} split;
 144	u32 full;
 145};
 146
 147union _cpuid4_leaf_ecx {
 148	struct {
 149		unsigned int		number_of_sets:32;
 150	} split;
 151	u32 full;
 152};
 153
 154struct amd_l3_cache {
 155	struct	 amd_northbridge *nb;
 156	unsigned indices;
 157	u8	 subcaches[4];
 158};
 159
 160struct _cpuid4_info {
 161	union _cpuid4_leaf_eax eax;
 162	union _cpuid4_leaf_ebx ebx;
 163	union _cpuid4_leaf_ecx ecx;
 164	unsigned long size;
 165	struct amd_l3_cache *l3;
 166	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
 167};
 168
 169/* subset of above _cpuid4_info w/o shared_cpu_map */
 170struct _cpuid4_info_regs {
 171	union _cpuid4_leaf_eax eax;
 172	union _cpuid4_leaf_ebx ebx;
 173	union _cpuid4_leaf_ecx ecx;
 174	unsigned long size;
 175	struct amd_l3_cache *l3;
 176};
 177
 178unsigned short			num_cache_leaves;
 179
 180/* AMD doesn't have CPUID4. Emulate it here to report the same
 181   information to the user.  This makes some assumptions about the machine:
 182   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 183
 184   In theory the TLBs could be reported as fake type (they are in "dummy").
 185   Maybe later */
 186union l1_cache {
 187	struct {
 188		unsigned line_size:8;
 189		unsigned lines_per_tag:8;
 190		unsigned assoc:8;
 191		unsigned size_in_kb:8;
 192	};
 193	unsigned val;
 194};
 195
 196union l2_cache {
 197	struct {
 198		unsigned line_size:8;
 199		unsigned lines_per_tag:4;
 200		unsigned assoc:4;
 201		unsigned size_in_kb:16;
 202	};
 203	unsigned val;
 204};
 205
 206union l3_cache {
 207	struct {
 208		unsigned line_size:8;
 209		unsigned lines_per_tag:4;
 210		unsigned assoc:4;
 211		unsigned res:2;
 212		unsigned size_encoded:14;
 213	};
 214	unsigned val;
 215};
 216
 217static const unsigned short __cpuinitconst assocs[] = {
 218	[1] = 1,
 219	[2] = 2,
 220	[4] = 4,
 221	[6] = 8,
 222	[8] = 16,
 223	[0xa] = 32,
 224	[0xb] = 48,
 225	[0xc] = 64,
 226	[0xd] = 96,
 227	[0xe] = 128,
 228	[0xf] = 0xffff /* fully associative - no way to show this currently */
 229};
 230
 231static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
 232static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
 233
 234static void __cpuinit
 
 
 
 
 
 
 
 235amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 236		     union _cpuid4_leaf_ebx *ebx,
 237		     union _cpuid4_leaf_ecx *ecx)
 238{
 239	unsigned dummy;
 240	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 241	union l1_cache l1i, l1d;
 242	union l2_cache l2;
 243	union l3_cache l3;
 244	union l1_cache *l1 = &l1d;
 245
 246	eax->full = 0;
 247	ebx->full = 0;
 248	ecx->full = 0;
 249
 250	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 251	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 252
 253	switch (leaf) {
 254	case 1:
 255		l1 = &l1i;
 256	case 0:
 257		if (!l1->val)
 258			return;
 259		assoc = assocs[l1->assoc];
 260		line_size = l1->line_size;
 261		lines_per_tag = l1->lines_per_tag;
 262		size_in_kb = l1->size_in_kb;
 263		break;
 264	case 2:
 265		if (!l2.val)
 266			return;
 267		assoc = assocs[l2.assoc];
 268		line_size = l2.line_size;
 269		lines_per_tag = l2.lines_per_tag;
 270		/* cpu_data has errata corrections for K7 applied */
 271		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 272		break;
 273	case 3:
 274		if (!l3.val)
 275			return;
 276		assoc = assocs[l3.assoc];
 277		line_size = l3.line_size;
 278		lines_per_tag = l3.lines_per_tag;
 279		size_in_kb = l3.size_encoded * 512;
 280		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 281			size_in_kb = size_in_kb >> 1;
 282			assoc = assoc >> 1;
 283		}
 284		break;
 285	default:
 286		return;
 287	}
 288
 289	eax->split.is_self_initializing = 1;
 290	eax->split.type = types[leaf];
 291	eax->split.level = levels[leaf];
 292	eax->split.num_threads_sharing = 0;
 293	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 294
 295
 296	if (assoc == 0xffff)
 297		eax->split.is_fully_associative = 1;
 298	ebx->split.coherency_line_size = line_size - 1;
 299	ebx->split.ways_of_associativity = assoc - 1;
 300	ebx->split.physical_line_partition = lines_per_tag - 1;
 301	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 302		(ebx->split.ways_of_associativity + 1) - 1;
 303}
 304
 305struct _cache_attr {
 306	struct attribute attr;
 307	ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
 308	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
 309			 unsigned int);
 310};
 311
 312#ifdef CONFIG_AMD_NB
 313
 314/*
 315 * L3 cache descriptors
 316 */
 317static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 318{
 
 319	unsigned int sc0, sc1, sc2, sc3;
 320	u32 val = 0;
 321
 322	pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
 323
 324	/* calculate subcache sizes */
 325	l3->subcaches[0] = sc0 = !(val & BIT(0));
 326	l3->subcaches[1] = sc1 = !(val & BIT(4));
 327	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 328	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 329
 330	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 331}
 332
 333static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
 334					int index)
 335{
 336	static struct amd_l3_cache *__cpuinitdata l3_caches;
 337	int node;
 338
 339	/* only for L3, and not in virtualized environments */
 340	if (index < 3 || amd_nb_num() == 0)
 341		return;
 342
 343	/*
 344	 * Strictly speaking, the amount in @size below is leaked since it is
 345	 * never freed but this is done only on shutdown so it doesn't matter.
 346	 */
 347	if (!l3_caches) {
 348		int size = amd_nb_num() * sizeof(struct amd_l3_cache);
 349
 350		l3_caches = kzalloc(size, GFP_ATOMIC);
 351		if (!l3_caches)
 352			return;
 353	}
 354
 355	node = amd_get_nb_id(smp_processor_id());
 356
 357	if (!l3_caches[node].nb) {
 358		l3_caches[node].nb = node_to_amd_nb(node);
 359		amd_calc_l3_indices(&l3_caches[node]);
 360	}
 361
 362	this_leaf->l3 = &l3_caches[node];
 363}
 364
 365/*
 366 * check whether a slot used for disabling an L3 index is occupied.
 367 * @l3: L3 cache descriptor
 368 * @slot: slot number (0..1)
 369 *
 370 * @returns: the disabled index if used or negative value if slot free.
 371 */
 372int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
 373{
 374	unsigned int reg = 0;
 375
 376	pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
 377
 378	/* check whether this slot is activated already */
 379	if (reg & (3UL << 30))
 380		return reg & 0xfff;
 381
 382	return -1;
 383}
 384
 385static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 386				  unsigned int slot)
 387{
 388	int index;
 
 389
 390	if (!this_leaf->l3 ||
 391	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 392		return -EINVAL;
 393
 394	index = amd_get_l3_disable_slot(this_leaf->l3, slot);
 395	if (index >= 0)
 396		return sprintf(buf, "%d\n", index);
 397
 398	return sprintf(buf, "FREE\n");
 399}
 400
 401#define SHOW_CACHE_DISABLE(slot)					\
 402static ssize_t								\
 403show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf,	\
 404			  unsigned int cpu)				\
 405{									\
 
 406	return show_cache_disable(this_leaf, buf, slot);		\
 407}
 408SHOW_CACHE_DISABLE(0)
 409SHOW_CACHE_DISABLE(1)
 410
 411static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
 412				 unsigned slot, unsigned long idx)
 413{
 414	int i;
 415
 416	idx |= BIT(30);
 417
 418	/*
 419	 *  disable index in all 4 subcaches
 420	 */
 421	for (i = 0; i < 4; i++) {
 422		u32 reg = idx | (i << 20);
 423
 424		if (!l3->subcaches[i])
 425			continue;
 426
 427		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 428
 429		/*
 430		 * We need to WBINVD on a core on the node containing the L3
 431		 * cache which indices we disable therefore a simple wbinvd()
 432		 * is not sufficient.
 433		 */
 434		wbinvd_on_cpu(cpu);
 435
 436		reg |= BIT(31);
 437		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 438	}
 439}
 440
 441/*
 442 * disable a L3 cache index by using a disable-slot
 443 *
 444 * @l3:    L3 cache descriptor
 445 * @cpu:   A CPU on the node containing the L3 cache
 446 * @slot:  slot number (0..1)
 447 * @index: index to disable
 448 *
 449 * @return: 0 on success, error status on failure
 450 */
 451int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
 452			    unsigned long index)
 453{
 454	int ret = 0;
 455
 456	/*  check if @slot is already used or the index is already disabled */
 457	ret = amd_get_l3_disable_slot(l3, slot);
 458	if (ret >= 0)
 459		return -EINVAL;
 460
 461	if (index > l3->indices)
 462		return -EINVAL;
 463
 464	/* check whether the other slot has disabled the same index already */
 465	if (index == amd_get_l3_disable_slot(l3, !slot))
 466		return -EINVAL;
 467
 468	amd_l3_disable_index(l3, cpu, slot, index);
 469
 470	return 0;
 471}
 472
 473static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 474				  const char *buf, size_t count,
 475				  unsigned int slot)
 476{
 477	unsigned long val = 0;
 478	int cpu, err = 0;
 
 479
 480	if (!capable(CAP_SYS_ADMIN))
 481		return -EPERM;
 482
 483	if (!this_leaf->l3 ||
 484	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 485		return -EINVAL;
 486
 487	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
 488
 489	if (strict_strtoul(buf, 10, &val) < 0)
 490		return -EINVAL;
 491
 492	err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
 493	if (err) {
 494		if (err == -EEXIST)
 495			printk(KERN_WARNING "L3 disable slot %d in use!\n",
 496					    slot);
 497		return err;
 498	}
 499	return count;
 500}
 501
 502#define STORE_CACHE_DISABLE(slot)					\
 503static ssize_t								\
 504store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
 505			   const char *buf, size_t count,		\
 506			   unsigned int cpu)				\
 507{									\
 
 508	return store_cache_disable(this_leaf, buf, count, slot);	\
 509}
 510STORE_CACHE_DISABLE(0)
 511STORE_CACHE_DISABLE(1)
 512
 513static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
 514		show_cache_disable_0, store_cache_disable_0);
 515static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 516		show_cache_disable_1, store_cache_disable_1);
 517
 518static ssize_t
 519show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
 520{
 521	if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 522		return -EINVAL;
 523
 524	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 525}
 526
 527static ssize_t
 528store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
 529		unsigned int cpu)
 530{
 
 
 531	unsigned long val;
 532
 533	if (!capable(CAP_SYS_ADMIN))
 534		return -EPERM;
 535
 536	if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 537		return -EINVAL;
 538
 539	if (strict_strtoul(buf, 16, &val) < 0)
 540		return -EINVAL;
 541
 542	if (amd_set_subcaches(cpu, val))
 543		return -EINVAL;
 544
 545	return count;
 546}
 547
 548static struct _cache_attr subcaches =
 549	__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 550
 551#else	/* CONFIG_AMD_NB */
 
 
 
 
 
 552#define amd_init_l3_cache(x, y)
 553#endif /* CONFIG_AMD_NB */
 554
 555static int
 556__cpuinit cpuid4_cache_lookup_regs(int index,
 557				   struct _cpuid4_info_regs *this_leaf)
 558{
 559	union _cpuid4_leaf_eax	eax;
 560	union _cpuid4_leaf_ebx	ebx;
 561	union _cpuid4_leaf_ecx	ecx;
 562	unsigned		edx;
 563
 564	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 565		amd_cpuid4(index, &eax, &ebx, &ecx);
 
 
 
 
 566		amd_init_l3_cache(this_leaf, index);
 567	} else {
 568		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 569	}
 570
 571	if (eax.split.type == CACHE_TYPE_NULL)
 572		return -EIO; /* better error ? */
 573
 574	this_leaf->eax = eax;
 575	this_leaf->ebx = ebx;
 576	this_leaf->ecx = ecx;
 577	this_leaf->size = (ecx.split.number_of_sets          + 1) *
 578			  (ebx.split.coherency_line_size     + 1) *
 579			  (ebx.split.physical_line_partition + 1) *
 580			  (ebx.split.ways_of_associativity   + 1);
 581	return 0;
 582}
 583
 584static int __cpuinit find_num_cache_leaves(void)
 585{
 586	unsigned int		eax, ebx, ecx, edx;
 587	union _cpuid4_leaf_eax	cache_eax;
 588	int 			i = -1;
 589
 
 
 
 
 
 590	do {
 591		++i;
 592		/* Do cpuid(4) loop to find out num_cache_leaves */
 593		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
 594		cache_eax.full = eax;
 595	} while (cache_eax.split.type != CACHE_TYPE_NULL);
 596	return i;
 597}
 598
 599unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 
 
 
 
 
 
 
 
 
 
 
 
 
 600{
 601	/* Cache sizes */
 602	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 603	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 604	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 605	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 606#ifdef CONFIG_X86_HT
 607	unsigned int cpu = c->cpu_index;
 608#endif
 609
 610	if (c->cpuid_level > 3) {
 611		static int is_initialized;
 612
 613		if (is_initialized == 0) {
 614			/* Init num_cache_leaves from boot CPU */
 615			num_cache_leaves = find_num_cache_leaves();
 616			is_initialized++;
 617		}
 618
 619		/*
 620		 * Whenever possible use cpuid(4), deterministic cache
 621		 * parameters cpuid leaf to find the cache details
 622		 */
 623		for (i = 0; i < num_cache_leaves; i++) {
 624			struct _cpuid4_info_regs this_leaf;
 625			int retval;
 626
 627			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 628			if (retval >= 0) {
 629				switch (this_leaf.eax.split.level) {
 630				case 1:
 631					if (this_leaf.eax.split.type ==
 632							CACHE_TYPE_DATA)
 633						new_l1d = this_leaf.size/1024;
 634					else if (this_leaf.eax.split.type ==
 635							CACHE_TYPE_INST)
 636						new_l1i = this_leaf.size/1024;
 637					break;
 638				case 2:
 639					new_l2 = this_leaf.size/1024;
 640					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 641					index_msb = get_count_order(num_threads_sharing);
 642					l2_id = c->apicid >> index_msb;
 643					break;
 644				case 3:
 645					new_l3 = this_leaf.size/1024;
 646					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 647					index_msb = get_count_order(
 648							num_threads_sharing);
 649					l3_id = c->apicid >> index_msb;
 650					break;
 651				default:
 652					break;
 653				}
 654			}
 655		}
 656	}
 657	/*
 658	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 659	 * trace cache
 660	 */
 661	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 662		/* supports eax=2  call */
 663		int j, n;
 664		unsigned int regs[4];
 665		unsigned char *dp = (unsigned char *)regs;
 666		int only_trace = 0;
 667
 668		if (num_cache_leaves != 0 && c->x86 == 15)
 669			only_trace = 1;
 670
 671		/* Number of times to iterate */
 672		n = cpuid_eax(2) & 0xFF;
 673
 674		for (i = 0 ; i < n ; i++) {
 675			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 676
 677			/* If bit 31 is set, this is an unknown format */
 678			for (j = 0 ; j < 3 ; j++)
 679				if (regs[j] & (1 << 31))
 680					regs[j] = 0;
 681
 682			/* Byte 0 is level count, not a descriptor */
 683			for (j = 1 ; j < 16 ; j++) {
 684				unsigned char des = dp[j];
 685				unsigned char k = 0;
 686
 687				/* look up this descriptor in the table */
 688				while (cache_table[k].descriptor != 0) {
 689					if (cache_table[k].descriptor == des) {
 690						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 691							break;
 692						switch (cache_table[k].cache_type) {
 693						case LVL_1_INST:
 694							l1i += cache_table[k].size;
 695							break;
 696						case LVL_1_DATA:
 697							l1d += cache_table[k].size;
 698							break;
 699						case LVL_2:
 700							l2 += cache_table[k].size;
 701							break;
 702						case LVL_3:
 703							l3 += cache_table[k].size;
 704							break;
 705						case LVL_TRACE:
 706							trace += cache_table[k].size;
 707							break;
 708						}
 709
 710						break;
 711					}
 712
 713					k++;
 714				}
 715			}
 716		}
 717	}
 718
 719	if (new_l1d)
 720		l1d = new_l1d;
 721
 722	if (new_l1i)
 723		l1i = new_l1i;
 724
 725	if (new_l2) {
 726		l2 = new_l2;
 727#ifdef CONFIG_X86_HT
 728		per_cpu(cpu_llc_id, cpu) = l2_id;
 729#endif
 730	}
 731
 732	if (new_l3) {
 733		l3 = new_l3;
 734#ifdef CONFIG_X86_HT
 735		per_cpu(cpu_llc_id, cpu) = l3_id;
 736#endif
 737	}
 738
 
 
 
 
 
 
 
 
 
 
 
 
 739	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 740
 741	return l2;
 742}
 743
 744#ifdef CONFIG_SYSFS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 745
 746/* pointer to _cpuid4_info array (for each cache leaf) */
 747static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
 748#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
 
 749
 750#ifdef CONFIG_SMP
 751static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 752{
 753	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 754	unsigned long num_threads_sharing;
 755	int index_msb, i, sibling;
 756	struct cpuinfo_x86 *c = &cpu_data(cpu);
 757
 758	if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
 
 
 
 
 
 
 
 
 
 
 759		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 760			if (!per_cpu(ici_cpuid4_info, i))
 
 761				continue;
 762			this_leaf = CPUID4_INFO_IDX(i, index);
 763			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 764				if (!cpu_online(sibling))
 765					continue;
 766				set_bit(sibling, this_leaf->shared_cpu_map);
 
 767			}
 768		}
 769		return;
 770	}
 771	this_leaf = CPUID4_INFO_IDX(cpu, index);
 772	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
 773
 774	if (num_threads_sharing == 1)
 775		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
 776	else {
 777		index_msb = get_count_order(num_threads_sharing);
 778
 779		for_each_online_cpu(i) {
 780			if (cpu_data(i).apicid >> index_msb ==
 781			    c->apicid >> index_msb) {
 782				cpumask_set_cpu(i,
 783					to_cpumask(this_leaf->shared_cpu_map));
 784				if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
 785					sibling_leaf =
 786						CPUID4_INFO_IDX(i, index);
 787					cpumask_set_cpu(cpu, to_cpumask(
 788						sibling_leaf->shared_cpu_map));
 789				}
 790			}
 791		}
 792	}
 793}
 794static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 795{
 796	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 797	int sibling;
 798
 799	this_leaf = CPUID4_INFO_IDX(cpu, index);
 800	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
 801		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
 802		cpumask_clear_cpu(cpu,
 803				  to_cpumask(sibling_leaf->shared_cpu_map));
 804	}
 805}
 806#else
 807static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 808{
 809}
 810
 811static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 812{
 813}
 814#endif
 815
 816static void __cpuinit free_cache_attributes(unsigned int cpu)
 817{
 818	int i;
 819
 820	for (i = 0; i < num_cache_leaves; i++)
 821		cache_remove_shared_cpu_map(cpu, i);
 822
 823	kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
 824	kfree(per_cpu(ici_cpuid4_info, cpu));
 825	per_cpu(ici_cpuid4_info, cpu) = NULL;
 826}
 827
 828static int
 829__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 830{
 831	struct _cpuid4_info_regs *leaf_regs =
 832		(struct _cpuid4_info_regs *)this_leaf;
 833
 834	return cpuid4_cache_lookup_regs(index, leaf_regs);
 835}
 836
 837static void __cpuinit get_cpu_leaves(void *_retval)
 838{
 839	int j, *retval = _retval, cpu = smp_processor_id();
 840
 841	/* Do cpuid and store the results */
 842	for (j = 0; j < num_cache_leaves; j++) {
 843		struct _cpuid4_info *this_leaf;
 844		this_leaf = CPUID4_INFO_IDX(cpu, j);
 845		*retval = cpuid4_cache_lookup(j, this_leaf);
 846		if (unlikely(*retval < 0)) {
 847			int i;
 848
 849			for (i = 0; i < j; i++)
 850				cache_remove_shared_cpu_map(cpu, i);
 851			break;
 852		}
 853		cache_shared_cpu_map_setup(cpu, j);
 854	}
 855}
 856
 857static int __cpuinit detect_cache_attributes(unsigned int cpu)
 858{
 859	int			retval;
 860
 861	if (num_cache_leaves == 0)
 862		return -ENOENT;
 863
 864	per_cpu(ici_cpuid4_info, cpu) = kzalloc(
 865	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
 866	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
 867		return -ENOMEM;
 868
 869	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
 870	if (retval) {
 871		kfree(per_cpu(ici_cpuid4_info, cpu));
 872		per_cpu(ici_cpuid4_info, cpu) = NULL;
 873	}
 874
 875	return retval;
 876}
 877
 878#include <linux/kobject.h>
 879#include <linux/sysfs.h>
 880
 881extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
 882
 883/* pointer to kobject for cpuX/cache */
 884static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
 885
 886struct _index_kobject {
 887	struct kobject kobj;
 888	unsigned int cpu;
 889	unsigned short index;
 890};
 891
 892/* pointer to array of kobjects for cpuX/cache/indexY */
 893static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
 894#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(ici_index_kobject, x))[y]))
 895
 896#define show_one_plus(file_name, object, val)				\
 897static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
 898				unsigned int cpu)			\
 899{									\
 900	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
 901}
 902
 903show_one_plus(level, eax.split.level, 0);
 904show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
 905show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
 906show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
 907show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
 908
 909static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
 910			 unsigned int cpu)
 911{
 912	return sprintf(buf, "%luK\n", this_leaf->size / 1024);
 913}
 914
 915static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 916					int type, char *buf)
 917{
 918	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
 919	int n = 0;
 920
 921	if (len > 1) {
 922		const struct cpumask *mask;
 923
 924		mask = to_cpumask(this_leaf->shared_cpu_map);
 925		n = type ?
 926			cpulist_scnprintf(buf, len-2, mask) :
 927			cpumask_scnprintf(buf, len-2, mask);
 928		buf[n++] = '\n';
 929		buf[n] = '\0';
 930	}
 931	return n;
 932}
 933
 934static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
 935					  unsigned int cpu)
 936{
 937	return show_shared_cpu_map_func(leaf, 0, buf);
 938}
 939
 940static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
 941					   unsigned int cpu)
 942{
 943	return show_shared_cpu_map_func(leaf, 1, buf);
 944}
 945
 946static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
 947			 unsigned int cpu)
 948{
 949	switch (this_leaf->eax.split.type) {
 950	case CACHE_TYPE_DATA:
 951		return sprintf(buf, "Data\n");
 952	case CACHE_TYPE_INST:
 953		return sprintf(buf, "Instruction\n");
 954	case CACHE_TYPE_UNIFIED:
 955		return sprintf(buf, "Unified\n");
 956	default:
 957		return sprintf(buf, "Unknown\n");
 958	}
 959}
 960
 961#define to_object(k)	container_of(k, struct _index_kobject, kobj)
 962#define to_attr(a)	container_of(a, struct _cache_attr, attr)
 963
 964#define define_one_ro(_name) \
 965static struct _cache_attr _name = \
 966	__ATTR(_name, 0444, show_##_name, NULL)
 967
 968define_one_ro(level);
 969define_one_ro(type);
 970define_one_ro(coherency_line_size);
 971define_one_ro(physical_line_partition);
 972define_one_ro(ways_of_associativity);
 973define_one_ro(number_of_sets);
 974define_one_ro(size);
 975define_one_ro(shared_cpu_map);
 976define_one_ro(shared_cpu_list);
 977
 978static struct attribute *default_attrs[] = {
 979	&type.attr,
 980	&level.attr,
 981	&coherency_line_size.attr,
 982	&physical_line_partition.attr,
 983	&ways_of_associativity.attr,
 984	&number_of_sets.attr,
 985	&size.attr,
 986	&shared_cpu_map.attr,
 987	&shared_cpu_list.attr,
 988	NULL
 989};
 990
 991#ifdef CONFIG_AMD_NB
 992static struct attribute ** __cpuinit amd_l3_attrs(void)
 993{
 994	static struct attribute **attrs;
 995	int n;
 996
 997	if (attrs)
 998		return attrs;
 999
1000	n = sizeof (default_attrs) / sizeof (struct attribute *);
1001
1002	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
1003		n += 2;
1004
1005	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1006		n += 1;
1007
1008	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
1009	if (attrs == NULL)
1010		return attrs = default_attrs;
1011
1012	for (n = 0; default_attrs[n]; n++)
1013		attrs[n] = default_attrs[n];
1014
1015	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
1016		attrs[n++] = &cache_disable_0.attr;
1017		attrs[n++] = &cache_disable_1.attr;
1018	}
1019
1020	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1021		attrs[n++] = &subcaches.attr;
1022
1023	return attrs;
1024}
1025#endif
1026
1027static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1028{
1029	struct _cache_attr *fattr = to_attr(attr);
1030	struct _index_kobject *this_leaf = to_object(kobj);
1031	ssize_t ret;
1032
1033	ret = fattr->show ?
1034		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1035			buf, this_leaf->cpu) :
1036		0;
1037	return ret;
1038}
1039
1040static ssize_t store(struct kobject *kobj, struct attribute *attr,
1041		     const char *buf, size_t count)
1042{
1043	struct _cache_attr *fattr = to_attr(attr);
1044	struct _index_kobject *this_leaf = to_object(kobj);
1045	ssize_t ret;
1046
1047	ret = fattr->store ?
1048		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1049			buf, count, this_leaf->cpu) :
1050		0;
1051	return ret;
 
 
 
 
 
1052}
1053
1054static const struct sysfs_ops sysfs_ops = {
1055	.show   = show,
1056	.store  = store,
1057};
1058
1059static struct kobj_type ktype_cache = {
1060	.sysfs_ops	= &sysfs_ops,
1061	.default_attrs	= default_attrs,
1062};
1063
1064static struct kobj_type ktype_percpu_entry = {
1065	.sysfs_ops	= &sysfs_ops,
1066};
1067
1068static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1069{
1070	kfree(per_cpu(ici_cache_kobject, cpu));
1071	kfree(per_cpu(ici_index_kobject, cpu));
1072	per_cpu(ici_cache_kobject, cpu) = NULL;
1073	per_cpu(ici_index_kobject, cpu) = NULL;
1074	free_cache_attributes(cpu);
 
 
 
 
 
 
1075}
1076
1077static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1078{
1079	int err;
1080
1081	if (num_cache_leaves == 0)
1082		return -ENOENT;
1083
1084	err = detect_cache_attributes(cpu);
1085	if (err)
1086		return err;
1087
1088	/* Allocate all required memory */
1089	per_cpu(ici_cache_kobject, cpu) =
1090		kzalloc(sizeof(struct kobject), GFP_KERNEL);
1091	if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1092		goto err_out;
1093
1094	per_cpu(ici_index_kobject, cpu) = kzalloc(
1095	    sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1096	if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1097		goto err_out;
1098
1099	return 0;
1100
1101err_out:
1102	cpuid4_cache_sysfs_exit(cpu);
1103	return -ENOMEM;
1104}
1105
1106static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1107
1108/* Add/Remove cache interface for CPU device */
1109static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1110{
1111	unsigned int cpu = sys_dev->id;
1112	unsigned long i, j;
1113	struct _index_kobject *this_object;
1114	struct _cpuid4_info   *this_leaf;
1115	int retval;
1116
1117	retval = cpuid4_cache_sysfs_init(cpu);
1118	if (unlikely(retval < 0))
1119		return retval;
1120
1121	retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1122				      &ktype_percpu_entry,
1123				      &sys_dev->kobj, "%s", "cache");
1124	if (retval < 0) {
1125		cpuid4_cache_sysfs_exit(cpu);
1126		return retval;
1127	}
1128
1129	for (i = 0; i < num_cache_leaves; i++) {
1130		this_object = INDEX_KOBJECT_PTR(cpu, i);
1131		this_object->cpu = cpu;
1132		this_object->index = i;
1133
1134		this_leaf = CPUID4_INFO_IDX(cpu, i);
1135
1136		ktype_cache.default_attrs = default_attrs;
1137#ifdef CONFIG_AMD_NB
1138		if (this_leaf->l3)
1139			ktype_cache.default_attrs = amd_l3_attrs();
1140#endif
1141		retval = kobject_init_and_add(&(this_object->kobj),
1142					      &ktype_cache,
1143					      per_cpu(ici_cache_kobject, cpu),
1144					      "index%1lu", i);
1145		if (unlikely(retval)) {
1146			for (j = 0; j < i; j++)
1147				kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1148			kobject_put(per_cpu(ici_cache_kobject, cpu));
1149			cpuid4_cache_sysfs_exit(cpu);
1150			return retval;
1151		}
1152		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1153	}
1154	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1155
1156	kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1157	return 0;
1158}
1159
1160static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
1161{
1162	unsigned int cpu = sys_dev->id;
1163	unsigned long i;
1164
1165	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1166		return;
1167	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1168		return;
1169	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1170
1171	for (i = 0; i < num_cache_leaves; i++)
1172		kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1173	kobject_put(per_cpu(ici_cache_kobject, cpu));
1174	cpuid4_cache_sysfs_exit(cpu);
1175}
1176
1177static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1178					unsigned long action, void *hcpu)
1179{
1180	unsigned int cpu = (unsigned long)hcpu;
1181	struct sys_device *sys_dev;
1182
1183	sys_dev = get_cpu_sysdev(cpu);
1184	switch (action) {
1185	case CPU_ONLINE:
1186	case CPU_ONLINE_FROZEN:
1187		cache_add_dev(sys_dev);
1188		break;
1189	case CPU_DEAD:
1190	case CPU_DEAD_FROZEN:
1191		cache_remove_dev(sys_dev);
1192		break;
1193	}
1194	return NOTIFY_OK;
1195}
1196
1197static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1198	.notifier_call = cacheinfo_cpu_callback,
1199};
1200
1201static int __cpuinit cache_sysfs_init(void)
1202{
1203	int i;
1204
1205	if (num_cache_leaves == 0)
1206		return 0;
1207
1208	for_each_online_cpu(i) {
1209		int err;
1210		struct sys_device *sys_dev = get_cpu_sysdev(i);
1211
1212		err = cache_add_dev(sys_dev);
1213		if (err)
1214			return err;
1215	}
1216	register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1217	return 0;
1218}
1219
1220device_initcall(cache_sysfs_init);
1221
1222#endif
v4.6
  1/*
  2 *	Routines to identify caches on Intel CPU.
  3 *
  4 *	Changes:
  5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  8 */
  9
 
 10#include <linux/slab.h>
 11#include <linux/cacheinfo.h>
 
 12#include <linux/cpu.h>
 13#include <linux/sched.h>
 14#include <linux/sysfs.h>
 15#include <linux/pci.h>
 16
 17#include <asm/cpufeature.h>
 
 18#include <asm/amd_nb.h>
 19#include <asm/smp.h>
 20
 21#define LVL_1_INST	1
 22#define LVL_1_DATA	2
 23#define LVL_2		3
 24#define LVL_3		4
 25#define LVL_TRACE	5
 26
 27struct _cache_table {
 28	unsigned char descriptor;
 29	char cache_type;
 30	short size;
 31};
 32
 33#define MB(x)	((x) * 1024)
 34
 35/* All the cache descriptor types we care about (no TLB or
 36   trace cache entries) */
 37
 38static const struct _cache_table cache_table[] =
 39{
 40	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
 41	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
 42	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
 43	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
 44	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
 45	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
 46	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
 47	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
 48	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 49	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 50	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 51	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 52	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
 53	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
 54	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 55	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 56	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
 57	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 58	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 59	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 60	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
 61	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
 62	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
 63	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
 64	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
 65	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
 66	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
 67	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
 68	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
 69	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 70	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
 71	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 72	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
 73	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
 74	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
 75	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 76	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 77	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 78	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 79	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
 80	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
 81	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 82	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 83	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 84	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 85	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 86	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 87	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 88	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 89	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 90	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 91	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 92	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 93	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 94	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 95	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 96	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 97	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 98	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 99	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
100	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
101	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
102	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
103	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
104	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
105	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
106	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
107	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
108	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
109	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
110	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
111	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
112	{ 0x00, 0, 0}
113};
114
115
116enum _cache_type {
117	CTYPE_NULL = 0,
118	CTYPE_DATA = 1,
119	CTYPE_INST = 2,
120	CTYPE_UNIFIED = 3
121};
122
123union _cpuid4_leaf_eax {
124	struct {
125		enum _cache_type	type:5;
126		unsigned int		level:3;
127		unsigned int		is_self_initializing:1;
128		unsigned int		is_fully_associative:1;
129		unsigned int		reserved:4;
130		unsigned int		num_threads_sharing:12;
131		unsigned int		num_cores_on_die:6;
132	} split;
133	u32 full;
134};
135
136union _cpuid4_leaf_ebx {
137	struct {
138		unsigned int		coherency_line_size:12;
139		unsigned int		physical_line_partition:10;
140		unsigned int		ways_of_associativity:10;
141	} split;
142	u32 full;
143};
144
145union _cpuid4_leaf_ecx {
146	struct {
147		unsigned int		number_of_sets:32;
148	} split;
149	u32 full;
150};
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152struct _cpuid4_info_regs {
153	union _cpuid4_leaf_eax eax;
154	union _cpuid4_leaf_ebx ebx;
155	union _cpuid4_leaf_ecx ecx;
156	unsigned long size;
157	struct amd_northbridge *nb;
158};
159
160static unsigned short num_cache_leaves;
161
162/* AMD doesn't have CPUID4. Emulate it here to report the same
163   information to the user.  This makes some assumptions about the machine:
164   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
165
166   In theory the TLBs could be reported as fake type (they are in "dummy").
167   Maybe later */
168union l1_cache {
169	struct {
170		unsigned line_size:8;
171		unsigned lines_per_tag:8;
172		unsigned assoc:8;
173		unsigned size_in_kb:8;
174	};
175	unsigned val;
176};
177
178union l2_cache {
179	struct {
180		unsigned line_size:8;
181		unsigned lines_per_tag:4;
182		unsigned assoc:4;
183		unsigned size_in_kb:16;
184	};
185	unsigned val;
186};
187
188union l3_cache {
189	struct {
190		unsigned line_size:8;
191		unsigned lines_per_tag:4;
192		unsigned assoc:4;
193		unsigned res:2;
194		unsigned size_encoded:14;
195	};
196	unsigned val;
197};
198
199static const unsigned short assocs[] = {
200	[1] = 1,
201	[2] = 2,
202	[4] = 4,
203	[6] = 8,
204	[8] = 16,
205	[0xa] = 32,
206	[0xb] = 48,
207	[0xc] = 64,
208	[0xd] = 96,
209	[0xe] = 128,
210	[0xf] = 0xffff /* fully associative - no way to show this currently */
211};
212
213static const unsigned char levels[] = { 1, 1, 2, 3 };
214static const unsigned char types[] = { 1, 2, 3, 3 };
215
216static const enum cache_type cache_type_map[] = {
217	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
218	[CTYPE_DATA] = CACHE_TYPE_DATA,
219	[CTYPE_INST] = CACHE_TYPE_INST,
220	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
221};
222
223static void
224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
225		     union _cpuid4_leaf_ebx *ebx,
226		     union _cpuid4_leaf_ecx *ecx)
227{
228	unsigned dummy;
229	unsigned line_size, lines_per_tag, assoc, size_in_kb;
230	union l1_cache l1i, l1d;
231	union l2_cache l2;
232	union l3_cache l3;
233	union l1_cache *l1 = &l1d;
234
235	eax->full = 0;
236	ebx->full = 0;
237	ecx->full = 0;
238
239	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
240	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
241
242	switch (leaf) {
243	case 1:
244		l1 = &l1i;
245	case 0:
246		if (!l1->val)
247			return;
248		assoc = assocs[l1->assoc];
249		line_size = l1->line_size;
250		lines_per_tag = l1->lines_per_tag;
251		size_in_kb = l1->size_in_kb;
252		break;
253	case 2:
254		if (!l2.val)
255			return;
256		assoc = assocs[l2.assoc];
257		line_size = l2.line_size;
258		lines_per_tag = l2.lines_per_tag;
259		/* cpu_data has errata corrections for K7 applied */
260		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
261		break;
262	case 3:
263		if (!l3.val)
264			return;
265		assoc = assocs[l3.assoc];
266		line_size = l3.line_size;
267		lines_per_tag = l3.lines_per_tag;
268		size_in_kb = l3.size_encoded * 512;
269		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
270			size_in_kb = size_in_kb >> 1;
271			assoc = assoc >> 1;
272		}
273		break;
274	default:
275		return;
276	}
277
278	eax->split.is_self_initializing = 1;
279	eax->split.type = types[leaf];
280	eax->split.level = levels[leaf];
281	eax->split.num_threads_sharing = 0;
282	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
283
284
285	if (assoc == 0xffff)
286		eax->split.is_fully_associative = 1;
287	ebx->split.coherency_line_size = line_size - 1;
288	ebx->split.ways_of_associativity = assoc - 1;
289	ebx->split.physical_line_partition = lines_per_tag - 1;
290	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
291		(ebx->split.ways_of_associativity + 1) - 1;
292}
293
294#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 
 
 
 
 
 
 
295
296/*
297 * L3 cache descriptors
298 */
299static void amd_calc_l3_indices(struct amd_northbridge *nb)
300{
301	struct amd_l3_cache *l3 = &nb->l3_cache;
302	unsigned int sc0, sc1, sc2, sc3;
303	u32 val = 0;
304
305	pci_read_config_dword(nb->misc, 0x1C4, &val);
306
307	/* calculate subcache sizes */
308	l3->subcaches[0] = sc0 = !(val & BIT(0));
309	l3->subcaches[1] = sc1 = !(val & BIT(4));
 
 
310
311	if (boot_cpu_data.x86 == 0x15) {
312		l3->subcaches[0] = sc0 += !(val & BIT(1));
313		l3->subcaches[1] = sc1 += !(val & BIT(5));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314	}
315
316	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
317	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 
 
 
 
318
319	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
320}
321
322/*
323 * check whether a slot used for disabling an L3 index is occupied.
324 * @l3: L3 cache descriptor
325 * @slot: slot number (0..1)
326 *
327 * @returns: the disabled index if used or negative value if slot free.
328 */
329static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
330{
331	unsigned int reg = 0;
332
333	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
334
335	/* check whether this slot is activated already */
336	if (reg & (3UL << 30))
337		return reg & 0xfff;
338
339	return -1;
340}
341
342static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
343				  unsigned int slot)
344{
345	int index;
346	struct amd_northbridge *nb = this_leaf->priv;
347
348	index = amd_get_l3_disable_slot(nb, slot);
 
 
 
 
349	if (index >= 0)
350		return sprintf(buf, "%d\n", index);
351
352	return sprintf(buf, "FREE\n");
353}
354
355#define SHOW_CACHE_DISABLE(slot)					\
356static ssize_t								\
357cache_disable_##slot##_show(struct device *dev,				\
358			    struct device_attribute *attr, char *buf)	\
359{									\
360	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
361	return show_cache_disable(this_leaf, buf, slot);		\
362}
363SHOW_CACHE_DISABLE(0)
364SHOW_CACHE_DISABLE(1)
365
366static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
367				 unsigned slot, unsigned long idx)
368{
369	int i;
370
371	idx |= BIT(30);
372
373	/*
374	 *  disable index in all 4 subcaches
375	 */
376	for (i = 0; i < 4; i++) {
377		u32 reg = idx | (i << 20);
378
379		if (!nb->l3_cache.subcaches[i])
380			continue;
381
382		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
383
384		/*
385		 * We need to WBINVD on a core on the node containing the L3
386		 * cache which indices we disable therefore a simple wbinvd()
387		 * is not sufficient.
388		 */
389		wbinvd_on_cpu(cpu);
390
391		reg |= BIT(31);
392		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
393	}
394}
395
396/*
397 * disable a L3 cache index by using a disable-slot
398 *
399 * @l3:    L3 cache descriptor
400 * @cpu:   A CPU on the node containing the L3 cache
401 * @slot:  slot number (0..1)
402 * @index: index to disable
403 *
404 * @return: 0 on success, error status on failure
405 */
406static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
407			    unsigned slot, unsigned long index)
408{
409	int ret = 0;
410
411	/*  check if @slot is already used or the index is already disabled */
412	ret = amd_get_l3_disable_slot(nb, slot);
413	if (ret >= 0)
414		return -EEXIST;
415
416	if (index > nb->l3_cache.indices)
417		return -EINVAL;
418
419	/* check whether the other slot has disabled the same index already */
420	if (index == amd_get_l3_disable_slot(nb, !slot))
421		return -EEXIST;
422
423	amd_l3_disable_index(nb, cpu, slot, index);
424
425	return 0;
426}
427
428static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
429				   const char *buf, size_t count,
430				   unsigned int slot)
431{
432	unsigned long val = 0;
433	int cpu, err = 0;
434	struct amd_northbridge *nb = this_leaf->priv;
435
436	if (!capable(CAP_SYS_ADMIN))
437		return -EPERM;
438
439	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
 
 
 
440
441	if (kstrtoul(buf, 10, &val) < 0)
442		return -EINVAL;
443
444	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
445	if (err) {
446		if (err == -EEXIST)
447			pr_warn("L3 slot %d in use/index already disabled!\n",
448				   slot);
449		return err;
450	}
451	return count;
452}
453
454#define STORE_CACHE_DISABLE(slot)					\
455static ssize_t								\
456cache_disable_##slot##_store(struct device *dev,			\
457			     struct device_attribute *attr,		\
458			     const char *buf, size_t count)		\
459{									\
460	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
461	return store_cache_disable(this_leaf, buf, count, slot);	\
462}
463STORE_CACHE_DISABLE(0)
464STORE_CACHE_DISABLE(1)
465
466static ssize_t subcaches_show(struct device *dev,
467			      struct device_attribute *attr, char *buf)
 
 
 
 
 
468{
469	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
470	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
471
472	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
473}
474
475static ssize_t subcaches_store(struct device *dev,
476			       struct device_attribute *attr,
477			       const char *buf, size_t count)
478{
479	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
480	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
481	unsigned long val;
482
483	if (!capable(CAP_SYS_ADMIN))
484		return -EPERM;
485
486	if (kstrtoul(buf, 16, &val) < 0)
 
 
 
487		return -EINVAL;
488
489	if (amd_set_subcaches(cpu, val))
490		return -EINVAL;
491
492	return count;
493}
494
495static DEVICE_ATTR_RW(cache_disable_0);
496static DEVICE_ATTR_RW(cache_disable_1);
497static DEVICE_ATTR_RW(subcaches);
498
499static umode_t
500cache_private_attrs_is_visible(struct kobject *kobj,
501			       struct attribute *attr, int unused)
502{
503	struct device *dev = kobj_to_dev(kobj);
504	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
505	umode_t mode = attr->mode;
506
507	if (!this_leaf->priv)
508		return 0;
509
510	if ((attr == &dev_attr_subcaches.attr) &&
511	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
512		return mode;
513
514	if ((attr == &dev_attr_cache_disable_0.attr ||
515	     attr == &dev_attr_cache_disable_1.attr) &&
516	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
517		return mode;
518
519	return 0;
520}
521
522static struct attribute_group cache_private_group = {
523	.is_visible = cache_private_attrs_is_visible,
524};
525
526static void init_amd_l3_attrs(void)
527{
528	int n = 1;
529	static struct attribute **amd_l3_attrs;
530
531	if (amd_l3_attrs) /* already initialized */
532		return;
533
534	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
535		n += 2;
536	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
537		n += 1;
538
539	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
540	if (!amd_l3_attrs)
541		return;
542
543	n = 0;
544	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
545		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
546		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
547	}
548	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
549		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
550
551	cache_private_group.attrs = amd_l3_attrs;
552}
553
554const struct attribute_group *
555cache_get_priv_group(struct cacheinfo *this_leaf)
556{
557	struct amd_northbridge *nb = this_leaf->priv;
558
559	if (this_leaf->level < 3 || !nb)
560		return NULL;
561
562	if (nb && nb->l3_cache.indices)
563		init_amd_l3_attrs();
564
565	return &cache_private_group;
566}
567
568static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
569{
570	int node;
571
572	/* only for L3, and not in virtualized environments */
573	if (index < 3)
574		return;
575
576	node = amd_get_nb_id(smp_processor_id());
577	this_leaf->nb = node_to_amd_nb(node);
578	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
579		amd_calc_l3_indices(this_leaf->nb);
580}
581#else
582#define amd_init_l3_cache(x, y)
583#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
584
585static int
586cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 
587{
588	union _cpuid4_leaf_eax	eax;
589	union _cpuid4_leaf_ebx	ebx;
590	union _cpuid4_leaf_ecx	ecx;
591	unsigned		edx;
592
593	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
594		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
595			cpuid_count(0x8000001d, index, &eax.full,
596				    &ebx.full, &ecx.full, &edx);
597		else
598			amd_cpuid4(index, &eax, &ebx, &ecx);
599		amd_init_l3_cache(this_leaf, index);
600	} else {
601		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
602	}
603
604	if (eax.split.type == CTYPE_NULL)
605		return -EIO; /* better error ? */
606
607	this_leaf->eax = eax;
608	this_leaf->ebx = ebx;
609	this_leaf->ecx = ecx;
610	this_leaf->size = (ecx.split.number_of_sets          + 1) *
611			  (ebx.split.coherency_line_size     + 1) *
612			  (ebx.split.physical_line_partition + 1) *
613			  (ebx.split.ways_of_associativity   + 1);
614	return 0;
615}
616
617static int find_num_cache_leaves(struct cpuinfo_x86 *c)
618{
619	unsigned int		eax, ebx, ecx, edx, op;
620	union _cpuid4_leaf_eax	cache_eax;
621	int 			i = -1;
622
623	if (c->x86_vendor == X86_VENDOR_AMD)
624		op = 0x8000001d;
625	else
626		op = 4;
627
628	do {
629		++i;
630		/* Do cpuid(op) loop to find out num_cache_leaves */
631		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
632		cache_eax.full = eax;
633	} while (cache_eax.split.type != CTYPE_NULL);
634	return i;
635}
636
637void init_amd_cacheinfo(struct cpuinfo_x86 *c)
638{
639
640	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
641		num_cache_leaves = find_num_cache_leaves(c);
642	} else if (c->extended_cpuid_level >= 0x80000006) {
643		if (cpuid_edx(0x80000006) & 0xf000)
644			num_cache_leaves = 4;
645		else
646			num_cache_leaves = 3;
647	}
648}
649
650unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
651{
652	/* Cache sizes */
653	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
654	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
655	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
656	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
657#ifdef CONFIG_SMP
658	unsigned int cpu = c->cpu_index;
659#endif
660
661	if (c->cpuid_level > 3) {
662		static int is_initialized;
663
664		if (is_initialized == 0) {
665			/* Init num_cache_leaves from boot CPU */
666			num_cache_leaves = find_num_cache_leaves(c);
667			is_initialized++;
668		}
669
670		/*
671		 * Whenever possible use cpuid(4), deterministic cache
672		 * parameters cpuid leaf to find the cache details
673		 */
674		for (i = 0; i < num_cache_leaves; i++) {
675			struct _cpuid4_info_regs this_leaf = {};
676			int retval;
677
678			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
679			if (retval < 0)
680				continue;
681
682			switch (this_leaf.eax.split.level) {
683			case 1:
684				if (this_leaf.eax.split.type == CTYPE_DATA)
685					new_l1d = this_leaf.size/1024;
686				else if (this_leaf.eax.split.type == CTYPE_INST)
687					new_l1i = this_leaf.size/1024;
688				break;
689			case 2:
690				new_l2 = this_leaf.size/1024;
691				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
692				index_msb = get_count_order(num_threads_sharing);
693				l2_id = c->apicid & ~((1 << index_msb) - 1);
694				break;
695			case 3:
696				new_l3 = this_leaf.size/1024;
697				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
698				index_msb = get_count_order(num_threads_sharing);
699				l3_id = c->apicid & ~((1 << index_msb) - 1);
700				break;
701			default:
702				break;
 
 
703			}
704		}
705	}
706	/*
707	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
708	 * trace cache
709	 */
710	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
711		/* supports eax=2  call */
712		int j, n;
713		unsigned int regs[4];
714		unsigned char *dp = (unsigned char *)regs;
715		int only_trace = 0;
716
717		if (num_cache_leaves != 0 && c->x86 == 15)
718			only_trace = 1;
719
720		/* Number of times to iterate */
721		n = cpuid_eax(2) & 0xFF;
722
723		for (i = 0 ; i < n ; i++) {
724			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
725
726			/* If bit 31 is set, this is an unknown format */
727			for (j = 0 ; j < 3 ; j++)
728				if (regs[j] & (1 << 31))
729					regs[j] = 0;
730
731			/* Byte 0 is level count, not a descriptor */
732			for (j = 1 ; j < 16 ; j++) {
733				unsigned char des = dp[j];
734				unsigned char k = 0;
735
736				/* look up this descriptor in the table */
737				while (cache_table[k].descriptor != 0) {
738					if (cache_table[k].descriptor == des) {
739						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
740							break;
741						switch (cache_table[k].cache_type) {
742						case LVL_1_INST:
743							l1i += cache_table[k].size;
744							break;
745						case LVL_1_DATA:
746							l1d += cache_table[k].size;
747							break;
748						case LVL_2:
749							l2 += cache_table[k].size;
750							break;
751						case LVL_3:
752							l3 += cache_table[k].size;
753							break;
754						case LVL_TRACE:
755							trace += cache_table[k].size;
756							break;
757						}
758
759						break;
760					}
761
762					k++;
763				}
764			}
765		}
766	}
767
768	if (new_l1d)
769		l1d = new_l1d;
770
771	if (new_l1i)
772		l1i = new_l1i;
773
774	if (new_l2) {
775		l2 = new_l2;
776#ifdef CONFIG_SMP
777		per_cpu(cpu_llc_id, cpu) = l2_id;
778#endif
779	}
780
781	if (new_l3) {
782		l3 = new_l3;
783#ifdef CONFIG_SMP
784		per_cpu(cpu_llc_id, cpu) = l3_id;
785#endif
786	}
787
788#ifdef CONFIG_SMP
789	/*
790	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
791	 * turns means that the only possibility is SMT (as indicated in
792	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
793	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
794	 * c->phys_proc_id.
795	 */
796	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
797		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
798#endif
799
800	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
801
802	return l2;
803}
804
805static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
806				    struct _cpuid4_info_regs *base)
807{
808	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
809	struct cacheinfo *this_leaf;
810	int i, sibling;
811
812	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
813		unsigned int apicid, nshared, first, last;
814
815		this_leaf = this_cpu_ci->info_list + index;
816		nshared = base->eax.split.num_threads_sharing + 1;
817		apicid = cpu_data(cpu).apicid;
818		first = apicid - (apicid % nshared);
819		last = first + nshared - 1;
820
821		for_each_online_cpu(i) {
822			this_cpu_ci = get_cpu_cacheinfo(i);
823			if (!this_cpu_ci->info_list)
824				continue;
825
826			apicid = cpu_data(i).apicid;
827			if ((apicid < first) || (apicid > last))
828				continue;
 
 
 
 
829
830			this_leaf = this_cpu_ci->info_list + index;
831
832			for_each_online_cpu(sibling) {
833				apicid = cpu_data(sibling).apicid;
834				if ((apicid < first) || (apicid > last))
835					continue;
836				cpumask_set_cpu(sibling,
837						&this_leaf->shared_cpu_map);
838			}
839		}
840	} else if (index == 3) {
841		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
842			this_cpu_ci = get_cpu_cacheinfo(i);
843			if (!this_cpu_ci->info_list)
844				continue;
845			this_leaf = this_cpu_ci->info_list + index;
846			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
847				if (!cpu_online(sibling))
848					continue;
849				cpumask_set_cpu(sibling,
850						&this_leaf->shared_cpu_map);
851			}
852		}
853	} else
854		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
855
856	return 1;
 
 
 
 
 
 
 
 
 
 
 
 
857}
858
859static void __cache_cpumap_setup(unsigned int cpu, int index,
860				 struct _cpuid4_info_regs *base)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
861{
862	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
863	struct cacheinfo *this_leaf, *sibling_leaf;
864	unsigned long num_threads_sharing;
865	int index_msb, i;
866	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
867
868	if (c->x86_vendor == X86_VENDOR_AMD) {
869		if (__cache_amd_cpumap_setup(cpu, index, base))
870			return;
871	}
872
873	this_leaf = this_cpu_ci->info_list + index;
874	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 
 
 
 
875
876	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
877	if (num_threads_sharing == 1)
878		return;
 
 
 
 
 
 
 
 
 
879
880	index_msb = get_count_order(num_threads_sharing);
 
 
 
 
 
881
882	for_each_online_cpu(i)
883		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
884			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
885
886			if (i == cpu || !sib_cpu_ci->info_list)
887				continue;/* skip if itself or no cacheinfo */
888			sibling_leaf = sib_cpu_ci->info_list + index;
889			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
890			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
891		}
892}
893
894static void ci_leaf_init(struct cacheinfo *this_leaf,
895			 struct _cpuid4_info_regs *base)
 
 
 
 
 
 
 
 
 
 
 
 
 
896{
897	this_leaf->level = base->eax.split.level;
898	this_leaf->type = cache_type_map[base->eax.split.type];
899	this_leaf->coherency_line_size =
900				base->ebx.split.coherency_line_size + 1;
901	this_leaf->ways_of_associativity =
902				base->ebx.split.ways_of_associativity + 1;
903	this_leaf->size = base->size;
904	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
905	this_leaf->physical_line_partition =
906				base->ebx.split.physical_line_partition + 1;
907	this_leaf->priv = base->nb;
908}
909
910static int __init_cache_level(unsigned int cpu)
911{
912	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
913
914	if (!num_cache_leaves)
915		return -ENOENT;
916	if (!this_cpu_ci)
917		return -EINVAL;
918	this_cpu_ci->num_levels = 3;
919	this_cpu_ci->num_leaves = num_cache_leaves;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920	return 0;
921}
922
923static int __populate_cache_leaves(unsigned int cpu)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
924{
925	unsigned int idx, ret;
926	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
927	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
928	struct _cpuid4_info_regs id4_regs = {};
929
930	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
931		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
932		if (ret)
933			return ret;
934		ci_leaf_init(this_leaf++, &id4_regs);
935		__cache_cpumap_setup(cpu, idx, &id4_regs);
 
936	}
 
937	return 0;
938}
939
940DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
941DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)