Linux Audio

Check our new training course

Loading...
v3.1
   1/*
   2 *	Routines to indentify caches on Intel CPU.
   3 *
   4 *	Changes:
   5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
   6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
   8 */
   9
  10#include <linux/init.h>
  11#include <linux/slab.h>
  12#include <linux/device.h>
  13#include <linux/compiler.h>
  14#include <linux/cpu.h>
  15#include <linux/sched.h>
 
  16#include <linux/pci.h>
  17
  18#include <asm/processor.h>
  19#include <linux/smp.h>
  20#include <asm/amd_nb.h>
  21#include <asm/smp.h>
  22
  23#define LVL_1_INST	1
  24#define LVL_1_DATA	2
  25#define LVL_2		3
  26#define LVL_3		4
  27#define LVL_TRACE	5
  28
  29struct _cache_table {
  30	unsigned char descriptor;
  31	char cache_type;
  32	short size;
  33};
  34
  35#define MB(x)	((x) * 1024)
  36
  37/* All the cache descriptor types we care about (no TLB or
  38   trace cache entries) */
  39
  40static const struct _cache_table __cpuinitconst cache_table[] =
  41{
  42	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
  43	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
  44	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
  45	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
  46	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
  47	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
  48	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
  49	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
  50	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  51	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  52	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  53	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  54	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
  55	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
  56	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  57	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  58	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
  59	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  60	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  61	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  62	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
  63	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
  64	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
  65	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
  66	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
  67	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
  68	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
  69	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
  70	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
  71	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
  72	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
  73	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
  74	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
  75	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
  76	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
  77	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  78	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  79	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  80	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  81	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
  82	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
  83	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
  84	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
  85	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
  86	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  87	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  88	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  89	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  90	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
  91	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
  92	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
  93	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
  94	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
  95	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
  96	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
  97	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
  98	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
  99	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 100	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 101	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
 102	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 103	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 104	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 105	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
 106	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 107	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
 108	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
 109	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 110	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 111	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
 112	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
 113	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
 114	{ 0x00, 0, 0}
 115};
 116
 117
 118enum _cache_type {
 119	CACHE_TYPE_NULL	= 0,
 120	CACHE_TYPE_DATA = 1,
 121	CACHE_TYPE_INST = 2,
 122	CACHE_TYPE_UNIFIED = 3
 123};
 124
 125union _cpuid4_leaf_eax {
 126	struct {
 127		enum _cache_type	type:5;
 128		unsigned int		level:3;
 129		unsigned int		is_self_initializing:1;
 130		unsigned int		is_fully_associative:1;
 131		unsigned int		reserved:4;
 132		unsigned int		num_threads_sharing:12;
 133		unsigned int		num_cores_on_die:6;
 134	} split;
 135	u32 full;
 136};
 137
 138union _cpuid4_leaf_ebx {
 139	struct {
 140		unsigned int		coherency_line_size:12;
 141		unsigned int		physical_line_partition:10;
 142		unsigned int		ways_of_associativity:10;
 143	} split;
 144	u32 full;
 145};
 146
 147union _cpuid4_leaf_ecx {
 148	struct {
 149		unsigned int		number_of_sets:32;
 150	} split;
 151	u32 full;
 152};
 153
 154struct amd_l3_cache {
 155	struct	 amd_northbridge *nb;
 156	unsigned indices;
 157	u8	 subcaches[4];
 158};
 159
 160struct _cpuid4_info {
 161	union _cpuid4_leaf_eax eax;
 162	union _cpuid4_leaf_ebx ebx;
 163	union _cpuid4_leaf_ecx ecx;
 164	unsigned long size;
 165	struct amd_l3_cache *l3;
 166	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
 167};
 168
 169/* subset of above _cpuid4_info w/o shared_cpu_map */
 170struct _cpuid4_info_regs {
 171	union _cpuid4_leaf_eax eax;
 172	union _cpuid4_leaf_ebx ebx;
 173	union _cpuid4_leaf_ecx ecx;
 
 174	unsigned long size;
 175	struct amd_l3_cache *l3;
 176};
 177
 178unsigned short			num_cache_leaves;
 179
 180/* AMD doesn't have CPUID4. Emulate it here to report the same
 181   information to the user.  This makes some assumptions about the machine:
 182   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 183
 184   In theory the TLBs could be reported as fake type (they are in "dummy").
 185   Maybe later */
 186union l1_cache {
 187	struct {
 188		unsigned line_size:8;
 189		unsigned lines_per_tag:8;
 190		unsigned assoc:8;
 191		unsigned size_in_kb:8;
 192	};
 193	unsigned val;
 194};
 195
 196union l2_cache {
 197	struct {
 198		unsigned line_size:8;
 199		unsigned lines_per_tag:4;
 200		unsigned assoc:4;
 201		unsigned size_in_kb:16;
 202	};
 203	unsigned val;
 204};
 205
 206union l3_cache {
 207	struct {
 208		unsigned line_size:8;
 209		unsigned lines_per_tag:4;
 210		unsigned assoc:4;
 211		unsigned res:2;
 212		unsigned size_encoded:14;
 213	};
 214	unsigned val;
 215};
 216
 217static const unsigned short __cpuinitconst assocs[] = {
 218	[1] = 1,
 219	[2] = 2,
 220	[4] = 4,
 221	[6] = 8,
 222	[8] = 16,
 223	[0xa] = 32,
 224	[0xb] = 48,
 225	[0xc] = 64,
 226	[0xd] = 96,
 227	[0xe] = 128,
 228	[0xf] = 0xffff /* fully associative - no way to show this currently */
 229};
 230
 231static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
 232static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
 233
 234static void __cpuinit
 
 
 
 
 
 
 
 235amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 236		     union _cpuid4_leaf_ebx *ebx,
 237		     union _cpuid4_leaf_ecx *ecx)
 238{
 239	unsigned dummy;
 240	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 241	union l1_cache l1i, l1d;
 242	union l2_cache l2;
 243	union l3_cache l3;
 244	union l1_cache *l1 = &l1d;
 245
 246	eax->full = 0;
 247	ebx->full = 0;
 248	ecx->full = 0;
 249
 250	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 251	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 252
 253	switch (leaf) {
 254	case 1:
 255		l1 = &l1i;
 256	case 0:
 257		if (!l1->val)
 258			return;
 259		assoc = assocs[l1->assoc];
 260		line_size = l1->line_size;
 261		lines_per_tag = l1->lines_per_tag;
 262		size_in_kb = l1->size_in_kb;
 263		break;
 264	case 2:
 265		if (!l2.val)
 266			return;
 267		assoc = assocs[l2.assoc];
 268		line_size = l2.line_size;
 269		lines_per_tag = l2.lines_per_tag;
 270		/* cpu_data has errata corrections for K7 applied */
 271		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 272		break;
 273	case 3:
 274		if (!l3.val)
 275			return;
 276		assoc = assocs[l3.assoc];
 277		line_size = l3.line_size;
 278		lines_per_tag = l3.lines_per_tag;
 279		size_in_kb = l3.size_encoded * 512;
 280		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 281			size_in_kb = size_in_kb >> 1;
 282			assoc = assoc >> 1;
 283		}
 284		break;
 285	default:
 286		return;
 287	}
 288
 289	eax->split.is_self_initializing = 1;
 290	eax->split.type = types[leaf];
 291	eax->split.level = levels[leaf];
 292	eax->split.num_threads_sharing = 0;
 293	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 294
 295
 296	if (assoc == 0xffff)
 297		eax->split.is_fully_associative = 1;
 298	ebx->split.coherency_line_size = line_size - 1;
 299	ebx->split.ways_of_associativity = assoc - 1;
 300	ebx->split.physical_line_partition = lines_per_tag - 1;
 301	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 302		(ebx->split.ways_of_associativity + 1) - 1;
 303}
 304
 305struct _cache_attr {
 306	struct attribute attr;
 307	ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
 308	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
 309			 unsigned int);
 310};
 311
 312#ifdef CONFIG_AMD_NB
 313
 314/*
 315 * L3 cache descriptors
 316 */
 317static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 318{
 
 319	unsigned int sc0, sc1, sc2, sc3;
 320	u32 val = 0;
 321
 322	pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
 323
 324	/* calculate subcache sizes */
 325	l3->subcaches[0] = sc0 = !(val & BIT(0));
 326	l3->subcaches[1] = sc1 = !(val & BIT(4));
 327	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 328	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 329
 330	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 331}
 332
 333static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
 334					int index)
 335{
 336	static struct amd_l3_cache *__cpuinitdata l3_caches;
 337	int node;
 338
 339	/* only for L3, and not in virtualized environments */
 340	if (index < 3 || amd_nb_num() == 0)
 341		return;
 342
 343	/*
 344	 * Strictly speaking, the amount in @size below is leaked since it is
 345	 * never freed but this is done only on shutdown so it doesn't matter.
 346	 */
 347	if (!l3_caches) {
 348		int size = amd_nb_num() * sizeof(struct amd_l3_cache);
 349
 350		l3_caches = kzalloc(size, GFP_ATOMIC);
 351		if (!l3_caches)
 352			return;
 353	}
 354
 355	node = amd_get_nb_id(smp_processor_id());
 356
 357	if (!l3_caches[node].nb) {
 358		l3_caches[node].nb = node_to_amd_nb(node);
 359		amd_calc_l3_indices(&l3_caches[node]);
 360	}
 361
 362	this_leaf->l3 = &l3_caches[node];
 363}
 364
 365/*
 366 * check whether a slot used for disabling an L3 index is occupied.
 367 * @l3: L3 cache descriptor
 368 * @slot: slot number (0..1)
 369 *
 370 * @returns: the disabled index if used or negative value if slot free.
 371 */
 372int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
 373{
 374	unsigned int reg = 0;
 375
 376	pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
 377
 378	/* check whether this slot is activated already */
 379	if (reg & (3UL << 30))
 380		return reg & 0xfff;
 381
 382	return -1;
 383}
 384
 385static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 386				  unsigned int slot)
 387{
 388	int index;
 
 389
 390	if (!this_leaf->l3 ||
 391	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 392		return -EINVAL;
 393
 394	index = amd_get_l3_disable_slot(this_leaf->l3, slot);
 395	if (index >= 0)
 396		return sprintf(buf, "%d\n", index);
 397
 398	return sprintf(buf, "FREE\n");
 399}
 400
 401#define SHOW_CACHE_DISABLE(slot)					\
 402static ssize_t								\
 403show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf,	\
 404			  unsigned int cpu)				\
 405{									\
 
 406	return show_cache_disable(this_leaf, buf, slot);		\
 407}
 408SHOW_CACHE_DISABLE(0)
 409SHOW_CACHE_DISABLE(1)
 410
 411static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
 412				 unsigned slot, unsigned long idx)
 413{
 414	int i;
 415
 416	idx |= BIT(30);
 417
 418	/*
 419	 *  disable index in all 4 subcaches
 420	 */
 421	for (i = 0; i < 4; i++) {
 422		u32 reg = idx | (i << 20);
 423
 424		if (!l3->subcaches[i])
 425			continue;
 426
 427		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 428
 429		/*
 430		 * We need to WBINVD on a core on the node containing the L3
 431		 * cache which indices we disable therefore a simple wbinvd()
 432		 * is not sufficient.
 433		 */
 434		wbinvd_on_cpu(cpu);
 435
 436		reg |= BIT(31);
 437		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 438	}
 439}
 440
 441/*
 442 * disable a L3 cache index by using a disable-slot
 443 *
 444 * @l3:    L3 cache descriptor
 445 * @cpu:   A CPU on the node containing the L3 cache
 446 * @slot:  slot number (0..1)
 447 * @index: index to disable
 448 *
 449 * @return: 0 on success, error status on failure
 450 */
 451int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
 452			    unsigned long index)
 453{
 454	int ret = 0;
 455
 456	/*  check if @slot is already used or the index is already disabled */
 457	ret = amd_get_l3_disable_slot(l3, slot);
 458	if (ret >= 0)
 459		return -EINVAL;
 460
 461	if (index > l3->indices)
 462		return -EINVAL;
 463
 464	/* check whether the other slot has disabled the same index already */
 465	if (index == amd_get_l3_disable_slot(l3, !slot))
 466		return -EINVAL;
 467
 468	amd_l3_disable_index(l3, cpu, slot, index);
 469
 470	return 0;
 471}
 472
 473static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 474				  const char *buf, size_t count,
 475				  unsigned int slot)
 476{
 477	unsigned long val = 0;
 478	int cpu, err = 0;
 
 479
 480	if (!capable(CAP_SYS_ADMIN))
 481		return -EPERM;
 482
 483	if (!this_leaf->l3 ||
 484	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 485		return -EINVAL;
 486
 487	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
 488
 489	if (strict_strtoul(buf, 10, &val) < 0)
 490		return -EINVAL;
 491
 492	err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
 493	if (err) {
 494		if (err == -EEXIST)
 495			printk(KERN_WARNING "L3 disable slot %d in use!\n",
 496					    slot);
 497		return err;
 498	}
 499	return count;
 500}
 501
 502#define STORE_CACHE_DISABLE(slot)					\
 503static ssize_t								\
 504store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
 505			   const char *buf, size_t count,		\
 506			   unsigned int cpu)				\
 507{									\
 
 508	return store_cache_disable(this_leaf, buf, count, slot);	\
 509}
 510STORE_CACHE_DISABLE(0)
 511STORE_CACHE_DISABLE(1)
 512
 513static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
 514		show_cache_disable_0, store_cache_disable_0);
 515static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 516		show_cache_disable_1, store_cache_disable_1);
 517
 518static ssize_t
 519show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
 520{
 521	if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 522		return -EINVAL;
 523
 524	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 525}
 526
 527static ssize_t
 528store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
 529		unsigned int cpu)
 530{
 
 
 531	unsigned long val;
 532
 533	if (!capable(CAP_SYS_ADMIN))
 534		return -EPERM;
 535
 536	if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 537		return -EINVAL;
 538
 539	if (strict_strtoul(buf, 16, &val) < 0)
 540		return -EINVAL;
 541
 542	if (amd_set_subcaches(cpu, val))
 543		return -EINVAL;
 544
 545	return count;
 546}
 547
 548static struct _cache_attr subcaches =
 549	__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 550
 551#else	/* CONFIG_AMD_NB */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 552#define amd_init_l3_cache(x, y)
 553#endif /* CONFIG_AMD_NB */
 554
 555static int
 556__cpuinit cpuid4_cache_lookup_regs(int index,
 557				   struct _cpuid4_info_regs *this_leaf)
 558{
 559	union _cpuid4_leaf_eax	eax;
 560	union _cpuid4_leaf_ebx	ebx;
 561	union _cpuid4_leaf_ecx	ecx;
 562	unsigned		edx;
 563
 564	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 565		amd_cpuid4(index, &eax, &ebx, &ecx);
 
 
 
 
 566		amd_init_l3_cache(this_leaf, index);
 567	} else {
 568		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 569	}
 570
 571	if (eax.split.type == CACHE_TYPE_NULL)
 572		return -EIO; /* better error ? */
 573
 574	this_leaf->eax = eax;
 575	this_leaf->ebx = ebx;
 576	this_leaf->ecx = ecx;
 577	this_leaf->size = (ecx.split.number_of_sets          + 1) *
 578			  (ebx.split.coherency_line_size     + 1) *
 579			  (ebx.split.physical_line_partition + 1) *
 580			  (ebx.split.ways_of_associativity   + 1);
 581	return 0;
 582}
 583
 584static int __cpuinit find_num_cache_leaves(void)
 585{
 586	unsigned int		eax, ebx, ecx, edx;
 587	union _cpuid4_leaf_eax	cache_eax;
 588	int 			i = -1;
 589
 
 
 
 
 
 590	do {
 591		++i;
 592		/* Do cpuid(4) loop to find out num_cache_leaves */
 593		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
 594		cache_eax.full = eax;
 595	} while (cache_eax.split.type != CACHE_TYPE_NULL);
 596	return i;
 597}
 598
 599unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 
 
 
 
 
 
 
 
 
 
 
 
 
 600{
 601	/* Cache sizes */
 602	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 603	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 604	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 605	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 606#ifdef CONFIG_X86_HT
 607	unsigned int cpu = c->cpu_index;
 608#endif
 609
 610	if (c->cpuid_level > 3) {
 611		static int is_initialized;
 612
 613		if (is_initialized == 0) {
 614			/* Init num_cache_leaves from boot CPU */
 615			num_cache_leaves = find_num_cache_leaves();
 616			is_initialized++;
 617		}
 618
 619		/*
 620		 * Whenever possible use cpuid(4), deterministic cache
 621		 * parameters cpuid leaf to find the cache details
 622		 */
 623		for (i = 0; i < num_cache_leaves; i++) {
 624			struct _cpuid4_info_regs this_leaf;
 625			int retval;
 626
 627			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 628			if (retval >= 0) {
 629				switch (this_leaf.eax.split.level) {
 630				case 1:
 631					if (this_leaf.eax.split.type ==
 632							CACHE_TYPE_DATA)
 633						new_l1d = this_leaf.size/1024;
 634					else if (this_leaf.eax.split.type ==
 635							CACHE_TYPE_INST)
 636						new_l1i = this_leaf.size/1024;
 637					break;
 638				case 2:
 639					new_l2 = this_leaf.size/1024;
 640					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 641					index_msb = get_count_order(num_threads_sharing);
 642					l2_id = c->apicid >> index_msb;
 643					break;
 644				case 3:
 645					new_l3 = this_leaf.size/1024;
 646					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 647					index_msb = get_count_order(
 648							num_threads_sharing);
 649					l3_id = c->apicid >> index_msb;
 650					break;
 651				default:
 652					break;
 653				}
 654			}
 655		}
 656	}
 657	/*
 658	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 659	 * trace cache
 660	 */
 661	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 662		/* supports eax=2  call */
 663		int j, n;
 664		unsigned int regs[4];
 665		unsigned char *dp = (unsigned char *)regs;
 666		int only_trace = 0;
 667
 668		if (num_cache_leaves != 0 && c->x86 == 15)
 669			only_trace = 1;
 670
 671		/* Number of times to iterate */
 672		n = cpuid_eax(2) & 0xFF;
 673
 674		for (i = 0 ; i < n ; i++) {
 675			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 676
 677			/* If bit 31 is set, this is an unknown format */
 678			for (j = 0 ; j < 3 ; j++)
 679				if (regs[j] & (1 << 31))
 680					regs[j] = 0;
 681
 682			/* Byte 0 is level count, not a descriptor */
 683			for (j = 1 ; j < 16 ; j++) {
 684				unsigned char des = dp[j];
 685				unsigned char k = 0;
 686
 687				/* look up this descriptor in the table */
 688				while (cache_table[k].descriptor != 0) {
 689					if (cache_table[k].descriptor == des) {
 690						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 691							break;
 692						switch (cache_table[k].cache_type) {
 693						case LVL_1_INST:
 694							l1i += cache_table[k].size;
 695							break;
 696						case LVL_1_DATA:
 697							l1d += cache_table[k].size;
 698							break;
 699						case LVL_2:
 700							l2 += cache_table[k].size;
 701							break;
 702						case LVL_3:
 703							l3 += cache_table[k].size;
 704							break;
 705						case LVL_TRACE:
 706							trace += cache_table[k].size;
 707							break;
 708						}
 709
 710						break;
 711					}
 712
 713					k++;
 714				}
 715			}
 716		}
 717	}
 718
 719	if (new_l1d)
 720		l1d = new_l1d;
 721
 722	if (new_l1i)
 723		l1i = new_l1i;
 724
 725	if (new_l2) {
 726		l2 = new_l2;
 727#ifdef CONFIG_X86_HT
 728		per_cpu(cpu_llc_id, cpu) = l2_id;
 729#endif
 730	}
 731
 732	if (new_l3) {
 733		l3 = new_l3;
 734#ifdef CONFIG_X86_HT
 735		per_cpu(cpu_llc_id, cpu) = l3_id;
 736#endif
 737	}
 738
 
 
 
 
 
 
 
 
 
 
 
 
 739	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 740
 741	return l2;
 742}
 743
 744#ifdef CONFIG_SYSFS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 745
 746/* pointer to _cpuid4_info array (for each cache leaf) */
 747static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
 748#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
 749
 750#ifdef CONFIG_SMP
 751static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 752{
 753	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 754	unsigned long num_threads_sharing;
 755	int index_msb, i, sibling;
 756	struct cpuinfo_x86 *c = &cpu_data(cpu);
 757
 758	if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
 
 
 
 
 
 
 
 
 759		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 760			if (!per_cpu(ici_cpuid4_info, i))
 
 761				continue;
 762			this_leaf = CPUID4_INFO_IDX(i, index);
 763			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 764				if (!cpu_online(sibling))
 765					continue;
 766				set_bit(sibling, this_leaf->shared_cpu_map);
 767			}
 768		}
 769		return;
 770	}
 771	this_leaf = CPUID4_INFO_IDX(cpu, index);
 772	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
 773
 774	if (num_threads_sharing == 1)
 775		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
 776	else {
 777		index_msb = get_count_order(num_threads_sharing);
 778
 779		for_each_online_cpu(i) {
 780			if (cpu_data(i).apicid >> index_msb ==
 781			    c->apicid >> index_msb) {
 782				cpumask_set_cpu(i,
 783					to_cpumask(this_leaf->shared_cpu_map));
 784				if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
 785					sibling_leaf =
 786						CPUID4_INFO_IDX(i, index);
 787					cpumask_set_cpu(cpu, to_cpumask(
 788						sibling_leaf->shared_cpu_map));
 789				}
 790			}
 791		}
 792	}
 793}
 794static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 795{
 796	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 797	int sibling;
 798
 799	this_leaf = CPUID4_INFO_IDX(cpu, index);
 800	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
 801		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
 802		cpumask_clear_cpu(cpu,
 803				  to_cpumask(sibling_leaf->shared_cpu_map));
 804	}
 805}
 806#else
 807static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 808{
 809}
 810
 811static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 812{
 813}
 814#endif
 815
 816static void __cpuinit free_cache_attributes(unsigned int cpu)
 817{
 818	int i;
 819
 820	for (i = 0; i < num_cache_leaves; i++)
 821		cache_remove_shared_cpu_map(cpu, i);
 822
 823	kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
 824	kfree(per_cpu(ici_cpuid4_info, cpu));
 825	per_cpu(ici_cpuid4_info, cpu) = NULL;
 826}
 827
 828static int
 829__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 830{
 831	struct _cpuid4_info_regs *leaf_regs =
 832		(struct _cpuid4_info_regs *)this_leaf;
 833
 834	return cpuid4_cache_lookup_regs(index, leaf_regs);
 835}
 836
 837static void __cpuinit get_cpu_leaves(void *_retval)
 838{
 839	int j, *retval = _retval, cpu = smp_processor_id();
 840
 841	/* Do cpuid and store the results */
 842	for (j = 0; j < num_cache_leaves; j++) {
 843		struct _cpuid4_info *this_leaf;
 844		this_leaf = CPUID4_INFO_IDX(cpu, j);
 845		*retval = cpuid4_cache_lookup(j, this_leaf);
 846		if (unlikely(*retval < 0)) {
 847			int i;
 848
 849			for (i = 0; i < j; i++)
 850				cache_remove_shared_cpu_map(cpu, i);
 851			break;
 852		}
 853		cache_shared_cpu_map_setup(cpu, j);
 854	}
 855}
 856
 857static int __cpuinit detect_cache_attributes(unsigned int cpu)
 858{
 859	int			retval;
 860
 861	if (num_cache_leaves == 0)
 862		return -ENOENT;
 863
 864	per_cpu(ici_cpuid4_info, cpu) = kzalloc(
 865	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
 866	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
 867		return -ENOMEM;
 868
 869	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
 870	if (retval) {
 871		kfree(per_cpu(ici_cpuid4_info, cpu));
 872		per_cpu(ici_cpuid4_info, cpu) = NULL;
 873	}
 874
 875	return retval;
 876}
 877
 878#include <linux/kobject.h>
 879#include <linux/sysfs.h>
 880
 881extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
 882
 883/* pointer to kobject for cpuX/cache */
 884static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
 885
 886struct _index_kobject {
 887	struct kobject kobj;
 888	unsigned int cpu;
 889	unsigned short index;
 890};
 891
 892/* pointer to array of kobjects for cpuX/cache/indexY */
 893static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
 894#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(ici_index_kobject, x))[y]))
 895
 896#define show_one_plus(file_name, object, val)				\
 897static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
 898				unsigned int cpu)			\
 899{									\
 900	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
 901}
 902
 903show_one_plus(level, eax.split.level, 0);
 904show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
 905show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
 906show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
 907show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
 908
 909static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
 910			 unsigned int cpu)
 911{
 912	return sprintf(buf, "%luK\n", this_leaf->size / 1024);
 913}
 914
 915static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 916					int type, char *buf)
 917{
 918	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
 919	int n = 0;
 920
 921	if (len > 1) {
 922		const struct cpumask *mask;
 923
 924		mask = to_cpumask(this_leaf->shared_cpu_map);
 925		n = type ?
 926			cpulist_scnprintf(buf, len-2, mask) :
 927			cpumask_scnprintf(buf, len-2, mask);
 928		buf[n++] = '\n';
 929		buf[n] = '\0';
 930	}
 931	return n;
 932}
 933
 934static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
 935					  unsigned int cpu)
 936{
 937	return show_shared_cpu_map_func(leaf, 0, buf);
 938}
 939
 940static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
 941					   unsigned int cpu)
 942{
 943	return show_shared_cpu_map_func(leaf, 1, buf);
 944}
 945
 946static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
 947			 unsigned int cpu)
 948{
 949	switch (this_leaf->eax.split.type) {
 950	case CACHE_TYPE_DATA:
 951		return sprintf(buf, "Data\n");
 952	case CACHE_TYPE_INST:
 953		return sprintf(buf, "Instruction\n");
 954	case CACHE_TYPE_UNIFIED:
 955		return sprintf(buf, "Unified\n");
 956	default:
 957		return sprintf(buf, "Unknown\n");
 958	}
 959}
 960
 961#define to_object(k)	container_of(k, struct _index_kobject, kobj)
 962#define to_attr(a)	container_of(a, struct _cache_attr, attr)
 963
 964#define define_one_ro(_name) \
 965static struct _cache_attr _name = \
 966	__ATTR(_name, 0444, show_##_name, NULL)
 967
 968define_one_ro(level);
 969define_one_ro(type);
 970define_one_ro(coherency_line_size);
 971define_one_ro(physical_line_partition);
 972define_one_ro(ways_of_associativity);
 973define_one_ro(number_of_sets);
 974define_one_ro(size);
 975define_one_ro(shared_cpu_map);
 976define_one_ro(shared_cpu_list);
 977
 978static struct attribute *default_attrs[] = {
 979	&type.attr,
 980	&level.attr,
 981	&coherency_line_size.attr,
 982	&physical_line_partition.attr,
 983	&ways_of_associativity.attr,
 984	&number_of_sets.attr,
 985	&size.attr,
 986	&shared_cpu_map.attr,
 987	&shared_cpu_list.attr,
 988	NULL
 989};
 990
 991#ifdef CONFIG_AMD_NB
 992static struct attribute ** __cpuinit amd_l3_attrs(void)
 993{
 994	static struct attribute **attrs;
 995	int n;
 996
 997	if (attrs)
 998		return attrs;
 999
1000	n = sizeof (default_attrs) / sizeof (struct attribute *);
1001
1002	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
1003		n += 2;
1004
1005	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1006		n += 1;
1007
1008	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
1009	if (attrs == NULL)
1010		return attrs = default_attrs;
1011
1012	for (n = 0; default_attrs[n]; n++)
1013		attrs[n] = default_attrs[n];
1014
1015	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
1016		attrs[n++] = &cache_disable_0.attr;
1017		attrs[n++] = &cache_disable_1.attr;
1018	}
1019
1020	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1021		attrs[n++] = &subcaches.attr;
1022
1023	return attrs;
1024}
1025#endif
1026
1027static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1028{
1029	struct _cache_attr *fattr = to_attr(attr);
1030	struct _index_kobject *this_leaf = to_object(kobj);
1031	ssize_t ret;
1032
1033	ret = fattr->show ?
1034		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1035			buf, this_leaf->cpu) :
1036		0;
1037	return ret;
1038}
1039
1040static ssize_t store(struct kobject *kobj, struct attribute *attr,
1041		     const char *buf, size_t count)
1042{
1043	struct _cache_attr *fattr = to_attr(attr);
1044	struct _index_kobject *this_leaf = to_object(kobj);
1045	ssize_t ret;
1046
1047	ret = fattr->store ?
1048		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1049			buf, count, this_leaf->cpu) :
1050		0;
1051	return ret;
 
 
 
 
 
1052}
1053
1054static const struct sysfs_ops sysfs_ops = {
1055	.show   = show,
1056	.store  = store,
1057};
1058
1059static struct kobj_type ktype_cache = {
1060	.sysfs_ops	= &sysfs_ops,
1061	.default_attrs	= default_attrs,
1062};
1063
1064static struct kobj_type ktype_percpu_entry = {
1065	.sysfs_ops	= &sysfs_ops,
1066};
1067
1068static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1069{
1070	kfree(per_cpu(ici_cache_kobject, cpu));
1071	kfree(per_cpu(ici_index_kobject, cpu));
1072	per_cpu(ici_cache_kobject, cpu) = NULL;
1073	per_cpu(ici_index_kobject, cpu) = NULL;
1074	free_cache_attributes(cpu);
 
 
 
 
 
 
 
 
1075}
1076
1077static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1078{
1079	int err;
1080
1081	if (num_cache_leaves == 0)
1082		return -ENOENT;
1083
1084	err = detect_cache_attributes(cpu);
1085	if (err)
1086		return err;
1087
1088	/* Allocate all required memory */
1089	per_cpu(ici_cache_kobject, cpu) =
1090		kzalloc(sizeof(struct kobject), GFP_KERNEL);
1091	if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1092		goto err_out;
1093
1094	per_cpu(ici_index_kobject, cpu) = kzalloc(
1095	    sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1096	if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1097		goto err_out;
1098
1099	return 0;
1100
1101err_out:
1102	cpuid4_cache_sysfs_exit(cpu);
1103	return -ENOMEM;
1104}
1105
1106static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1107
1108/* Add/Remove cache interface for CPU device */
1109static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1110{
1111	unsigned int cpu = sys_dev->id;
1112	unsigned long i, j;
1113	struct _index_kobject *this_object;
1114	struct _cpuid4_info   *this_leaf;
1115	int retval;
1116
1117	retval = cpuid4_cache_sysfs_init(cpu);
1118	if (unlikely(retval < 0))
1119		return retval;
1120
1121	retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1122				      &ktype_percpu_entry,
1123				      &sys_dev->kobj, "%s", "cache");
1124	if (retval < 0) {
1125		cpuid4_cache_sysfs_exit(cpu);
1126		return retval;
1127	}
1128
1129	for (i = 0; i < num_cache_leaves; i++) {
1130		this_object = INDEX_KOBJECT_PTR(cpu, i);
1131		this_object->cpu = cpu;
1132		this_object->index = i;
1133
1134		this_leaf = CPUID4_INFO_IDX(cpu, i);
1135
1136		ktype_cache.default_attrs = default_attrs;
1137#ifdef CONFIG_AMD_NB
1138		if (this_leaf->l3)
1139			ktype_cache.default_attrs = amd_l3_attrs();
1140#endif
1141		retval = kobject_init_and_add(&(this_object->kobj),
1142					      &ktype_cache,
1143					      per_cpu(ici_cache_kobject, cpu),
1144					      "index%1lu", i);
1145		if (unlikely(retval)) {
1146			for (j = 0; j < i; j++)
1147				kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1148			kobject_put(per_cpu(ici_cache_kobject, cpu));
1149			cpuid4_cache_sysfs_exit(cpu);
1150			return retval;
1151		}
1152		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1153	}
1154	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1155
1156	kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1157	return 0;
1158}
1159
1160static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
 
 
 
 
 
1161{
1162	unsigned int cpu = sys_dev->id;
1163	unsigned long i;
1164
1165	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1166		return;
1167	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1168		return;
1169	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1170
1171	for (i = 0; i < num_cache_leaves; i++)
1172		kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1173	kobject_put(per_cpu(ici_cache_kobject, cpu));
1174	cpuid4_cache_sysfs_exit(cpu);
1175}
1176
1177static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1178					unsigned long action, void *hcpu)
1179{
1180	unsigned int cpu = (unsigned long)hcpu;
1181	struct sys_device *sys_dev;
 
 
1182
1183	sys_dev = get_cpu_sysdev(cpu);
1184	switch (action) {
1185	case CPU_ONLINE:
1186	case CPU_ONLINE_FROZEN:
1187		cache_add_dev(sys_dev);
1188		break;
1189	case CPU_DEAD:
1190	case CPU_DEAD_FROZEN:
1191		cache_remove_dev(sys_dev);
1192		break;
1193	}
1194	return NOTIFY_OK;
1195}
1196
1197static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1198	.notifier_call = cacheinfo_cpu_callback,
1199};
1200
1201static int __cpuinit cache_sysfs_init(void)
1202{
1203	int i;
1204
1205	if (num_cache_leaves == 0)
1206		return 0;
1207
1208	for_each_online_cpu(i) {
1209		int err;
1210		struct sys_device *sys_dev = get_cpu_sysdev(i);
1211
1212		err = cache_add_dev(sys_dev);
1213		if (err)
1214			return err;
1215	}
1216	register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1217	return 0;
1218}
1219
1220device_initcall(cache_sysfs_init);
1221
1222#endif
v4.10.11
  1/*
  2 *	Routines to identify caches on Intel CPU.
  3 *
  4 *	Changes:
  5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  8 */
  9
 
 10#include <linux/slab.h>
 11#include <linux/cacheinfo.h>
 
 12#include <linux/cpu.h>
 13#include <linux/sched.h>
 14#include <linux/sysfs.h>
 15#include <linux/pci.h>
 16
 17#include <asm/cpufeature.h>
 
 18#include <asm/amd_nb.h>
 19#include <asm/smp.h>
 20
 21#define LVL_1_INST	1
 22#define LVL_1_DATA	2
 23#define LVL_2		3
 24#define LVL_3		4
 25#define LVL_TRACE	5
 26
 27struct _cache_table {
 28	unsigned char descriptor;
 29	char cache_type;
 30	short size;
 31};
 32
 33#define MB(x)	((x) * 1024)
 34
 35/* All the cache descriptor types we care about (no TLB or
 36   trace cache entries) */
 37
 38static const struct _cache_table cache_table[] =
 39{
 40	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
 41	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
 42	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
 43	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
 44	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
 45	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
 46	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
 47	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
 48	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 49	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 50	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 51	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 52	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
 53	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
 54	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 55	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 56	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
 57	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 58	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 59	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 60	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
 61	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
 62	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
 63	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
 64	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
 65	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
 66	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
 67	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
 68	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
 69	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 70	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
 71	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 72	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
 73	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
 74	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
 75	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 76	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 77	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 78	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 79	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
 80	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
 81	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 82	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 83	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 84	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 85	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 86	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 87	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 88	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 89	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 90	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 91	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 92	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 93	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 94	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 95	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 96	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 97	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 98	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 99	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
100	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
101	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
102	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
103	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
104	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
105	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
106	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
107	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
108	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
109	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
110	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
111	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
112	{ 0x00, 0, 0}
113};
114
115
116enum _cache_type {
117	CTYPE_NULL = 0,
118	CTYPE_DATA = 1,
119	CTYPE_INST = 2,
120	CTYPE_UNIFIED = 3
121};
122
123union _cpuid4_leaf_eax {
124	struct {
125		enum _cache_type	type:5;
126		unsigned int		level:3;
127		unsigned int		is_self_initializing:1;
128		unsigned int		is_fully_associative:1;
129		unsigned int		reserved:4;
130		unsigned int		num_threads_sharing:12;
131		unsigned int		num_cores_on_die:6;
132	} split;
133	u32 full;
134};
135
136union _cpuid4_leaf_ebx {
137	struct {
138		unsigned int		coherency_line_size:12;
139		unsigned int		physical_line_partition:10;
140		unsigned int		ways_of_associativity:10;
141	} split;
142	u32 full;
143};
144
145union _cpuid4_leaf_ecx {
146	struct {
147		unsigned int		number_of_sets:32;
148	} split;
149	u32 full;
150};
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152struct _cpuid4_info_regs {
153	union _cpuid4_leaf_eax eax;
154	union _cpuid4_leaf_ebx ebx;
155	union _cpuid4_leaf_ecx ecx;
156	unsigned int id;
157	unsigned long size;
158	struct amd_northbridge *nb;
159};
160
161static unsigned short num_cache_leaves;
162
163/* AMD doesn't have CPUID4. Emulate it here to report the same
164   information to the user.  This makes some assumptions about the machine:
165   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
166
167   In theory the TLBs could be reported as fake type (they are in "dummy").
168   Maybe later */
169union l1_cache {
170	struct {
171		unsigned line_size:8;
172		unsigned lines_per_tag:8;
173		unsigned assoc:8;
174		unsigned size_in_kb:8;
175	};
176	unsigned val;
177};
178
179union l2_cache {
180	struct {
181		unsigned line_size:8;
182		unsigned lines_per_tag:4;
183		unsigned assoc:4;
184		unsigned size_in_kb:16;
185	};
186	unsigned val;
187};
188
189union l3_cache {
190	struct {
191		unsigned line_size:8;
192		unsigned lines_per_tag:4;
193		unsigned assoc:4;
194		unsigned res:2;
195		unsigned size_encoded:14;
196	};
197	unsigned val;
198};
199
200static const unsigned short assocs[] = {
201	[1] = 1,
202	[2] = 2,
203	[4] = 4,
204	[6] = 8,
205	[8] = 16,
206	[0xa] = 32,
207	[0xb] = 48,
208	[0xc] = 64,
209	[0xd] = 96,
210	[0xe] = 128,
211	[0xf] = 0xffff /* fully associative - no way to show this currently */
212};
213
214static const unsigned char levels[] = { 1, 1, 2, 3 };
215static const unsigned char types[] = { 1, 2, 3, 3 };
216
217static const enum cache_type cache_type_map[] = {
218	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
219	[CTYPE_DATA] = CACHE_TYPE_DATA,
220	[CTYPE_INST] = CACHE_TYPE_INST,
221	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
222};
223
224static void
225amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
226		     union _cpuid4_leaf_ebx *ebx,
227		     union _cpuid4_leaf_ecx *ecx)
228{
229	unsigned dummy;
230	unsigned line_size, lines_per_tag, assoc, size_in_kb;
231	union l1_cache l1i, l1d;
232	union l2_cache l2;
233	union l3_cache l3;
234	union l1_cache *l1 = &l1d;
235
236	eax->full = 0;
237	ebx->full = 0;
238	ecx->full = 0;
239
240	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
241	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
242
243	switch (leaf) {
244	case 1:
245		l1 = &l1i;
246	case 0:
247		if (!l1->val)
248			return;
249		assoc = assocs[l1->assoc];
250		line_size = l1->line_size;
251		lines_per_tag = l1->lines_per_tag;
252		size_in_kb = l1->size_in_kb;
253		break;
254	case 2:
255		if (!l2.val)
256			return;
257		assoc = assocs[l2.assoc];
258		line_size = l2.line_size;
259		lines_per_tag = l2.lines_per_tag;
260		/* cpu_data has errata corrections for K7 applied */
261		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
262		break;
263	case 3:
264		if (!l3.val)
265			return;
266		assoc = assocs[l3.assoc];
267		line_size = l3.line_size;
268		lines_per_tag = l3.lines_per_tag;
269		size_in_kb = l3.size_encoded * 512;
270		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
271			size_in_kb = size_in_kb >> 1;
272			assoc = assoc >> 1;
273		}
274		break;
275	default:
276		return;
277	}
278
279	eax->split.is_self_initializing = 1;
280	eax->split.type = types[leaf];
281	eax->split.level = levels[leaf];
282	eax->split.num_threads_sharing = 0;
283	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
284
285
286	if (assoc == 0xffff)
287		eax->split.is_fully_associative = 1;
288	ebx->split.coherency_line_size = line_size - 1;
289	ebx->split.ways_of_associativity = assoc - 1;
290	ebx->split.physical_line_partition = lines_per_tag - 1;
291	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
292		(ebx->split.ways_of_associativity + 1) - 1;
293}
294
295#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 
 
 
 
 
 
 
296
297/*
298 * L3 cache descriptors
299 */
300static void amd_calc_l3_indices(struct amd_northbridge *nb)
301{
302	struct amd_l3_cache *l3 = &nb->l3_cache;
303	unsigned int sc0, sc1, sc2, sc3;
304	u32 val = 0;
305
306	pci_read_config_dword(nb->misc, 0x1C4, &val);
307
308	/* calculate subcache sizes */
309	l3->subcaches[0] = sc0 = !(val & BIT(0));
310	l3->subcaches[1] = sc1 = !(val & BIT(4));
 
 
311
312	if (boot_cpu_data.x86 == 0x15) {
313		l3->subcaches[0] = sc0 += !(val & BIT(1));
314		l3->subcaches[1] = sc1 += !(val & BIT(5));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315	}
316
317	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
318	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 
 
 
 
319
320	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
321}
322
323/*
324 * check whether a slot used for disabling an L3 index is occupied.
325 * @l3: L3 cache descriptor
326 * @slot: slot number (0..1)
327 *
328 * @returns: the disabled index if used or negative value if slot free.
329 */
330static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
331{
332	unsigned int reg = 0;
333
334	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
335
336	/* check whether this slot is activated already */
337	if (reg & (3UL << 30))
338		return reg & 0xfff;
339
340	return -1;
341}
342
343static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
344				  unsigned int slot)
345{
346	int index;
347	struct amd_northbridge *nb = this_leaf->priv;
348
349	index = amd_get_l3_disable_slot(nb, slot);
 
 
 
 
350	if (index >= 0)
351		return sprintf(buf, "%d\n", index);
352
353	return sprintf(buf, "FREE\n");
354}
355
356#define SHOW_CACHE_DISABLE(slot)					\
357static ssize_t								\
358cache_disable_##slot##_show(struct device *dev,				\
359			    struct device_attribute *attr, char *buf)	\
360{									\
361	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
362	return show_cache_disable(this_leaf, buf, slot);		\
363}
364SHOW_CACHE_DISABLE(0)
365SHOW_CACHE_DISABLE(1)
366
367static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
368				 unsigned slot, unsigned long idx)
369{
370	int i;
371
372	idx |= BIT(30);
373
374	/*
375	 *  disable index in all 4 subcaches
376	 */
377	for (i = 0; i < 4; i++) {
378		u32 reg = idx | (i << 20);
379
380		if (!nb->l3_cache.subcaches[i])
381			continue;
382
383		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
384
385		/*
386		 * We need to WBINVD on a core on the node containing the L3
387		 * cache which indices we disable therefore a simple wbinvd()
388		 * is not sufficient.
389		 */
390		wbinvd_on_cpu(cpu);
391
392		reg |= BIT(31);
393		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
394	}
395}
396
397/*
398 * disable a L3 cache index by using a disable-slot
399 *
400 * @l3:    L3 cache descriptor
401 * @cpu:   A CPU on the node containing the L3 cache
402 * @slot:  slot number (0..1)
403 * @index: index to disable
404 *
405 * @return: 0 on success, error status on failure
406 */
407static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
408			    unsigned slot, unsigned long index)
409{
410	int ret = 0;
411
412	/*  check if @slot is already used or the index is already disabled */
413	ret = amd_get_l3_disable_slot(nb, slot);
414	if (ret >= 0)
415		return -EEXIST;
416
417	if (index > nb->l3_cache.indices)
418		return -EINVAL;
419
420	/* check whether the other slot has disabled the same index already */
421	if (index == amd_get_l3_disable_slot(nb, !slot))
422		return -EEXIST;
423
424	amd_l3_disable_index(nb, cpu, slot, index);
425
426	return 0;
427}
428
429static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
430				   const char *buf, size_t count,
431				   unsigned int slot)
432{
433	unsigned long val = 0;
434	int cpu, err = 0;
435	struct amd_northbridge *nb = this_leaf->priv;
436
437	if (!capable(CAP_SYS_ADMIN))
438		return -EPERM;
439
440	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
 
 
 
441
442	if (kstrtoul(buf, 10, &val) < 0)
443		return -EINVAL;
444
445	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
446	if (err) {
447		if (err == -EEXIST)
448			pr_warn("L3 slot %d in use/index already disabled!\n",
449				   slot);
450		return err;
451	}
452	return count;
453}
454
455#define STORE_CACHE_DISABLE(slot)					\
456static ssize_t								\
457cache_disable_##slot##_store(struct device *dev,			\
458			     struct device_attribute *attr,		\
459			     const char *buf, size_t count)		\
460{									\
461	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
462	return store_cache_disable(this_leaf, buf, count, slot);	\
463}
464STORE_CACHE_DISABLE(0)
465STORE_CACHE_DISABLE(1)
466
467static ssize_t subcaches_show(struct device *dev,
468			      struct device_attribute *attr, char *buf)
 
 
 
 
 
469{
470	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
471	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
472
473	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
474}
475
476static ssize_t subcaches_store(struct device *dev,
477			       struct device_attribute *attr,
478			       const char *buf, size_t count)
479{
480	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
481	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
482	unsigned long val;
483
484	if (!capable(CAP_SYS_ADMIN))
485		return -EPERM;
486
487	if (kstrtoul(buf, 16, &val) < 0)
 
 
 
488		return -EINVAL;
489
490	if (amd_set_subcaches(cpu, val))
491		return -EINVAL;
492
493	return count;
494}
495
496static DEVICE_ATTR_RW(cache_disable_0);
497static DEVICE_ATTR_RW(cache_disable_1);
498static DEVICE_ATTR_RW(subcaches);
499
500static umode_t
501cache_private_attrs_is_visible(struct kobject *kobj,
502			       struct attribute *attr, int unused)
503{
504	struct device *dev = kobj_to_dev(kobj);
505	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
506	umode_t mode = attr->mode;
507
508	if (!this_leaf->priv)
509		return 0;
510
511	if ((attr == &dev_attr_subcaches.attr) &&
512	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
513		return mode;
514
515	if ((attr == &dev_attr_cache_disable_0.attr ||
516	     attr == &dev_attr_cache_disable_1.attr) &&
517	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
518		return mode;
519
520	return 0;
521}
522
523static struct attribute_group cache_private_group = {
524	.is_visible = cache_private_attrs_is_visible,
525};
526
527static void init_amd_l3_attrs(void)
528{
529	int n = 1;
530	static struct attribute **amd_l3_attrs;
531
532	if (amd_l3_attrs) /* already initialized */
533		return;
534
535	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
536		n += 2;
537	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
538		n += 1;
539
540	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
541	if (!amd_l3_attrs)
542		return;
543
544	n = 0;
545	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
546		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
547		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
548	}
549	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
550		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
551
552	cache_private_group.attrs = amd_l3_attrs;
553}
554
555const struct attribute_group *
556cache_get_priv_group(struct cacheinfo *this_leaf)
557{
558	struct amd_northbridge *nb = this_leaf->priv;
559
560	if (this_leaf->level < 3 || !nb)
561		return NULL;
562
563	if (nb && nb->l3_cache.indices)
564		init_amd_l3_attrs();
565
566	return &cache_private_group;
567}
568
569static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
570{
571	int node;
572
573	/* only for L3, and not in virtualized environments */
574	if (index < 3)
575		return;
576
577	node = amd_get_nb_id(smp_processor_id());
578	this_leaf->nb = node_to_amd_nb(node);
579	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
580		amd_calc_l3_indices(this_leaf->nb);
581}
582#else
583#define amd_init_l3_cache(x, y)
584#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
585
586static int
587cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 
588{
589	union _cpuid4_leaf_eax	eax;
590	union _cpuid4_leaf_ebx	ebx;
591	union _cpuid4_leaf_ecx	ecx;
592	unsigned		edx;
593
594	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
595		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
596			cpuid_count(0x8000001d, index, &eax.full,
597				    &ebx.full, &ecx.full, &edx);
598		else
599			amd_cpuid4(index, &eax, &ebx, &ecx);
600		amd_init_l3_cache(this_leaf, index);
601	} else {
602		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
603	}
604
605	if (eax.split.type == CTYPE_NULL)
606		return -EIO; /* better error ? */
607
608	this_leaf->eax = eax;
609	this_leaf->ebx = ebx;
610	this_leaf->ecx = ecx;
611	this_leaf->size = (ecx.split.number_of_sets          + 1) *
612			  (ebx.split.coherency_line_size     + 1) *
613			  (ebx.split.physical_line_partition + 1) *
614			  (ebx.split.ways_of_associativity   + 1);
615	return 0;
616}
617
618static int find_num_cache_leaves(struct cpuinfo_x86 *c)
619{
620	unsigned int		eax, ebx, ecx, edx, op;
621	union _cpuid4_leaf_eax	cache_eax;
622	int 			i = -1;
623
624	if (c->x86_vendor == X86_VENDOR_AMD)
625		op = 0x8000001d;
626	else
627		op = 4;
628
629	do {
630		++i;
631		/* Do cpuid(op) loop to find out num_cache_leaves */
632		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
633		cache_eax.full = eax;
634	} while (cache_eax.split.type != CTYPE_NULL);
635	return i;
636}
637
638void init_amd_cacheinfo(struct cpuinfo_x86 *c)
639{
640
641	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
642		num_cache_leaves = find_num_cache_leaves(c);
643	} else if (c->extended_cpuid_level >= 0x80000006) {
644		if (cpuid_edx(0x80000006) & 0xf000)
645			num_cache_leaves = 4;
646		else
647			num_cache_leaves = 3;
648	}
649}
650
651unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
652{
653	/* Cache sizes */
654	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
655	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
656	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
657	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
658#ifdef CONFIG_SMP
659	unsigned int cpu = c->cpu_index;
660#endif
661
662	if (c->cpuid_level > 3) {
663		static int is_initialized;
664
665		if (is_initialized == 0) {
666			/* Init num_cache_leaves from boot CPU */
667			num_cache_leaves = find_num_cache_leaves(c);
668			is_initialized++;
669		}
670
671		/*
672		 * Whenever possible use cpuid(4), deterministic cache
673		 * parameters cpuid leaf to find the cache details
674		 */
675		for (i = 0; i < num_cache_leaves; i++) {
676			struct _cpuid4_info_regs this_leaf = {};
677			int retval;
678
679			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
680			if (retval < 0)
681				continue;
682
683			switch (this_leaf.eax.split.level) {
684			case 1:
685				if (this_leaf.eax.split.type == CTYPE_DATA)
686					new_l1d = this_leaf.size/1024;
687				else if (this_leaf.eax.split.type == CTYPE_INST)
688					new_l1i = this_leaf.size/1024;
689				break;
690			case 2:
691				new_l2 = this_leaf.size/1024;
692				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
693				index_msb = get_count_order(num_threads_sharing);
694				l2_id = c->apicid & ~((1 << index_msb) - 1);
695				break;
696			case 3:
697				new_l3 = this_leaf.size/1024;
698				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
699				index_msb = get_count_order(num_threads_sharing);
700				l3_id = c->apicid & ~((1 << index_msb) - 1);
701				break;
702			default:
703				break;
 
 
704			}
705		}
706	}
707	/*
708	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
709	 * trace cache
710	 */
711	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
712		/* supports eax=2  call */
713		int j, n;
714		unsigned int regs[4];
715		unsigned char *dp = (unsigned char *)regs;
716		int only_trace = 0;
717
718		if (num_cache_leaves != 0 && c->x86 == 15)
719			only_trace = 1;
720
721		/* Number of times to iterate */
722		n = cpuid_eax(2) & 0xFF;
723
724		for (i = 0 ; i < n ; i++) {
725			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
726
727			/* If bit 31 is set, this is an unknown format */
728			for (j = 0 ; j < 3 ; j++)
729				if (regs[j] & (1 << 31))
730					regs[j] = 0;
731
732			/* Byte 0 is level count, not a descriptor */
733			for (j = 1 ; j < 16 ; j++) {
734				unsigned char des = dp[j];
735				unsigned char k = 0;
736
737				/* look up this descriptor in the table */
738				while (cache_table[k].descriptor != 0) {
739					if (cache_table[k].descriptor == des) {
740						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
741							break;
742						switch (cache_table[k].cache_type) {
743						case LVL_1_INST:
744							l1i += cache_table[k].size;
745							break;
746						case LVL_1_DATA:
747							l1d += cache_table[k].size;
748							break;
749						case LVL_2:
750							l2 += cache_table[k].size;
751							break;
752						case LVL_3:
753							l3 += cache_table[k].size;
754							break;
755						case LVL_TRACE:
756							trace += cache_table[k].size;
757							break;
758						}
759
760						break;
761					}
762
763					k++;
764				}
765			}
766		}
767	}
768
769	if (new_l1d)
770		l1d = new_l1d;
771
772	if (new_l1i)
773		l1i = new_l1i;
774
775	if (new_l2) {
776		l2 = new_l2;
777#ifdef CONFIG_SMP
778		per_cpu(cpu_llc_id, cpu) = l2_id;
779#endif
780	}
781
782	if (new_l3) {
783		l3 = new_l3;
784#ifdef CONFIG_SMP
785		per_cpu(cpu_llc_id, cpu) = l3_id;
786#endif
787	}
788
789#ifdef CONFIG_SMP
790	/*
791	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
792	 * turns means that the only possibility is SMT (as indicated in
793	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
794	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
795	 * c->phys_proc_id.
796	 */
797	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
798		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
799#endif
800
801	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
802
803	return l2;
804}
805
806static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
807				    struct _cpuid4_info_regs *base)
808{
809	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
810	struct cacheinfo *this_leaf;
811	int i, sibling;
812
813	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
814		unsigned int apicid, nshared, first, last;
815
816		this_leaf = this_cpu_ci->info_list + index;
817		nshared = base->eax.split.num_threads_sharing + 1;
818		apicid = cpu_data(cpu).apicid;
819		first = apicid - (apicid % nshared);
820		last = first + nshared - 1;
821
822		for_each_online_cpu(i) {
823			this_cpu_ci = get_cpu_cacheinfo(i);
824			if (!this_cpu_ci->info_list)
825				continue;
826
827			apicid = cpu_data(i).apicid;
828			if ((apicid < first) || (apicid > last))
829				continue;
830
831			this_leaf = this_cpu_ci->info_list + index;
 
 
 
 
 
 
832
833			for_each_online_cpu(sibling) {
834				apicid = cpu_data(sibling).apicid;
835				if ((apicid < first) || (apicid > last))
836					continue;
837				cpumask_set_cpu(sibling,
838						&this_leaf->shared_cpu_map);
839			}
840		}
841	} else if (index == 3) {
842		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
843			this_cpu_ci = get_cpu_cacheinfo(i);
844			if (!this_cpu_ci->info_list)
845				continue;
846			this_leaf = this_cpu_ci->info_list + index;
847			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
848				if (!cpu_online(sibling))
849					continue;
850				cpumask_set_cpu(sibling,
851						&this_leaf->shared_cpu_map);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
852			}
853		}
854	} else
855		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
856
857	return 1;
 
 
 
 
 
 
 
 
 
 
 
 
858}
859
860static void __cache_cpumap_setup(unsigned int cpu, int index,
861				 struct _cpuid4_info_regs *base)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
862{
863	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
864	struct cacheinfo *this_leaf, *sibling_leaf;
865	unsigned long num_threads_sharing;
866	int index_msb, i;
867	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
868
869	if (c->x86_vendor == X86_VENDOR_AMD) {
870		if (__cache_amd_cpumap_setup(cpu, index, base))
871			return;
872	}
873
874	this_leaf = this_cpu_ci->info_list + index;
875	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 
 
 
 
 
 
 
 
 
 
876
877	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
878	if (num_threads_sharing == 1)
879		return;
 
 
 
880
881	index_msb = get_count_order(num_threads_sharing);
 
 
 
 
 
882
883	for_each_online_cpu(i)
884		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
885			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
886
887			if (i == cpu || !sib_cpu_ci->info_list)
888				continue;/* skip if itself or no cacheinfo */
889			sibling_leaf = sib_cpu_ci->info_list + index;
890			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
891			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
892		}
893}
894
895static void ci_leaf_init(struct cacheinfo *this_leaf,
896			 struct _cpuid4_info_regs *base)
 
 
 
 
 
 
 
 
 
 
 
 
 
897{
898	this_leaf->id = base->id;
899	this_leaf->attributes = CACHE_ID;
900	this_leaf->level = base->eax.split.level;
901	this_leaf->type = cache_type_map[base->eax.split.type];
902	this_leaf->coherency_line_size =
903				base->ebx.split.coherency_line_size + 1;
904	this_leaf->ways_of_associativity =
905				base->ebx.split.ways_of_associativity + 1;
906	this_leaf->size = base->size;
907	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
908	this_leaf->physical_line_partition =
909				base->ebx.split.physical_line_partition + 1;
910	this_leaf->priv = base->nb;
911}
912
913static int __init_cache_level(unsigned int cpu)
914{
915	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
916
917	if (!num_cache_leaves)
918		return -ENOENT;
919	if (!this_cpu_ci)
920		return -EINVAL;
921	this_cpu_ci->num_levels = 3;
922	this_cpu_ci->num_leaves = num_cache_leaves;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
923	return 0;
924}
925
926/*
927 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
928 * ECX as cache index. Then right shift apicid by the number's order to get
929 * cache id for this cache node.
930 */
931static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
932{
933	struct cpuinfo_x86 *c = &cpu_data(cpu);
934	unsigned long num_threads_sharing;
935	int index_msb;
 
 
 
 
 
936
937	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
938	index_msb = get_count_order(num_threads_sharing);
939	id4_regs->id = c->apicid >> index_msb;
 
940}
941
942static int __populate_cache_leaves(unsigned int cpu)
 
943{
944	unsigned int idx, ret;
945	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
946	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
947	struct _cpuid4_info_regs id4_regs = {};
948
949	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
950		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
951		if (ret)
952			return ret;
953		get_cache_id(cpu, &id4_regs);
954		ci_leaf_init(this_leaf++, &id4_regs);
955		__cache_cpumap_setup(cpu, idx, &id4_regs);
 
 
 
956	}
957	this_cpu_ci->cpu_map_populated = true;
 
 
 
 
 
958
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
959	return 0;
960}
961
962DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
963DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)