Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Mar 24-27, 2025, special US time zones
Register
Loading...
v4.17
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 *	Routines to identify caches on Intel CPU.
  4 *
  5 *	Changes:
  6 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  7 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  8 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  9 */
 10
 
 11#include <linux/slab.h>
 12#include <linux/cacheinfo.h>
 
 13#include <linux/cpu.h>
 14#include <linux/sched.h>
 15#include <linux/capability.h>
 16#include <linux/sysfs.h>
 17#include <linux/pci.h>
 18
 19#include <asm/cpufeature.h>
 
 20#include <asm/amd_nb.h>
 21#include <asm/smp.h>
 22
 23#define LVL_1_INST	1
 24#define LVL_1_DATA	2
 25#define LVL_2		3
 26#define LVL_3		4
 27#define LVL_TRACE	5
 28
 29struct _cache_table {
 30	unsigned char descriptor;
 31	char cache_type;
 32	short size;
 33};
 34
 35#define MB(x)	((x) * 1024)
 36
 37/* All the cache descriptor types we care about (no TLB or
 38   trace cache entries) */
 39
 40static const struct _cache_table cache_table[] =
 41{
 42	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
 43	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
 44	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
 45	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
 46	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
 47	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
 48	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
 49	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
 50	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 51	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 52	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 53	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 54	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
 55	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
 56	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 57	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 58	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
 59	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 60	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 61	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 62	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
 63	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
 64	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
 65	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
 66	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
 67	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
 68	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
 69	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
 70	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
 71	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 72	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
 73	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 74	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
 75	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
 76	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
 77	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 78	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 79	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 80	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 81	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
 82	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
 83	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 84	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 85	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 86	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 87	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 88	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 89	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 90	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 91	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 92	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 93	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 94	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 95	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 96	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 97	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 98	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 99	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
100	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
101	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
102	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
103	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
104	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
105	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
106	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
107	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
108	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
109	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
110	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
111	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
112	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
113	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
114	{ 0x00, 0, 0}
115};
116
117
118enum _cache_type {
119	CTYPE_NULL = 0,
120	CTYPE_DATA = 1,
121	CTYPE_INST = 2,
122	CTYPE_UNIFIED = 3
123};
124
125union _cpuid4_leaf_eax {
126	struct {
127		enum _cache_type	type:5;
128		unsigned int		level:3;
129		unsigned int		is_self_initializing:1;
130		unsigned int		is_fully_associative:1;
131		unsigned int		reserved:4;
132		unsigned int		num_threads_sharing:12;
133		unsigned int		num_cores_on_die:6;
134	} split;
135	u32 full;
136};
137
138union _cpuid4_leaf_ebx {
139	struct {
140		unsigned int		coherency_line_size:12;
141		unsigned int		physical_line_partition:10;
142		unsigned int		ways_of_associativity:10;
143	} split;
144	u32 full;
145};
146
147union _cpuid4_leaf_ecx {
148	struct {
149		unsigned int		number_of_sets:32;
150	} split;
151	u32 full;
152};
153
154struct _cpuid4_info_regs {
155	union _cpuid4_leaf_eax eax;
156	union _cpuid4_leaf_ebx ebx;
157	union _cpuid4_leaf_ecx ecx;
158	unsigned int id;
159	unsigned long size;
160	struct amd_northbridge *nb;
161};
162
163static unsigned short num_cache_leaves;
 
 
 
 
 
164
165/* AMD doesn't have CPUID4. Emulate it here to report the same
166   information to the user.  This makes some assumptions about the machine:
167   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
168
169   In theory the TLBs could be reported as fake type (they are in "dummy").
170   Maybe later */
171union l1_cache {
172	struct {
173		unsigned line_size:8;
174		unsigned lines_per_tag:8;
175		unsigned assoc:8;
176		unsigned size_in_kb:8;
177	};
178	unsigned val;
179};
180
181union l2_cache {
182	struct {
183		unsigned line_size:8;
184		unsigned lines_per_tag:4;
185		unsigned assoc:4;
186		unsigned size_in_kb:16;
187	};
188	unsigned val;
189};
190
191union l3_cache {
192	struct {
193		unsigned line_size:8;
194		unsigned lines_per_tag:4;
195		unsigned assoc:4;
196		unsigned res:2;
197		unsigned size_encoded:14;
198	};
199	unsigned val;
200};
201
202static const unsigned short assocs[] = {
203	[1] = 1,
204	[2] = 2,
205	[4] = 4,
206	[6] = 8,
207	[8] = 16,
208	[0xa] = 32,
209	[0xb] = 48,
210	[0xc] = 64,
211	[0xd] = 96,
212	[0xe] = 128,
213	[0xf] = 0xffff /* fully associative - no way to show this currently */
214};
215
216static const unsigned char levels[] = { 1, 1, 2, 3 };
217static const unsigned char types[] = { 1, 2, 3, 3 };
218
219static const enum cache_type cache_type_map[] = {
220	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
221	[CTYPE_DATA] = CACHE_TYPE_DATA,
222	[CTYPE_INST] = CACHE_TYPE_INST,
223	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
224};
225
226static void
227amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
228		     union _cpuid4_leaf_ebx *ebx,
229		     union _cpuid4_leaf_ecx *ecx)
230{
231	unsigned dummy;
232	unsigned line_size, lines_per_tag, assoc, size_in_kb;
233	union l1_cache l1i, l1d;
234	union l2_cache l2;
235	union l3_cache l3;
236	union l1_cache *l1 = &l1d;
237
238	eax->full = 0;
239	ebx->full = 0;
240	ecx->full = 0;
241
242	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
243	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
244
245	switch (leaf) {
246	case 1:
247		l1 = &l1i;
248	case 0:
249		if (!l1->val)
250			return;
251		assoc = assocs[l1->assoc];
252		line_size = l1->line_size;
253		lines_per_tag = l1->lines_per_tag;
254		size_in_kb = l1->size_in_kb;
255		break;
256	case 2:
257		if (!l2.val)
258			return;
259		assoc = assocs[l2.assoc];
260		line_size = l2.line_size;
261		lines_per_tag = l2.lines_per_tag;
262		/* cpu_data has errata corrections for K7 applied */
263		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
264		break;
265	case 3:
266		if (!l3.val)
267			return;
268		assoc = assocs[l3.assoc];
269		line_size = l3.line_size;
270		lines_per_tag = l3.lines_per_tag;
271		size_in_kb = l3.size_encoded * 512;
272		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
273			size_in_kb = size_in_kb >> 1;
274			assoc = assoc >> 1;
275		}
276		break;
277	default:
278		return;
279	}
280
281	eax->split.is_self_initializing = 1;
282	eax->split.type = types[leaf];
283	eax->split.level = levels[leaf];
284	eax->split.num_threads_sharing = 0;
285	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
286
287
288	if (assoc == 0xffff)
289		eax->split.is_fully_associative = 1;
290	ebx->split.coherency_line_size = line_size - 1;
291	ebx->split.ways_of_associativity = assoc - 1;
292	ebx->split.physical_line_partition = lines_per_tag - 1;
293	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
294		(ebx->split.ways_of_associativity + 1) - 1;
295}
296
297#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 
 
 
 
 
298
 
299/*
300 * L3 cache descriptors
301 */
302static void amd_calc_l3_indices(struct amd_northbridge *nb)
303{
304	struct amd_l3_cache *l3 = &nb->l3_cache;
305	unsigned int sc0, sc1, sc2, sc3;
306	u32 val = 0;
307
308	pci_read_config_dword(nb->misc, 0x1C4, &val);
309
310	/* calculate subcache sizes */
311	l3->subcaches[0] = sc0 = !(val & BIT(0));
312	l3->subcaches[1] = sc1 = !(val & BIT(4));
313
314	if (boot_cpu_data.x86 == 0x15) {
315		l3->subcaches[0] = sc0 += !(val & BIT(1));
316		l3->subcaches[1] = sc1 += !(val & BIT(5));
317	}
318
319	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
320	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
321
322	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
323}
324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325/*
326 * check whether a slot used for disabling an L3 index is occupied.
327 * @l3: L3 cache descriptor
328 * @slot: slot number (0..1)
329 *
330 * @returns: the disabled index if used or negative value if slot free.
331 */
332static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
333{
334	unsigned int reg = 0;
335
336	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
337
338	/* check whether this slot is activated already */
339	if (reg & (3UL << 30))
340		return reg & 0xfff;
341
342	return -1;
343}
344
345static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
346				  unsigned int slot)
347{
348	int index;
349	struct amd_northbridge *nb = this_leaf->priv;
350
351	index = amd_get_l3_disable_slot(nb, slot);
 
 
 
352	if (index >= 0)
353		return sprintf(buf, "%d\n", index);
354
355	return sprintf(buf, "FREE\n");
356}
357
358#define SHOW_CACHE_DISABLE(slot)					\
359static ssize_t								\
360cache_disable_##slot##_show(struct device *dev,				\
361			    struct device_attribute *attr, char *buf)	\
362{									\
363	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
364	return show_cache_disable(this_leaf, buf, slot);		\
365}
366SHOW_CACHE_DISABLE(0)
367SHOW_CACHE_DISABLE(1)
368
369static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
370				 unsigned slot, unsigned long idx)
371{
372	int i;
373
374	idx |= BIT(30);
375
376	/*
377	 *  disable index in all 4 subcaches
378	 */
379	for (i = 0; i < 4; i++) {
380		u32 reg = idx | (i << 20);
381
382		if (!nb->l3_cache.subcaches[i])
383			continue;
384
385		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
386
387		/*
388		 * We need to WBINVD on a core on the node containing the L3
389		 * cache which indices we disable therefore a simple wbinvd()
390		 * is not sufficient.
391		 */
392		wbinvd_on_cpu(cpu);
393
394		reg |= BIT(31);
395		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
396	}
397}
398
399/*
400 * disable a L3 cache index by using a disable-slot
401 *
402 * @l3:    L3 cache descriptor
403 * @cpu:   A CPU on the node containing the L3 cache
404 * @slot:  slot number (0..1)
405 * @index: index to disable
406 *
407 * @return: 0 on success, error status on failure
408 */
409static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
410			    unsigned slot, unsigned long index)
411{
412	int ret = 0;
413
414	/*  check if @slot is already used or the index is already disabled */
415	ret = amd_get_l3_disable_slot(nb, slot);
416	if (ret >= 0)
417		return -EEXIST;
418
419	if (index > nb->l3_cache.indices)
420		return -EINVAL;
421
422	/* check whether the other slot has disabled the same index already */
423	if (index == amd_get_l3_disable_slot(nb, !slot))
424		return -EEXIST;
425
426	amd_l3_disable_index(nb, cpu, slot, index);
427
428	return 0;
429}
430
431static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
432				   const char *buf, size_t count,
433				   unsigned int slot)
434{
435	unsigned long val = 0;
436	int cpu, err = 0;
437	struct amd_northbridge *nb = this_leaf->priv;
438
439	if (!capable(CAP_SYS_ADMIN))
440		return -EPERM;
441
442	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
 
 
443
444	if (kstrtoul(buf, 10, &val) < 0)
445		return -EINVAL;
446
447	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
448	if (err) {
449		if (err == -EEXIST)
450			pr_warn("L3 slot %d in use/index already disabled!\n",
451				   slot);
452		return err;
453	}
454	return count;
455}
456
457#define STORE_CACHE_DISABLE(slot)					\
458static ssize_t								\
459cache_disable_##slot##_store(struct device *dev,			\
460			     struct device_attribute *attr,		\
461			     const char *buf, size_t count)		\
462{									\
463	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
464	return store_cache_disable(this_leaf, buf, count, slot);	\
465}
466STORE_CACHE_DISABLE(0)
467STORE_CACHE_DISABLE(1)
468
469static ssize_t subcaches_show(struct device *dev,
470			      struct device_attribute *attr, char *buf)
 
 
 
 
 
471{
472	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
473	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
474
475	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
476}
477
478static ssize_t subcaches_store(struct device *dev,
479			       struct device_attribute *attr,
480			       const char *buf, size_t count)
481{
482	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
483	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
484	unsigned long val;
485
486	if (!capable(CAP_SYS_ADMIN))
487		return -EPERM;
488
489	if (kstrtoul(buf, 16, &val) < 0)
 
 
 
490		return -EINVAL;
491
492	if (amd_set_subcaches(cpu, val))
493		return -EINVAL;
494
495	return count;
496}
497
498static DEVICE_ATTR_RW(cache_disable_0);
499static DEVICE_ATTR_RW(cache_disable_1);
500static DEVICE_ATTR_RW(subcaches);
501
502static umode_t
503cache_private_attrs_is_visible(struct kobject *kobj,
504			       struct attribute *attr, int unused)
505{
506	struct device *dev = kobj_to_dev(kobj);
507	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
508	umode_t mode = attr->mode;
509
510	if (!this_leaf->priv)
511		return 0;
512
513	if ((attr == &dev_attr_subcaches.attr) &&
514	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
515		return mode;
516
517	if ((attr == &dev_attr_cache_disable_0.attr ||
518	     attr == &dev_attr_cache_disable_1.attr) &&
519	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
520		return mode;
521
522	return 0;
523}
524
525static struct attribute_group cache_private_group = {
526	.is_visible = cache_private_attrs_is_visible,
527};
528
529static void init_amd_l3_attrs(void)
530{
531	int n = 1;
532	static struct attribute **amd_l3_attrs;
533
534	if (amd_l3_attrs) /* already initialized */
535		return;
536
537	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
538		n += 2;
539	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
540		n += 1;
541
542	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
543	if (!amd_l3_attrs)
544		return;
545
546	n = 0;
547	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
548		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
549		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
550	}
551	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
552		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
553
554	cache_private_group.attrs = amd_l3_attrs;
555}
556
557const struct attribute_group *
558cache_get_priv_group(struct cacheinfo *this_leaf)
559{
560	struct amd_northbridge *nb = this_leaf->priv;
561
562	if (this_leaf->level < 3 || !nb)
563		return NULL;
564
565	if (nb && nb->l3_cache.indices)
566		init_amd_l3_attrs();
567
568	return &cache_private_group;
569}
570
571static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
572{
573	int node;
574
575	/* only for L3, and not in virtualized environments */
576	if (index < 3)
577		return;
578
579	node = amd_get_nb_id(smp_processor_id());
580	this_leaf->nb = node_to_amd_nb(node);
581	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
582		amd_calc_l3_indices(this_leaf->nb);
583}
584#else
585#define amd_init_l3_cache(x, y)
586#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
587
588static int
589cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
590{
591	union _cpuid4_leaf_eax	eax;
592	union _cpuid4_leaf_ebx	ebx;
593	union _cpuid4_leaf_ecx	ecx;
594	unsigned		edx;
595
596	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
597		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
598			cpuid_count(0x8000001d, index, &eax.full,
599				    &ebx.full, &ecx.full, &edx);
600		else
601			amd_cpuid4(index, &eax, &ebx, &ecx);
602		amd_init_l3_cache(this_leaf, index);
603	} else {
604		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
605	}
606
607	if (eax.split.type == CTYPE_NULL)
608		return -EIO; /* better error ? */
609
610	this_leaf->eax = eax;
611	this_leaf->ebx = ebx;
612	this_leaf->ecx = ecx;
613	this_leaf->size = (ecx.split.number_of_sets          + 1) *
614			  (ebx.split.coherency_line_size     + 1) *
615			  (ebx.split.physical_line_partition + 1) *
616			  (ebx.split.ways_of_associativity   + 1);
617	return 0;
618}
619
620static int find_num_cache_leaves(struct cpuinfo_x86 *c)
621{
622	unsigned int		eax, ebx, ecx, edx, op;
623	union _cpuid4_leaf_eax	cache_eax;
624	int 			i = -1;
625
626	if (c->x86_vendor == X86_VENDOR_AMD)
627		op = 0x8000001d;
628	else
629		op = 4;
630
631	do {
632		++i;
633		/* Do cpuid(op) loop to find out num_cache_leaves */
634		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
635		cache_eax.full = eax;
636	} while (cache_eax.split.type != CTYPE_NULL);
637	return i;
638}
639
640void init_amd_cacheinfo(struct cpuinfo_x86 *c)
641{
642
643	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
644		num_cache_leaves = find_num_cache_leaves(c);
645	} else if (c->extended_cpuid_level >= 0x80000006) {
646		if (cpuid_edx(0x80000006) & 0xf000)
647			num_cache_leaves = 4;
648		else
649			num_cache_leaves = 3;
650	}
651}
652
653unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
654{
655	/* Cache sizes */
656	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
657	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
658	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
659	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
660#ifdef CONFIG_SMP
661	unsigned int cpu = c->cpu_index;
662#endif
663
664	if (c->cpuid_level > 3) {
665		static int is_initialized;
666
667		if (is_initialized == 0) {
668			/* Init num_cache_leaves from boot CPU */
669			num_cache_leaves = find_num_cache_leaves(c);
670			is_initialized++;
671		}
672
673		/*
674		 * Whenever possible use cpuid(4), deterministic cache
675		 * parameters cpuid leaf to find the cache details
676		 */
677		for (i = 0; i < num_cache_leaves; i++) {
678			struct _cpuid4_info_regs this_leaf = {};
679			int retval;
680
681			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
682			if (retval < 0)
683				continue;
684
685			switch (this_leaf.eax.split.level) {
686			case 1:
687				if (this_leaf.eax.split.type == CTYPE_DATA)
688					new_l1d = this_leaf.size/1024;
689				else if (this_leaf.eax.split.type == CTYPE_INST)
690					new_l1i = this_leaf.size/1024;
691				break;
692			case 2:
693				new_l2 = this_leaf.size/1024;
694				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
695				index_msb = get_count_order(num_threads_sharing);
696				l2_id = c->apicid & ~((1 << index_msb) - 1);
697				break;
698			case 3:
699				new_l3 = this_leaf.size/1024;
700				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
701				index_msb = get_count_order(num_threads_sharing);
702				l3_id = c->apicid & ~((1 << index_msb) - 1);
703				break;
704			default:
705				break;
706			}
707		}
708	}
709	/*
710	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
711	 * trace cache
712	 */
713	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
714		/* supports eax=2  call */
715		int j, n;
716		unsigned int regs[4];
717		unsigned char *dp = (unsigned char *)regs;
718		int only_trace = 0;
719
720		if (num_cache_leaves != 0 && c->x86 == 15)
721			only_trace = 1;
722
723		/* Number of times to iterate */
724		n = cpuid_eax(2) & 0xFF;
725
726		for (i = 0 ; i < n ; i++) {
727			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
728
729			/* If bit 31 is set, this is an unknown format */
730			for (j = 0 ; j < 3 ; j++)
731				if (regs[j] & (1 << 31))
732					regs[j] = 0;
733
734			/* Byte 0 is level count, not a descriptor */
735			for (j = 1 ; j < 16 ; j++) {
736				unsigned char des = dp[j];
737				unsigned char k = 0;
738
739				/* look up this descriptor in the table */
740				while (cache_table[k].descriptor != 0) {
741					if (cache_table[k].descriptor == des) {
742						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
743							break;
744						switch (cache_table[k].cache_type) {
745						case LVL_1_INST:
746							l1i += cache_table[k].size;
747							break;
748						case LVL_1_DATA:
749							l1d += cache_table[k].size;
750							break;
751						case LVL_2:
752							l2 += cache_table[k].size;
753							break;
754						case LVL_3:
755							l3 += cache_table[k].size;
756							break;
757						case LVL_TRACE:
758							trace += cache_table[k].size;
759							break;
760						}
761
762						break;
763					}
764
765					k++;
766				}
767			}
768		}
769	}
770
771	if (new_l1d)
772		l1d = new_l1d;
773
774	if (new_l1i)
775		l1i = new_l1i;
776
777	if (new_l2) {
778		l2 = new_l2;
779#ifdef CONFIG_SMP
780		per_cpu(cpu_llc_id, cpu) = l2_id;
781#endif
782	}
783
784	if (new_l3) {
785		l3 = new_l3;
786#ifdef CONFIG_SMP
787		per_cpu(cpu_llc_id, cpu) = l3_id;
788#endif
789	}
790
791#ifdef CONFIG_SMP
792	/*
793	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
794	 * turns means that the only possibility is SMT (as indicated in
795	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
796	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
797	 * c->phys_proc_id.
798	 */
799	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
800		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
801#endif
802
803	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
804
805	return l2;
806}
807
808static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
809				    struct _cpuid4_info_regs *base)
 
 
 
 
 
 
 
810{
811	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
812	struct cacheinfo *this_leaf;
813	int i, sibling;
814
815	/*
816	 * For L3, always use the pre-calculated cpu_llc_shared_mask
817	 * to derive shared_cpu_map.
818	 */
819	if (index == 3) {
820		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
821			this_cpu_ci = get_cpu_cacheinfo(i);
822			if (!this_cpu_ci->info_list)
823				continue;
824			this_leaf = this_cpu_ci->info_list + index;
825			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
826				if (!cpu_online(sibling))
827					continue;
828				cpumask_set_cpu(sibling,
829						&this_leaf->shared_cpu_map);
830			}
831		}
832	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
833		unsigned int apicid, nshared, first, last;
834
835		nshared = base->eax.split.num_threads_sharing + 1;
 
 
 
 
836		apicid = cpu_data(cpu).apicid;
837		first = apicid - (apicid % nshared);
838		last = first + nshared - 1;
839
840		for_each_online_cpu(i) {
841			this_cpu_ci = get_cpu_cacheinfo(i);
842			if (!this_cpu_ci->info_list)
843				continue;
844
845			apicid = cpu_data(i).apicid;
846			if ((apicid < first) || (apicid > last))
847				continue;
848
849			this_leaf = this_cpu_ci->info_list + index;
 
850
851			for_each_online_cpu(sibling) {
852				apicid = cpu_data(sibling).apicid;
853				if ((apicid < first) || (apicid > last))
854					continue;
855				cpumask_set_cpu(sibling,
856						&this_leaf->shared_cpu_map);
 
 
 
 
 
 
 
 
 
 
857			}
858		}
859	} else
860		return 0;
861
862	return 1;
863}
864
865static void __cache_cpumap_setup(unsigned int cpu, int index,
866				 struct _cpuid4_info_regs *base)
867{
868	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
869	struct cacheinfo *this_leaf, *sibling_leaf;
870	unsigned long num_threads_sharing;
871	int index_msb, i;
872	struct cpuinfo_x86 *c = &cpu_data(cpu);
873
874	if (c->x86_vendor == X86_VENDOR_AMD) {
875		if (__cache_amd_cpumap_setup(cpu, index, base))
876			return;
877	}
878
879	this_leaf = this_cpu_ci->info_list + index;
880	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
881
882	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
883	if (num_threads_sharing == 1)
884		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
886	index_msb = get_count_order(num_threads_sharing);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887
888	for_each_online_cpu(i)
889		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
890			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
891
892			if (i == cpu || !sib_cpu_ci->info_list)
893				continue;/* skip if itself or no cacheinfo */
894			sibling_leaf = sib_cpu_ci->info_list + index;
895			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
896			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
897		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898}
899
900static void ci_leaf_init(struct cacheinfo *this_leaf,
901			 struct _cpuid4_info_regs *base)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
902{
903	this_leaf->id = base->id;
904	this_leaf->attributes = CACHE_ID;
905	this_leaf->level = base->eax.split.level;
906	this_leaf->type = cache_type_map[base->eax.split.type];
907	this_leaf->coherency_line_size =
908				base->ebx.split.coherency_line_size + 1;
909	this_leaf->ways_of_associativity =
910				base->ebx.split.ways_of_associativity + 1;
911	this_leaf->size = base->size;
912	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
913	this_leaf->physical_line_partition =
914				base->ebx.split.physical_line_partition + 1;
915	this_leaf->priv = base->nb;
916}
917
918static int __init_cache_level(unsigned int cpu)
 
919{
920	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
921
922	if (!num_cache_leaves)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
923		return -ENOENT;
924	if (!this_cpu_ci)
925		return -EINVAL;
926	this_cpu_ci->num_levels = 3;
927	this_cpu_ci->num_leaves = num_cache_leaves;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
928	return 0;
929}
930
931/*
932 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
933 * ECX as cache index. Then right shift apicid by the number's order to get
934 * cache id for this cache node.
935 */
936static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
937{
938	struct cpuinfo_x86 *c = &cpu_data(cpu);
939	unsigned long num_threads_sharing;
940	int index_msb;
 
 
 
 
 
941
942	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
943	index_msb = get_count_order(num_threads_sharing);
944	id4_regs->id = c->apicid >> index_msb;
 
945}
946
947static int __populate_cache_leaves(unsigned int cpu)
 
948{
949	unsigned int idx, ret;
950	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
951	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
952	struct _cpuid4_info_regs id4_regs = {};
953
954	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
955		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
956		if (ret)
957			return ret;
958		get_cache_id(cpu, &id4_regs);
959		ci_leaf_init(this_leaf++, &id4_regs);
960		__cache_cpumap_setup(cpu, idx, &id4_regs);
 
 
 
961	}
962	this_cpu_ci->cpu_map_populated = true;
 
963
964	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
965}
966
967DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
968DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
 
v3.15
 
   1/*
   2 *	Routines to identify caches on Intel CPU.
   3 *
   4 *	Changes:
   5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
   6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
   8 */
   9
  10#include <linux/init.h>
  11#include <linux/slab.h>
  12#include <linux/device.h>
  13#include <linux/compiler.h>
  14#include <linux/cpu.h>
  15#include <linux/sched.h>
 
 
  16#include <linux/pci.h>
  17
  18#include <asm/processor.h>
  19#include <linux/smp.h>
  20#include <asm/amd_nb.h>
  21#include <asm/smp.h>
  22
  23#define LVL_1_INST	1
  24#define LVL_1_DATA	2
  25#define LVL_2		3
  26#define LVL_3		4
  27#define LVL_TRACE	5
  28
  29struct _cache_table {
  30	unsigned char descriptor;
  31	char cache_type;
  32	short size;
  33};
  34
  35#define MB(x)	((x) * 1024)
  36
  37/* All the cache descriptor types we care about (no TLB or
  38   trace cache entries) */
  39
  40static const struct _cache_table cache_table[] =
  41{
  42	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
  43	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
  44	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
  45	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
  46	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
  47	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
  48	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
  49	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
  50	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  51	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  52	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  53	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  54	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
  55	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
  56	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  57	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  58	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
  59	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  60	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  61	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  62	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
  63	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
  64	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
  65	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
  66	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
  67	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
  68	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
  69	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
  70	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
  71	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
  72	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
  73	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
  74	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
  75	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
  76	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
  77	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  78	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  79	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  80	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  81	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
  82	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
  83	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
  84	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
  85	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
  86	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  87	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  88	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  89	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  90	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
  91	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
  92	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
  93	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
  94	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
  95	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
  96	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
  97	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
  98	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
  99	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 100	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 101	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
 102	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 103	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 104	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 105	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
 106	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 107	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
 108	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
 109	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 110	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 111	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
 112	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
 113	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
 114	{ 0x00, 0, 0}
 115};
 116
 117
 118enum _cache_type {
 119	CACHE_TYPE_NULL	= 0,
 120	CACHE_TYPE_DATA = 1,
 121	CACHE_TYPE_INST = 2,
 122	CACHE_TYPE_UNIFIED = 3
 123};
 124
 125union _cpuid4_leaf_eax {
 126	struct {
 127		enum _cache_type	type:5;
 128		unsigned int		level:3;
 129		unsigned int		is_self_initializing:1;
 130		unsigned int		is_fully_associative:1;
 131		unsigned int		reserved:4;
 132		unsigned int		num_threads_sharing:12;
 133		unsigned int		num_cores_on_die:6;
 134	} split;
 135	u32 full;
 136};
 137
 138union _cpuid4_leaf_ebx {
 139	struct {
 140		unsigned int		coherency_line_size:12;
 141		unsigned int		physical_line_partition:10;
 142		unsigned int		ways_of_associativity:10;
 143	} split;
 144	u32 full;
 145};
 146
 147union _cpuid4_leaf_ecx {
 148	struct {
 149		unsigned int		number_of_sets:32;
 150	} split;
 151	u32 full;
 152};
 153
 154struct _cpuid4_info_regs {
 155	union _cpuid4_leaf_eax eax;
 156	union _cpuid4_leaf_ebx ebx;
 157	union _cpuid4_leaf_ecx ecx;
 
 158	unsigned long size;
 159	struct amd_northbridge *nb;
 160};
 161
 162struct _cpuid4_info {
 163	struct _cpuid4_info_regs base;
 164	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
 165};
 166
 167unsigned short			num_cache_leaves;
 168
 169/* AMD doesn't have CPUID4. Emulate it here to report the same
 170   information to the user.  This makes some assumptions about the machine:
 171   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 172
 173   In theory the TLBs could be reported as fake type (they are in "dummy").
 174   Maybe later */
 175union l1_cache {
 176	struct {
 177		unsigned line_size:8;
 178		unsigned lines_per_tag:8;
 179		unsigned assoc:8;
 180		unsigned size_in_kb:8;
 181	};
 182	unsigned val;
 183};
 184
 185union l2_cache {
 186	struct {
 187		unsigned line_size:8;
 188		unsigned lines_per_tag:4;
 189		unsigned assoc:4;
 190		unsigned size_in_kb:16;
 191	};
 192	unsigned val;
 193};
 194
 195union l3_cache {
 196	struct {
 197		unsigned line_size:8;
 198		unsigned lines_per_tag:4;
 199		unsigned assoc:4;
 200		unsigned res:2;
 201		unsigned size_encoded:14;
 202	};
 203	unsigned val;
 204};
 205
 206static const unsigned short assocs[] = {
 207	[1] = 1,
 208	[2] = 2,
 209	[4] = 4,
 210	[6] = 8,
 211	[8] = 16,
 212	[0xa] = 32,
 213	[0xb] = 48,
 214	[0xc] = 64,
 215	[0xd] = 96,
 216	[0xe] = 128,
 217	[0xf] = 0xffff /* fully associative - no way to show this currently */
 218};
 219
 220static const unsigned char levels[] = { 1, 1, 2, 3 };
 221static const unsigned char types[] = { 1, 2, 3, 3 };
 222
 
 
 
 
 
 
 
 223static void
 224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 225		     union _cpuid4_leaf_ebx *ebx,
 226		     union _cpuid4_leaf_ecx *ecx)
 227{
 228	unsigned dummy;
 229	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 230	union l1_cache l1i, l1d;
 231	union l2_cache l2;
 232	union l3_cache l3;
 233	union l1_cache *l1 = &l1d;
 234
 235	eax->full = 0;
 236	ebx->full = 0;
 237	ecx->full = 0;
 238
 239	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 240	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 241
 242	switch (leaf) {
 243	case 1:
 244		l1 = &l1i;
 245	case 0:
 246		if (!l1->val)
 247			return;
 248		assoc = assocs[l1->assoc];
 249		line_size = l1->line_size;
 250		lines_per_tag = l1->lines_per_tag;
 251		size_in_kb = l1->size_in_kb;
 252		break;
 253	case 2:
 254		if (!l2.val)
 255			return;
 256		assoc = assocs[l2.assoc];
 257		line_size = l2.line_size;
 258		lines_per_tag = l2.lines_per_tag;
 259		/* cpu_data has errata corrections for K7 applied */
 260		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 261		break;
 262	case 3:
 263		if (!l3.val)
 264			return;
 265		assoc = assocs[l3.assoc];
 266		line_size = l3.line_size;
 267		lines_per_tag = l3.lines_per_tag;
 268		size_in_kb = l3.size_encoded * 512;
 269		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 270			size_in_kb = size_in_kb >> 1;
 271			assoc = assoc >> 1;
 272		}
 273		break;
 274	default:
 275		return;
 276	}
 277
 278	eax->split.is_self_initializing = 1;
 279	eax->split.type = types[leaf];
 280	eax->split.level = levels[leaf];
 281	eax->split.num_threads_sharing = 0;
 282	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 283
 284
 285	if (assoc == 0xffff)
 286		eax->split.is_fully_associative = 1;
 287	ebx->split.coherency_line_size = line_size - 1;
 288	ebx->split.ways_of_associativity = assoc - 1;
 289	ebx->split.physical_line_partition = lines_per_tag - 1;
 290	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 291		(ebx->split.ways_of_associativity + 1) - 1;
 292}
 293
 294struct _cache_attr {
 295	struct attribute attr;
 296	ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
 297	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
 298			 unsigned int);
 299};
 300
 301#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 302/*
 303 * L3 cache descriptors
 304 */
 305static void amd_calc_l3_indices(struct amd_northbridge *nb)
 306{
 307	struct amd_l3_cache *l3 = &nb->l3_cache;
 308	unsigned int sc0, sc1, sc2, sc3;
 309	u32 val = 0;
 310
 311	pci_read_config_dword(nb->misc, 0x1C4, &val);
 312
 313	/* calculate subcache sizes */
 314	l3->subcaches[0] = sc0 = !(val & BIT(0));
 315	l3->subcaches[1] = sc1 = !(val & BIT(4));
 316
 317	if (boot_cpu_data.x86 == 0x15) {
 318		l3->subcaches[0] = sc0 += !(val & BIT(1));
 319		l3->subcaches[1] = sc1 += !(val & BIT(5));
 320	}
 321
 322	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 323	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 324
 325	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 326}
 327
 328static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 329{
 330	int node;
 331
 332	/* only for L3, and not in virtualized environments */
 333	if (index < 3)
 334		return;
 335
 336	node = amd_get_nb_id(smp_processor_id());
 337	this_leaf->nb = node_to_amd_nb(node);
 338	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 339		amd_calc_l3_indices(this_leaf->nb);
 340}
 341
 342/*
 343 * check whether a slot used for disabling an L3 index is occupied.
 344 * @l3: L3 cache descriptor
 345 * @slot: slot number (0..1)
 346 *
 347 * @returns: the disabled index if used or negative value if slot free.
 348 */
 349int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 350{
 351	unsigned int reg = 0;
 352
 353	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 354
 355	/* check whether this slot is activated already */
 356	if (reg & (3UL << 30))
 357		return reg & 0xfff;
 358
 359	return -1;
 360}
 361
 362static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 363				  unsigned int slot)
 364{
 365	int index;
 
 366
 367	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 368		return -EINVAL;
 369
 370	index = amd_get_l3_disable_slot(this_leaf->base.nb, slot);
 371	if (index >= 0)
 372		return sprintf(buf, "%d\n", index);
 373
 374	return sprintf(buf, "FREE\n");
 375}
 376
 377#define SHOW_CACHE_DISABLE(slot)					\
 378static ssize_t								\
 379show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf,	\
 380			  unsigned int cpu)				\
 381{									\
 
 382	return show_cache_disable(this_leaf, buf, slot);		\
 383}
 384SHOW_CACHE_DISABLE(0)
 385SHOW_CACHE_DISABLE(1)
 386
 387static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 388				 unsigned slot, unsigned long idx)
 389{
 390	int i;
 391
 392	idx |= BIT(30);
 393
 394	/*
 395	 *  disable index in all 4 subcaches
 396	 */
 397	for (i = 0; i < 4; i++) {
 398		u32 reg = idx | (i << 20);
 399
 400		if (!nb->l3_cache.subcaches[i])
 401			continue;
 402
 403		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 404
 405		/*
 406		 * We need to WBINVD on a core on the node containing the L3
 407		 * cache which indices we disable therefore a simple wbinvd()
 408		 * is not sufficient.
 409		 */
 410		wbinvd_on_cpu(cpu);
 411
 412		reg |= BIT(31);
 413		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 414	}
 415}
 416
 417/*
 418 * disable a L3 cache index by using a disable-slot
 419 *
 420 * @l3:    L3 cache descriptor
 421 * @cpu:   A CPU on the node containing the L3 cache
 422 * @slot:  slot number (0..1)
 423 * @index: index to disable
 424 *
 425 * @return: 0 on success, error status on failure
 426 */
 427int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
 428			    unsigned long index)
 429{
 430	int ret = 0;
 431
 432	/*  check if @slot is already used or the index is already disabled */
 433	ret = amd_get_l3_disable_slot(nb, slot);
 434	if (ret >= 0)
 435		return -EEXIST;
 436
 437	if (index > nb->l3_cache.indices)
 438		return -EINVAL;
 439
 440	/* check whether the other slot has disabled the same index already */
 441	if (index == amd_get_l3_disable_slot(nb, !slot))
 442		return -EEXIST;
 443
 444	amd_l3_disable_index(nb, cpu, slot, index);
 445
 446	return 0;
 447}
 448
 449static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 450				  const char *buf, size_t count,
 451				  unsigned int slot)
 452{
 453	unsigned long val = 0;
 454	int cpu, err = 0;
 
 455
 456	if (!capable(CAP_SYS_ADMIN))
 457		return -EPERM;
 458
 459	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 460		return -EINVAL;
 461
 462	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
 463
 464	if (strict_strtoul(buf, 10, &val) < 0)
 465		return -EINVAL;
 466
 467	err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
 468	if (err) {
 469		if (err == -EEXIST)
 470			pr_warning("L3 slot %d in use/index already disabled!\n",
 471				   slot);
 472		return err;
 473	}
 474	return count;
 475}
 476
 477#define STORE_CACHE_DISABLE(slot)					\
 478static ssize_t								\
 479store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
 480			   const char *buf, size_t count,		\
 481			   unsigned int cpu)				\
 482{									\
 
 483	return store_cache_disable(this_leaf, buf, count, slot);	\
 484}
 485STORE_CACHE_DISABLE(0)
 486STORE_CACHE_DISABLE(1)
 487
 488static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
 489		show_cache_disable_0, store_cache_disable_0);
 490static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 491		show_cache_disable_1, store_cache_disable_1);
 492
 493static ssize_t
 494show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
 495{
 496	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 497		return -EINVAL;
 498
 499	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 500}
 501
 502static ssize_t
 503store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
 504		unsigned int cpu)
 505{
 
 
 506	unsigned long val;
 507
 508	if (!capable(CAP_SYS_ADMIN))
 509		return -EPERM;
 510
 511	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 512		return -EINVAL;
 513
 514	if (strict_strtoul(buf, 16, &val) < 0)
 515		return -EINVAL;
 516
 517	if (amd_set_subcaches(cpu, val))
 518		return -EINVAL;
 519
 520	return count;
 521}
 522
 523static struct _cache_attr subcaches =
 524	__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 525
 
 
 
 
 
 
 
 
 
 
 
 
 
 526#else
 527#define amd_init_l3_cache(x, y)
 528#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
 529
 530static int
 531cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 532{
 533	union _cpuid4_leaf_eax	eax;
 534	union _cpuid4_leaf_ebx	ebx;
 535	union _cpuid4_leaf_ecx	ecx;
 536	unsigned		edx;
 537
 538	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 539		if (cpu_has_topoext)
 540			cpuid_count(0x8000001d, index, &eax.full,
 541				    &ebx.full, &ecx.full, &edx);
 542		else
 543			amd_cpuid4(index, &eax, &ebx, &ecx);
 544		amd_init_l3_cache(this_leaf, index);
 545	} else {
 546		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 547	}
 548
 549	if (eax.split.type == CACHE_TYPE_NULL)
 550		return -EIO; /* better error ? */
 551
 552	this_leaf->eax = eax;
 553	this_leaf->ebx = ebx;
 554	this_leaf->ecx = ecx;
 555	this_leaf->size = (ecx.split.number_of_sets          + 1) *
 556			  (ebx.split.coherency_line_size     + 1) *
 557			  (ebx.split.physical_line_partition + 1) *
 558			  (ebx.split.ways_of_associativity   + 1);
 559	return 0;
 560}
 561
 562static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 563{
 564	unsigned int		eax, ebx, ecx, edx, op;
 565	union _cpuid4_leaf_eax	cache_eax;
 566	int 			i = -1;
 567
 568	if (c->x86_vendor == X86_VENDOR_AMD)
 569		op = 0x8000001d;
 570	else
 571		op = 4;
 572
 573	do {
 574		++i;
 575		/* Do cpuid(op) loop to find out num_cache_leaves */
 576		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
 577		cache_eax.full = eax;
 578	} while (cache_eax.split.type != CACHE_TYPE_NULL);
 579	return i;
 580}
 581
 582void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 583{
 584
 585	if (cpu_has_topoext) {
 586		num_cache_leaves = find_num_cache_leaves(c);
 587	} else if (c->extended_cpuid_level >= 0x80000006) {
 588		if (cpuid_edx(0x80000006) & 0xf000)
 589			num_cache_leaves = 4;
 590		else
 591			num_cache_leaves = 3;
 592	}
 593}
 594
 595unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
 596{
 597	/* Cache sizes */
 598	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 599	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 600	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 601	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 602#ifdef CONFIG_X86_HT
 603	unsigned int cpu = c->cpu_index;
 604#endif
 605
 606	if (c->cpuid_level > 3) {
 607		static int is_initialized;
 608
 609		if (is_initialized == 0) {
 610			/* Init num_cache_leaves from boot CPU */
 611			num_cache_leaves = find_num_cache_leaves(c);
 612			is_initialized++;
 613		}
 614
 615		/*
 616		 * Whenever possible use cpuid(4), deterministic cache
 617		 * parameters cpuid leaf to find the cache details
 618		 */
 619		for (i = 0; i < num_cache_leaves; i++) {
 620			struct _cpuid4_info_regs this_leaf = {};
 621			int retval;
 622
 623			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 624			if (retval < 0)
 625				continue;
 626
 627			switch (this_leaf.eax.split.level) {
 628			case 1:
 629				if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
 630					new_l1d = this_leaf.size/1024;
 631				else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
 632					new_l1i = this_leaf.size/1024;
 633				break;
 634			case 2:
 635				new_l2 = this_leaf.size/1024;
 636				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 637				index_msb = get_count_order(num_threads_sharing);
 638				l2_id = c->apicid & ~((1 << index_msb) - 1);
 639				break;
 640			case 3:
 641				new_l3 = this_leaf.size/1024;
 642				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 643				index_msb = get_count_order(num_threads_sharing);
 644				l3_id = c->apicid & ~((1 << index_msb) - 1);
 645				break;
 646			default:
 647				break;
 648			}
 649		}
 650	}
 651	/*
 652	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 653	 * trace cache
 654	 */
 655	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 656		/* supports eax=2  call */
 657		int j, n;
 658		unsigned int regs[4];
 659		unsigned char *dp = (unsigned char *)regs;
 660		int only_trace = 0;
 661
 662		if (num_cache_leaves != 0 && c->x86 == 15)
 663			only_trace = 1;
 664
 665		/* Number of times to iterate */
 666		n = cpuid_eax(2) & 0xFF;
 667
 668		for (i = 0 ; i < n ; i++) {
 669			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 670
 671			/* If bit 31 is set, this is an unknown format */
 672			for (j = 0 ; j < 3 ; j++)
 673				if (regs[j] & (1 << 31))
 674					regs[j] = 0;
 675
 676			/* Byte 0 is level count, not a descriptor */
 677			for (j = 1 ; j < 16 ; j++) {
 678				unsigned char des = dp[j];
 679				unsigned char k = 0;
 680
 681				/* look up this descriptor in the table */
 682				while (cache_table[k].descriptor != 0) {
 683					if (cache_table[k].descriptor == des) {
 684						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 685							break;
 686						switch (cache_table[k].cache_type) {
 687						case LVL_1_INST:
 688							l1i += cache_table[k].size;
 689							break;
 690						case LVL_1_DATA:
 691							l1d += cache_table[k].size;
 692							break;
 693						case LVL_2:
 694							l2 += cache_table[k].size;
 695							break;
 696						case LVL_3:
 697							l3 += cache_table[k].size;
 698							break;
 699						case LVL_TRACE:
 700							trace += cache_table[k].size;
 701							break;
 702						}
 703
 704						break;
 705					}
 706
 707					k++;
 708				}
 709			}
 710		}
 711	}
 712
 713	if (new_l1d)
 714		l1d = new_l1d;
 715
 716	if (new_l1i)
 717		l1i = new_l1i;
 718
 719	if (new_l2) {
 720		l2 = new_l2;
 721#ifdef CONFIG_X86_HT
 722		per_cpu(cpu_llc_id, cpu) = l2_id;
 723#endif
 724	}
 725
 726	if (new_l3) {
 727		l3 = new_l3;
 728#ifdef CONFIG_X86_HT
 729		per_cpu(cpu_llc_id, cpu) = l3_id;
 730#endif
 731	}
 732
 
 
 
 
 
 
 
 
 
 
 
 
 733	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 734
 735	return l2;
 736}
 737
 738#ifdef CONFIG_SYSFS
 739
 740/* pointer to _cpuid4_info array (for each cache leaf) */
 741static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
 742#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
 743
 744#ifdef CONFIG_SMP
 745
 746static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
 747{
 748	struct _cpuid4_info *this_leaf;
 
 749	int i, sibling;
 750
 751	if (cpu_has_topoext) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 752		unsigned int apicid, nshared, first, last;
 753
 754		if (!per_cpu(ici_cpuid4_info, cpu))
 755			return 0;
 756
 757		this_leaf = CPUID4_INFO_IDX(cpu, index);
 758		nshared = this_leaf->base.eax.split.num_threads_sharing + 1;
 759		apicid = cpu_data(cpu).apicid;
 760		first = apicid - (apicid % nshared);
 761		last = first + nshared - 1;
 762
 763		for_each_online_cpu(i) {
 
 
 
 
 764			apicid = cpu_data(i).apicid;
 765			if ((apicid < first) || (apicid > last))
 766				continue;
 767			if (!per_cpu(ici_cpuid4_info, i))
 768				continue;
 769			this_leaf = CPUID4_INFO_IDX(i, index);
 770
 771			for_each_online_cpu(sibling) {
 772				apicid = cpu_data(sibling).apicid;
 773				if ((apicid < first) || (apicid > last))
 774					continue;
 775				set_bit(sibling, this_leaf->shared_cpu_map);
 776			}
 777		}
 778	} else if (index == 3) {
 779		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 780			if (!per_cpu(ici_cpuid4_info, i))
 781				continue;
 782			this_leaf = CPUID4_INFO_IDX(i, index);
 783			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 784				if (!cpu_online(sibling))
 785					continue;
 786				set_bit(sibling, this_leaf->shared_cpu_map);
 787			}
 788		}
 789	} else
 790		return 0;
 791
 792	return 1;
 793}
 794
 795static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
 
 796{
 797	struct _cpuid4_info *this_leaf, *sibling_leaf;
 
 798	unsigned long num_threads_sharing;
 799	int index_msb, i;
 800	struct cpuinfo_x86 *c = &cpu_data(cpu);
 801
 802	if (c->x86_vendor == X86_VENDOR_AMD) {
 803		if (cache_shared_amd_cpu_map_setup(cpu, index))
 804			return;
 805	}
 806
 807	this_leaf = CPUID4_INFO_IDX(cpu, index);
 808	num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
 809
 
 810	if (num_threads_sharing == 1)
 811		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
 812	else {
 813		index_msb = get_count_order(num_threads_sharing);
 814
 815		for_each_online_cpu(i) {
 816			if (cpu_data(i).apicid >> index_msb ==
 817			    c->apicid >> index_msb) {
 818				cpumask_set_cpu(i,
 819					to_cpumask(this_leaf->shared_cpu_map));
 820				if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
 821					sibling_leaf =
 822						CPUID4_INFO_IDX(i, index);
 823					cpumask_set_cpu(cpu, to_cpumask(
 824						sibling_leaf->shared_cpu_map));
 825				}
 826			}
 827		}
 828	}
 829}
 830static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
 831{
 832	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 833	int sibling;
 834
 835	this_leaf = CPUID4_INFO_IDX(cpu, index);
 836	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
 837		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
 838		cpumask_clear_cpu(cpu,
 839				  to_cpumask(sibling_leaf->shared_cpu_map));
 840	}
 841}
 842#else
 843static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
 844{
 845}
 846
 847static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
 848{
 849}
 850#endif
 851
 852static void free_cache_attributes(unsigned int cpu)
 853{
 854	int i;
 855
 856	for (i = 0; i < num_cache_leaves; i++)
 857		cache_remove_shared_cpu_map(cpu, i);
 858
 859	kfree(per_cpu(ici_cpuid4_info, cpu));
 860	per_cpu(ici_cpuid4_info, cpu) = NULL;
 861}
 862
 863static void get_cpu_leaves(void *_retval)
 864{
 865	int j, *retval = _retval, cpu = smp_processor_id();
 866
 867	/* Do cpuid and store the results */
 868	for (j = 0; j < num_cache_leaves; j++) {
 869		struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j);
 870
 871		*retval = cpuid4_cache_lookup_regs(j, &this_leaf->base);
 872		if (unlikely(*retval < 0)) {
 873			int i;
 874
 875			for (i = 0; i < j; i++)
 876				cache_remove_shared_cpu_map(cpu, i);
 877			break;
 
 
 878		}
 879		cache_shared_cpu_map_setup(cpu, j);
 880	}
 881}
 882
 883static int detect_cache_attributes(unsigned int cpu)
 884{
 885	int			retval;
 886
 887	if (num_cache_leaves == 0)
 888		return -ENOENT;
 889
 890	per_cpu(ici_cpuid4_info, cpu) = kzalloc(
 891	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
 892	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
 893		return -ENOMEM;
 894
 895	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
 896	if (retval) {
 897		kfree(per_cpu(ici_cpuid4_info, cpu));
 898		per_cpu(ici_cpuid4_info, cpu) = NULL;
 899	}
 900
 901	return retval;
 902}
 903
 904#include <linux/kobject.h>
 905#include <linux/sysfs.h>
 906#include <linux/cpu.h>
 907
 908/* pointer to kobject for cpuX/cache */
 909static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
 910
 911struct _index_kobject {
 912	struct kobject kobj;
 913	unsigned int cpu;
 914	unsigned short index;
 915};
 916
 917/* pointer to array of kobjects for cpuX/cache/indexY */
 918static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
 919#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(ici_index_kobject, x))[y]))
 920
 921#define show_one_plus(file_name, object, val)				\
 922static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
 923				unsigned int cpu)			\
 924{									\
 925	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
 926}
 927
 928show_one_plus(level, base.eax.split.level, 0);
 929show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1);
 930show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1);
 931show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1);
 932show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1);
 933
 934static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
 935			 unsigned int cpu)
 936{
 937	return sprintf(buf, "%luK\n", this_leaf->base.size / 1024);
 938}
 939
 940static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 941					int type, char *buf)
 942{
 943	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
 944	int n = 0;
 945
 946	if (len > 1) {
 947		const struct cpumask *mask;
 948
 949		mask = to_cpumask(this_leaf->shared_cpu_map);
 950		n = type ?
 951			cpulist_scnprintf(buf, len-2, mask) :
 952			cpumask_scnprintf(buf, len-2, mask);
 953		buf[n++] = '\n';
 954		buf[n] = '\0';
 955	}
 956	return n;
 957}
 958
 959static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
 960					  unsigned int cpu)
 961{
 962	return show_shared_cpu_map_func(leaf, 0, buf);
 963}
 964
 965static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
 966					   unsigned int cpu)
 967{
 968	return show_shared_cpu_map_func(leaf, 1, buf);
 
 
 
 
 
 
 
 
 
 
 
 
 969}
 970
 971static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
 972			 unsigned int cpu)
 973{
 974	switch (this_leaf->base.eax.split.type) {
 975	case CACHE_TYPE_DATA:
 976		return sprintf(buf, "Data\n");
 977	case CACHE_TYPE_INST:
 978		return sprintf(buf, "Instruction\n");
 979	case CACHE_TYPE_UNIFIED:
 980		return sprintf(buf, "Unified\n");
 981	default:
 982		return sprintf(buf, "Unknown\n");
 983	}
 984}
 985
 986#define to_object(k)	container_of(k, struct _index_kobject, kobj)
 987#define to_attr(a)	container_of(a, struct _cache_attr, attr)
 988
 989#define define_one_ro(_name) \
 990static struct _cache_attr _name = \
 991	__ATTR(_name, 0444, show_##_name, NULL)
 992
 993define_one_ro(level);
 994define_one_ro(type);
 995define_one_ro(coherency_line_size);
 996define_one_ro(physical_line_partition);
 997define_one_ro(ways_of_associativity);
 998define_one_ro(number_of_sets);
 999define_one_ro(size);
1000define_one_ro(shared_cpu_map);
1001define_one_ro(shared_cpu_list);
1002
1003static struct attribute *default_attrs[] = {
1004	&type.attr,
1005	&level.attr,
1006	&coherency_line_size.attr,
1007	&physical_line_partition.attr,
1008	&ways_of_associativity.attr,
1009	&number_of_sets.attr,
1010	&size.attr,
1011	&shared_cpu_map.attr,
1012	&shared_cpu_list.attr,
1013	NULL
1014};
1015
1016#ifdef CONFIG_AMD_NB
1017static struct attribute **amd_l3_attrs(void)
1018{
1019	static struct attribute **attrs;
1020	int n;
1021
1022	if (attrs)
1023		return attrs;
1024
1025	n = ARRAY_SIZE(default_attrs);
1026
1027	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
1028		n += 2;
1029
1030	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1031		n += 1;
1032
1033	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
1034	if (attrs == NULL)
1035		return attrs = default_attrs;
1036
1037	for (n = 0; default_attrs[n]; n++)
1038		attrs[n] = default_attrs[n];
1039
1040	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
1041		attrs[n++] = &cache_disable_0.attr;
1042		attrs[n++] = &cache_disable_1.attr;
1043	}
1044
1045	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1046		attrs[n++] = &subcaches.attr;
1047
1048	return attrs;
1049}
1050#endif
1051
1052static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1053{
1054	struct _cache_attr *fattr = to_attr(attr);
1055	struct _index_kobject *this_leaf = to_object(kobj);
1056	ssize_t ret;
1057
1058	ret = fattr->show ?
1059		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1060			buf, this_leaf->cpu) :
1061		0;
1062	return ret;
1063}
1064
1065static ssize_t store(struct kobject *kobj, struct attribute *attr,
1066		     const char *buf, size_t count)
1067{
1068	struct _cache_attr *fattr = to_attr(attr);
1069	struct _index_kobject *this_leaf = to_object(kobj);
1070	ssize_t ret;
1071
1072	ret = fattr->store ?
1073		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1074			buf, count, this_leaf->cpu) :
1075		0;
1076	return ret;
1077}
1078
1079static const struct sysfs_ops sysfs_ops = {
1080	.show   = show,
1081	.store  = store,
1082};
1083
1084static struct kobj_type ktype_cache = {
1085	.sysfs_ops	= &sysfs_ops,
1086	.default_attrs	= default_attrs,
1087};
1088
1089static struct kobj_type ktype_percpu_entry = {
1090	.sysfs_ops	= &sysfs_ops,
1091};
1092
1093static void cpuid4_cache_sysfs_exit(unsigned int cpu)
1094{
1095	kfree(per_cpu(ici_cache_kobject, cpu));
1096	kfree(per_cpu(ici_index_kobject, cpu));
1097	per_cpu(ici_cache_kobject, cpu) = NULL;
1098	per_cpu(ici_index_kobject, cpu) = NULL;
1099	free_cache_attributes(cpu);
1100}
1101
1102static int cpuid4_cache_sysfs_init(unsigned int cpu)
1103{
1104	int err;
1105
1106	if (num_cache_leaves == 0)
1107		return -ENOENT;
1108
1109	err = detect_cache_attributes(cpu);
1110	if (err)
1111		return err;
1112
1113	/* Allocate all required memory */
1114	per_cpu(ici_cache_kobject, cpu) =
1115		kzalloc(sizeof(struct kobject), GFP_KERNEL);
1116	if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1117		goto err_out;
1118
1119	per_cpu(ici_index_kobject, cpu) = kzalloc(
1120	    sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1121	if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1122		goto err_out;
1123
1124	return 0;
1125
1126err_out:
1127	cpuid4_cache_sysfs_exit(cpu);
1128	return -ENOMEM;
1129}
1130
1131static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1132
1133/* Add/Remove cache interface for CPU device */
1134static int cache_add_dev(struct device *dev)
1135{
1136	unsigned int cpu = dev->id;
1137	unsigned long i, j;
1138	struct _index_kobject *this_object;
1139	struct _cpuid4_info   *this_leaf;
1140	int retval;
1141
1142	retval = cpuid4_cache_sysfs_init(cpu);
1143	if (unlikely(retval < 0))
1144		return retval;
1145
1146	retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1147				      &ktype_percpu_entry,
1148				      &dev->kobj, "%s", "cache");
1149	if (retval < 0) {
1150		cpuid4_cache_sysfs_exit(cpu);
1151		return retval;
1152	}
1153
1154	for (i = 0; i < num_cache_leaves; i++) {
1155		this_object = INDEX_KOBJECT_PTR(cpu, i);
1156		this_object->cpu = cpu;
1157		this_object->index = i;
1158
1159		this_leaf = CPUID4_INFO_IDX(cpu, i);
1160
1161		ktype_cache.default_attrs = default_attrs;
1162#ifdef CONFIG_AMD_NB
1163		if (this_leaf->base.nb)
1164			ktype_cache.default_attrs = amd_l3_attrs();
1165#endif
1166		retval = kobject_init_and_add(&(this_object->kobj),
1167					      &ktype_cache,
1168					      per_cpu(ici_cache_kobject, cpu),
1169					      "index%1lu", i);
1170		if (unlikely(retval)) {
1171			for (j = 0; j < i; j++)
1172				kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1173			kobject_put(per_cpu(ici_cache_kobject, cpu));
1174			cpuid4_cache_sysfs_exit(cpu);
1175			return retval;
1176		}
1177		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1178	}
1179	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1180
1181	kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1182	return 0;
1183}
1184
1185static void cache_remove_dev(struct device *dev)
 
 
 
 
 
1186{
1187	unsigned int cpu = dev->id;
1188	unsigned long i;
1189
1190	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1191		return;
1192	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1193		return;
1194	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1195
1196	for (i = 0; i < num_cache_leaves; i++)
1197		kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1198	kobject_put(per_cpu(ici_cache_kobject, cpu));
1199	cpuid4_cache_sysfs_exit(cpu);
1200}
1201
1202static int cacheinfo_cpu_callback(struct notifier_block *nfb,
1203				  unsigned long action, void *hcpu)
1204{
1205	unsigned int cpu = (unsigned long)hcpu;
1206	struct device *dev;
 
 
1207
1208	dev = get_cpu_device(cpu);
1209	switch (action) {
1210	case CPU_ONLINE:
1211	case CPU_ONLINE_FROZEN:
1212		cache_add_dev(dev);
1213		break;
1214	case CPU_DEAD:
1215	case CPU_DEAD_FROZEN:
1216		cache_remove_dev(dev);
1217		break;
1218	}
1219	return NOTIFY_OK;
1220}
1221
1222static struct notifier_block cacheinfo_cpu_notifier = {
1223	.notifier_call = cacheinfo_cpu_callback,
1224};
1225
1226static int __init cache_sysfs_init(void)
1227{
1228	int i, err = 0;
1229
1230	if (num_cache_leaves == 0)
1231		return 0;
1232
1233	cpu_notifier_register_begin();
1234	for_each_online_cpu(i) {
1235		struct device *dev = get_cpu_device(i);
1236
1237		err = cache_add_dev(dev);
1238		if (err)
1239			goto out;
1240	}
1241	__register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1242
1243out:
1244	cpu_notifier_register_done();
1245	return err;
1246}
1247
1248device_initcall(cache_sysfs_init);
1249
1250#endif