Linux Audio

Check our new training course

Yocto distribution development and maintenance

Need a Yocto distribution for your embedded project?
Loading...
v4.17
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 *	Routines to identify caches on Intel CPU.
  4 *
  5 *	Changes:
  6 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  7 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  8 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  9 */
 10
 
 11#include <linux/slab.h>
 12#include <linux/cacheinfo.h>
 
 13#include <linux/cpu.h>
 14#include <linux/sched.h>
 15#include <linux/capability.h>
 16#include <linux/sysfs.h>
 17#include <linux/pci.h>
 18
 19#include <asm/cpufeature.h>
 
 20#include <asm/amd_nb.h>
 21#include <asm/smp.h>
 22
 23#define LVL_1_INST	1
 24#define LVL_1_DATA	2
 25#define LVL_2		3
 26#define LVL_3		4
 27#define LVL_TRACE	5
 28
 29struct _cache_table {
 30	unsigned char descriptor;
 31	char cache_type;
 32	short size;
 33};
 34
 35#define MB(x)	((x) * 1024)
 36
 37/* All the cache descriptor types we care about (no TLB or
 38   trace cache entries) */
 39
 40static const struct _cache_table cache_table[] =
 41{
 42	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
 43	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
 44	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
 45	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
 46	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
 47	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
 48	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
 49	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
 50	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 51	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 52	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 53	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 54	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
 55	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
 56	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 57	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 58	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
 59	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 60	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 61	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 62	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
 63	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
 64	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
 65	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
 66	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
 67	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
 68	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
 69	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
 70	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
 71	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 72	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
 73	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 74	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
 75	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
 76	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
 77	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 78	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 79	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 80	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 81	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
 82	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
 83	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 84	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 85	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 86	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 87	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 88	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 89	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 90	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 91	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 92	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 93	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 94	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 95	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 96	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 97	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 98	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 99	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
100	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
101	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
102	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
103	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
104	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
105	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
106	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
107	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
108	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
109	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
110	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
111	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
112	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
113	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
114	{ 0x00, 0, 0}
115};
116
117
118enum _cache_type {
119	CTYPE_NULL = 0,
120	CTYPE_DATA = 1,
121	CTYPE_INST = 2,
122	CTYPE_UNIFIED = 3
123};
124
125union _cpuid4_leaf_eax {
126	struct {
127		enum _cache_type	type:5;
128		unsigned int		level:3;
129		unsigned int		is_self_initializing:1;
130		unsigned int		is_fully_associative:1;
131		unsigned int		reserved:4;
132		unsigned int		num_threads_sharing:12;
133		unsigned int		num_cores_on_die:6;
134	} split;
135	u32 full;
136};
137
138union _cpuid4_leaf_ebx {
139	struct {
140		unsigned int		coherency_line_size:12;
141		unsigned int		physical_line_partition:10;
142		unsigned int		ways_of_associativity:10;
143	} split;
144	u32 full;
145};
146
147union _cpuid4_leaf_ecx {
148	struct {
149		unsigned int		number_of_sets:32;
150	} split;
151	u32 full;
152};
153
154struct _cpuid4_info_regs {
155	union _cpuid4_leaf_eax eax;
156	union _cpuid4_leaf_ebx ebx;
157	union _cpuid4_leaf_ecx ecx;
158	unsigned int id;
159	unsigned long size;
160	struct amd_northbridge *nb;
161};
162
163static unsigned short num_cache_leaves;
 
 
 
 
 
164
165/* AMD doesn't have CPUID4. Emulate it here to report the same
166   information to the user.  This makes some assumptions about the machine:
167   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
168
169   In theory the TLBs could be reported as fake type (they are in "dummy").
170   Maybe later */
171union l1_cache {
172	struct {
173		unsigned line_size:8;
174		unsigned lines_per_tag:8;
175		unsigned assoc:8;
176		unsigned size_in_kb:8;
177	};
178	unsigned val;
179};
180
181union l2_cache {
182	struct {
183		unsigned line_size:8;
184		unsigned lines_per_tag:4;
185		unsigned assoc:4;
186		unsigned size_in_kb:16;
187	};
188	unsigned val;
189};
190
191union l3_cache {
192	struct {
193		unsigned line_size:8;
194		unsigned lines_per_tag:4;
195		unsigned assoc:4;
196		unsigned res:2;
197		unsigned size_encoded:14;
198	};
199	unsigned val;
200};
201
202static const unsigned short assocs[] = {
203	[1] = 1,
204	[2] = 2,
205	[4] = 4,
206	[6] = 8,
207	[8] = 16,
208	[0xa] = 32,
209	[0xb] = 48,
210	[0xc] = 64,
211	[0xd] = 96,
212	[0xe] = 128,
213	[0xf] = 0xffff /* fully associative - no way to show this currently */
214};
215
216static const unsigned char levels[] = { 1, 1, 2, 3 };
217static const unsigned char types[] = { 1, 2, 3, 3 };
218
219static const enum cache_type cache_type_map[] = {
220	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
221	[CTYPE_DATA] = CACHE_TYPE_DATA,
222	[CTYPE_INST] = CACHE_TYPE_INST,
223	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
224};
225
226static void
227amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
228		     union _cpuid4_leaf_ebx *ebx,
229		     union _cpuid4_leaf_ecx *ecx)
230{
231	unsigned dummy;
232	unsigned line_size, lines_per_tag, assoc, size_in_kb;
233	union l1_cache l1i, l1d;
234	union l2_cache l2;
235	union l3_cache l3;
236	union l1_cache *l1 = &l1d;
237
238	eax->full = 0;
239	ebx->full = 0;
240	ecx->full = 0;
241
242	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
243	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
244
245	switch (leaf) {
246	case 1:
247		l1 = &l1i;
248	case 0:
249		if (!l1->val)
250			return;
251		assoc = assocs[l1->assoc];
252		line_size = l1->line_size;
253		lines_per_tag = l1->lines_per_tag;
254		size_in_kb = l1->size_in_kb;
255		break;
256	case 2:
257		if (!l2.val)
258			return;
259		assoc = assocs[l2.assoc];
260		line_size = l2.line_size;
261		lines_per_tag = l2.lines_per_tag;
262		/* cpu_data has errata corrections for K7 applied */
263		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
264		break;
265	case 3:
266		if (!l3.val)
267			return;
268		assoc = assocs[l3.assoc];
269		line_size = l3.line_size;
270		lines_per_tag = l3.lines_per_tag;
271		size_in_kb = l3.size_encoded * 512;
272		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
273			size_in_kb = size_in_kb >> 1;
274			assoc = assoc >> 1;
275		}
276		break;
277	default:
278		return;
279	}
280
281	eax->split.is_self_initializing = 1;
282	eax->split.type = types[leaf];
283	eax->split.level = levels[leaf];
284	eax->split.num_threads_sharing = 0;
285	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
286
287
288	if (assoc == 0xffff)
289		eax->split.is_fully_associative = 1;
290	ebx->split.coherency_line_size = line_size - 1;
291	ebx->split.ways_of_associativity = assoc - 1;
292	ebx->split.physical_line_partition = lines_per_tag - 1;
293	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
294		(ebx->split.ways_of_associativity + 1) - 1;
295}
296
297#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
 
 
 
 
 
 
 
298
299/*
300 * L3 cache descriptors
301 */
302static void amd_calc_l3_indices(struct amd_northbridge *nb)
303{
304	struct amd_l3_cache *l3 = &nb->l3_cache;
305	unsigned int sc0, sc1, sc2, sc3;
306	u32 val = 0;
307
308	pci_read_config_dword(nb->misc, 0x1C4, &val);
309
310	/* calculate subcache sizes */
311	l3->subcaches[0] = sc0 = !(val & BIT(0));
312	l3->subcaches[1] = sc1 = !(val & BIT(4));
313
314	if (boot_cpu_data.x86 == 0x15) {
315		l3->subcaches[0] = sc0 += !(val & BIT(1));
316		l3->subcaches[1] = sc1 += !(val & BIT(5));
317	}
318
319	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
320	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
321
322	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
323}
324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325/*
326 * check whether a slot used for disabling an L3 index is occupied.
327 * @l3: L3 cache descriptor
328 * @slot: slot number (0..1)
329 *
330 * @returns: the disabled index if used or negative value if slot free.
331 */
332static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
333{
334	unsigned int reg = 0;
335
336	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
337
338	/* check whether this slot is activated already */
339	if (reg & (3UL << 30))
340		return reg & 0xfff;
341
342	return -1;
343}
344
345static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
346				  unsigned int slot)
347{
348	int index;
349	struct amd_northbridge *nb = this_leaf->priv;
350
351	index = amd_get_l3_disable_slot(nb, slot);
 
 
 
352	if (index >= 0)
353		return sprintf(buf, "%d\n", index);
354
355	return sprintf(buf, "FREE\n");
356}
357
358#define SHOW_CACHE_DISABLE(slot)					\
359static ssize_t								\
360cache_disable_##slot##_show(struct device *dev,				\
361			    struct device_attribute *attr, char *buf)	\
362{									\
363	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
364	return show_cache_disable(this_leaf, buf, slot);		\
365}
366SHOW_CACHE_DISABLE(0)
367SHOW_CACHE_DISABLE(1)
368
369static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
370				 unsigned slot, unsigned long idx)
371{
372	int i;
373
374	idx |= BIT(30);
375
376	/*
377	 *  disable index in all 4 subcaches
378	 */
379	for (i = 0; i < 4; i++) {
380		u32 reg = idx | (i << 20);
381
382		if (!nb->l3_cache.subcaches[i])
383			continue;
384
385		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
386
387		/*
388		 * We need to WBINVD on a core on the node containing the L3
389		 * cache which indices we disable therefore a simple wbinvd()
390		 * is not sufficient.
391		 */
392		wbinvd_on_cpu(cpu);
393
394		reg |= BIT(31);
395		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
396	}
397}
398
399/*
400 * disable a L3 cache index by using a disable-slot
401 *
402 * @l3:    L3 cache descriptor
403 * @cpu:   A CPU on the node containing the L3 cache
404 * @slot:  slot number (0..1)
405 * @index: index to disable
406 *
407 * @return: 0 on success, error status on failure
408 */
409static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
410			    unsigned slot, unsigned long index)
411{
412	int ret = 0;
413
414	/*  check if @slot is already used or the index is already disabled */
415	ret = amd_get_l3_disable_slot(nb, slot);
416	if (ret >= 0)
417		return -EEXIST;
418
419	if (index > nb->l3_cache.indices)
420		return -EINVAL;
421
422	/* check whether the other slot has disabled the same index already */
423	if (index == amd_get_l3_disable_slot(nb, !slot))
424		return -EEXIST;
425
426	amd_l3_disable_index(nb, cpu, slot, index);
427
428	return 0;
429}
430
431static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
432				   const char *buf, size_t count,
433				   unsigned int slot)
434{
435	unsigned long val = 0;
436	int cpu, err = 0;
437	struct amd_northbridge *nb = this_leaf->priv;
438
439	if (!capable(CAP_SYS_ADMIN))
440		return -EPERM;
441
442	cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
443
444	if (kstrtoul(buf, 10, &val) < 0)
 
 
445		return -EINVAL;
446
447	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
448	if (err) {
449		if (err == -EEXIST)
450			pr_warn("L3 slot %d in use/index already disabled!\n",
451				   slot);
452		return err;
453	}
454	return count;
455}
456
457#define STORE_CACHE_DISABLE(slot)					\
458static ssize_t								\
459cache_disable_##slot##_store(struct device *dev,			\
460			     struct device_attribute *attr,		\
461			     const char *buf, size_t count)		\
462{									\
463	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
464	return store_cache_disable(this_leaf, buf, count, slot);	\
465}
466STORE_CACHE_DISABLE(0)
467STORE_CACHE_DISABLE(1)
468
469static ssize_t subcaches_show(struct device *dev,
470			      struct device_attribute *attr, char *buf)
 
 
 
 
 
471{
472	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
473	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
474
475	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
476}
477
478static ssize_t subcaches_store(struct device *dev,
479			       struct device_attribute *attr,
480			       const char *buf, size_t count)
481{
482	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
483	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
484	unsigned long val;
485
486	if (!capable(CAP_SYS_ADMIN))
487		return -EPERM;
488
489	if (kstrtoul(buf, 16, &val) < 0)
 
 
 
490		return -EINVAL;
491
492	if (amd_set_subcaches(cpu, val))
493		return -EINVAL;
494
495	return count;
496}
497
498static DEVICE_ATTR_RW(cache_disable_0);
499static DEVICE_ATTR_RW(cache_disable_1);
500static DEVICE_ATTR_RW(subcaches);
501
502static umode_t
503cache_private_attrs_is_visible(struct kobject *kobj,
504			       struct attribute *attr, int unused)
505{
506	struct device *dev = kobj_to_dev(kobj);
507	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
508	umode_t mode = attr->mode;
509
510	if (!this_leaf->priv)
511		return 0;
512
513	if ((attr == &dev_attr_subcaches.attr) &&
514	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
515		return mode;
516
517	if ((attr == &dev_attr_cache_disable_0.attr ||
518	     attr == &dev_attr_cache_disable_1.attr) &&
519	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
520		return mode;
521
522	return 0;
523}
524
525static struct attribute_group cache_private_group = {
526	.is_visible = cache_private_attrs_is_visible,
527};
528
529static void init_amd_l3_attrs(void)
530{
531	int n = 1;
532	static struct attribute **amd_l3_attrs;
533
534	if (amd_l3_attrs) /* already initialized */
535		return;
536
537	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
538		n += 2;
539	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
540		n += 1;
541
542	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
543	if (!amd_l3_attrs)
544		return;
545
546	n = 0;
547	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
548		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
549		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
550	}
551	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
552		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
553
554	cache_private_group.attrs = amd_l3_attrs;
555}
556
557const struct attribute_group *
558cache_get_priv_group(struct cacheinfo *this_leaf)
559{
560	struct amd_northbridge *nb = this_leaf->priv;
561
562	if (this_leaf->level < 3 || !nb)
563		return NULL;
564
565	if (nb && nb->l3_cache.indices)
566		init_amd_l3_attrs();
567
568	return &cache_private_group;
569}
570
571static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
572{
573	int node;
574
575	/* only for L3, and not in virtualized environments */
576	if (index < 3)
577		return;
578
579	node = amd_get_nb_id(smp_processor_id());
580	this_leaf->nb = node_to_amd_nb(node);
581	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
582		amd_calc_l3_indices(this_leaf->nb);
583}
584#else
585#define amd_init_l3_cache(x, y)
586#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
587
588static int
589cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
 
590{
591	union _cpuid4_leaf_eax	eax;
592	union _cpuid4_leaf_ebx	ebx;
593	union _cpuid4_leaf_ecx	ecx;
594	unsigned		edx;
595
596	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
597		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
598			cpuid_count(0x8000001d, index, &eax.full,
599				    &ebx.full, &ecx.full, &edx);
600		else
601			amd_cpuid4(index, &eax, &ebx, &ecx);
602		amd_init_l3_cache(this_leaf, index);
603	} else {
604		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
605	}
606
607	if (eax.split.type == CTYPE_NULL)
608		return -EIO; /* better error ? */
609
610	this_leaf->eax = eax;
611	this_leaf->ebx = ebx;
612	this_leaf->ecx = ecx;
613	this_leaf->size = (ecx.split.number_of_sets          + 1) *
614			  (ebx.split.coherency_line_size     + 1) *
615			  (ebx.split.physical_line_partition + 1) *
616			  (ebx.split.ways_of_associativity   + 1);
617	return 0;
618}
619
620static int find_num_cache_leaves(struct cpuinfo_x86 *c)
621{
622	unsigned int		eax, ebx, ecx, edx, op;
623	union _cpuid4_leaf_eax	cache_eax;
624	int 			i = -1;
625
626	if (c->x86_vendor == X86_VENDOR_AMD)
627		op = 0x8000001d;
628	else
629		op = 4;
630
631	do {
632		++i;
633		/* Do cpuid(op) loop to find out num_cache_leaves */
634		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
635		cache_eax.full = eax;
636	} while (cache_eax.split.type != CTYPE_NULL);
637	return i;
638}
639
640void init_amd_cacheinfo(struct cpuinfo_x86 *c)
641{
642
643	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
644		num_cache_leaves = find_num_cache_leaves(c);
645	} else if (c->extended_cpuid_level >= 0x80000006) {
646		if (cpuid_edx(0x80000006) & 0xf000)
647			num_cache_leaves = 4;
648		else
649			num_cache_leaves = 3;
650	}
651}
652
653unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
654{
655	/* Cache sizes */
656	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
657	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
658	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
659	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
660#ifdef CONFIG_SMP
661	unsigned int cpu = c->cpu_index;
662#endif
663
664	if (c->cpuid_level > 3) {
665		static int is_initialized;
666
667		if (is_initialized == 0) {
668			/* Init num_cache_leaves from boot CPU */
669			num_cache_leaves = find_num_cache_leaves(c);
670			is_initialized++;
671		}
672
673		/*
674		 * Whenever possible use cpuid(4), deterministic cache
675		 * parameters cpuid leaf to find the cache details
676		 */
677		for (i = 0; i < num_cache_leaves; i++) {
678			struct _cpuid4_info_regs this_leaf = {};
679			int retval;
680
681			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
682			if (retval < 0)
683				continue;
684
685			switch (this_leaf.eax.split.level) {
686			case 1:
687				if (this_leaf.eax.split.type == CTYPE_DATA)
688					new_l1d = this_leaf.size/1024;
689				else if (this_leaf.eax.split.type == CTYPE_INST)
690					new_l1i = this_leaf.size/1024;
691				break;
692			case 2:
693				new_l2 = this_leaf.size/1024;
694				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
695				index_msb = get_count_order(num_threads_sharing);
696				l2_id = c->apicid & ~((1 << index_msb) - 1);
697				break;
698			case 3:
699				new_l3 = this_leaf.size/1024;
700				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
701				index_msb = get_count_order(num_threads_sharing);
702				l3_id = c->apicid & ~((1 << index_msb) - 1);
703				break;
704			default:
705				break;
 
 
706			}
707		}
708	}
709	/*
710	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
711	 * trace cache
712	 */
713	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
714		/* supports eax=2  call */
715		int j, n;
716		unsigned int regs[4];
717		unsigned char *dp = (unsigned char *)regs;
718		int only_trace = 0;
719
720		if (num_cache_leaves != 0 && c->x86 == 15)
721			only_trace = 1;
722
723		/* Number of times to iterate */
724		n = cpuid_eax(2) & 0xFF;
725
726		for (i = 0 ; i < n ; i++) {
727			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
728
729			/* If bit 31 is set, this is an unknown format */
730			for (j = 0 ; j < 3 ; j++)
731				if (regs[j] & (1 << 31))
732					regs[j] = 0;
733
734			/* Byte 0 is level count, not a descriptor */
735			for (j = 1 ; j < 16 ; j++) {
736				unsigned char des = dp[j];
737				unsigned char k = 0;
738
739				/* look up this descriptor in the table */
740				while (cache_table[k].descriptor != 0) {
741					if (cache_table[k].descriptor == des) {
742						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
743							break;
744						switch (cache_table[k].cache_type) {
745						case LVL_1_INST:
746							l1i += cache_table[k].size;
747							break;
748						case LVL_1_DATA:
749							l1d += cache_table[k].size;
750							break;
751						case LVL_2:
752							l2 += cache_table[k].size;
753							break;
754						case LVL_3:
755							l3 += cache_table[k].size;
756							break;
757						case LVL_TRACE:
758							trace += cache_table[k].size;
759							break;
760						}
761
762						break;
763					}
764
765					k++;
766				}
767			}
768		}
769	}
770
771	if (new_l1d)
772		l1d = new_l1d;
773
774	if (new_l1i)
775		l1i = new_l1i;
776
777	if (new_l2) {
778		l2 = new_l2;
779#ifdef CONFIG_SMP
780		per_cpu(cpu_llc_id, cpu) = l2_id;
781#endif
782	}
783
784	if (new_l3) {
785		l3 = new_l3;
786#ifdef CONFIG_SMP
787		per_cpu(cpu_llc_id, cpu) = l3_id;
788#endif
789	}
790
791#ifdef CONFIG_SMP
792	/*
793	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
794	 * turns means that the only possibility is SMT (as indicated in
795	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
796	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
797	 * c->phys_proc_id.
798	 */
799	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
800		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
801#endif
802
803	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
804
805	return l2;
806}
807
808static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
809				    struct _cpuid4_info_regs *base)
 
 
 
 
 
 
 
810{
811	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
812	struct cacheinfo *this_leaf;
813	int i, sibling;
814
815	/*
816	 * For L3, always use the pre-calculated cpu_llc_shared_mask
817	 * to derive shared_cpu_map.
818	 */
819	if (index == 3) {
 
820		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
821			this_cpu_ci = get_cpu_cacheinfo(i);
822			if (!this_cpu_ci->info_list)
823				continue;
824			this_leaf = this_cpu_ci->info_list + index;
825			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
826				if (!cpu_online(sibling))
827					continue;
828				cpumask_set_cpu(sibling,
829						&this_leaf->shared_cpu_map);
830			}
831		}
832	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
833		unsigned int apicid, nshared, first, last;
834
835		nshared = base->eax.split.num_threads_sharing + 1;
836		apicid = cpu_data(cpu).apicid;
837		first = apicid - (apicid % nshared);
838		last = first + nshared - 1;
839
840		for_each_online_cpu(i) {
841			this_cpu_ci = get_cpu_cacheinfo(i);
842			if (!this_cpu_ci->info_list)
843				continue;
844
845			apicid = cpu_data(i).apicid;
846			if ((apicid < first) || (apicid > last))
847				continue;
848
849			this_leaf = this_cpu_ci->info_list + index;
850
851			for_each_online_cpu(sibling) {
852				apicid = cpu_data(sibling).apicid;
853				if ((apicid < first) || (apicid > last))
854					continue;
855				cpumask_set_cpu(sibling,
856						&this_leaf->shared_cpu_map);
857			}
858		}
859	} else
860		return 0;
861
862	return 1;
863}
864
865static void __cache_cpumap_setup(unsigned int cpu, int index,
866				 struct _cpuid4_info_regs *base)
867{
868	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
869	struct cacheinfo *this_leaf, *sibling_leaf;
870	unsigned long num_threads_sharing;
871	int index_msb, i;
872	struct cpuinfo_x86 *c = &cpu_data(cpu);
873
874	if (c->x86_vendor == X86_VENDOR_AMD) {
875		if (__cache_amd_cpumap_setup(cpu, index, base))
876			return;
877	}
878
879	this_leaf = this_cpu_ci->info_list + index;
880	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
881
882	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
883	if (num_threads_sharing == 1)
884		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
886	index_msb = get_count_order(num_threads_sharing);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887
888	for_each_online_cpu(i)
889		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
890			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
891
892			if (i == cpu || !sib_cpu_ci->info_list)
893				continue;/* skip if itself or no cacheinfo */
894			sibling_leaf = sib_cpu_ci->info_list + index;
895			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
896			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
897		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898}
 
899
900static void ci_leaf_init(struct cacheinfo *this_leaf,
901			 struct _cpuid4_info_regs *base)
902{
903	this_leaf->id = base->id;
904	this_leaf->attributes = CACHE_ID;
905	this_leaf->level = base->eax.split.level;
906	this_leaf->type = cache_type_map[base->eax.split.type];
907	this_leaf->coherency_line_size =
908				base->ebx.split.coherency_line_size + 1;
909	this_leaf->ways_of_associativity =
910				base->ebx.split.ways_of_associativity + 1;
911	this_leaf->size = base->size;
912	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
913	this_leaf->physical_line_partition =
914				base->ebx.split.physical_line_partition + 1;
915	this_leaf->priv = base->nb;
916}
917
918static int __init_cache_level(unsigned int cpu)
 
919{
920	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
 
921
922	if (!num_cache_leaves)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
923		return -ENOENT;
924	if (!this_cpu_ci)
925		return -EINVAL;
926	this_cpu_ci->num_levels = 3;
927	this_cpu_ci->num_leaves = num_cache_leaves;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
928	return 0;
929}
930
931/*
932 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
933 * ECX as cache index. Then right shift apicid by the number's order to get
934 * cache id for this cache node.
935 */
936static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
937{
938	struct cpuinfo_x86 *c = &cpu_data(cpu);
939	unsigned long num_threads_sharing;
940	int index_msb;
 
 
 
 
 
941
942	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
943	index_msb = get_count_order(num_threads_sharing);
944	id4_regs->id = c->apicid >> index_msb;
 
945}
946
947static int __populate_cache_leaves(unsigned int cpu)
 
948{
949	unsigned int idx, ret;
950	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
951	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
952	struct _cpuid4_info_regs id4_regs = {};
953
954	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
955		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
956		if (ret)
957			return ret;
958		get_cache_id(cpu, &id4_regs);
959		ci_leaf_init(this_leaf++, &id4_regs);
960		__cache_cpumap_setup(cpu, idx, &id4_regs);
 
 
 
961	}
962	this_cpu_ci->cpu_map_populated = true;
 
 
 
 
 
963
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
964	return 0;
965}
966
967DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
968DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
 
v3.5.6
 
   1/*
   2 *	Routines to indentify caches on Intel CPU.
   3 *
   4 *	Changes:
   5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
   6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
   7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
   8 */
   9
  10#include <linux/init.h>
  11#include <linux/slab.h>
  12#include <linux/device.h>
  13#include <linux/compiler.h>
  14#include <linux/cpu.h>
  15#include <linux/sched.h>
 
 
  16#include <linux/pci.h>
  17
  18#include <asm/processor.h>
  19#include <linux/smp.h>
  20#include <asm/amd_nb.h>
  21#include <asm/smp.h>
  22
  23#define LVL_1_INST	1
  24#define LVL_1_DATA	2
  25#define LVL_2		3
  26#define LVL_3		4
  27#define LVL_TRACE	5
  28
  29struct _cache_table {
  30	unsigned char descriptor;
  31	char cache_type;
  32	short size;
  33};
  34
  35#define MB(x)	((x) * 1024)
  36
  37/* All the cache descriptor types we care about (no TLB or
  38   trace cache entries) */
  39
  40static const struct _cache_table __cpuinitconst cache_table[] =
  41{
  42	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
  43	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
  44	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
  45	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
  46	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
  47	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
  48	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
  49	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
  50	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  51	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  52	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  53	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  54	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
  55	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
  56	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  57	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  58	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
  59	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  60	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
  61	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  62	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
  63	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
  64	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
  65	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
  66	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
  67	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
  68	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
  69	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
  70	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
  71	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
  72	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
  73	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
  74	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
  75	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
  76	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
  77	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  78	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  79	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  80	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
  81	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
  82	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
  83	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
  84	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
  85	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
  86	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  87	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  88	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
  89	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
  90	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
  91	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
  92	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
  93	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
  94	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
  95	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
  96	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
  97	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
  98	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
  99	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 100	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 101	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
 102	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 103	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 104	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 105	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
 106	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
 107	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
 108	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
 109	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 110	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 111	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
 112	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
 113	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
 114	{ 0x00, 0, 0}
 115};
 116
 117
 118enum _cache_type {
 119	CACHE_TYPE_NULL	= 0,
 120	CACHE_TYPE_DATA = 1,
 121	CACHE_TYPE_INST = 2,
 122	CACHE_TYPE_UNIFIED = 3
 123};
 124
 125union _cpuid4_leaf_eax {
 126	struct {
 127		enum _cache_type	type:5;
 128		unsigned int		level:3;
 129		unsigned int		is_self_initializing:1;
 130		unsigned int		is_fully_associative:1;
 131		unsigned int		reserved:4;
 132		unsigned int		num_threads_sharing:12;
 133		unsigned int		num_cores_on_die:6;
 134	} split;
 135	u32 full;
 136};
 137
 138union _cpuid4_leaf_ebx {
 139	struct {
 140		unsigned int		coherency_line_size:12;
 141		unsigned int		physical_line_partition:10;
 142		unsigned int		ways_of_associativity:10;
 143	} split;
 144	u32 full;
 145};
 146
 147union _cpuid4_leaf_ecx {
 148	struct {
 149		unsigned int		number_of_sets:32;
 150	} split;
 151	u32 full;
 152};
 153
 154struct _cpuid4_info_regs {
 155	union _cpuid4_leaf_eax eax;
 156	union _cpuid4_leaf_ebx ebx;
 157	union _cpuid4_leaf_ecx ecx;
 
 158	unsigned long size;
 159	struct amd_northbridge *nb;
 160};
 161
 162struct _cpuid4_info {
 163	struct _cpuid4_info_regs base;
 164	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
 165};
 166
 167unsigned short			num_cache_leaves;
 168
 169/* AMD doesn't have CPUID4. Emulate it here to report the same
 170   information to the user.  This makes some assumptions about the machine:
 171   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
 172
 173   In theory the TLBs could be reported as fake type (they are in "dummy").
 174   Maybe later */
 175union l1_cache {
 176	struct {
 177		unsigned line_size:8;
 178		unsigned lines_per_tag:8;
 179		unsigned assoc:8;
 180		unsigned size_in_kb:8;
 181	};
 182	unsigned val;
 183};
 184
 185union l2_cache {
 186	struct {
 187		unsigned line_size:8;
 188		unsigned lines_per_tag:4;
 189		unsigned assoc:4;
 190		unsigned size_in_kb:16;
 191	};
 192	unsigned val;
 193};
 194
 195union l3_cache {
 196	struct {
 197		unsigned line_size:8;
 198		unsigned lines_per_tag:4;
 199		unsigned assoc:4;
 200		unsigned res:2;
 201		unsigned size_encoded:14;
 202	};
 203	unsigned val;
 204};
 205
 206static const unsigned short __cpuinitconst assocs[] = {
 207	[1] = 1,
 208	[2] = 2,
 209	[4] = 4,
 210	[6] = 8,
 211	[8] = 16,
 212	[0xa] = 32,
 213	[0xb] = 48,
 214	[0xc] = 64,
 215	[0xd] = 96,
 216	[0xe] = 128,
 217	[0xf] = 0xffff /* fully associative - no way to show this currently */
 218};
 219
 220static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
 221static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
 
 
 
 
 
 
 
 222
 223static void __cpuinit
 224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 225		     union _cpuid4_leaf_ebx *ebx,
 226		     union _cpuid4_leaf_ecx *ecx)
 227{
 228	unsigned dummy;
 229	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 230	union l1_cache l1i, l1d;
 231	union l2_cache l2;
 232	union l3_cache l3;
 233	union l1_cache *l1 = &l1d;
 234
 235	eax->full = 0;
 236	ebx->full = 0;
 237	ecx->full = 0;
 238
 239	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
 240	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
 241
 242	switch (leaf) {
 243	case 1:
 244		l1 = &l1i;
 245	case 0:
 246		if (!l1->val)
 247			return;
 248		assoc = assocs[l1->assoc];
 249		line_size = l1->line_size;
 250		lines_per_tag = l1->lines_per_tag;
 251		size_in_kb = l1->size_in_kb;
 252		break;
 253	case 2:
 254		if (!l2.val)
 255			return;
 256		assoc = assocs[l2.assoc];
 257		line_size = l2.line_size;
 258		lines_per_tag = l2.lines_per_tag;
 259		/* cpu_data has errata corrections for K7 applied */
 260		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
 261		break;
 262	case 3:
 263		if (!l3.val)
 264			return;
 265		assoc = assocs[l3.assoc];
 266		line_size = l3.line_size;
 267		lines_per_tag = l3.lines_per_tag;
 268		size_in_kb = l3.size_encoded * 512;
 269		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 270			size_in_kb = size_in_kb >> 1;
 271			assoc = assoc >> 1;
 272		}
 273		break;
 274	default:
 275		return;
 276	}
 277
 278	eax->split.is_self_initializing = 1;
 279	eax->split.type = types[leaf];
 280	eax->split.level = levels[leaf];
 281	eax->split.num_threads_sharing = 0;
 282	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
 283
 284
 285	if (assoc == 0xffff)
 286		eax->split.is_fully_associative = 1;
 287	ebx->split.coherency_line_size = line_size - 1;
 288	ebx->split.ways_of_associativity = assoc - 1;
 289	ebx->split.physical_line_partition = lines_per_tag - 1;
 290	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
 291		(ebx->split.ways_of_associativity + 1) - 1;
 292}
 293
 294struct _cache_attr {
 295	struct attribute attr;
 296	ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
 297	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
 298			 unsigned int);
 299};
 300
 301#ifdef CONFIG_AMD_NB
 302
 303/*
 304 * L3 cache descriptors
 305 */
 306static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
 307{
 308	struct amd_l3_cache *l3 = &nb->l3_cache;
 309	unsigned int sc0, sc1, sc2, sc3;
 310	u32 val = 0;
 311
 312	pci_read_config_dword(nb->misc, 0x1C4, &val);
 313
 314	/* calculate subcache sizes */
 315	l3->subcaches[0] = sc0 = !(val & BIT(0));
 316	l3->subcaches[1] = sc1 = !(val & BIT(4));
 317
 318	if (boot_cpu_data.x86 == 0x15) {
 319		l3->subcaches[0] = sc0 += !(val & BIT(1));
 320		l3->subcaches[1] = sc1 += !(val & BIT(5));
 321	}
 322
 323	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 324	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 325
 326	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 327}
 328
 329static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
 330{
 331	int node;
 332
 333	/* only for L3, and not in virtualized environments */
 334	if (index < 3)
 335		return;
 336
 337	node = amd_get_nb_id(smp_processor_id());
 338	this_leaf->nb = node_to_amd_nb(node);
 339	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
 340		amd_calc_l3_indices(this_leaf->nb);
 341}
 342
 343/*
 344 * check whether a slot used for disabling an L3 index is occupied.
 345 * @l3: L3 cache descriptor
 346 * @slot: slot number (0..1)
 347 *
 348 * @returns: the disabled index if used or negative value if slot free.
 349 */
 350int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 351{
 352	unsigned int reg = 0;
 353
 354	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 355
 356	/* check whether this slot is activated already */
 357	if (reg & (3UL << 30))
 358		return reg & 0xfff;
 359
 360	return -1;
 361}
 362
 363static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 364				  unsigned int slot)
 365{
 366	int index;
 
 367
 368	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 369		return -EINVAL;
 370
 371	index = amd_get_l3_disable_slot(this_leaf->base.nb, slot);
 372	if (index >= 0)
 373		return sprintf(buf, "%d\n", index);
 374
 375	return sprintf(buf, "FREE\n");
 376}
 377
 378#define SHOW_CACHE_DISABLE(slot)					\
 379static ssize_t								\
 380show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf,	\
 381			  unsigned int cpu)				\
 382{									\
 
 383	return show_cache_disable(this_leaf, buf, slot);		\
 384}
 385SHOW_CACHE_DISABLE(0)
 386SHOW_CACHE_DISABLE(1)
 387
 388static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 389				 unsigned slot, unsigned long idx)
 390{
 391	int i;
 392
 393	idx |= BIT(30);
 394
 395	/*
 396	 *  disable index in all 4 subcaches
 397	 */
 398	for (i = 0; i < 4; i++) {
 399		u32 reg = idx | (i << 20);
 400
 401		if (!nb->l3_cache.subcaches[i])
 402			continue;
 403
 404		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 405
 406		/*
 407		 * We need to WBINVD on a core on the node containing the L3
 408		 * cache which indices we disable therefore a simple wbinvd()
 409		 * is not sufficient.
 410		 */
 411		wbinvd_on_cpu(cpu);
 412
 413		reg |= BIT(31);
 414		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 415	}
 416}
 417
 418/*
 419 * disable a L3 cache index by using a disable-slot
 420 *
 421 * @l3:    L3 cache descriptor
 422 * @cpu:   A CPU on the node containing the L3 cache
 423 * @slot:  slot number (0..1)
 424 * @index: index to disable
 425 *
 426 * @return: 0 on success, error status on failure
 427 */
 428int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
 429			    unsigned long index)
 430{
 431	int ret = 0;
 432
 433	/*  check if @slot is already used or the index is already disabled */
 434	ret = amd_get_l3_disable_slot(nb, slot);
 435	if (ret >= 0)
 436		return -EEXIST;
 437
 438	if (index > nb->l3_cache.indices)
 439		return -EINVAL;
 440
 441	/* check whether the other slot has disabled the same index already */
 442	if (index == amd_get_l3_disable_slot(nb, !slot))
 443		return -EEXIST;
 444
 445	amd_l3_disable_index(nb, cpu, slot, index);
 446
 447	return 0;
 448}
 449
 450static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 451				  const char *buf, size_t count,
 452				  unsigned int slot)
 453{
 454	unsigned long val = 0;
 455	int cpu, err = 0;
 
 456
 457	if (!capable(CAP_SYS_ADMIN))
 458		return -EPERM;
 459
 460	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 461		return -EINVAL;
 462
 463	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
 464
 465	if (strict_strtoul(buf, 10, &val) < 0)
 466		return -EINVAL;
 467
 468	err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
 469	if (err) {
 470		if (err == -EEXIST)
 471			pr_warning("L3 slot %d in use/index already disabled!\n",
 472				   slot);
 473		return err;
 474	}
 475	return count;
 476}
 477
 478#define STORE_CACHE_DISABLE(slot)					\
 479static ssize_t								\
 480store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
 481			   const char *buf, size_t count,		\
 482			   unsigned int cpu)				\
 483{									\
 
 484	return store_cache_disable(this_leaf, buf, count, slot);	\
 485}
 486STORE_CACHE_DISABLE(0)
 487STORE_CACHE_DISABLE(1)
 488
 489static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
 490		show_cache_disable_0, store_cache_disable_0);
 491static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 492		show_cache_disable_1, store_cache_disable_1);
 493
 494static ssize_t
 495show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
 496{
 497	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 498		return -EINVAL;
 499
 500	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 501}
 502
 503static ssize_t
 504store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
 505		unsigned int cpu)
 506{
 
 
 507	unsigned long val;
 508
 509	if (!capable(CAP_SYS_ADMIN))
 510		return -EPERM;
 511
 512	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 513		return -EINVAL;
 514
 515	if (strict_strtoul(buf, 16, &val) < 0)
 516		return -EINVAL;
 517
 518	if (amd_set_subcaches(cpu, val))
 519		return -EINVAL;
 520
 521	return count;
 522}
 523
 524static struct _cache_attr subcaches =
 525	__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 526
 527#else	/* CONFIG_AMD_NB */
 
 
 
 
 
 528#define amd_init_l3_cache(x, y)
 529#endif /* CONFIG_AMD_NB */
 530
 531static int
 532__cpuinit cpuid4_cache_lookup_regs(int index,
 533				   struct _cpuid4_info_regs *this_leaf)
 534{
 535	union _cpuid4_leaf_eax	eax;
 536	union _cpuid4_leaf_ebx	ebx;
 537	union _cpuid4_leaf_ecx	ecx;
 538	unsigned		edx;
 539
 540	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 541		amd_cpuid4(index, &eax, &ebx, &ecx);
 
 
 
 
 542		amd_init_l3_cache(this_leaf, index);
 543	} else {
 544		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 545	}
 546
 547	if (eax.split.type == CACHE_TYPE_NULL)
 548		return -EIO; /* better error ? */
 549
 550	this_leaf->eax = eax;
 551	this_leaf->ebx = ebx;
 552	this_leaf->ecx = ecx;
 553	this_leaf->size = (ecx.split.number_of_sets          + 1) *
 554			  (ebx.split.coherency_line_size     + 1) *
 555			  (ebx.split.physical_line_partition + 1) *
 556			  (ebx.split.ways_of_associativity   + 1);
 557	return 0;
 558}
 559
 560static int __cpuinit find_num_cache_leaves(void)
 561{
 562	unsigned int		eax, ebx, ecx, edx;
 563	union _cpuid4_leaf_eax	cache_eax;
 564	int 			i = -1;
 565
 
 
 
 
 
 566	do {
 567		++i;
 568		/* Do cpuid(4) loop to find out num_cache_leaves */
 569		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
 570		cache_eax.full = eax;
 571	} while (cache_eax.split.type != CACHE_TYPE_NULL);
 572	return i;
 573}
 574
 575unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 
 
 
 
 
 
 
 
 
 
 
 
 
 576{
 577	/* Cache sizes */
 578	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
 579	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
 580	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 581	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 582#ifdef CONFIG_X86_HT
 583	unsigned int cpu = c->cpu_index;
 584#endif
 585
 586	if (c->cpuid_level > 3) {
 587		static int is_initialized;
 588
 589		if (is_initialized == 0) {
 590			/* Init num_cache_leaves from boot CPU */
 591			num_cache_leaves = find_num_cache_leaves();
 592			is_initialized++;
 593		}
 594
 595		/*
 596		 * Whenever possible use cpuid(4), deterministic cache
 597		 * parameters cpuid leaf to find the cache details
 598		 */
 599		for (i = 0; i < num_cache_leaves; i++) {
 600			struct _cpuid4_info_regs this_leaf;
 601			int retval;
 602
 603			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
 604			if (retval >= 0) {
 605				switch (this_leaf.eax.split.level) {
 606				case 1:
 607					if (this_leaf.eax.split.type ==
 608							CACHE_TYPE_DATA)
 609						new_l1d = this_leaf.size/1024;
 610					else if (this_leaf.eax.split.type ==
 611							CACHE_TYPE_INST)
 612						new_l1i = this_leaf.size/1024;
 613					break;
 614				case 2:
 615					new_l2 = this_leaf.size/1024;
 616					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 617					index_msb = get_count_order(num_threads_sharing);
 618					l2_id = c->apicid & ~((1 << index_msb) - 1);
 619					break;
 620				case 3:
 621					new_l3 = this_leaf.size/1024;
 622					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
 623					index_msb = get_count_order(
 624							num_threads_sharing);
 625					l3_id = c->apicid & ~((1 << index_msb) - 1);
 626					break;
 627				default:
 628					break;
 629				}
 630			}
 631		}
 632	}
 633	/*
 634	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
 635	 * trace cache
 636	 */
 637	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
 638		/* supports eax=2  call */
 639		int j, n;
 640		unsigned int regs[4];
 641		unsigned char *dp = (unsigned char *)regs;
 642		int only_trace = 0;
 643
 644		if (num_cache_leaves != 0 && c->x86 == 15)
 645			only_trace = 1;
 646
 647		/* Number of times to iterate */
 648		n = cpuid_eax(2) & 0xFF;
 649
 650		for (i = 0 ; i < n ; i++) {
 651			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
 652
 653			/* If bit 31 is set, this is an unknown format */
 654			for (j = 0 ; j < 3 ; j++)
 655				if (regs[j] & (1 << 31))
 656					regs[j] = 0;
 657
 658			/* Byte 0 is level count, not a descriptor */
 659			for (j = 1 ; j < 16 ; j++) {
 660				unsigned char des = dp[j];
 661				unsigned char k = 0;
 662
 663				/* look up this descriptor in the table */
 664				while (cache_table[k].descriptor != 0) {
 665					if (cache_table[k].descriptor == des) {
 666						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
 667							break;
 668						switch (cache_table[k].cache_type) {
 669						case LVL_1_INST:
 670							l1i += cache_table[k].size;
 671							break;
 672						case LVL_1_DATA:
 673							l1d += cache_table[k].size;
 674							break;
 675						case LVL_2:
 676							l2 += cache_table[k].size;
 677							break;
 678						case LVL_3:
 679							l3 += cache_table[k].size;
 680							break;
 681						case LVL_TRACE:
 682							trace += cache_table[k].size;
 683							break;
 684						}
 685
 686						break;
 687					}
 688
 689					k++;
 690				}
 691			}
 692		}
 693	}
 694
 695	if (new_l1d)
 696		l1d = new_l1d;
 697
 698	if (new_l1i)
 699		l1i = new_l1i;
 700
 701	if (new_l2) {
 702		l2 = new_l2;
 703#ifdef CONFIG_X86_HT
 704		per_cpu(cpu_llc_id, cpu) = l2_id;
 705#endif
 706	}
 707
 708	if (new_l3) {
 709		l3 = new_l3;
 710#ifdef CONFIG_X86_HT
 711		per_cpu(cpu_llc_id, cpu) = l3_id;
 712#endif
 713	}
 714
 
 
 
 
 
 
 
 
 
 
 
 
 715	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 716
 717	return l2;
 718}
 719
 720#ifdef CONFIG_SYSFS
 721
 722/* pointer to _cpuid4_info array (for each cache leaf) */
 723static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
 724#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
 725
 726#ifdef CONFIG_SMP
 727
 728static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
 729{
 730	struct _cpuid4_info *this_leaf;
 731	int ret, i, sibling;
 732	struct cpuinfo_x86 *c = &cpu_data(cpu);
 733
 734	ret = 0;
 
 
 
 735	if (index == 3) {
 736		ret = 1;
 737		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
 738			if (!per_cpu(ici_cpuid4_info, i))
 
 739				continue;
 740			this_leaf = CPUID4_INFO_IDX(i, index);
 741			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
 742				if (!cpu_online(sibling))
 743					continue;
 744				set_bit(sibling, this_leaf->shared_cpu_map);
 
 745			}
 746		}
 747	} else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
 748		ret = 1;
 749		for_each_cpu(i, cpu_sibling_mask(cpu)) {
 750			if (!per_cpu(ici_cpuid4_info, i))
 
 
 
 
 
 
 
 751				continue;
 752			this_leaf = CPUID4_INFO_IDX(i, index);
 753			for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
 754				if (!cpu_online(sibling))
 
 
 
 
 
 
 
 755					continue;
 756				set_bit(sibling, this_leaf->shared_cpu_map);
 
 757			}
 758		}
 759	}
 
 760
 761	return ret;
 762}
 763
 764static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 
 765{
 766	struct _cpuid4_info *this_leaf, *sibling_leaf;
 
 767	unsigned long num_threads_sharing;
 768	int index_msb, i;
 769	struct cpuinfo_x86 *c = &cpu_data(cpu);
 770
 771	if (c->x86_vendor == X86_VENDOR_AMD) {
 772		if (cache_shared_amd_cpu_map_setup(cpu, index))
 773			return;
 774	}
 775
 776	this_leaf = CPUID4_INFO_IDX(cpu, index);
 777	num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
 778
 
 779	if (num_threads_sharing == 1)
 780		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
 781	else {
 782		index_msb = get_count_order(num_threads_sharing);
 783
 784		for_each_online_cpu(i) {
 785			if (cpu_data(i).apicid >> index_msb ==
 786			    c->apicid >> index_msb) {
 787				cpumask_set_cpu(i,
 788					to_cpumask(this_leaf->shared_cpu_map));
 789				if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
 790					sibling_leaf =
 791						CPUID4_INFO_IDX(i, index);
 792					cpumask_set_cpu(cpu, to_cpumask(
 793						sibling_leaf->shared_cpu_map));
 794				}
 795			}
 796		}
 797	}
 798}
 799static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 800{
 801	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 802	int sibling;
 803
 804	this_leaf = CPUID4_INFO_IDX(cpu, index);
 805	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
 806		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
 807		cpumask_clear_cpu(cpu,
 808				  to_cpumask(sibling_leaf->shared_cpu_map));
 809	}
 810}
 811#else
 812static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 813{
 814}
 815
 816static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 817{
 818}
 819#endif
 820
 821static void __cpuinit free_cache_attributes(unsigned int cpu)
 822{
 823	int i;
 824
 825	for (i = 0; i < num_cache_leaves; i++)
 826		cache_remove_shared_cpu_map(cpu, i);
 827
 828	kfree(per_cpu(ici_cpuid4_info, cpu));
 829	per_cpu(ici_cpuid4_info, cpu) = NULL;
 830}
 831
 832static void __cpuinit get_cpu_leaves(void *_retval)
 833{
 834	int j, *retval = _retval, cpu = smp_processor_id();
 835
 836	/* Do cpuid and store the results */
 837	for (j = 0; j < num_cache_leaves; j++) {
 838		struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j);
 839
 840		*retval = cpuid4_cache_lookup_regs(j, &this_leaf->base);
 841		if (unlikely(*retval < 0)) {
 842			int i;
 843
 844			for (i = 0; i < j; i++)
 845				cache_remove_shared_cpu_map(cpu, i);
 846			break;
 
 
 847		}
 848		cache_shared_cpu_map_setup(cpu, j);
 849	}
 850}
 851
 852static int __cpuinit detect_cache_attributes(unsigned int cpu)
 853{
 854	int			retval;
 855
 856	if (num_cache_leaves == 0)
 857		return -ENOENT;
 858
 859	per_cpu(ici_cpuid4_info, cpu) = kzalloc(
 860	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
 861	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
 862		return -ENOMEM;
 863
 864	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
 865	if (retval) {
 866		kfree(per_cpu(ici_cpuid4_info, cpu));
 867		per_cpu(ici_cpuid4_info, cpu) = NULL;
 868	}
 869
 870	return retval;
 871}
 872
 873#include <linux/kobject.h>
 874#include <linux/sysfs.h>
 875#include <linux/cpu.h>
 876
 877/* pointer to kobject for cpuX/cache */
 878static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
 879
 880struct _index_kobject {
 881	struct kobject kobj;
 882	unsigned int cpu;
 883	unsigned short index;
 884};
 885
 886/* pointer to array of kobjects for cpuX/cache/indexY */
 887static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
 888#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(ici_index_kobject, x))[y]))
 889
 890#define show_one_plus(file_name, object, val)				\
 891static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
 892				unsigned int cpu)			\
 893{									\
 894	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
 895}
 896
 897show_one_plus(level, base.eax.split.level, 0);
 898show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1);
 899show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1);
 900show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1);
 901show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1);
 902
 903static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
 904			 unsigned int cpu)
 905{
 906	return sprintf(buf, "%luK\n", this_leaf->base.size / 1024);
 907}
 908
 909static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 910					int type, char *buf)
 911{
 912	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
 913	int n = 0;
 914
 915	if (len > 1) {
 916		const struct cpumask *mask;
 917
 918		mask = to_cpumask(this_leaf->shared_cpu_map);
 919		n = type ?
 920			cpulist_scnprintf(buf, len-2, mask) :
 921			cpumask_scnprintf(buf, len-2, mask);
 922		buf[n++] = '\n';
 923		buf[n] = '\0';
 924	}
 925	return n;
 926}
 927
 928static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
 929					  unsigned int cpu)
 930{
 931	return show_shared_cpu_map_func(leaf, 0, buf);
 932}
 933
 934static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
 935					   unsigned int cpu)
 936{
 937	return show_shared_cpu_map_func(leaf, 1, buf);
 938}
 939
 940static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
 941			 unsigned int cpu)
 942{
 943	switch (this_leaf->base.eax.split.type) {
 944	case CACHE_TYPE_DATA:
 945		return sprintf(buf, "Data\n");
 946	case CACHE_TYPE_INST:
 947		return sprintf(buf, "Instruction\n");
 948	case CACHE_TYPE_UNIFIED:
 949		return sprintf(buf, "Unified\n");
 950	default:
 951		return sprintf(buf, "Unknown\n");
 952	}
 953}
 954
 955#define to_object(k)	container_of(k, struct _index_kobject, kobj)
 956#define to_attr(a)	container_of(a, struct _cache_attr, attr)
 957
 958#define define_one_ro(_name) \
 959static struct _cache_attr _name = \
 960	__ATTR(_name, 0444, show_##_name, NULL)
 961
 962define_one_ro(level);
 963define_one_ro(type);
 964define_one_ro(coherency_line_size);
 965define_one_ro(physical_line_partition);
 966define_one_ro(ways_of_associativity);
 967define_one_ro(number_of_sets);
 968define_one_ro(size);
 969define_one_ro(shared_cpu_map);
 970define_one_ro(shared_cpu_list);
 971
 972static struct attribute *default_attrs[] = {
 973	&type.attr,
 974	&level.attr,
 975	&coherency_line_size.attr,
 976	&physical_line_partition.attr,
 977	&ways_of_associativity.attr,
 978	&number_of_sets.attr,
 979	&size.attr,
 980	&shared_cpu_map.attr,
 981	&shared_cpu_list.attr,
 982	NULL
 983};
 984
 985#ifdef CONFIG_AMD_NB
 986static struct attribute ** __cpuinit amd_l3_attrs(void)
 987{
 988	static struct attribute **attrs;
 989	int n;
 990
 991	if (attrs)
 992		return attrs;
 993
 994	n = sizeof (default_attrs) / sizeof (struct attribute *);
 995
 996	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 997		n += 2;
 998
 999	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1000		n += 1;
1001
1002	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
1003	if (attrs == NULL)
1004		return attrs = default_attrs;
1005
1006	for (n = 0; default_attrs[n]; n++)
1007		attrs[n] = default_attrs[n];
1008
1009	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
1010		attrs[n++] = &cache_disable_0.attr;
1011		attrs[n++] = &cache_disable_1.attr;
1012	}
1013
1014	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1015		attrs[n++] = &subcaches.attr;
1016
1017	return attrs;
1018}
1019#endif
1020
1021static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 
1022{
1023	struct _cache_attr *fattr = to_attr(attr);
1024	struct _index_kobject *this_leaf = to_object(kobj);
1025	ssize_t ret;
1026
1027	ret = fattr->show ?
1028		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1029			buf, this_leaf->cpu) :
1030		0;
1031	return ret;
 
 
 
 
1032}
1033
1034static ssize_t store(struct kobject *kobj, struct attribute *attr,
1035		     const char *buf, size_t count)
1036{
1037	struct _cache_attr *fattr = to_attr(attr);
1038	struct _index_kobject *this_leaf = to_object(kobj);
1039	ssize_t ret;
1040
1041	ret = fattr->store ?
1042		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1043			buf, count, this_leaf->cpu) :
1044		0;
1045	return ret;
1046}
1047
1048static const struct sysfs_ops sysfs_ops = {
1049	.show   = show,
1050	.store  = store,
1051};
1052
1053static struct kobj_type ktype_cache = {
1054	.sysfs_ops	= &sysfs_ops,
1055	.default_attrs	= default_attrs,
1056};
1057
1058static struct kobj_type ktype_percpu_entry = {
1059	.sysfs_ops	= &sysfs_ops,
1060};
1061
1062static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1063{
1064	kfree(per_cpu(ici_cache_kobject, cpu));
1065	kfree(per_cpu(ici_index_kobject, cpu));
1066	per_cpu(ici_cache_kobject, cpu) = NULL;
1067	per_cpu(ici_index_kobject, cpu) = NULL;
1068	free_cache_attributes(cpu);
1069}
1070
1071static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1072{
1073	int err;
1074
1075	if (num_cache_leaves == 0)
1076		return -ENOENT;
1077
1078	err = detect_cache_attributes(cpu);
1079	if (err)
1080		return err;
1081
1082	/* Allocate all required memory */
1083	per_cpu(ici_cache_kobject, cpu) =
1084		kzalloc(sizeof(struct kobject), GFP_KERNEL);
1085	if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1086		goto err_out;
1087
1088	per_cpu(ici_index_kobject, cpu) = kzalloc(
1089	    sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1090	if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1091		goto err_out;
1092
1093	return 0;
1094
1095err_out:
1096	cpuid4_cache_sysfs_exit(cpu);
1097	return -ENOMEM;
1098}
1099
1100static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1101
1102/* Add/Remove cache interface for CPU device */
1103static int __cpuinit cache_add_dev(struct device *dev)
1104{
1105	unsigned int cpu = dev->id;
1106	unsigned long i, j;
1107	struct _index_kobject *this_object;
1108	struct _cpuid4_info   *this_leaf;
1109	int retval;
1110
1111	retval = cpuid4_cache_sysfs_init(cpu);
1112	if (unlikely(retval < 0))
1113		return retval;
1114
1115	retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1116				      &ktype_percpu_entry,
1117				      &dev->kobj, "%s", "cache");
1118	if (retval < 0) {
1119		cpuid4_cache_sysfs_exit(cpu);
1120		return retval;
1121	}
1122
1123	for (i = 0; i < num_cache_leaves; i++) {
1124		this_object = INDEX_KOBJECT_PTR(cpu, i);
1125		this_object->cpu = cpu;
1126		this_object->index = i;
1127
1128		this_leaf = CPUID4_INFO_IDX(cpu, i);
1129
1130		ktype_cache.default_attrs = default_attrs;
1131#ifdef CONFIG_AMD_NB
1132		if (this_leaf->base.nb)
1133			ktype_cache.default_attrs = amd_l3_attrs();
1134#endif
1135		retval = kobject_init_and_add(&(this_object->kobj),
1136					      &ktype_cache,
1137					      per_cpu(ici_cache_kobject, cpu),
1138					      "index%1lu", i);
1139		if (unlikely(retval)) {
1140			for (j = 0; j < i; j++)
1141				kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1142			kobject_put(per_cpu(ici_cache_kobject, cpu));
1143			cpuid4_cache_sysfs_exit(cpu);
1144			return retval;
1145		}
1146		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1147	}
1148	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1149
1150	kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1151	return 0;
1152}
1153
1154static void __cpuinit cache_remove_dev(struct device *dev)
 
 
 
 
 
1155{
1156	unsigned int cpu = dev->id;
1157	unsigned long i;
1158
1159	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1160		return;
1161	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1162		return;
1163	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1164
1165	for (i = 0; i < num_cache_leaves; i++)
1166		kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1167	kobject_put(per_cpu(ici_cache_kobject, cpu));
1168	cpuid4_cache_sysfs_exit(cpu);
1169}
1170
1171static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1172					unsigned long action, void *hcpu)
1173{
1174	unsigned int cpu = (unsigned long)hcpu;
1175	struct device *dev;
 
 
1176
1177	dev = get_cpu_device(cpu);
1178	switch (action) {
1179	case CPU_ONLINE:
1180	case CPU_ONLINE_FROZEN:
1181		cache_add_dev(dev);
1182		break;
1183	case CPU_DEAD:
1184	case CPU_DEAD_FROZEN:
1185		cache_remove_dev(dev);
1186		break;
1187	}
1188	return NOTIFY_OK;
1189}
1190
1191static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1192	.notifier_call = cacheinfo_cpu_callback,
1193};
1194
1195static int __cpuinit cache_sysfs_init(void)
1196{
1197	int i;
1198
1199	if (num_cache_leaves == 0)
1200		return 0;
1201
1202	for_each_online_cpu(i) {
1203		int err;
1204		struct device *dev = get_cpu_device(i);
1205
1206		err = cache_add_dev(dev);
1207		if (err)
1208			return err;
1209	}
1210	register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1211	return 0;
1212}
1213
1214device_initcall(cache_sysfs_init);
1215
1216#endif