Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 *	Routines to identify caches on Intel CPU.
  4 *
  5 *	Changes:
  6 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  7 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  8 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  9 */
 10
 11#include <linux/slab.h>
 12#include <linux/cacheinfo.h>
 13#include <linux/cpu.h>
 14#include <linux/sched.h>
 15#include <linux/capability.h>
 16#include <linux/sysfs.h>
 17#include <linux/pci.h>
 18
 19#include <asm/cpufeature.h>
 20#include <asm/amd_nb.h>
 21#include <asm/smp.h>
 22
 23#define LVL_1_INST	1
 24#define LVL_1_DATA	2
 25#define LVL_2		3
 26#define LVL_3		4
 27#define LVL_TRACE	5
 28
 29struct _cache_table {
 30	unsigned char descriptor;
 31	char cache_type;
 32	short size;
 33};
 34
 35#define MB(x)	((x) * 1024)
 36
 37/* All the cache descriptor types we care about (no TLB or
 38   trace cache entries) */
 39
 40static const struct _cache_table cache_table[] =
 41{
 42	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
 43	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
 44	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
 45	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
 46	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
 47	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
 48	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
 49	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
 50	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 51	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 52	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 53	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 54	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
 55	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
 56	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 57	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 58	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
 59	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 60	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 61	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 62	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
 63	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
 64	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
 65	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
 66	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
 67	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
 68	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
 69	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
 70	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
 71	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 72	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
 73	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 74	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
 75	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
 76	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
 77	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 78	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 79	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 80	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 81	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
 82	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
 83	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 84	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 85	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 86	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 87	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 88	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 89	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 90	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 91	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 92	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 93	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 94	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 95	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 96	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 97	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 98	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 99	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
100	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
101	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
102	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
103	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
104	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
105	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
106	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
107	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
108	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
109	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
110	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
111	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
112	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
113	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
114	{ 0x00, 0, 0}
115};
116
117
118enum _cache_type {
119	CTYPE_NULL = 0,
120	CTYPE_DATA = 1,
121	CTYPE_INST = 2,
122	CTYPE_UNIFIED = 3
123};
124
125union _cpuid4_leaf_eax {
126	struct {
127		enum _cache_type	type:5;
128		unsigned int		level:3;
129		unsigned int		is_self_initializing:1;
130		unsigned int		is_fully_associative:1;
131		unsigned int		reserved:4;
132		unsigned int		num_threads_sharing:12;
133		unsigned int		num_cores_on_die:6;
134	} split;
135	u32 full;
136};
137
138union _cpuid4_leaf_ebx {
139	struct {
140		unsigned int		coherency_line_size:12;
141		unsigned int		physical_line_partition:10;
142		unsigned int		ways_of_associativity:10;
143	} split;
144	u32 full;
145};
146
147union _cpuid4_leaf_ecx {
148	struct {
149		unsigned int		number_of_sets:32;
150	} split;
151	u32 full;
152};
153
154struct _cpuid4_info_regs {
155	union _cpuid4_leaf_eax eax;
156	union _cpuid4_leaf_ebx ebx;
157	union _cpuid4_leaf_ecx ecx;
158	unsigned int id;
159	unsigned long size;
160	struct amd_northbridge *nb;
161};
162
163static unsigned short num_cache_leaves;
164
165/* AMD doesn't have CPUID4. Emulate it here to report the same
166   information to the user.  This makes some assumptions about the machine:
167   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
168
169   In theory the TLBs could be reported as fake type (they are in "dummy").
170   Maybe later */
171union l1_cache {
172	struct {
173		unsigned line_size:8;
174		unsigned lines_per_tag:8;
175		unsigned assoc:8;
176		unsigned size_in_kb:8;
177	};
178	unsigned val;
179};
180
181union l2_cache {
182	struct {
183		unsigned line_size:8;
184		unsigned lines_per_tag:4;
185		unsigned assoc:4;
186		unsigned size_in_kb:16;
187	};
188	unsigned val;
189};
190
191union l3_cache {
192	struct {
193		unsigned line_size:8;
194		unsigned lines_per_tag:4;
195		unsigned assoc:4;
196		unsigned res:2;
197		unsigned size_encoded:14;
198	};
199	unsigned val;
200};
201
202static const unsigned short assocs[] = {
203	[1] = 1,
204	[2] = 2,
205	[4] = 4,
206	[6] = 8,
207	[8] = 16,
208	[0xa] = 32,
209	[0xb] = 48,
210	[0xc] = 64,
211	[0xd] = 96,
212	[0xe] = 128,
213	[0xf] = 0xffff /* fully associative - no way to show this currently */
214};
215
216static const unsigned char levels[] = { 1, 1, 2, 3 };
217static const unsigned char types[] = { 1, 2, 3, 3 };
218
219static const enum cache_type cache_type_map[] = {
220	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
221	[CTYPE_DATA] = CACHE_TYPE_DATA,
222	[CTYPE_INST] = CACHE_TYPE_INST,
223	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
224};
225
226static void
227amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
228		     union _cpuid4_leaf_ebx *ebx,
229		     union _cpuid4_leaf_ecx *ecx)
230{
231	unsigned dummy;
232	unsigned line_size, lines_per_tag, assoc, size_in_kb;
233	union l1_cache l1i, l1d;
234	union l2_cache l2;
235	union l3_cache l3;
236	union l1_cache *l1 = &l1d;
237
238	eax->full = 0;
239	ebx->full = 0;
240	ecx->full = 0;
241
242	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
243	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
244
245	switch (leaf) {
246	case 1:
247		l1 = &l1i;
248	case 0:
249		if (!l1->val)
250			return;
251		assoc = assocs[l1->assoc];
252		line_size = l1->line_size;
253		lines_per_tag = l1->lines_per_tag;
254		size_in_kb = l1->size_in_kb;
255		break;
256	case 2:
257		if (!l2.val)
258			return;
259		assoc = assocs[l2.assoc];
260		line_size = l2.line_size;
261		lines_per_tag = l2.lines_per_tag;
262		/* cpu_data has errata corrections for K7 applied */
263		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
264		break;
265	case 3:
266		if (!l3.val)
267			return;
268		assoc = assocs[l3.assoc];
269		line_size = l3.line_size;
270		lines_per_tag = l3.lines_per_tag;
271		size_in_kb = l3.size_encoded * 512;
272		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
273			size_in_kb = size_in_kb >> 1;
274			assoc = assoc >> 1;
275		}
276		break;
277	default:
278		return;
279	}
280
281	eax->split.is_self_initializing = 1;
282	eax->split.type = types[leaf];
283	eax->split.level = levels[leaf];
284	eax->split.num_threads_sharing = 0;
285	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
286
287
288	if (assoc == 0xffff)
289		eax->split.is_fully_associative = 1;
290	ebx->split.coherency_line_size = line_size - 1;
291	ebx->split.ways_of_associativity = assoc - 1;
292	ebx->split.physical_line_partition = lines_per_tag - 1;
293	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
294		(ebx->split.ways_of_associativity + 1) - 1;
295}
296
297#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
298
299/*
300 * L3 cache descriptors
301 */
302static void amd_calc_l3_indices(struct amd_northbridge *nb)
303{
304	struct amd_l3_cache *l3 = &nb->l3_cache;
305	unsigned int sc0, sc1, sc2, sc3;
306	u32 val = 0;
307
308	pci_read_config_dword(nb->misc, 0x1C4, &val);
309
310	/* calculate subcache sizes */
311	l3->subcaches[0] = sc0 = !(val & BIT(0));
312	l3->subcaches[1] = sc1 = !(val & BIT(4));
313
314	if (boot_cpu_data.x86 == 0x15) {
315		l3->subcaches[0] = sc0 += !(val & BIT(1));
316		l3->subcaches[1] = sc1 += !(val & BIT(5));
317	}
318
319	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
320	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
321
322	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
323}
324
325/*
326 * check whether a slot used for disabling an L3 index is occupied.
327 * @l3: L3 cache descriptor
328 * @slot: slot number (0..1)
329 *
330 * @returns: the disabled index if used or negative value if slot free.
331 */
332static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
333{
334	unsigned int reg = 0;
335
336	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
337
338	/* check whether this slot is activated already */
339	if (reg & (3UL << 30))
340		return reg & 0xfff;
341
342	return -1;
343}
344
345static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
346				  unsigned int slot)
347{
348	int index;
349	struct amd_northbridge *nb = this_leaf->priv;
350
351	index = amd_get_l3_disable_slot(nb, slot);
352	if (index >= 0)
353		return sprintf(buf, "%d\n", index);
354
355	return sprintf(buf, "FREE\n");
356}
357
358#define SHOW_CACHE_DISABLE(slot)					\
359static ssize_t								\
360cache_disable_##slot##_show(struct device *dev,				\
361			    struct device_attribute *attr, char *buf)	\
362{									\
363	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
364	return show_cache_disable(this_leaf, buf, slot);		\
365}
366SHOW_CACHE_DISABLE(0)
367SHOW_CACHE_DISABLE(1)
368
369static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
370				 unsigned slot, unsigned long idx)
371{
372	int i;
373
374	idx |= BIT(30);
375
376	/*
377	 *  disable index in all 4 subcaches
378	 */
379	for (i = 0; i < 4; i++) {
380		u32 reg = idx | (i << 20);
381
382		if (!nb->l3_cache.subcaches[i])
383			continue;
384
385		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
386
387		/*
388		 * We need to WBINVD on a core on the node containing the L3
389		 * cache which indices we disable therefore a simple wbinvd()
390		 * is not sufficient.
391		 */
392		wbinvd_on_cpu(cpu);
393
394		reg |= BIT(31);
395		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
396	}
397}
398
399/*
400 * disable a L3 cache index by using a disable-slot
401 *
402 * @l3:    L3 cache descriptor
403 * @cpu:   A CPU on the node containing the L3 cache
404 * @slot:  slot number (0..1)
405 * @index: index to disable
406 *
407 * @return: 0 on success, error status on failure
408 */
409static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
410			    unsigned slot, unsigned long index)
411{
412	int ret = 0;
413
414	/*  check if @slot is already used or the index is already disabled */
415	ret = amd_get_l3_disable_slot(nb, slot);
416	if (ret >= 0)
417		return -EEXIST;
418
419	if (index > nb->l3_cache.indices)
420		return -EINVAL;
421
422	/* check whether the other slot has disabled the same index already */
423	if (index == amd_get_l3_disable_slot(nb, !slot))
424		return -EEXIST;
425
426	amd_l3_disable_index(nb, cpu, slot, index);
427
428	return 0;
429}
430
431static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
432				   const char *buf, size_t count,
433				   unsigned int slot)
434{
435	unsigned long val = 0;
436	int cpu, err = 0;
437	struct amd_northbridge *nb = this_leaf->priv;
438
439	if (!capable(CAP_SYS_ADMIN))
440		return -EPERM;
441
442	cpu = cpumask_first(&this_leaf->shared_cpu_map);
443
444	if (kstrtoul(buf, 10, &val) < 0)
445		return -EINVAL;
446
447	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
448	if (err) {
449		if (err == -EEXIST)
450			pr_warn("L3 slot %d in use/index already disabled!\n",
451				   slot);
452		return err;
453	}
454	return count;
455}
456
457#define STORE_CACHE_DISABLE(slot)					\
458static ssize_t								\
459cache_disable_##slot##_store(struct device *dev,			\
460			     struct device_attribute *attr,		\
461			     const char *buf, size_t count)		\
462{									\
463	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
464	return store_cache_disable(this_leaf, buf, count, slot);	\
465}
466STORE_CACHE_DISABLE(0)
467STORE_CACHE_DISABLE(1)
468
469static ssize_t subcaches_show(struct device *dev,
470			      struct device_attribute *attr, char *buf)
471{
472	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
473	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
474
475	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
476}
477
478static ssize_t subcaches_store(struct device *dev,
479			       struct device_attribute *attr,
480			       const char *buf, size_t count)
481{
482	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
483	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
484	unsigned long val;
485
486	if (!capable(CAP_SYS_ADMIN))
487		return -EPERM;
488
489	if (kstrtoul(buf, 16, &val) < 0)
490		return -EINVAL;
491
492	if (amd_set_subcaches(cpu, val))
493		return -EINVAL;
494
495	return count;
496}
497
498static DEVICE_ATTR_RW(cache_disable_0);
499static DEVICE_ATTR_RW(cache_disable_1);
500static DEVICE_ATTR_RW(subcaches);
501
502static umode_t
503cache_private_attrs_is_visible(struct kobject *kobj,
504			       struct attribute *attr, int unused)
505{
506	struct device *dev = kobj_to_dev(kobj);
507	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
508	umode_t mode = attr->mode;
509
510	if (!this_leaf->priv)
511		return 0;
512
513	if ((attr == &dev_attr_subcaches.attr) &&
514	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
515		return mode;
516
517	if ((attr == &dev_attr_cache_disable_0.attr ||
518	     attr == &dev_attr_cache_disable_1.attr) &&
519	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
520		return mode;
521
522	return 0;
523}
524
525static struct attribute_group cache_private_group = {
526	.is_visible = cache_private_attrs_is_visible,
527};
528
529static void init_amd_l3_attrs(void)
530{
531	int n = 1;
532	static struct attribute **amd_l3_attrs;
533
534	if (amd_l3_attrs) /* already initialized */
535		return;
536
537	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
538		n += 2;
539	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
540		n += 1;
541
542	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
543	if (!amd_l3_attrs)
544		return;
545
546	n = 0;
547	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
548		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
549		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
550	}
551	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
552		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
553
554	cache_private_group.attrs = amd_l3_attrs;
555}
556
557const struct attribute_group *
558cache_get_priv_group(struct cacheinfo *this_leaf)
559{
560	struct amd_northbridge *nb = this_leaf->priv;
561
562	if (this_leaf->level < 3 || !nb)
563		return NULL;
564
565	if (nb && nb->l3_cache.indices)
566		init_amd_l3_attrs();
567
568	return &cache_private_group;
569}
570
571static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
572{
573	int node;
574
575	/* only for L3, and not in virtualized environments */
576	if (index < 3)
577		return;
578
579	node = amd_get_nb_id(smp_processor_id());
580	this_leaf->nb = node_to_amd_nb(node);
581	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
582		amd_calc_l3_indices(this_leaf->nb);
583}
584#else
585#define amd_init_l3_cache(x, y)
586#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
587
588static int
589cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
590{
591	union _cpuid4_leaf_eax	eax;
592	union _cpuid4_leaf_ebx	ebx;
593	union _cpuid4_leaf_ecx	ecx;
594	unsigned		edx;
595
596	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
597		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
598			cpuid_count(0x8000001d, index, &eax.full,
599				    &ebx.full, &ecx.full, &edx);
600		else
601			amd_cpuid4(index, &eax, &ebx, &ecx);
602		amd_init_l3_cache(this_leaf, index);
603	} else {
604		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
605	}
606
607	if (eax.split.type == CTYPE_NULL)
608		return -EIO; /* better error ? */
609
610	this_leaf->eax = eax;
611	this_leaf->ebx = ebx;
612	this_leaf->ecx = ecx;
613	this_leaf->size = (ecx.split.number_of_sets          + 1) *
614			  (ebx.split.coherency_line_size     + 1) *
615			  (ebx.split.physical_line_partition + 1) *
616			  (ebx.split.ways_of_associativity   + 1);
617	return 0;
618}
619
620static int find_num_cache_leaves(struct cpuinfo_x86 *c)
621{
622	unsigned int		eax, ebx, ecx, edx, op;
623	union _cpuid4_leaf_eax	cache_eax;
624	int 			i = -1;
625
626	if (c->x86_vendor == X86_VENDOR_AMD)
627		op = 0x8000001d;
628	else
629		op = 4;
630
631	do {
632		++i;
633		/* Do cpuid(op) loop to find out num_cache_leaves */
634		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
635		cache_eax.full = eax;
636	} while (cache_eax.split.type != CTYPE_NULL);
637	return i;
638}
639
640void init_amd_cacheinfo(struct cpuinfo_x86 *c)
641{
642
643	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
644		num_cache_leaves = find_num_cache_leaves(c);
645	} else if (c->extended_cpuid_level >= 0x80000006) {
646		if (cpuid_edx(0x80000006) & 0xf000)
647			num_cache_leaves = 4;
648		else
649			num_cache_leaves = 3;
650	}
651}
652
653unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
654{
655	/* Cache sizes */
656	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
657	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
658	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
659	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
660#ifdef CONFIG_SMP
661	unsigned int cpu = c->cpu_index;
662#endif
663
664	if (c->cpuid_level > 3) {
665		static int is_initialized;
666
667		if (is_initialized == 0) {
668			/* Init num_cache_leaves from boot CPU */
669			num_cache_leaves = find_num_cache_leaves(c);
670			is_initialized++;
671		}
672
673		/*
674		 * Whenever possible use cpuid(4), deterministic cache
675		 * parameters cpuid leaf to find the cache details
676		 */
677		for (i = 0; i < num_cache_leaves; i++) {
678			struct _cpuid4_info_regs this_leaf = {};
679			int retval;
680
681			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
682			if (retval < 0)
683				continue;
684
685			switch (this_leaf.eax.split.level) {
686			case 1:
687				if (this_leaf.eax.split.type == CTYPE_DATA)
688					new_l1d = this_leaf.size/1024;
689				else if (this_leaf.eax.split.type == CTYPE_INST)
690					new_l1i = this_leaf.size/1024;
691				break;
692			case 2:
693				new_l2 = this_leaf.size/1024;
694				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
695				index_msb = get_count_order(num_threads_sharing);
696				l2_id = c->apicid & ~((1 << index_msb) - 1);
697				break;
698			case 3:
699				new_l3 = this_leaf.size/1024;
700				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
701				index_msb = get_count_order(num_threads_sharing);
702				l3_id = c->apicid & ~((1 << index_msb) - 1);
703				break;
704			default:
705				break;
706			}
707		}
708	}
709	/*
710	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
711	 * trace cache
712	 */
713	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
714		/* supports eax=2  call */
715		int j, n;
716		unsigned int regs[4];
717		unsigned char *dp = (unsigned char *)regs;
718		int only_trace = 0;
719
720		if (num_cache_leaves != 0 && c->x86 == 15)
721			only_trace = 1;
722
723		/* Number of times to iterate */
724		n = cpuid_eax(2) & 0xFF;
725
726		for (i = 0 ; i < n ; i++) {
727			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
728
729			/* If bit 31 is set, this is an unknown format */
730			for (j = 0 ; j < 3 ; j++)
731				if (regs[j] & (1 << 31))
732					regs[j] = 0;
733
734			/* Byte 0 is level count, not a descriptor */
735			for (j = 1 ; j < 16 ; j++) {
736				unsigned char des = dp[j];
737				unsigned char k = 0;
738
739				/* look up this descriptor in the table */
740				while (cache_table[k].descriptor != 0) {
741					if (cache_table[k].descriptor == des) {
742						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
743							break;
744						switch (cache_table[k].cache_type) {
745						case LVL_1_INST:
746							l1i += cache_table[k].size;
747							break;
748						case LVL_1_DATA:
749							l1d += cache_table[k].size;
750							break;
751						case LVL_2:
752							l2 += cache_table[k].size;
753							break;
754						case LVL_3:
755							l3 += cache_table[k].size;
756							break;
757						case LVL_TRACE:
758							trace += cache_table[k].size;
759							break;
760						}
761
762						break;
763					}
764
765					k++;
766				}
767			}
768		}
769	}
770
771	if (new_l1d)
772		l1d = new_l1d;
773
774	if (new_l1i)
775		l1i = new_l1i;
776
777	if (new_l2) {
778		l2 = new_l2;
779#ifdef CONFIG_SMP
780		per_cpu(cpu_llc_id, cpu) = l2_id;
781#endif
782	}
783
784	if (new_l3) {
785		l3 = new_l3;
786#ifdef CONFIG_SMP
787		per_cpu(cpu_llc_id, cpu) = l3_id;
788#endif
789	}
790
791#ifdef CONFIG_SMP
792	/*
793	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
794	 * turns means that the only possibility is SMT (as indicated in
795	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
796	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
797	 * c->phys_proc_id.
798	 */
799	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
800		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
801#endif
802
803	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
804
805	return l2;
806}
807
808static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
809				    struct _cpuid4_info_regs *base)
810{
811	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
812	struct cacheinfo *this_leaf;
813	int i, sibling;
814
815	/*
816	 * For L3, always use the pre-calculated cpu_llc_shared_mask
817	 * to derive shared_cpu_map.
818	 */
819	if (index == 3) {
820		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
821			this_cpu_ci = get_cpu_cacheinfo(i);
822			if (!this_cpu_ci->info_list)
823				continue;
824			this_leaf = this_cpu_ci->info_list + index;
825			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
826				if (!cpu_online(sibling))
827					continue;
828				cpumask_set_cpu(sibling,
829						&this_leaf->shared_cpu_map);
830			}
831		}
832	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
833		unsigned int apicid, nshared, first, last;
834
835		nshared = base->eax.split.num_threads_sharing + 1;
836		apicid = cpu_data(cpu).apicid;
837		first = apicid - (apicid % nshared);
838		last = first + nshared - 1;
839
840		for_each_online_cpu(i) {
841			this_cpu_ci = get_cpu_cacheinfo(i);
842			if (!this_cpu_ci->info_list)
843				continue;
844
845			apicid = cpu_data(i).apicid;
846			if ((apicid < first) || (apicid > last))
847				continue;
848
849			this_leaf = this_cpu_ci->info_list + index;
850
851			for_each_online_cpu(sibling) {
852				apicid = cpu_data(sibling).apicid;
853				if ((apicid < first) || (apicid > last))
854					continue;
855				cpumask_set_cpu(sibling,
856						&this_leaf->shared_cpu_map);
857			}
858		}
859	} else
860		return 0;
861
862	return 1;
863}
864
865static void __cache_cpumap_setup(unsigned int cpu, int index,
866				 struct _cpuid4_info_regs *base)
867{
868	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
869	struct cacheinfo *this_leaf, *sibling_leaf;
870	unsigned long num_threads_sharing;
871	int index_msb, i;
872	struct cpuinfo_x86 *c = &cpu_data(cpu);
873
874	if (c->x86_vendor == X86_VENDOR_AMD) {
875		if (__cache_amd_cpumap_setup(cpu, index, base))
876			return;
877	}
878
879	this_leaf = this_cpu_ci->info_list + index;
880	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
881
882	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
883	if (num_threads_sharing == 1)
884		return;
885
886	index_msb = get_count_order(num_threads_sharing);
887
888	for_each_online_cpu(i)
889		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
890			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
891
892			if (i == cpu || !sib_cpu_ci->info_list)
893				continue;/* skip if itself or no cacheinfo */
894			sibling_leaf = sib_cpu_ci->info_list + index;
895			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
896			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
897		}
898}
899
900static void ci_leaf_init(struct cacheinfo *this_leaf,
901			 struct _cpuid4_info_regs *base)
902{
903	this_leaf->id = base->id;
904	this_leaf->attributes = CACHE_ID;
905	this_leaf->level = base->eax.split.level;
906	this_leaf->type = cache_type_map[base->eax.split.type];
907	this_leaf->coherency_line_size =
908				base->ebx.split.coherency_line_size + 1;
909	this_leaf->ways_of_associativity =
910				base->ebx.split.ways_of_associativity + 1;
911	this_leaf->size = base->size;
912	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
913	this_leaf->physical_line_partition =
914				base->ebx.split.physical_line_partition + 1;
915	this_leaf->priv = base->nb;
916}
917
918static int __init_cache_level(unsigned int cpu)
919{
920	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
921
922	if (!num_cache_leaves)
923		return -ENOENT;
924	if (!this_cpu_ci)
925		return -EINVAL;
926	this_cpu_ci->num_levels = 3;
927	this_cpu_ci->num_leaves = num_cache_leaves;
928	return 0;
929}
930
931/*
932 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
933 * ECX as cache index. Then right shift apicid by the number's order to get
934 * cache id for this cache node.
935 */
936static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
937{
938	struct cpuinfo_x86 *c = &cpu_data(cpu);
939	unsigned long num_threads_sharing;
940	int index_msb;
941
942	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
943	index_msb = get_count_order(num_threads_sharing);
944	id4_regs->id = c->apicid >> index_msb;
945}
946
947static int __populate_cache_leaves(unsigned int cpu)
948{
949	unsigned int idx, ret;
950	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
951	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
952	struct _cpuid4_info_regs id4_regs = {};
953
954	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
955		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
956		if (ret)
957			return ret;
958		get_cache_id(cpu, &id4_regs);
959		ci_leaf_init(this_leaf++, &id4_regs);
960		__cache_cpumap_setup(cpu, idx, &id4_regs);
961	}
962	this_cpu_ci->cpu_map_populated = true;
963
964	return 0;
965}
966
967DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
968DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)