Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
  1/*
  2 *	Routines to identify caches on Intel CPU.
  3 *
  4 *	Changes:
  5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  8 */
  9
 10#include <linux/slab.h>
 11#include <linux/cacheinfo.h>
 12#include <linux/cpu.h>
 13#include <linux/sched.h>
 14#include <linux/sysfs.h>
 15#include <linux/pci.h>
 16
 17#include <asm/cpufeature.h>
 18#include <asm/amd_nb.h>
 19#include <asm/smp.h>
 20
 21#define LVL_1_INST	1
 22#define LVL_1_DATA	2
 23#define LVL_2		3
 24#define LVL_3		4
 25#define LVL_TRACE	5
 26
 27struct _cache_table {
 28	unsigned char descriptor;
 29	char cache_type;
 30	short size;
 31};
 32
 33#define MB(x)	((x) * 1024)
 34
 35/* All the cache descriptor types we care about (no TLB or
 36   trace cache entries) */
 37
 38static const struct _cache_table cache_table[] =
 39{
 40	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
 41	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
 42	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
 43	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
 44	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
 45	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
 46	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
 47	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
 48	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 49	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 50	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 51	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 52	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
 53	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
 54	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 55	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 56	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
 57	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 58	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
 59	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 60	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
 61	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
 62	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
 63	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
 64	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
 65	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
 66	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
 67	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
 68	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
 69	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
 70	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
 71	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
 72	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
 73	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
 74	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
 75	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 76	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 77	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 78	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
 79	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
 80	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
 81	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
 82	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
 83	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 84	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 85	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 86	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
 87	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
 88	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
 89	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
 90	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
 91	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
 92	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
 93	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
 94	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
 95	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
 96	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
 97	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
 98	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
 99	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
100	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
101	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
102	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
103	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
104	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
105	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
106	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
107	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
108	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
109	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
110	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
111	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
112	{ 0x00, 0, 0}
113};
114
115
116enum _cache_type {
117	CTYPE_NULL = 0,
118	CTYPE_DATA = 1,
119	CTYPE_INST = 2,
120	CTYPE_UNIFIED = 3
121};
122
123union _cpuid4_leaf_eax {
124	struct {
125		enum _cache_type	type:5;
126		unsigned int		level:3;
127		unsigned int		is_self_initializing:1;
128		unsigned int		is_fully_associative:1;
129		unsigned int		reserved:4;
130		unsigned int		num_threads_sharing:12;
131		unsigned int		num_cores_on_die:6;
132	} split;
133	u32 full;
134};
135
136union _cpuid4_leaf_ebx {
137	struct {
138		unsigned int		coherency_line_size:12;
139		unsigned int		physical_line_partition:10;
140		unsigned int		ways_of_associativity:10;
141	} split;
142	u32 full;
143};
144
145union _cpuid4_leaf_ecx {
146	struct {
147		unsigned int		number_of_sets:32;
148	} split;
149	u32 full;
150};
151
152struct _cpuid4_info_regs {
153	union _cpuid4_leaf_eax eax;
154	union _cpuid4_leaf_ebx ebx;
155	union _cpuid4_leaf_ecx ecx;
156	unsigned int id;
157	unsigned long size;
158	struct amd_northbridge *nb;
159};
160
161static unsigned short num_cache_leaves;
162
163/* AMD doesn't have CPUID4. Emulate it here to report the same
164   information to the user.  This makes some assumptions about the machine:
165   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
166
167   In theory the TLBs could be reported as fake type (they are in "dummy").
168   Maybe later */
169union l1_cache {
170	struct {
171		unsigned line_size:8;
172		unsigned lines_per_tag:8;
173		unsigned assoc:8;
174		unsigned size_in_kb:8;
175	};
176	unsigned val;
177};
178
179union l2_cache {
180	struct {
181		unsigned line_size:8;
182		unsigned lines_per_tag:4;
183		unsigned assoc:4;
184		unsigned size_in_kb:16;
185	};
186	unsigned val;
187};
188
189union l3_cache {
190	struct {
191		unsigned line_size:8;
192		unsigned lines_per_tag:4;
193		unsigned assoc:4;
194		unsigned res:2;
195		unsigned size_encoded:14;
196	};
197	unsigned val;
198};
199
200static const unsigned short assocs[] = {
201	[1] = 1,
202	[2] = 2,
203	[4] = 4,
204	[6] = 8,
205	[8] = 16,
206	[0xa] = 32,
207	[0xb] = 48,
208	[0xc] = 64,
209	[0xd] = 96,
210	[0xe] = 128,
211	[0xf] = 0xffff /* fully associative - no way to show this currently */
212};
213
214static const unsigned char levels[] = { 1, 1, 2, 3 };
215static const unsigned char types[] = { 1, 2, 3, 3 };
216
217static const enum cache_type cache_type_map[] = {
218	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
219	[CTYPE_DATA] = CACHE_TYPE_DATA,
220	[CTYPE_INST] = CACHE_TYPE_INST,
221	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
222};
223
224static void
225amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
226		     union _cpuid4_leaf_ebx *ebx,
227		     union _cpuid4_leaf_ecx *ecx)
228{
229	unsigned dummy;
230	unsigned line_size, lines_per_tag, assoc, size_in_kb;
231	union l1_cache l1i, l1d;
232	union l2_cache l2;
233	union l3_cache l3;
234	union l1_cache *l1 = &l1d;
235
236	eax->full = 0;
237	ebx->full = 0;
238	ecx->full = 0;
239
240	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
241	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
242
243	switch (leaf) {
244	case 1:
245		l1 = &l1i;
246	case 0:
247		if (!l1->val)
248			return;
249		assoc = assocs[l1->assoc];
250		line_size = l1->line_size;
251		lines_per_tag = l1->lines_per_tag;
252		size_in_kb = l1->size_in_kb;
253		break;
254	case 2:
255		if (!l2.val)
256			return;
257		assoc = assocs[l2.assoc];
258		line_size = l2.line_size;
259		lines_per_tag = l2.lines_per_tag;
260		/* cpu_data has errata corrections for K7 applied */
261		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
262		break;
263	case 3:
264		if (!l3.val)
265			return;
266		assoc = assocs[l3.assoc];
267		line_size = l3.line_size;
268		lines_per_tag = l3.lines_per_tag;
269		size_in_kb = l3.size_encoded * 512;
270		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
271			size_in_kb = size_in_kb >> 1;
272			assoc = assoc >> 1;
273		}
274		break;
275	default:
276		return;
277	}
278
279	eax->split.is_self_initializing = 1;
280	eax->split.type = types[leaf];
281	eax->split.level = levels[leaf];
282	eax->split.num_threads_sharing = 0;
283	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
284
285
286	if (assoc == 0xffff)
287		eax->split.is_fully_associative = 1;
288	ebx->split.coherency_line_size = line_size - 1;
289	ebx->split.ways_of_associativity = assoc - 1;
290	ebx->split.physical_line_partition = lines_per_tag - 1;
291	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
292		(ebx->split.ways_of_associativity + 1) - 1;
293}
294
295#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
296
297/*
298 * L3 cache descriptors
299 */
300static void amd_calc_l3_indices(struct amd_northbridge *nb)
301{
302	struct amd_l3_cache *l3 = &nb->l3_cache;
303	unsigned int sc0, sc1, sc2, sc3;
304	u32 val = 0;
305
306	pci_read_config_dword(nb->misc, 0x1C4, &val);
307
308	/* calculate subcache sizes */
309	l3->subcaches[0] = sc0 = !(val & BIT(0));
310	l3->subcaches[1] = sc1 = !(val & BIT(4));
311
312	if (boot_cpu_data.x86 == 0x15) {
313		l3->subcaches[0] = sc0 += !(val & BIT(1));
314		l3->subcaches[1] = sc1 += !(val & BIT(5));
315	}
316
317	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
318	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
319
320	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
321}
322
323/*
324 * check whether a slot used for disabling an L3 index is occupied.
325 * @l3: L3 cache descriptor
326 * @slot: slot number (0..1)
327 *
328 * @returns: the disabled index if used or negative value if slot free.
329 */
330static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
331{
332	unsigned int reg = 0;
333
334	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
335
336	/* check whether this slot is activated already */
337	if (reg & (3UL << 30))
338		return reg & 0xfff;
339
340	return -1;
341}
342
343static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
344				  unsigned int slot)
345{
346	int index;
347	struct amd_northbridge *nb = this_leaf->priv;
348
349	index = amd_get_l3_disable_slot(nb, slot);
350	if (index >= 0)
351		return sprintf(buf, "%d\n", index);
352
353	return sprintf(buf, "FREE\n");
354}
355
356#define SHOW_CACHE_DISABLE(slot)					\
357static ssize_t								\
358cache_disable_##slot##_show(struct device *dev,				\
359			    struct device_attribute *attr, char *buf)	\
360{									\
361	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
362	return show_cache_disable(this_leaf, buf, slot);		\
363}
364SHOW_CACHE_DISABLE(0)
365SHOW_CACHE_DISABLE(1)
366
367static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
368				 unsigned slot, unsigned long idx)
369{
370	int i;
371
372	idx |= BIT(30);
373
374	/*
375	 *  disable index in all 4 subcaches
376	 */
377	for (i = 0; i < 4; i++) {
378		u32 reg = idx | (i << 20);
379
380		if (!nb->l3_cache.subcaches[i])
381			continue;
382
383		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
384
385		/*
386		 * We need to WBINVD on a core on the node containing the L3
387		 * cache which indices we disable therefore a simple wbinvd()
388		 * is not sufficient.
389		 */
390		wbinvd_on_cpu(cpu);
391
392		reg |= BIT(31);
393		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
394	}
395}
396
397/*
398 * disable a L3 cache index by using a disable-slot
399 *
400 * @l3:    L3 cache descriptor
401 * @cpu:   A CPU on the node containing the L3 cache
402 * @slot:  slot number (0..1)
403 * @index: index to disable
404 *
405 * @return: 0 on success, error status on failure
406 */
407static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
408			    unsigned slot, unsigned long index)
409{
410	int ret = 0;
411
412	/*  check if @slot is already used or the index is already disabled */
413	ret = amd_get_l3_disable_slot(nb, slot);
414	if (ret >= 0)
415		return -EEXIST;
416
417	if (index > nb->l3_cache.indices)
418		return -EINVAL;
419
420	/* check whether the other slot has disabled the same index already */
421	if (index == amd_get_l3_disable_slot(nb, !slot))
422		return -EEXIST;
423
424	amd_l3_disable_index(nb, cpu, slot, index);
425
426	return 0;
427}
428
429static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
430				   const char *buf, size_t count,
431				   unsigned int slot)
432{
433	unsigned long val = 0;
434	int cpu, err = 0;
435	struct amd_northbridge *nb = this_leaf->priv;
436
437	if (!capable(CAP_SYS_ADMIN))
438		return -EPERM;
439
440	cpu = cpumask_first(&this_leaf->shared_cpu_map);
441
442	if (kstrtoul(buf, 10, &val) < 0)
443		return -EINVAL;
444
445	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
446	if (err) {
447		if (err == -EEXIST)
448			pr_warn("L3 slot %d in use/index already disabled!\n",
449				   slot);
450		return err;
451	}
452	return count;
453}
454
455#define STORE_CACHE_DISABLE(slot)					\
456static ssize_t								\
457cache_disable_##slot##_store(struct device *dev,			\
458			     struct device_attribute *attr,		\
459			     const char *buf, size_t count)		\
460{									\
461	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
462	return store_cache_disable(this_leaf, buf, count, slot);	\
463}
464STORE_CACHE_DISABLE(0)
465STORE_CACHE_DISABLE(1)
466
467static ssize_t subcaches_show(struct device *dev,
468			      struct device_attribute *attr, char *buf)
469{
470	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
471	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
472
473	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
474}
475
476static ssize_t subcaches_store(struct device *dev,
477			       struct device_attribute *attr,
478			       const char *buf, size_t count)
479{
480	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
481	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
482	unsigned long val;
483
484	if (!capable(CAP_SYS_ADMIN))
485		return -EPERM;
486
487	if (kstrtoul(buf, 16, &val) < 0)
488		return -EINVAL;
489
490	if (amd_set_subcaches(cpu, val))
491		return -EINVAL;
492
493	return count;
494}
495
496static DEVICE_ATTR_RW(cache_disable_0);
497static DEVICE_ATTR_RW(cache_disable_1);
498static DEVICE_ATTR_RW(subcaches);
499
500static umode_t
501cache_private_attrs_is_visible(struct kobject *kobj,
502			       struct attribute *attr, int unused)
503{
504	struct device *dev = kobj_to_dev(kobj);
505	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
506	umode_t mode = attr->mode;
507
508	if (!this_leaf->priv)
509		return 0;
510
511	if ((attr == &dev_attr_subcaches.attr) &&
512	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
513		return mode;
514
515	if ((attr == &dev_attr_cache_disable_0.attr ||
516	     attr == &dev_attr_cache_disable_1.attr) &&
517	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
518		return mode;
519
520	return 0;
521}
522
523static struct attribute_group cache_private_group = {
524	.is_visible = cache_private_attrs_is_visible,
525};
526
527static void init_amd_l3_attrs(void)
528{
529	int n = 1;
530	static struct attribute **amd_l3_attrs;
531
532	if (amd_l3_attrs) /* already initialized */
533		return;
534
535	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
536		n += 2;
537	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
538		n += 1;
539
540	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
541	if (!amd_l3_attrs)
542		return;
543
544	n = 0;
545	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
546		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
547		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
548	}
549	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
550		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
551
552	cache_private_group.attrs = amd_l3_attrs;
553}
554
555const struct attribute_group *
556cache_get_priv_group(struct cacheinfo *this_leaf)
557{
558	struct amd_northbridge *nb = this_leaf->priv;
559
560	if (this_leaf->level < 3 || !nb)
561		return NULL;
562
563	if (nb && nb->l3_cache.indices)
564		init_amd_l3_attrs();
565
566	return &cache_private_group;
567}
568
569static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
570{
571	int node;
572
573	/* only for L3, and not in virtualized environments */
574	if (index < 3)
575		return;
576
577	node = amd_get_nb_id(smp_processor_id());
578	this_leaf->nb = node_to_amd_nb(node);
579	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
580		amd_calc_l3_indices(this_leaf->nb);
581}
582#else
583#define amd_init_l3_cache(x, y)
584#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
585
586static int
587cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
588{
589	union _cpuid4_leaf_eax	eax;
590	union _cpuid4_leaf_ebx	ebx;
591	union _cpuid4_leaf_ecx	ecx;
592	unsigned		edx;
593
594	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
595		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
596			cpuid_count(0x8000001d, index, &eax.full,
597				    &ebx.full, &ecx.full, &edx);
598		else
599			amd_cpuid4(index, &eax, &ebx, &ecx);
600		amd_init_l3_cache(this_leaf, index);
601	} else {
602		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
603	}
604
605	if (eax.split.type == CTYPE_NULL)
606		return -EIO; /* better error ? */
607
608	this_leaf->eax = eax;
609	this_leaf->ebx = ebx;
610	this_leaf->ecx = ecx;
611	this_leaf->size = (ecx.split.number_of_sets          + 1) *
612			  (ebx.split.coherency_line_size     + 1) *
613			  (ebx.split.physical_line_partition + 1) *
614			  (ebx.split.ways_of_associativity   + 1);
615	return 0;
616}
617
618static int find_num_cache_leaves(struct cpuinfo_x86 *c)
619{
620	unsigned int		eax, ebx, ecx, edx, op;
621	union _cpuid4_leaf_eax	cache_eax;
622	int 			i = -1;
623
624	if (c->x86_vendor == X86_VENDOR_AMD)
625		op = 0x8000001d;
626	else
627		op = 4;
628
629	do {
630		++i;
631		/* Do cpuid(op) loop to find out num_cache_leaves */
632		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
633		cache_eax.full = eax;
634	} while (cache_eax.split.type != CTYPE_NULL);
635	return i;
636}
637
638void init_amd_cacheinfo(struct cpuinfo_x86 *c)
639{
640
641	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
642		num_cache_leaves = find_num_cache_leaves(c);
643	} else if (c->extended_cpuid_level >= 0x80000006) {
644		if (cpuid_edx(0x80000006) & 0xf000)
645			num_cache_leaves = 4;
646		else
647			num_cache_leaves = 3;
648	}
649}
650
651unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
652{
653	/* Cache sizes */
654	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
655	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
656	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
657	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
658#ifdef CONFIG_SMP
659	unsigned int cpu = c->cpu_index;
660#endif
661
662	if (c->cpuid_level > 3) {
663		static int is_initialized;
664
665		if (is_initialized == 0) {
666			/* Init num_cache_leaves from boot CPU */
667			num_cache_leaves = find_num_cache_leaves(c);
668			is_initialized++;
669		}
670
671		/*
672		 * Whenever possible use cpuid(4), deterministic cache
673		 * parameters cpuid leaf to find the cache details
674		 */
675		for (i = 0; i < num_cache_leaves; i++) {
676			struct _cpuid4_info_regs this_leaf = {};
677			int retval;
678
679			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
680			if (retval < 0)
681				continue;
682
683			switch (this_leaf.eax.split.level) {
684			case 1:
685				if (this_leaf.eax.split.type == CTYPE_DATA)
686					new_l1d = this_leaf.size/1024;
687				else if (this_leaf.eax.split.type == CTYPE_INST)
688					new_l1i = this_leaf.size/1024;
689				break;
690			case 2:
691				new_l2 = this_leaf.size/1024;
692				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
693				index_msb = get_count_order(num_threads_sharing);
694				l2_id = c->apicid & ~((1 << index_msb) - 1);
695				break;
696			case 3:
697				new_l3 = this_leaf.size/1024;
698				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
699				index_msb = get_count_order(num_threads_sharing);
700				l3_id = c->apicid & ~((1 << index_msb) - 1);
701				break;
702			default:
703				break;
704			}
705		}
706	}
707	/*
708	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
709	 * trace cache
710	 */
711	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
712		/* supports eax=2  call */
713		int j, n;
714		unsigned int regs[4];
715		unsigned char *dp = (unsigned char *)regs;
716		int only_trace = 0;
717
718		if (num_cache_leaves != 0 && c->x86 == 15)
719			only_trace = 1;
720
721		/* Number of times to iterate */
722		n = cpuid_eax(2) & 0xFF;
723
724		for (i = 0 ; i < n ; i++) {
725			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
726
727			/* If bit 31 is set, this is an unknown format */
728			for (j = 0 ; j < 3 ; j++)
729				if (regs[j] & (1 << 31))
730					regs[j] = 0;
731
732			/* Byte 0 is level count, not a descriptor */
733			for (j = 1 ; j < 16 ; j++) {
734				unsigned char des = dp[j];
735				unsigned char k = 0;
736
737				/* look up this descriptor in the table */
738				while (cache_table[k].descriptor != 0) {
739					if (cache_table[k].descriptor == des) {
740						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
741							break;
742						switch (cache_table[k].cache_type) {
743						case LVL_1_INST:
744							l1i += cache_table[k].size;
745							break;
746						case LVL_1_DATA:
747							l1d += cache_table[k].size;
748							break;
749						case LVL_2:
750							l2 += cache_table[k].size;
751							break;
752						case LVL_3:
753							l3 += cache_table[k].size;
754							break;
755						case LVL_TRACE:
756							trace += cache_table[k].size;
757							break;
758						}
759
760						break;
761					}
762
763					k++;
764				}
765			}
766		}
767	}
768
769	if (new_l1d)
770		l1d = new_l1d;
771
772	if (new_l1i)
773		l1i = new_l1i;
774
775	if (new_l2) {
776		l2 = new_l2;
777#ifdef CONFIG_SMP
778		per_cpu(cpu_llc_id, cpu) = l2_id;
779#endif
780	}
781
782	if (new_l3) {
783		l3 = new_l3;
784#ifdef CONFIG_SMP
785		per_cpu(cpu_llc_id, cpu) = l3_id;
786#endif
787	}
788
789#ifdef CONFIG_SMP
790	/*
791	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
792	 * turns means that the only possibility is SMT (as indicated in
793	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
794	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
795	 * c->phys_proc_id.
796	 */
797	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
798		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
799#endif
800
801	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
802
803	return l2;
804}
805
806static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
807				    struct _cpuid4_info_regs *base)
808{
809	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
810	struct cacheinfo *this_leaf;
811	int i, sibling;
812
813	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
814		unsigned int apicid, nshared, first, last;
815
816		this_leaf = this_cpu_ci->info_list + index;
817		nshared = base->eax.split.num_threads_sharing + 1;
818		apicid = cpu_data(cpu).apicid;
819		first = apicid - (apicid % nshared);
820		last = first + nshared - 1;
821
822		for_each_online_cpu(i) {
823			this_cpu_ci = get_cpu_cacheinfo(i);
824			if (!this_cpu_ci->info_list)
825				continue;
826
827			apicid = cpu_data(i).apicid;
828			if ((apicid < first) || (apicid > last))
829				continue;
830
831			this_leaf = this_cpu_ci->info_list + index;
832
833			for_each_online_cpu(sibling) {
834				apicid = cpu_data(sibling).apicid;
835				if ((apicid < first) || (apicid > last))
836					continue;
837				cpumask_set_cpu(sibling,
838						&this_leaf->shared_cpu_map);
839			}
840		}
841	} else if (index == 3) {
842		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
843			this_cpu_ci = get_cpu_cacheinfo(i);
844			if (!this_cpu_ci->info_list)
845				continue;
846			this_leaf = this_cpu_ci->info_list + index;
847			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
848				if (!cpu_online(sibling))
849					continue;
850				cpumask_set_cpu(sibling,
851						&this_leaf->shared_cpu_map);
852			}
853		}
854	} else
855		return 0;
856
857	return 1;
858}
859
860static void __cache_cpumap_setup(unsigned int cpu, int index,
861				 struct _cpuid4_info_regs *base)
862{
863	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
864	struct cacheinfo *this_leaf, *sibling_leaf;
865	unsigned long num_threads_sharing;
866	int index_msb, i;
867	struct cpuinfo_x86 *c = &cpu_data(cpu);
868
869	if (c->x86_vendor == X86_VENDOR_AMD) {
870		if (__cache_amd_cpumap_setup(cpu, index, base))
871			return;
872	}
873
874	this_leaf = this_cpu_ci->info_list + index;
875	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
876
877	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
878	if (num_threads_sharing == 1)
879		return;
880
881	index_msb = get_count_order(num_threads_sharing);
882
883	for_each_online_cpu(i)
884		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
885			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
886
887			if (i == cpu || !sib_cpu_ci->info_list)
888				continue;/* skip if itself or no cacheinfo */
889			sibling_leaf = sib_cpu_ci->info_list + index;
890			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
891			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
892		}
893}
894
895static void ci_leaf_init(struct cacheinfo *this_leaf,
896			 struct _cpuid4_info_regs *base)
897{
898	this_leaf->id = base->id;
899	this_leaf->attributes = CACHE_ID;
900	this_leaf->level = base->eax.split.level;
901	this_leaf->type = cache_type_map[base->eax.split.type];
902	this_leaf->coherency_line_size =
903				base->ebx.split.coherency_line_size + 1;
904	this_leaf->ways_of_associativity =
905				base->ebx.split.ways_of_associativity + 1;
906	this_leaf->size = base->size;
907	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
908	this_leaf->physical_line_partition =
909				base->ebx.split.physical_line_partition + 1;
910	this_leaf->priv = base->nb;
911}
912
913static int __init_cache_level(unsigned int cpu)
914{
915	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
916
917	if (!num_cache_leaves)
918		return -ENOENT;
919	if (!this_cpu_ci)
920		return -EINVAL;
921	this_cpu_ci->num_levels = 3;
922	this_cpu_ci->num_leaves = num_cache_leaves;
923	return 0;
924}
925
926/*
927 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
928 * ECX as cache index. Then right shift apicid by the number's order to get
929 * cache id for this cache node.
930 */
931static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
932{
933	struct cpuinfo_x86 *c = &cpu_data(cpu);
934	unsigned long num_threads_sharing;
935	int index_msb;
936
937	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
938	index_msb = get_count_order(num_threads_sharing);
939	id4_regs->id = c->apicid >> index_msb;
940}
941
942static int __populate_cache_leaves(unsigned int cpu)
943{
944	unsigned int idx, ret;
945	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
946	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
947	struct _cpuid4_info_regs id4_regs = {};
948
949	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
950		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
951		if (ret)
952			return ret;
953		get_cache_id(cpu, &id4_regs);
954		ci_leaf_init(this_leaf++, &id4_regs);
955		__cache_cpumap_setup(cpu, idx, &id4_regs);
956	}
957	this_cpu_ci->cpu_map_populated = true;
958
959	return 0;
960}
961
962DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
963DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)