Loading...
1/*
2 * Routines to indentify caches on Intel CPU.
3 *
4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */
9
10#include <linux/init.h>
11#include <linux/slab.h>
12#include <linux/device.h>
13#include <linux/compiler.h>
14#include <linux/cpu.h>
15#include <linux/sched.h>
16#include <linux/pci.h>
17
18#include <asm/processor.h>
19#include <linux/smp.h>
20#include <asm/amd_nb.h>
21#include <asm/smp.h>
22
23#define LVL_1_INST 1
24#define LVL_1_DATA 2
25#define LVL_2 3
26#define LVL_3 4
27#define LVL_TRACE 5
28
29struct _cache_table {
30 unsigned char descriptor;
31 char cache_type;
32 short size;
33};
34
35#define MB(x) ((x) * 1024)
36
37/* All the cache descriptor types we care about (no TLB or
38 trace cache entries) */
39
40static const struct _cache_table __cpuinitconst cache_table[] =
41{
42 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
43 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
44 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
45 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
46 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
48 { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
49 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
50 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
51 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
52 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
53 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
54 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
55 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
56 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
57 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
58 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
59 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
60 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
61 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
62 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
63 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
64 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
65 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
66 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
67 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
68 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
69 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
70 { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
71 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
72 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
73 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
74 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
75 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
76 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
77 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
78 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
79 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
80 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
81 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
82 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
83 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
84 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
85 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
86 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
88 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
89 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
90 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
91 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
92 { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
93 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
94 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
95 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
96 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
97 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
98 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
99 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
100 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
101 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
102 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
103 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
104 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
105 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
106 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
107 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
108 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
109 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
110 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
111 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
112 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
113 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
114 { 0x00, 0, 0}
115};
116
117
118enum _cache_type {
119 CACHE_TYPE_NULL = 0,
120 CACHE_TYPE_DATA = 1,
121 CACHE_TYPE_INST = 2,
122 CACHE_TYPE_UNIFIED = 3
123};
124
125union _cpuid4_leaf_eax {
126 struct {
127 enum _cache_type type:5;
128 unsigned int level:3;
129 unsigned int is_self_initializing:1;
130 unsigned int is_fully_associative:1;
131 unsigned int reserved:4;
132 unsigned int num_threads_sharing:12;
133 unsigned int num_cores_on_die:6;
134 } split;
135 u32 full;
136};
137
138union _cpuid4_leaf_ebx {
139 struct {
140 unsigned int coherency_line_size:12;
141 unsigned int physical_line_partition:10;
142 unsigned int ways_of_associativity:10;
143 } split;
144 u32 full;
145};
146
147union _cpuid4_leaf_ecx {
148 struct {
149 unsigned int number_of_sets:32;
150 } split;
151 u32 full;
152};
153
154struct amd_l3_cache {
155 struct amd_northbridge *nb;
156 unsigned indices;
157 u8 subcaches[4];
158};
159
160struct _cpuid4_info {
161 union _cpuid4_leaf_eax eax;
162 union _cpuid4_leaf_ebx ebx;
163 union _cpuid4_leaf_ecx ecx;
164 unsigned long size;
165 struct amd_l3_cache *l3;
166 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
167};
168
169/* subset of above _cpuid4_info w/o shared_cpu_map */
170struct _cpuid4_info_regs {
171 union _cpuid4_leaf_eax eax;
172 union _cpuid4_leaf_ebx ebx;
173 union _cpuid4_leaf_ecx ecx;
174 unsigned long size;
175 struct amd_l3_cache *l3;
176};
177
178unsigned short num_cache_leaves;
179
180/* AMD doesn't have CPUID4. Emulate it here to report the same
181 information to the user. This makes some assumptions about the machine:
182 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
183
184 In theory the TLBs could be reported as fake type (they are in "dummy").
185 Maybe later */
186union l1_cache {
187 struct {
188 unsigned line_size:8;
189 unsigned lines_per_tag:8;
190 unsigned assoc:8;
191 unsigned size_in_kb:8;
192 };
193 unsigned val;
194};
195
196union l2_cache {
197 struct {
198 unsigned line_size:8;
199 unsigned lines_per_tag:4;
200 unsigned assoc:4;
201 unsigned size_in_kb:16;
202 };
203 unsigned val;
204};
205
206union l3_cache {
207 struct {
208 unsigned line_size:8;
209 unsigned lines_per_tag:4;
210 unsigned assoc:4;
211 unsigned res:2;
212 unsigned size_encoded:14;
213 };
214 unsigned val;
215};
216
217static const unsigned short __cpuinitconst assocs[] = {
218 [1] = 1,
219 [2] = 2,
220 [4] = 4,
221 [6] = 8,
222 [8] = 16,
223 [0xa] = 32,
224 [0xb] = 48,
225 [0xc] = 64,
226 [0xd] = 96,
227 [0xe] = 128,
228 [0xf] = 0xffff /* fully associative - no way to show this currently */
229};
230
231static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
232static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
233
234static void __cpuinit
235amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
236 union _cpuid4_leaf_ebx *ebx,
237 union _cpuid4_leaf_ecx *ecx)
238{
239 unsigned dummy;
240 unsigned line_size, lines_per_tag, assoc, size_in_kb;
241 union l1_cache l1i, l1d;
242 union l2_cache l2;
243 union l3_cache l3;
244 union l1_cache *l1 = &l1d;
245
246 eax->full = 0;
247 ebx->full = 0;
248 ecx->full = 0;
249
250 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
251 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
252
253 switch (leaf) {
254 case 1:
255 l1 = &l1i;
256 case 0:
257 if (!l1->val)
258 return;
259 assoc = assocs[l1->assoc];
260 line_size = l1->line_size;
261 lines_per_tag = l1->lines_per_tag;
262 size_in_kb = l1->size_in_kb;
263 break;
264 case 2:
265 if (!l2.val)
266 return;
267 assoc = assocs[l2.assoc];
268 line_size = l2.line_size;
269 lines_per_tag = l2.lines_per_tag;
270 /* cpu_data has errata corrections for K7 applied */
271 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
272 break;
273 case 3:
274 if (!l3.val)
275 return;
276 assoc = assocs[l3.assoc];
277 line_size = l3.line_size;
278 lines_per_tag = l3.lines_per_tag;
279 size_in_kb = l3.size_encoded * 512;
280 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
281 size_in_kb = size_in_kb >> 1;
282 assoc = assoc >> 1;
283 }
284 break;
285 default:
286 return;
287 }
288
289 eax->split.is_self_initializing = 1;
290 eax->split.type = types[leaf];
291 eax->split.level = levels[leaf];
292 eax->split.num_threads_sharing = 0;
293 eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
294
295
296 if (assoc == 0xffff)
297 eax->split.is_fully_associative = 1;
298 ebx->split.coherency_line_size = line_size - 1;
299 ebx->split.ways_of_associativity = assoc - 1;
300 ebx->split.physical_line_partition = lines_per_tag - 1;
301 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
302 (ebx->split.ways_of_associativity + 1) - 1;
303}
304
305struct _cache_attr {
306 struct attribute attr;
307 ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
308 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
309 unsigned int);
310};
311
312#ifdef CONFIG_AMD_NB
313
314/*
315 * L3 cache descriptors
316 */
317static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
318{
319 unsigned int sc0, sc1, sc2, sc3;
320 u32 val = 0;
321
322 pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
323
324 /* calculate subcache sizes */
325 l3->subcaches[0] = sc0 = !(val & BIT(0));
326 l3->subcaches[1] = sc1 = !(val & BIT(4));
327 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
328 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
329
330 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
331}
332
333static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
334 int index)
335{
336 static struct amd_l3_cache *__cpuinitdata l3_caches;
337 int node;
338
339 /* only for L3, and not in virtualized environments */
340 if (index < 3 || amd_nb_num() == 0)
341 return;
342
343 /*
344 * Strictly speaking, the amount in @size below is leaked since it is
345 * never freed but this is done only on shutdown so it doesn't matter.
346 */
347 if (!l3_caches) {
348 int size = amd_nb_num() * sizeof(struct amd_l3_cache);
349
350 l3_caches = kzalloc(size, GFP_ATOMIC);
351 if (!l3_caches)
352 return;
353 }
354
355 node = amd_get_nb_id(smp_processor_id());
356
357 if (!l3_caches[node].nb) {
358 l3_caches[node].nb = node_to_amd_nb(node);
359 amd_calc_l3_indices(&l3_caches[node]);
360 }
361
362 this_leaf->l3 = &l3_caches[node];
363}
364
365/*
366 * check whether a slot used for disabling an L3 index is occupied.
367 * @l3: L3 cache descriptor
368 * @slot: slot number (0..1)
369 *
370 * @returns: the disabled index if used or negative value if slot free.
371 */
372int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
373{
374 unsigned int reg = 0;
375
376 pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, ®);
377
378 /* check whether this slot is activated already */
379 if (reg & (3UL << 30))
380 return reg & 0xfff;
381
382 return -1;
383}
384
385static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
386 unsigned int slot)
387{
388 int index;
389
390 if (!this_leaf->l3 ||
391 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
392 return -EINVAL;
393
394 index = amd_get_l3_disable_slot(this_leaf->l3, slot);
395 if (index >= 0)
396 return sprintf(buf, "%d\n", index);
397
398 return sprintf(buf, "FREE\n");
399}
400
401#define SHOW_CACHE_DISABLE(slot) \
402static ssize_t \
403show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
404 unsigned int cpu) \
405{ \
406 return show_cache_disable(this_leaf, buf, slot); \
407}
408SHOW_CACHE_DISABLE(0)
409SHOW_CACHE_DISABLE(1)
410
411static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
412 unsigned slot, unsigned long idx)
413{
414 int i;
415
416 idx |= BIT(30);
417
418 /*
419 * disable index in all 4 subcaches
420 */
421 for (i = 0; i < 4; i++) {
422 u32 reg = idx | (i << 20);
423
424 if (!l3->subcaches[i])
425 continue;
426
427 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
428
429 /*
430 * We need to WBINVD on a core on the node containing the L3
431 * cache which indices we disable therefore a simple wbinvd()
432 * is not sufficient.
433 */
434 wbinvd_on_cpu(cpu);
435
436 reg |= BIT(31);
437 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
438 }
439}
440
441/*
442 * disable a L3 cache index by using a disable-slot
443 *
444 * @l3: L3 cache descriptor
445 * @cpu: A CPU on the node containing the L3 cache
446 * @slot: slot number (0..1)
447 * @index: index to disable
448 *
449 * @return: 0 on success, error status on failure
450 */
451int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
452 unsigned long index)
453{
454 int ret = 0;
455
456 /* check if @slot is already used or the index is already disabled */
457 ret = amd_get_l3_disable_slot(l3, slot);
458 if (ret >= 0)
459 return -EINVAL;
460
461 if (index > l3->indices)
462 return -EINVAL;
463
464 /* check whether the other slot has disabled the same index already */
465 if (index == amd_get_l3_disable_slot(l3, !slot))
466 return -EINVAL;
467
468 amd_l3_disable_index(l3, cpu, slot, index);
469
470 return 0;
471}
472
473static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
474 const char *buf, size_t count,
475 unsigned int slot)
476{
477 unsigned long val = 0;
478 int cpu, err = 0;
479
480 if (!capable(CAP_SYS_ADMIN))
481 return -EPERM;
482
483 if (!this_leaf->l3 ||
484 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
485 return -EINVAL;
486
487 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
488
489 if (strict_strtoul(buf, 10, &val) < 0)
490 return -EINVAL;
491
492 err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
493 if (err) {
494 if (err == -EEXIST)
495 printk(KERN_WARNING "L3 disable slot %d in use!\n",
496 slot);
497 return err;
498 }
499 return count;
500}
501
502#define STORE_CACHE_DISABLE(slot) \
503static ssize_t \
504store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
505 const char *buf, size_t count, \
506 unsigned int cpu) \
507{ \
508 return store_cache_disable(this_leaf, buf, count, slot); \
509}
510STORE_CACHE_DISABLE(0)
511STORE_CACHE_DISABLE(1)
512
513static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
514 show_cache_disable_0, store_cache_disable_0);
515static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
516 show_cache_disable_1, store_cache_disable_1);
517
518static ssize_t
519show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
520{
521 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
522 return -EINVAL;
523
524 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
525}
526
527static ssize_t
528store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
529 unsigned int cpu)
530{
531 unsigned long val;
532
533 if (!capable(CAP_SYS_ADMIN))
534 return -EPERM;
535
536 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
537 return -EINVAL;
538
539 if (strict_strtoul(buf, 16, &val) < 0)
540 return -EINVAL;
541
542 if (amd_set_subcaches(cpu, val))
543 return -EINVAL;
544
545 return count;
546}
547
548static struct _cache_attr subcaches =
549 __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
550
551#else /* CONFIG_AMD_NB */
552#define amd_init_l3_cache(x, y)
553#endif /* CONFIG_AMD_NB */
554
555static int
556__cpuinit cpuid4_cache_lookup_regs(int index,
557 struct _cpuid4_info_regs *this_leaf)
558{
559 union _cpuid4_leaf_eax eax;
560 union _cpuid4_leaf_ebx ebx;
561 union _cpuid4_leaf_ecx ecx;
562 unsigned edx;
563
564 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
565 amd_cpuid4(index, &eax, &ebx, &ecx);
566 amd_init_l3_cache(this_leaf, index);
567 } else {
568 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
569 }
570
571 if (eax.split.type == CACHE_TYPE_NULL)
572 return -EIO; /* better error ? */
573
574 this_leaf->eax = eax;
575 this_leaf->ebx = ebx;
576 this_leaf->ecx = ecx;
577 this_leaf->size = (ecx.split.number_of_sets + 1) *
578 (ebx.split.coherency_line_size + 1) *
579 (ebx.split.physical_line_partition + 1) *
580 (ebx.split.ways_of_associativity + 1);
581 return 0;
582}
583
584static int __cpuinit find_num_cache_leaves(void)
585{
586 unsigned int eax, ebx, ecx, edx;
587 union _cpuid4_leaf_eax cache_eax;
588 int i = -1;
589
590 do {
591 ++i;
592 /* Do cpuid(4) loop to find out num_cache_leaves */
593 cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
594 cache_eax.full = eax;
595 } while (cache_eax.split.type != CACHE_TYPE_NULL);
596 return i;
597}
598
599unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
600{
601 /* Cache sizes */
602 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
603 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
604 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
605 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
606#ifdef CONFIG_X86_HT
607 unsigned int cpu = c->cpu_index;
608#endif
609
610 if (c->cpuid_level > 3) {
611 static int is_initialized;
612
613 if (is_initialized == 0) {
614 /* Init num_cache_leaves from boot CPU */
615 num_cache_leaves = find_num_cache_leaves();
616 is_initialized++;
617 }
618
619 /*
620 * Whenever possible use cpuid(4), deterministic cache
621 * parameters cpuid leaf to find the cache details
622 */
623 for (i = 0; i < num_cache_leaves; i++) {
624 struct _cpuid4_info_regs this_leaf;
625 int retval;
626
627 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
628 if (retval >= 0) {
629 switch (this_leaf.eax.split.level) {
630 case 1:
631 if (this_leaf.eax.split.type ==
632 CACHE_TYPE_DATA)
633 new_l1d = this_leaf.size/1024;
634 else if (this_leaf.eax.split.type ==
635 CACHE_TYPE_INST)
636 new_l1i = this_leaf.size/1024;
637 break;
638 case 2:
639 new_l2 = this_leaf.size/1024;
640 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
641 index_msb = get_count_order(num_threads_sharing);
642 l2_id = c->apicid >> index_msb;
643 break;
644 case 3:
645 new_l3 = this_leaf.size/1024;
646 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
647 index_msb = get_count_order(
648 num_threads_sharing);
649 l3_id = c->apicid >> index_msb;
650 break;
651 default:
652 break;
653 }
654 }
655 }
656 }
657 /*
658 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
659 * trace cache
660 */
661 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
662 /* supports eax=2 call */
663 int j, n;
664 unsigned int regs[4];
665 unsigned char *dp = (unsigned char *)regs;
666 int only_trace = 0;
667
668 if (num_cache_leaves != 0 && c->x86 == 15)
669 only_trace = 1;
670
671 /* Number of times to iterate */
672 n = cpuid_eax(2) & 0xFF;
673
674 for (i = 0 ; i < n ; i++) {
675 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
676
677 /* If bit 31 is set, this is an unknown format */
678 for (j = 0 ; j < 3 ; j++)
679 if (regs[j] & (1 << 31))
680 regs[j] = 0;
681
682 /* Byte 0 is level count, not a descriptor */
683 for (j = 1 ; j < 16 ; j++) {
684 unsigned char des = dp[j];
685 unsigned char k = 0;
686
687 /* look up this descriptor in the table */
688 while (cache_table[k].descriptor != 0) {
689 if (cache_table[k].descriptor == des) {
690 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
691 break;
692 switch (cache_table[k].cache_type) {
693 case LVL_1_INST:
694 l1i += cache_table[k].size;
695 break;
696 case LVL_1_DATA:
697 l1d += cache_table[k].size;
698 break;
699 case LVL_2:
700 l2 += cache_table[k].size;
701 break;
702 case LVL_3:
703 l3 += cache_table[k].size;
704 break;
705 case LVL_TRACE:
706 trace += cache_table[k].size;
707 break;
708 }
709
710 break;
711 }
712
713 k++;
714 }
715 }
716 }
717 }
718
719 if (new_l1d)
720 l1d = new_l1d;
721
722 if (new_l1i)
723 l1i = new_l1i;
724
725 if (new_l2) {
726 l2 = new_l2;
727#ifdef CONFIG_X86_HT
728 per_cpu(cpu_llc_id, cpu) = l2_id;
729#endif
730 }
731
732 if (new_l3) {
733 l3 = new_l3;
734#ifdef CONFIG_X86_HT
735 per_cpu(cpu_llc_id, cpu) = l3_id;
736#endif
737 }
738
739 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
740
741 return l2;
742}
743
744#ifdef CONFIG_SYSFS
745
746/* pointer to _cpuid4_info array (for each cache leaf) */
747static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
748#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
749
750#ifdef CONFIG_SMP
751static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
752{
753 struct _cpuid4_info *this_leaf, *sibling_leaf;
754 unsigned long num_threads_sharing;
755 int index_msb, i, sibling;
756 struct cpuinfo_x86 *c = &cpu_data(cpu);
757
758 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
759 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
760 if (!per_cpu(ici_cpuid4_info, i))
761 continue;
762 this_leaf = CPUID4_INFO_IDX(i, index);
763 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
764 if (!cpu_online(sibling))
765 continue;
766 set_bit(sibling, this_leaf->shared_cpu_map);
767 }
768 }
769 return;
770 }
771 this_leaf = CPUID4_INFO_IDX(cpu, index);
772 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
773
774 if (num_threads_sharing == 1)
775 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
776 else {
777 index_msb = get_count_order(num_threads_sharing);
778
779 for_each_online_cpu(i) {
780 if (cpu_data(i).apicid >> index_msb ==
781 c->apicid >> index_msb) {
782 cpumask_set_cpu(i,
783 to_cpumask(this_leaf->shared_cpu_map));
784 if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
785 sibling_leaf =
786 CPUID4_INFO_IDX(i, index);
787 cpumask_set_cpu(cpu, to_cpumask(
788 sibling_leaf->shared_cpu_map));
789 }
790 }
791 }
792 }
793}
794static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
795{
796 struct _cpuid4_info *this_leaf, *sibling_leaf;
797 int sibling;
798
799 this_leaf = CPUID4_INFO_IDX(cpu, index);
800 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
801 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
802 cpumask_clear_cpu(cpu,
803 to_cpumask(sibling_leaf->shared_cpu_map));
804 }
805}
806#else
807static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
808{
809}
810
811static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
812{
813}
814#endif
815
816static void __cpuinit free_cache_attributes(unsigned int cpu)
817{
818 int i;
819
820 for (i = 0; i < num_cache_leaves; i++)
821 cache_remove_shared_cpu_map(cpu, i);
822
823 kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
824 kfree(per_cpu(ici_cpuid4_info, cpu));
825 per_cpu(ici_cpuid4_info, cpu) = NULL;
826}
827
828static int
829__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
830{
831 struct _cpuid4_info_regs *leaf_regs =
832 (struct _cpuid4_info_regs *)this_leaf;
833
834 return cpuid4_cache_lookup_regs(index, leaf_regs);
835}
836
837static void __cpuinit get_cpu_leaves(void *_retval)
838{
839 int j, *retval = _retval, cpu = smp_processor_id();
840
841 /* Do cpuid and store the results */
842 for (j = 0; j < num_cache_leaves; j++) {
843 struct _cpuid4_info *this_leaf;
844 this_leaf = CPUID4_INFO_IDX(cpu, j);
845 *retval = cpuid4_cache_lookup(j, this_leaf);
846 if (unlikely(*retval < 0)) {
847 int i;
848
849 for (i = 0; i < j; i++)
850 cache_remove_shared_cpu_map(cpu, i);
851 break;
852 }
853 cache_shared_cpu_map_setup(cpu, j);
854 }
855}
856
857static int __cpuinit detect_cache_attributes(unsigned int cpu)
858{
859 int retval;
860
861 if (num_cache_leaves == 0)
862 return -ENOENT;
863
864 per_cpu(ici_cpuid4_info, cpu) = kzalloc(
865 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
866 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
867 return -ENOMEM;
868
869 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
870 if (retval) {
871 kfree(per_cpu(ici_cpuid4_info, cpu));
872 per_cpu(ici_cpuid4_info, cpu) = NULL;
873 }
874
875 return retval;
876}
877
878#include <linux/kobject.h>
879#include <linux/sysfs.h>
880
881extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
882
883/* pointer to kobject for cpuX/cache */
884static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
885
886struct _index_kobject {
887 struct kobject kobj;
888 unsigned int cpu;
889 unsigned short index;
890};
891
892/* pointer to array of kobjects for cpuX/cache/indexY */
893static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
894#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
895
896#define show_one_plus(file_name, object, val) \
897static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
898 unsigned int cpu) \
899{ \
900 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
901}
902
903show_one_plus(level, eax.split.level, 0);
904show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
905show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
906show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
907show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
908
909static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
910 unsigned int cpu)
911{
912 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
913}
914
915static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
916 int type, char *buf)
917{
918 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
919 int n = 0;
920
921 if (len > 1) {
922 const struct cpumask *mask;
923
924 mask = to_cpumask(this_leaf->shared_cpu_map);
925 n = type ?
926 cpulist_scnprintf(buf, len-2, mask) :
927 cpumask_scnprintf(buf, len-2, mask);
928 buf[n++] = '\n';
929 buf[n] = '\0';
930 }
931 return n;
932}
933
934static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
935 unsigned int cpu)
936{
937 return show_shared_cpu_map_func(leaf, 0, buf);
938}
939
940static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
941 unsigned int cpu)
942{
943 return show_shared_cpu_map_func(leaf, 1, buf);
944}
945
946static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
947 unsigned int cpu)
948{
949 switch (this_leaf->eax.split.type) {
950 case CACHE_TYPE_DATA:
951 return sprintf(buf, "Data\n");
952 case CACHE_TYPE_INST:
953 return sprintf(buf, "Instruction\n");
954 case CACHE_TYPE_UNIFIED:
955 return sprintf(buf, "Unified\n");
956 default:
957 return sprintf(buf, "Unknown\n");
958 }
959}
960
961#define to_object(k) container_of(k, struct _index_kobject, kobj)
962#define to_attr(a) container_of(a, struct _cache_attr, attr)
963
964#define define_one_ro(_name) \
965static struct _cache_attr _name = \
966 __ATTR(_name, 0444, show_##_name, NULL)
967
968define_one_ro(level);
969define_one_ro(type);
970define_one_ro(coherency_line_size);
971define_one_ro(physical_line_partition);
972define_one_ro(ways_of_associativity);
973define_one_ro(number_of_sets);
974define_one_ro(size);
975define_one_ro(shared_cpu_map);
976define_one_ro(shared_cpu_list);
977
978static struct attribute *default_attrs[] = {
979 &type.attr,
980 &level.attr,
981 &coherency_line_size.attr,
982 &physical_line_partition.attr,
983 &ways_of_associativity.attr,
984 &number_of_sets.attr,
985 &size.attr,
986 &shared_cpu_map.attr,
987 &shared_cpu_list.attr,
988 NULL
989};
990
991#ifdef CONFIG_AMD_NB
992static struct attribute ** __cpuinit amd_l3_attrs(void)
993{
994 static struct attribute **attrs;
995 int n;
996
997 if (attrs)
998 return attrs;
999
1000 n = sizeof (default_attrs) / sizeof (struct attribute *);
1001
1002 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
1003 n += 2;
1004
1005 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1006 n += 1;
1007
1008 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
1009 if (attrs == NULL)
1010 return attrs = default_attrs;
1011
1012 for (n = 0; default_attrs[n]; n++)
1013 attrs[n] = default_attrs[n];
1014
1015 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
1016 attrs[n++] = &cache_disable_0.attr;
1017 attrs[n++] = &cache_disable_1.attr;
1018 }
1019
1020 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1021 attrs[n++] = &subcaches.attr;
1022
1023 return attrs;
1024}
1025#endif
1026
1027static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1028{
1029 struct _cache_attr *fattr = to_attr(attr);
1030 struct _index_kobject *this_leaf = to_object(kobj);
1031 ssize_t ret;
1032
1033 ret = fattr->show ?
1034 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1035 buf, this_leaf->cpu) :
1036 0;
1037 return ret;
1038}
1039
1040static ssize_t store(struct kobject *kobj, struct attribute *attr,
1041 const char *buf, size_t count)
1042{
1043 struct _cache_attr *fattr = to_attr(attr);
1044 struct _index_kobject *this_leaf = to_object(kobj);
1045 ssize_t ret;
1046
1047 ret = fattr->store ?
1048 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1049 buf, count, this_leaf->cpu) :
1050 0;
1051 return ret;
1052}
1053
1054static const struct sysfs_ops sysfs_ops = {
1055 .show = show,
1056 .store = store,
1057};
1058
1059static struct kobj_type ktype_cache = {
1060 .sysfs_ops = &sysfs_ops,
1061 .default_attrs = default_attrs,
1062};
1063
1064static struct kobj_type ktype_percpu_entry = {
1065 .sysfs_ops = &sysfs_ops,
1066};
1067
1068static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1069{
1070 kfree(per_cpu(ici_cache_kobject, cpu));
1071 kfree(per_cpu(ici_index_kobject, cpu));
1072 per_cpu(ici_cache_kobject, cpu) = NULL;
1073 per_cpu(ici_index_kobject, cpu) = NULL;
1074 free_cache_attributes(cpu);
1075}
1076
1077static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1078{
1079 int err;
1080
1081 if (num_cache_leaves == 0)
1082 return -ENOENT;
1083
1084 err = detect_cache_attributes(cpu);
1085 if (err)
1086 return err;
1087
1088 /* Allocate all required memory */
1089 per_cpu(ici_cache_kobject, cpu) =
1090 kzalloc(sizeof(struct kobject), GFP_KERNEL);
1091 if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1092 goto err_out;
1093
1094 per_cpu(ici_index_kobject, cpu) = kzalloc(
1095 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1096 if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1097 goto err_out;
1098
1099 return 0;
1100
1101err_out:
1102 cpuid4_cache_sysfs_exit(cpu);
1103 return -ENOMEM;
1104}
1105
1106static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1107
1108/* Add/Remove cache interface for CPU device */
1109static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1110{
1111 unsigned int cpu = sys_dev->id;
1112 unsigned long i, j;
1113 struct _index_kobject *this_object;
1114 struct _cpuid4_info *this_leaf;
1115 int retval;
1116
1117 retval = cpuid4_cache_sysfs_init(cpu);
1118 if (unlikely(retval < 0))
1119 return retval;
1120
1121 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1122 &ktype_percpu_entry,
1123 &sys_dev->kobj, "%s", "cache");
1124 if (retval < 0) {
1125 cpuid4_cache_sysfs_exit(cpu);
1126 return retval;
1127 }
1128
1129 for (i = 0; i < num_cache_leaves; i++) {
1130 this_object = INDEX_KOBJECT_PTR(cpu, i);
1131 this_object->cpu = cpu;
1132 this_object->index = i;
1133
1134 this_leaf = CPUID4_INFO_IDX(cpu, i);
1135
1136 ktype_cache.default_attrs = default_attrs;
1137#ifdef CONFIG_AMD_NB
1138 if (this_leaf->l3)
1139 ktype_cache.default_attrs = amd_l3_attrs();
1140#endif
1141 retval = kobject_init_and_add(&(this_object->kobj),
1142 &ktype_cache,
1143 per_cpu(ici_cache_kobject, cpu),
1144 "index%1lu", i);
1145 if (unlikely(retval)) {
1146 for (j = 0; j < i; j++)
1147 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1148 kobject_put(per_cpu(ici_cache_kobject, cpu));
1149 cpuid4_cache_sysfs_exit(cpu);
1150 return retval;
1151 }
1152 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1153 }
1154 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1155
1156 kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1157 return 0;
1158}
1159
1160static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
1161{
1162 unsigned int cpu = sys_dev->id;
1163 unsigned long i;
1164
1165 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1166 return;
1167 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1168 return;
1169 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1170
1171 for (i = 0; i < num_cache_leaves; i++)
1172 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1173 kobject_put(per_cpu(ici_cache_kobject, cpu));
1174 cpuid4_cache_sysfs_exit(cpu);
1175}
1176
1177static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1178 unsigned long action, void *hcpu)
1179{
1180 unsigned int cpu = (unsigned long)hcpu;
1181 struct sys_device *sys_dev;
1182
1183 sys_dev = get_cpu_sysdev(cpu);
1184 switch (action) {
1185 case CPU_ONLINE:
1186 case CPU_ONLINE_FROZEN:
1187 cache_add_dev(sys_dev);
1188 break;
1189 case CPU_DEAD:
1190 case CPU_DEAD_FROZEN:
1191 cache_remove_dev(sys_dev);
1192 break;
1193 }
1194 return NOTIFY_OK;
1195}
1196
1197static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1198 .notifier_call = cacheinfo_cpu_callback,
1199};
1200
1201static int __cpuinit cache_sysfs_init(void)
1202{
1203 int i;
1204
1205 if (num_cache_leaves == 0)
1206 return 0;
1207
1208 for_each_online_cpu(i) {
1209 int err;
1210 struct sys_device *sys_dev = get_cpu_sysdev(i);
1211
1212 err = cache_add_dev(sys_dev);
1213 if (err)
1214 return err;
1215 }
1216 register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1217 return 0;
1218}
1219
1220device_initcall(cache_sysfs_init);
1221
1222#endif
1/*
2 * Routines to identify caches on Intel CPU.
3 *
4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */
9
10#include <linux/slab.h>
11#include <linux/cacheinfo.h>
12#include <linux/cpu.h>
13#include <linux/sched.h>
14#include <linux/sysfs.h>
15#include <linux/pci.h>
16
17#include <asm/cpufeature.h>
18#include <asm/amd_nb.h>
19#include <asm/smp.h>
20
21#define LVL_1_INST 1
22#define LVL_1_DATA 2
23#define LVL_2 3
24#define LVL_3 4
25#define LVL_TRACE 5
26
27struct _cache_table {
28 unsigned char descriptor;
29 char cache_type;
30 short size;
31};
32
33#define MB(x) ((x) * 1024)
34
35/* All the cache descriptor types we care about (no TLB or
36 trace cache entries) */
37
38static const struct _cache_table cache_table[] =
39{
40 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
41 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
42 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
43 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
44 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
45 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
46 { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
47 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
48 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
49 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
50 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
51 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
52 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
53 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
54 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
55 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
56 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
57 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
58 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
59 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
60 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
61 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
62 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
63 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
64 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
65 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
66 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
67 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
68 { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
69 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
70 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
71 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
72 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
73 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
74 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
75 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
76 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
77 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
78 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
79 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
80 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
81 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
82 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
83 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
84 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
85 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
86 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
88 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
89 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
90 { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
91 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
92 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
93 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
94 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
95 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
96 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
97 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
98 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
99 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
100 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
101 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
102 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
103 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
104 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
105 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
106 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
107 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
108 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
109 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
110 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
111 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
112 { 0x00, 0, 0}
113};
114
115
116enum _cache_type {
117 CTYPE_NULL = 0,
118 CTYPE_DATA = 1,
119 CTYPE_INST = 2,
120 CTYPE_UNIFIED = 3
121};
122
123union _cpuid4_leaf_eax {
124 struct {
125 enum _cache_type type:5;
126 unsigned int level:3;
127 unsigned int is_self_initializing:1;
128 unsigned int is_fully_associative:1;
129 unsigned int reserved:4;
130 unsigned int num_threads_sharing:12;
131 unsigned int num_cores_on_die:6;
132 } split;
133 u32 full;
134};
135
136union _cpuid4_leaf_ebx {
137 struct {
138 unsigned int coherency_line_size:12;
139 unsigned int physical_line_partition:10;
140 unsigned int ways_of_associativity:10;
141 } split;
142 u32 full;
143};
144
145union _cpuid4_leaf_ecx {
146 struct {
147 unsigned int number_of_sets:32;
148 } split;
149 u32 full;
150};
151
152struct _cpuid4_info_regs {
153 union _cpuid4_leaf_eax eax;
154 union _cpuid4_leaf_ebx ebx;
155 union _cpuid4_leaf_ecx ecx;
156 unsigned long size;
157 struct amd_northbridge *nb;
158};
159
160static unsigned short num_cache_leaves;
161
162/* AMD doesn't have CPUID4. Emulate it here to report the same
163 information to the user. This makes some assumptions about the machine:
164 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
165
166 In theory the TLBs could be reported as fake type (they are in "dummy").
167 Maybe later */
168union l1_cache {
169 struct {
170 unsigned line_size:8;
171 unsigned lines_per_tag:8;
172 unsigned assoc:8;
173 unsigned size_in_kb:8;
174 };
175 unsigned val;
176};
177
178union l2_cache {
179 struct {
180 unsigned line_size:8;
181 unsigned lines_per_tag:4;
182 unsigned assoc:4;
183 unsigned size_in_kb:16;
184 };
185 unsigned val;
186};
187
188union l3_cache {
189 struct {
190 unsigned line_size:8;
191 unsigned lines_per_tag:4;
192 unsigned assoc:4;
193 unsigned res:2;
194 unsigned size_encoded:14;
195 };
196 unsigned val;
197};
198
199static const unsigned short assocs[] = {
200 [1] = 1,
201 [2] = 2,
202 [4] = 4,
203 [6] = 8,
204 [8] = 16,
205 [0xa] = 32,
206 [0xb] = 48,
207 [0xc] = 64,
208 [0xd] = 96,
209 [0xe] = 128,
210 [0xf] = 0xffff /* fully associative - no way to show this currently */
211};
212
213static const unsigned char levels[] = { 1, 1, 2, 3 };
214static const unsigned char types[] = { 1, 2, 3, 3 };
215
216static const enum cache_type cache_type_map[] = {
217 [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
218 [CTYPE_DATA] = CACHE_TYPE_DATA,
219 [CTYPE_INST] = CACHE_TYPE_INST,
220 [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
221};
222
223static void
224amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
225 union _cpuid4_leaf_ebx *ebx,
226 union _cpuid4_leaf_ecx *ecx)
227{
228 unsigned dummy;
229 unsigned line_size, lines_per_tag, assoc, size_in_kb;
230 union l1_cache l1i, l1d;
231 union l2_cache l2;
232 union l3_cache l3;
233 union l1_cache *l1 = &l1d;
234
235 eax->full = 0;
236 ebx->full = 0;
237 ecx->full = 0;
238
239 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
240 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
241
242 switch (leaf) {
243 case 1:
244 l1 = &l1i;
245 case 0:
246 if (!l1->val)
247 return;
248 assoc = assocs[l1->assoc];
249 line_size = l1->line_size;
250 lines_per_tag = l1->lines_per_tag;
251 size_in_kb = l1->size_in_kb;
252 break;
253 case 2:
254 if (!l2.val)
255 return;
256 assoc = assocs[l2.assoc];
257 line_size = l2.line_size;
258 lines_per_tag = l2.lines_per_tag;
259 /* cpu_data has errata corrections for K7 applied */
260 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
261 break;
262 case 3:
263 if (!l3.val)
264 return;
265 assoc = assocs[l3.assoc];
266 line_size = l3.line_size;
267 lines_per_tag = l3.lines_per_tag;
268 size_in_kb = l3.size_encoded * 512;
269 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
270 size_in_kb = size_in_kb >> 1;
271 assoc = assoc >> 1;
272 }
273 break;
274 default:
275 return;
276 }
277
278 eax->split.is_self_initializing = 1;
279 eax->split.type = types[leaf];
280 eax->split.level = levels[leaf];
281 eax->split.num_threads_sharing = 0;
282 eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
283
284
285 if (assoc == 0xffff)
286 eax->split.is_fully_associative = 1;
287 ebx->split.coherency_line_size = line_size - 1;
288 ebx->split.ways_of_associativity = assoc - 1;
289 ebx->split.physical_line_partition = lines_per_tag - 1;
290 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
291 (ebx->split.ways_of_associativity + 1) - 1;
292}
293
294#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
295
296/*
297 * L3 cache descriptors
298 */
299static void amd_calc_l3_indices(struct amd_northbridge *nb)
300{
301 struct amd_l3_cache *l3 = &nb->l3_cache;
302 unsigned int sc0, sc1, sc2, sc3;
303 u32 val = 0;
304
305 pci_read_config_dword(nb->misc, 0x1C4, &val);
306
307 /* calculate subcache sizes */
308 l3->subcaches[0] = sc0 = !(val & BIT(0));
309 l3->subcaches[1] = sc1 = !(val & BIT(4));
310
311 if (boot_cpu_data.x86 == 0x15) {
312 l3->subcaches[0] = sc0 += !(val & BIT(1));
313 l3->subcaches[1] = sc1 += !(val & BIT(5));
314 }
315
316 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
317 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
318
319 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
320}
321
322/*
323 * check whether a slot used for disabling an L3 index is occupied.
324 * @l3: L3 cache descriptor
325 * @slot: slot number (0..1)
326 *
327 * @returns: the disabled index if used or negative value if slot free.
328 */
329static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
330{
331 unsigned int reg = 0;
332
333 pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®);
334
335 /* check whether this slot is activated already */
336 if (reg & (3UL << 30))
337 return reg & 0xfff;
338
339 return -1;
340}
341
342static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
343 unsigned int slot)
344{
345 int index;
346 struct amd_northbridge *nb = this_leaf->priv;
347
348 index = amd_get_l3_disable_slot(nb, slot);
349 if (index >= 0)
350 return sprintf(buf, "%d\n", index);
351
352 return sprintf(buf, "FREE\n");
353}
354
355#define SHOW_CACHE_DISABLE(slot) \
356static ssize_t \
357cache_disable_##slot##_show(struct device *dev, \
358 struct device_attribute *attr, char *buf) \
359{ \
360 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
361 return show_cache_disable(this_leaf, buf, slot); \
362}
363SHOW_CACHE_DISABLE(0)
364SHOW_CACHE_DISABLE(1)
365
366static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
367 unsigned slot, unsigned long idx)
368{
369 int i;
370
371 idx |= BIT(30);
372
373 /*
374 * disable index in all 4 subcaches
375 */
376 for (i = 0; i < 4; i++) {
377 u32 reg = idx | (i << 20);
378
379 if (!nb->l3_cache.subcaches[i])
380 continue;
381
382 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
383
384 /*
385 * We need to WBINVD on a core on the node containing the L3
386 * cache which indices we disable therefore a simple wbinvd()
387 * is not sufficient.
388 */
389 wbinvd_on_cpu(cpu);
390
391 reg |= BIT(31);
392 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
393 }
394}
395
396/*
397 * disable a L3 cache index by using a disable-slot
398 *
399 * @l3: L3 cache descriptor
400 * @cpu: A CPU on the node containing the L3 cache
401 * @slot: slot number (0..1)
402 * @index: index to disable
403 *
404 * @return: 0 on success, error status on failure
405 */
406static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
407 unsigned slot, unsigned long index)
408{
409 int ret = 0;
410
411 /* check if @slot is already used or the index is already disabled */
412 ret = amd_get_l3_disable_slot(nb, slot);
413 if (ret >= 0)
414 return -EEXIST;
415
416 if (index > nb->l3_cache.indices)
417 return -EINVAL;
418
419 /* check whether the other slot has disabled the same index already */
420 if (index == amd_get_l3_disable_slot(nb, !slot))
421 return -EEXIST;
422
423 amd_l3_disable_index(nb, cpu, slot, index);
424
425 return 0;
426}
427
428static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
429 const char *buf, size_t count,
430 unsigned int slot)
431{
432 unsigned long val = 0;
433 int cpu, err = 0;
434 struct amd_northbridge *nb = this_leaf->priv;
435
436 if (!capable(CAP_SYS_ADMIN))
437 return -EPERM;
438
439 cpu = cpumask_first(&this_leaf->shared_cpu_map);
440
441 if (kstrtoul(buf, 10, &val) < 0)
442 return -EINVAL;
443
444 err = amd_set_l3_disable_slot(nb, cpu, slot, val);
445 if (err) {
446 if (err == -EEXIST)
447 pr_warn("L3 slot %d in use/index already disabled!\n",
448 slot);
449 return err;
450 }
451 return count;
452}
453
454#define STORE_CACHE_DISABLE(slot) \
455static ssize_t \
456cache_disable_##slot##_store(struct device *dev, \
457 struct device_attribute *attr, \
458 const char *buf, size_t count) \
459{ \
460 struct cacheinfo *this_leaf = dev_get_drvdata(dev); \
461 return store_cache_disable(this_leaf, buf, count, slot); \
462}
463STORE_CACHE_DISABLE(0)
464STORE_CACHE_DISABLE(1)
465
466static ssize_t subcaches_show(struct device *dev,
467 struct device_attribute *attr, char *buf)
468{
469 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
470 int cpu = cpumask_first(&this_leaf->shared_cpu_map);
471
472 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
473}
474
475static ssize_t subcaches_store(struct device *dev,
476 struct device_attribute *attr,
477 const char *buf, size_t count)
478{
479 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
480 int cpu = cpumask_first(&this_leaf->shared_cpu_map);
481 unsigned long val;
482
483 if (!capable(CAP_SYS_ADMIN))
484 return -EPERM;
485
486 if (kstrtoul(buf, 16, &val) < 0)
487 return -EINVAL;
488
489 if (amd_set_subcaches(cpu, val))
490 return -EINVAL;
491
492 return count;
493}
494
495static DEVICE_ATTR_RW(cache_disable_0);
496static DEVICE_ATTR_RW(cache_disable_1);
497static DEVICE_ATTR_RW(subcaches);
498
499static umode_t
500cache_private_attrs_is_visible(struct kobject *kobj,
501 struct attribute *attr, int unused)
502{
503 struct device *dev = kobj_to_dev(kobj);
504 struct cacheinfo *this_leaf = dev_get_drvdata(dev);
505 umode_t mode = attr->mode;
506
507 if (!this_leaf->priv)
508 return 0;
509
510 if ((attr == &dev_attr_subcaches.attr) &&
511 amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
512 return mode;
513
514 if ((attr == &dev_attr_cache_disable_0.attr ||
515 attr == &dev_attr_cache_disable_1.attr) &&
516 amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
517 return mode;
518
519 return 0;
520}
521
522static struct attribute_group cache_private_group = {
523 .is_visible = cache_private_attrs_is_visible,
524};
525
526static void init_amd_l3_attrs(void)
527{
528 int n = 1;
529 static struct attribute **amd_l3_attrs;
530
531 if (amd_l3_attrs) /* already initialized */
532 return;
533
534 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
535 n += 2;
536 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
537 n += 1;
538
539 amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
540 if (!amd_l3_attrs)
541 return;
542
543 n = 0;
544 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
545 amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
546 amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
547 }
548 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
549 amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
550
551 cache_private_group.attrs = amd_l3_attrs;
552}
553
554const struct attribute_group *
555cache_get_priv_group(struct cacheinfo *this_leaf)
556{
557 struct amd_northbridge *nb = this_leaf->priv;
558
559 if (this_leaf->level < 3 || !nb)
560 return NULL;
561
562 if (nb && nb->l3_cache.indices)
563 init_amd_l3_attrs();
564
565 return &cache_private_group;
566}
567
568static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
569{
570 int node;
571
572 /* only for L3, and not in virtualized environments */
573 if (index < 3)
574 return;
575
576 node = amd_get_nb_id(smp_processor_id());
577 this_leaf->nb = node_to_amd_nb(node);
578 if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
579 amd_calc_l3_indices(this_leaf->nb);
580}
581#else
582#define amd_init_l3_cache(x, y)
583#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
584
585static int
586cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
587{
588 union _cpuid4_leaf_eax eax;
589 union _cpuid4_leaf_ebx ebx;
590 union _cpuid4_leaf_ecx ecx;
591 unsigned edx;
592
593 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
594 if (boot_cpu_has(X86_FEATURE_TOPOEXT))
595 cpuid_count(0x8000001d, index, &eax.full,
596 &ebx.full, &ecx.full, &edx);
597 else
598 amd_cpuid4(index, &eax, &ebx, &ecx);
599 amd_init_l3_cache(this_leaf, index);
600 } else {
601 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
602 }
603
604 if (eax.split.type == CTYPE_NULL)
605 return -EIO; /* better error ? */
606
607 this_leaf->eax = eax;
608 this_leaf->ebx = ebx;
609 this_leaf->ecx = ecx;
610 this_leaf->size = (ecx.split.number_of_sets + 1) *
611 (ebx.split.coherency_line_size + 1) *
612 (ebx.split.physical_line_partition + 1) *
613 (ebx.split.ways_of_associativity + 1);
614 return 0;
615}
616
617static int find_num_cache_leaves(struct cpuinfo_x86 *c)
618{
619 unsigned int eax, ebx, ecx, edx, op;
620 union _cpuid4_leaf_eax cache_eax;
621 int i = -1;
622
623 if (c->x86_vendor == X86_VENDOR_AMD)
624 op = 0x8000001d;
625 else
626 op = 4;
627
628 do {
629 ++i;
630 /* Do cpuid(op) loop to find out num_cache_leaves */
631 cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
632 cache_eax.full = eax;
633 } while (cache_eax.split.type != CTYPE_NULL);
634 return i;
635}
636
637void init_amd_cacheinfo(struct cpuinfo_x86 *c)
638{
639
640 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
641 num_cache_leaves = find_num_cache_leaves(c);
642 } else if (c->extended_cpuid_level >= 0x80000006) {
643 if (cpuid_edx(0x80000006) & 0xf000)
644 num_cache_leaves = 4;
645 else
646 num_cache_leaves = 3;
647 }
648}
649
650unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
651{
652 /* Cache sizes */
653 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
654 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
655 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
656 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
657#ifdef CONFIG_SMP
658 unsigned int cpu = c->cpu_index;
659#endif
660
661 if (c->cpuid_level > 3) {
662 static int is_initialized;
663
664 if (is_initialized == 0) {
665 /* Init num_cache_leaves from boot CPU */
666 num_cache_leaves = find_num_cache_leaves(c);
667 is_initialized++;
668 }
669
670 /*
671 * Whenever possible use cpuid(4), deterministic cache
672 * parameters cpuid leaf to find the cache details
673 */
674 for (i = 0; i < num_cache_leaves; i++) {
675 struct _cpuid4_info_regs this_leaf = {};
676 int retval;
677
678 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
679 if (retval < 0)
680 continue;
681
682 switch (this_leaf.eax.split.level) {
683 case 1:
684 if (this_leaf.eax.split.type == CTYPE_DATA)
685 new_l1d = this_leaf.size/1024;
686 else if (this_leaf.eax.split.type == CTYPE_INST)
687 new_l1i = this_leaf.size/1024;
688 break;
689 case 2:
690 new_l2 = this_leaf.size/1024;
691 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
692 index_msb = get_count_order(num_threads_sharing);
693 l2_id = c->apicid & ~((1 << index_msb) - 1);
694 break;
695 case 3:
696 new_l3 = this_leaf.size/1024;
697 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
698 index_msb = get_count_order(num_threads_sharing);
699 l3_id = c->apicid & ~((1 << index_msb) - 1);
700 break;
701 default:
702 break;
703 }
704 }
705 }
706 /*
707 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
708 * trace cache
709 */
710 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
711 /* supports eax=2 call */
712 int j, n;
713 unsigned int regs[4];
714 unsigned char *dp = (unsigned char *)regs;
715 int only_trace = 0;
716
717 if (num_cache_leaves != 0 && c->x86 == 15)
718 only_trace = 1;
719
720 /* Number of times to iterate */
721 n = cpuid_eax(2) & 0xFF;
722
723 for (i = 0 ; i < n ; i++) {
724 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
725
726 /* If bit 31 is set, this is an unknown format */
727 for (j = 0 ; j < 3 ; j++)
728 if (regs[j] & (1 << 31))
729 regs[j] = 0;
730
731 /* Byte 0 is level count, not a descriptor */
732 for (j = 1 ; j < 16 ; j++) {
733 unsigned char des = dp[j];
734 unsigned char k = 0;
735
736 /* look up this descriptor in the table */
737 while (cache_table[k].descriptor != 0) {
738 if (cache_table[k].descriptor == des) {
739 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
740 break;
741 switch (cache_table[k].cache_type) {
742 case LVL_1_INST:
743 l1i += cache_table[k].size;
744 break;
745 case LVL_1_DATA:
746 l1d += cache_table[k].size;
747 break;
748 case LVL_2:
749 l2 += cache_table[k].size;
750 break;
751 case LVL_3:
752 l3 += cache_table[k].size;
753 break;
754 case LVL_TRACE:
755 trace += cache_table[k].size;
756 break;
757 }
758
759 break;
760 }
761
762 k++;
763 }
764 }
765 }
766 }
767
768 if (new_l1d)
769 l1d = new_l1d;
770
771 if (new_l1i)
772 l1i = new_l1i;
773
774 if (new_l2) {
775 l2 = new_l2;
776#ifdef CONFIG_SMP
777 per_cpu(cpu_llc_id, cpu) = l2_id;
778#endif
779 }
780
781 if (new_l3) {
782 l3 = new_l3;
783#ifdef CONFIG_SMP
784 per_cpu(cpu_llc_id, cpu) = l3_id;
785#endif
786 }
787
788#ifdef CONFIG_SMP
789 /*
790 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
791 * turns means that the only possibility is SMT (as indicated in
792 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
793 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
794 * c->phys_proc_id.
795 */
796 if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
797 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
798#endif
799
800 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
801
802 return l2;
803}
804
805static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
806 struct _cpuid4_info_regs *base)
807{
808 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
809 struct cacheinfo *this_leaf;
810 int i, sibling;
811
812 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
813 unsigned int apicid, nshared, first, last;
814
815 this_leaf = this_cpu_ci->info_list + index;
816 nshared = base->eax.split.num_threads_sharing + 1;
817 apicid = cpu_data(cpu).apicid;
818 first = apicid - (apicid % nshared);
819 last = first + nshared - 1;
820
821 for_each_online_cpu(i) {
822 this_cpu_ci = get_cpu_cacheinfo(i);
823 if (!this_cpu_ci->info_list)
824 continue;
825
826 apicid = cpu_data(i).apicid;
827 if ((apicid < first) || (apicid > last))
828 continue;
829
830 this_leaf = this_cpu_ci->info_list + index;
831
832 for_each_online_cpu(sibling) {
833 apicid = cpu_data(sibling).apicid;
834 if ((apicid < first) || (apicid > last))
835 continue;
836 cpumask_set_cpu(sibling,
837 &this_leaf->shared_cpu_map);
838 }
839 }
840 } else if (index == 3) {
841 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
842 this_cpu_ci = get_cpu_cacheinfo(i);
843 if (!this_cpu_ci->info_list)
844 continue;
845 this_leaf = this_cpu_ci->info_list + index;
846 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
847 if (!cpu_online(sibling))
848 continue;
849 cpumask_set_cpu(sibling,
850 &this_leaf->shared_cpu_map);
851 }
852 }
853 } else
854 return 0;
855
856 return 1;
857}
858
859static void __cache_cpumap_setup(unsigned int cpu, int index,
860 struct _cpuid4_info_regs *base)
861{
862 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
863 struct cacheinfo *this_leaf, *sibling_leaf;
864 unsigned long num_threads_sharing;
865 int index_msb, i;
866 struct cpuinfo_x86 *c = &cpu_data(cpu);
867
868 if (c->x86_vendor == X86_VENDOR_AMD) {
869 if (__cache_amd_cpumap_setup(cpu, index, base))
870 return;
871 }
872
873 this_leaf = this_cpu_ci->info_list + index;
874 num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
875
876 cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
877 if (num_threads_sharing == 1)
878 return;
879
880 index_msb = get_count_order(num_threads_sharing);
881
882 for_each_online_cpu(i)
883 if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
884 struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
885
886 if (i == cpu || !sib_cpu_ci->info_list)
887 continue;/* skip if itself or no cacheinfo */
888 sibling_leaf = sib_cpu_ci->info_list + index;
889 cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
890 cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
891 }
892}
893
894static void ci_leaf_init(struct cacheinfo *this_leaf,
895 struct _cpuid4_info_regs *base)
896{
897 this_leaf->level = base->eax.split.level;
898 this_leaf->type = cache_type_map[base->eax.split.type];
899 this_leaf->coherency_line_size =
900 base->ebx.split.coherency_line_size + 1;
901 this_leaf->ways_of_associativity =
902 base->ebx.split.ways_of_associativity + 1;
903 this_leaf->size = base->size;
904 this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
905 this_leaf->physical_line_partition =
906 base->ebx.split.physical_line_partition + 1;
907 this_leaf->priv = base->nb;
908}
909
910static int __init_cache_level(unsigned int cpu)
911{
912 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
913
914 if (!num_cache_leaves)
915 return -ENOENT;
916 if (!this_cpu_ci)
917 return -EINVAL;
918 this_cpu_ci->num_levels = 3;
919 this_cpu_ci->num_leaves = num_cache_leaves;
920 return 0;
921}
922
923static int __populate_cache_leaves(unsigned int cpu)
924{
925 unsigned int idx, ret;
926 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
927 struct cacheinfo *this_leaf = this_cpu_ci->info_list;
928 struct _cpuid4_info_regs id4_regs = {};
929
930 for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
931 ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
932 if (ret)
933 return ret;
934 ci_leaf_init(this_leaf++, &id4_regs);
935 __cache_cpumap_setup(cpu, idx, &id4_regs);
936 }
937 return 0;
938}
939
940DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
941DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)