Linux Audio

Check our new training course

Loading...
  1/*
  2 * native hashtable management.
  3 *
  4 * SMP scalability work:
  5 *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
  6 * 
  7 * This program is free software; you can redistribute it and/or
  8 * modify it under the terms of the GNU General Public License
  9 * as published by the Free Software Foundation; either version
 10 * 2 of the License, or (at your option) any later version.
 11 */
 12
 13#undef DEBUG_LOW
 14
 15#include <linux/spinlock.h>
 16#include <linux/bitops.h>
 17#include <linux/threads.h>
 18#include <linux/smp.h>
 19
 20#include <asm/abs_addr.h>
 21#include <asm/machdep.h>
 22#include <asm/mmu.h>
 23#include <asm/mmu_context.h>
 24#include <asm/pgtable.h>
 25#include <asm/tlbflush.h>
 26#include <asm/tlb.h>
 27#include <asm/cputable.h>
 28#include <asm/udbg.h>
 29#include <asm/kexec.h>
 30#include <asm/ppc-opcode.h>
 31
 32#ifdef DEBUG_LOW
 33#define DBG_LOW(fmt...) udbg_printf(fmt)
 34#else
 35#define DBG_LOW(fmt...)
 36#endif
 37
 38#define HPTE_LOCK_BIT 3
 39
 40DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 41
 42static inline void __tlbie(unsigned long va, int psize, int ssize)
 43{
 44	unsigned int penc;
 45
 46	/* clear top 16 bits, non SLS segment */
 47	va &= ~(0xffffULL << 48);
 48
 49	switch (psize) {
 50	case MMU_PAGE_4K:
 51		va &= ~0xffful;
 52		va |= ssize << 8;
 53		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
 54			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 55			     : "memory");
 56		break;
 57	default:
 58		penc = mmu_psize_defs[psize].penc;
 59		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
 60		va |= penc << 12;
 61		va |= ssize << 8;
 62		va |= 1; /* L */
 63		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
 64			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 65			     : "memory");
 66		break;
 67	}
 68}
 69
 70static inline void __tlbiel(unsigned long va, int psize, int ssize)
 71{
 72	unsigned int penc;
 73
 74	/* clear top 16 bits, non SLS segment */
 75	va &= ~(0xffffULL << 48);
 76
 77	switch (psize) {
 78	case MMU_PAGE_4K:
 79		va &= ~0xffful;
 80		va |= ssize << 8;
 81		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
 82			     : : "r"(va) : "memory");
 83		break;
 84	default:
 85		penc = mmu_psize_defs[psize].penc;
 86		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
 87		va |= penc << 12;
 88		va |= ssize << 8;
 89		va |= 1; /* L */
 90		asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
 91			     : : "r"(va) : "memory");
 92		break;
 93	}
 94
 95}
 96
 97static inline void tlbie(unsigned long va, int psize, int ssize, int local)
 98{
 99	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
100	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
101
102	if (use_local)
103		use_local = mmu_psize_defs[psize].tlbiel;
104	if (lock_tlbie && !use_local)
105		raw_spin_lock(&native_tlbie_lock);
106	asm volatile("ptesync": : :"memory");
107	if (use_local) {
108		__tlbiel(va, psize, ssize);
109		asm volatile("ptesync": : :"memory");
110	} else {
111		__tlbie(va, psize, ssize);
112		asm volatile("eieio; tlbsync; ptesync": : :"memory");
113	}
114	if (lock_tlbie && !use_local)
115		raw_spin_unlock(&native_tlbie_lock);
116}
117
118static inline void native_lock_hpte(struct hash_pte *hptep)
119{
120	unsigned long *word = &hptep->v;
121
122	while (1) {
123		if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
124			break;
125		while(test_bit(HPTE_LOCK_BIT, word))
126			cpu_relax();
127	}
128}
129
130static inline void native_unlock_hpte(struct hash_pte *hptep)
131{
132	unsigned long *word = &hptep->v;
133
134	clear_bit_unlock(HPTE_LOCK_BIT, word);
135}
136
137static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
138			unsigned long pa, unsigned long rflags,
139			unsigned long vflags, int psize, int ssize)
140{
141	struct hash_pte *hptep = htab_address + hpte_group;
142	unsigned long hpte_v, hpte_r;
143	int i;
144
145	if (!(vflags & HPTE_V_BOLTED)) {
146		DBG_LOW("    insert(group=%lx, va=%016lx, pa=%016lx,"
147			" rflags=%lx, vflags=%lx, psize=%d)\n",
148			hpte_group, va, pa, rflags, vflags, psize);
149	}
150
151	for (i = 0; i < HPTES_PER_GROUP; i++) {
152		if (! (hptep->v & HPTE_V_VALID)) {
153			/* retry with lock held */
154			native_lock_hpte(hptep);
155			if (! (hptep->v & HPTE_V_VALID))
156				break;
157			native_unlock_hpte(hptep);
158		}
159
160		hptep++;
161	}
162
163	if (i == HPTES_PER_GROUP)
164		return -1;
165
166	hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
167	hpte_r = hpte_encode_r(pa, psize) | rflags;
168
169	if (!(vflags & HPTE_V_BOLTED)) {
170		DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
171			i, hpte_v, hpte_r);
172	}
173
174	hptep->r = hpte_r;
175	/* Guarantee the second dword is visible before the valid bit */
176	eieio();
177	/*
178	 * Now set the first dword including the valid bit
179	 * NOTE: this also unlocks the hpte
180	 */
181	hptep->v = hpte_v;
182
183	__asm__ __volatile__ ("ptesync" : : : "memory");
184
185	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
186}
187
188static long native_hpte_remove(unsigned long hpte_group)
189{
190	struct hash_pte *hptep;
191	int i;
192	int slot_offset;
193	unsigned long hpte_v;
194
195	DBG_LOW("    remove(group=%lx)\n", hpte_group);
196
197	/* pick a random entry to start at */
198	slot_offset = mftb() & 0x7;
199
200	for (i = 0; i < HPTES_PER_GROUP; i++) {
201		hptep = htab_address + hpte_group + slot_offset;
202		hpte_v = hptep->v;
203
204		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
205			/* retry with lock held */
206			native_lock_hpte(hptep);
207			hpte_v = hptep->v;
208			if ((hpte_v & HPTE_V_VALID)
209			    && !(hpte_v & HPTE_V_BOLTED))
210				break;
211			native_unlock_hpte(hptep);
212		}
213
214		slot_offset++;
215		slot_offset &= 0x7;
216	}
217
218	if (i == HPTES_PER_GROUP)
219		return -1;
220
221	/* Invalidate the hpte. NOTE: this also unlocks it */
222	hptep->v = 0;
223
224	return i;
225}
226
227static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
228				 unsigned long va, int psize, int ssize,
229				 int local)
230{
231	struct hash_pte *hptep = htab_address + slot;
232	unsigned long hpte_v, want_v;
233	int ret = 0;
234
235	want_v = hpte_encode_v(va, psize, ssize);
236
237	DBG_LOW("    update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
238		va, want_v & HPTE_V_AVPN, slot, newpp);
239
240	native_lock_hpte(hptep);
241
242	hpte_v = hptep->v;
243
244	/* Even if we miss, we need to invalidate the TLB */
245	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
246		DBG_LOW(" -> miss\n");
247		ret = -1;
248	} else {
249		DBG_LOW(" -> hit\n");
250		/* Update the HPTE */
251		hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
252			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
253	}
254	native_unlock_hpte(hptep);
255
256	/* Ensure it is out of the tlb too. */
257	tlbie(va, psize, ssize, local);
258
259	return ret;
260}
261
262static long native_hpte_find(unsigned long va, int psize, int ssize)
263{
264	struct hash_pte *hptep;
265	unsigned long hash;
266	unsigned long i;
267	long slot;
268	unsigned long want_v, hpte_v;
269
270	hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
271	want_v = hpte_encode_v(va, psize, ssize);
272
273	/* Bolted mappings are only ever in the primary group */
274	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
275	for (i = 0; i < HPTES_PER_GROUP; i++) {
276		hptep = htab_address + slot;
277		hpte_v = hptep->v;
278
279		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
280			/* HPTE matches */
281			return slot;
282		++slot;
283	}
284
285	return -1;
286}
287
288/*
289 * Update the page protection bits. Intended to be used to create
290 * guard pages for kernel data structures on pages which are bolted
291 * in the HPT. Assumes pages being operated on will not be stolen.
292 *
293 * No need to lock here because we should be the only user.
294 */
295static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
296				       int psize, int ssize)
297{
298	unsigned long vsid, va;
299	long slot;
300	struct hash_pte *hptep;
301
302	vsid = get_kernel_vsid(ea, ssize);
303	va = hpt_va(ea, vsid, ssize);
304
305	slot = native_hpte_find(va, psize, ssize);
306	if (slot == -1)
307		panic("could not find page to bolt\n");
308	hptep = htab_address + slot;
309
310	/* Update the HPTE */
311	hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
312		(newpp & (HPTE_R_PP | HPTE_R_N));
313
314	/* Ensure it is out of the tlb too. */
315	tlbie(va, psize, ssize, 0);
316}
317
318static void native_hpte_invalidate(unsigned long slot, unsigned long va,
319				   int psize, int ssize, int local)
320{
321	struct hash_pte *hptep = htab_address + slot;
322	unsigned long hpte_v;
323	unsigned long want_v;
324	unsigned long flags;
325
326	local_irq_save(flags);
327
328	DBG_LOW("    invalidate(va=%016lx, hash: %x)\n", va, slot);
329
330	want_v = hpte_encode_v(va, psize, ssize);
331	native_lock_hpte(hptep);
332	hpte_v = hptep->v;
333
334	/* Even if we miss, we need to invalidate the TLB */
335	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
336		native_unlock_hpte(hptep);
337	else
338		/* Invalidate the hpte. NOTE: this also unlocks it */
339		hptep->v = 0;
340
341	/* Invalidate the TLB */
342	tlbie(va, psize, ssize, local);
343
344	local_irq_restore(flags);
345}
346
347#define LP_SHIFT	12
348#define LP_BITS		8
349#define LP_MASK(i)	((0xFF >> (i)) << LP_SHIFT)
350
351static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
352			int *psize, int *ssize, unsigned long *va)
353{
354	unsigned long hpte_r = hpte->r;
355	unsigned long hpte_v = hpte->v;
356	unsigned long avpn;
357	int i, size, shift, penc;
358
359	if (!(hpte_v & HPTE_V_LARGE))
360		size = MMU_PAGE_4K;
361	else {
362		for (i = 0; i < LP_BITS; i++) {
363			if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
364				break;
365		}
366		penc = LP_MASK(i+1) >> LP_SHIFT;
367		for (size = 0; size < MMU_PAGE_COUNT; size++) {
368
369			/* 4K pages are not represented by LP */
370			if (size == MMU_PAGE_4K)
371				continue;
372
373			/* valid entries have a shift value */
374			if (!mmu_psize_defs[size].shift)
375				continue;
376
377			if (penc == mmu_psize_defs[size].penc)
378				break;
379		}
380	}
381
382	/* This works for all page sizes, and for 256M and 1T segments */
383	shift = mmu_psize_defs[size].shift;
384	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
385
386	if (shift < 23) {
387		unsigned long vpi, vsid, pteg;
388
389		pteg = slot / HPTES_PER_GROUP;
390		if (hpte_v & HPTE_V_SECONDARY)
391			pteg = ~pteg;
392		switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
393		case MMU_SEGSIZE_256M:
394			vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
395			break;
396		case MMU_SEGSIZE_1T:
397			vsid = avpn >> 40;
398			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
399			break;
400		default:
401			avpn = vpi = size = 0;
402		}
403		avpn |= (vpi << mmu_psize_defs[size].shift);
404	}
405
406	*va = avpn;
407	*psize = size;
408	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
409}
410
411/*
412 * clear all mappings on kexec.  All cpus are in real mode (or they will
413 * be when they isi), and we are the only one left.  We rely on our kernel
414 * mapping being 0xC0's and the hardware ignoring those two real bits.
415 *
416 * TODO: add batching support when enabled.  remember, no dynamic memory here,
417 * athough there is the control page available...
418 */
419static void native_hpte_clear(void)
420{
421	unsigned long slot, slots, flags;
422	struct hash_pte *hptep = htab_address;
423	unsigned long hpte_v, va;
424	unsigned long pteg_count;
425	int psize, ssize;
426
427	pteg_count = htab_hash_mask + 1;
428
429	local_irq_save(flags);
430
431	/* we take the tlbie lock and hold it.  Some hardware will
432	 * deadlock if we try to tlbie from two processors at once.
433	 */
434	raw_spin_lock(&native_tlbie_lock);
435
436	slots = pteg_count * HPTES_PER_GROUP;
437
438	for (slot = 0; slot < slots; slot++, hptep++) {
439		/*
440		 * we could lock the pte here, but we are the only cpu
441		 * running,  right?  and for crash dump, we probably
442		 * don't want to wait for a maybe bad cpu.
443		 */
444		hpte_v = hptep->v;
445
446		/*
447		 * Call __tlbie() here rather than tlbie() since we
448		 * already hold the native_tlbie_lock.
449		 */
450		if (hpte_v & HPTE_V_VALID) {
451			hpte_decode(hptep, slot, &psize, &ssize, &va);
452			hptep->v = 0;
453			__tlbie(va, psize, ssize);
454		}
455	}
456
457	asm volatile("eieio; tlbsync; ptesync":::"memory");
458	raw_spin_unlock(&native_tlbie_lock);
459	local_irq_restore(flags);
460}
461
462/*
463 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
464 * the lock all the time
465 */
466static void native_flush_hash_range(unsigned long number, int local)
467{
468	unsigned long va, hash, index, hidx, shift, slot;
469	struct hash_pte *hptep;
470	unsigned long hpte_v;
471	unsigned long want_v;
472	unsigned long flags;
473	real_pte_t pte;
474	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
475	unsigned long psize = batch->psize;
476	int ssize = batch->ssize;
477	int i;
478
479	local_irq_save(flags);
480
481	for (i = 0; i < number; i++) {
482		va = batch->vaddr[i];
483		pte = batch->pte[i];
484
485		pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
486			hash = hpt_hash(va, shift, ssize);
487			hidx = __rpte_to_hidx(pte, index);
488			if (hidx & _PTEIDX_SECONDARY)
489				hash = ~hash;
490			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
491			slot += hidx & _PTEIDX_GROUP_IX;
492			hptep = htab_address + slot;
493			want_v = hpte_encode_v(va, psize, ssize);
494			native_lock_hpte(hptep);
495			hpte_v = hptep->v;
496			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
497			    !(hpte_v & HPTE_V_VALID))
498				native_unlock_hpte(hptep);
499			else
500				hptep->v = 0;
501		} pte_iterate_hashed_end();
502	}
503
504	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
505	    mmu_psize_defs[psize].tlbiel && local) {
506		asm volatile("ptesync":::"memory");
507		for (i = 0; i < number; i++) {
508			va = batch->vaddr[i];
509			pte = batch->pte[i];
510
511			pte_iterate_hashed_subpages(pte, psize, va, index,
512						    shift) {
513				__tlbiel(va, psize, ssize);
514			} pte_iterate_hashed_end();
515		}
516		asm volatile("ptesync":::"memory");
517	} else {
518		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
519
520		if (lock_tlbie)
521			raw_spin_lock(&native_tlbie_lock);
522
523		asm volatile("ptesync":::"memory");
524		for (i = 0; i < number; i++) {
525			va = batch->vaddr[i];
526			pte = batch->pte[i];
527
528			pte_iterate_hashed_subpages(pte, psize, va, index,
529						    shift) {
530				__tlbie(va, psize, ssize);
531			} pte_iterate_hashed_end();
532		}
533		asm volatile("eieio; tlbsync; ptesync":::"memory");
534
535		if (lock_tlbie)
536			raw_spin_unlock(&native_tlbie_lock);
537	}
538
539	local_irq_restore(flags);
540}
541
542#ifdef CONFIG_PPC_PSERIES
543/* Disable TLB batching on nighthawk */
544static inline int tlb_batching_enabled(void)
545{
546	struct device_node *root = of_find_node_by_path("/");
547	int enabled = 1;
548
549	if (root) {
550		const char *model = of_get_property(root, "model", NULL);
551		if (model && !strcmp(model, "IBM,9076-N81"))
552			enabled = 0;
553		of_node_put(root);
554	}
555
556	return enabled;
557}
558#else
559static inline int tlb_batching_enabled(void)
560{
561	return 1;
562}
563#endif
564
565void __init hpte_init_native(void)
566{
567	ppc_md.hpte_invalidate	= native_hpte_invalidate;
568	ppc_md.hpte_updatepp	= native_hpte_updatepp;
569	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
570	ppc_md.hpte_insert	= native_hpte_insert;
571	ppc_md.hpte_remove	= native_hpte_remove;
572	ppc_md.hpte_clear_all	= native_hpte_clear;
573	if (tlb_batching_enabled())
574		ppc_md.flush_hash_range = native_flush_hash_range;
575}