Linux Audio

Check our new training course

Loading...
  1/*
  2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
  3 *
  4 *   This program is free software; you can redistribute it and/or
  5 *   modify it under the terms of the GNU General Public License
  6 *   as published by the Free Software Foundation, version 2.
  7 *
  8 *   This program is distributed in the hope that it will be useful, but
  9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
 10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 11 *   NON INFRINGEMENT.  See the GNU General Public License for
 12 *   more details.
 13 */
 14
 15#include <linux/string.h>
 16#include <linux/smp.h>
 17#include <linux/module.h>
 18#include <linux/uaccess.h>
 19#include <asm/fixmap.h>
 20#include <asm/kmap_types.h>
 21#include <asm/tlbflush.h>
 22#include <hv/hypervisor.h>
 23#include <arch/chip.h>
 24
 25
 26#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
 27
 28/* Defined in memcpy.S */
 29extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
 30extern unsigned long __copy_to_user_inatomic_asm(
 31	void __user *to, const void *from, unsigned long n);
 32extern unsigned long __copy_from_user_inatomic_asm(
 33	void *to, const void __user *from, unsigned long n);
 34extern unsigned long __copy_from_user_zeroing_asm(
 35	void *to, const void __user *from, unsigned long n);
 36
 37typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
 38
 39/* Size above which to consider TLB games for performance */
 40#define LARGE_COPY_CUTOFF 2048
 41
 42/* Communicate to the simulator what we are trying to do. */
 43#define sim_allow_multiple_caching(b) \
 44  __insn_mtspr(SPR_SIM_CONTROL, \
 45   SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
 46
 47/*
 48 * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
 49 *
 50 * We set up our own source and destination PTEs that we fully control.
 51 * This is the only way to guarantee that we don't race with another
 52 * thread that is modifying the PTE; we can't afford to try the
 53 * copy_{to,from}_user() technique of catching the interrupt, since
 54 * we must run with interrupts disabled to avoid the risk of some
 55 * other code seeing the incoherent data in our cache.  (Recall that
 56 * our cache is indexed by PA, so even if the other code doesn't use
 57 * our kmap_atomic virtual addresses, they'll still hit in cache using
 58 * the normal VAs that aren't supposed to hit in cache.)
 59 */
 60static void memcpy_multicache(void *dest, const void *source,
 61			      pte_t dst_pte, pte_t src_pte, int len)
 62{
 63	int idx;
 64	unsigned long flags, newsrc, newdst;
 65	pmd_t *pmdp;
 66	pte_t *ptep;
 67	int type0, type1;
 68	int cpu = get_cpu();
 69
 70	/*
 71	 * Disable interrupts so that we don't recurse into memcpy()
 72	 * in an interrupt handler, nor accidentally reference
 73	 * the PA of the source from an interrupt routine.  Also
 74	 * notify the simulator that we're playing games so we don't
 75	 * generate spurious coherency warnings.
 76	 */
 77	local_irq_save(flags);
 78	sim_allow_multiple_caching(1);
 79
 80	/* Set up the new dest mapping */
 81	type0 = kmap_atomic_idx_push();
 82	idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
 83	newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
 84	pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
 85	ptep = pte_offset_kernel(pmdp, newdst);
 86	if (pte_val(*ptep) != pte_val(dst_pte)) {
 87		set_pte(ptep, dst_pte);
 88		local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
 89	}
 90
 91	/* Set up the new source mapping */
 92	type1 = kmap_atomic_idx_push();
 93	idx += (type0 - type1);
 94	src_pte = hv_pte_set_nc(src_pte);
 95	src_pte = hv_pte_clear_writable(src_pte);  /* be paranoid */
 96	newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
 97	pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
 98	ptep = pte_offset_kernel(pmdp, newsrc);
 99	__set_pte(ptep, src_pte);   /* set_pte() would be confused by this */
100	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
101
102	/* Actually move the data. */
103	__memcpy_asm((void *)newdst, (const void *)newsrc, len);
104
105	/*
106	 * Remap the source as locally-cached and not OLOC'ed so that
107	 * we can inval without also invaling the remote cpu's cache.
108	 * This also avoids known errata with inv'ing cacheable oloc data.
109	 */
110	src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
111	src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
112	__set_pte(ptep, src_pte);   /* set_pte() would be confused by this */
113	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
114
115	/*
116	 * Do the actual invalidation, covering the full L2 cache line
117	 * at the end since __memcpy_asm() is somewhat aggressive.
118	 */
119	__inv_buffer((void *)newsrc, len);
120
121	/*
122	 * We're done: notify the simulator that all is back to normal,
123	 * and re-enable interrupts and pre-emption.
124	 */
125	kmap_atomic_idx_pop();
126	kmap_atomic_idx_pop();
127	sim_allow_multiple_caching(0);
128	local_irq_restore(flags);
129	put_cpu();
130}
131
132/*
133 * Identify large copies from remotely-cached memory, and copy them
134 * via memcpy_multicache() if they look good, otherwise fall back
135 * to the particular kind of copying passed as the memcpy_t function.
136 */
137static unsigned long fast_copy(void *dest, const void *source, int len,
138			       memcpy_t func)
139{
140	/*
141	 * Check if it's big enough to bother with.  We may end up doing a
142	 * small copy via TLB manipulation if we're near a page boundary,
143	 * but presumably we'll make it up when we hit the second page.
144	 */
145	while (len >= LARGE_COPY_CUTOFF) {
146		int copy_size, bytes_left_on_page;
147		pte_t *src_ptep, *dst_ptep;
148		pte_t src_pte, dst_pte;
149		struct page *src_page, *dst_page;
150
151		/* Is the source page oloc'ed to a remote cpu? */
152retry_source:
153		src_ptep = virt_to_pte(current->mm, (unsigned long)source);
154		if (src_ptep == NULL)
155			break;
156		src_pte = *src_ptep;
157		if (!hv_pte_get_present(src_pte) ||
158		    !hv_pte_get_readable(src_pte) ||
159		    hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
160			break;
161		if (get_remote_cache_cpu(src_pte) == smp_processor_id())
162			break;
163		src_page = pfn_to_page(pte_pfn(src_pte));
164		get_page(src_page);
165		if (pte_val(src_pte) != pte_val(*src_ptep)) {
166			put_page(src_page);
167			goto retry_source;
168		}
169		if (pte_huge(src_pte)) {
170			/* Adjust the PTE to correspond to a small page */
171			int pfn = pte_pfn(src_pte);
172			pfn += (((unsigned long)source & (HPAGE_SIZE-1))
173				>> PAGE_SHIFT);
174			src_pte = pfn_pte(pfn, src_pte);
175			src_pte = pte_mksmall(src_pte);
176		}
177
178		/* Is the destination page writable? */
179retry_dest:
180		dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
181		if (dst_ptep == NULL) {
182			put_page(src_page);
183			break;
184		}
185		dst_pte = *dst_ptep;
186		if (!hv_pte_get_present(dst_pte) ||
187		    !hv_pte_get_writable(dst_pte)) {
188			put_page(src_page);
189			break;
190		}
191		dst_page = pfn_to_page(pte_pfn(dst_pte));
192		if (dst_page == src_page) {
193			/*
194			 * Source and dest are on the same page; this
195			 * potentially exposes us to incoherence if any
196			 * part of src and dest overlap on a cache line.
197			 * Just give up rather than trying to be precise.
198			 */
199			put_page(src_page);
200			break;
201		}
202		get_page(dst_page);
203		if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
204			put_page(dst_page);
205			goto retry_dest;
206		}
207		if (pte_huge(dst_pte)) {
208			/* Adjust the PTE to correspond to a small page */
209			int pfn = pte_pfn(dst_pte);
210			pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
211				>> PAGE_SHIFT);
212			dst_pte = pfn_pte(pfn, dst_pte);
213			dst_pte = pte_mksmall(dst_pte);
214		}
215
216		/* All looks good: create a cachable PTE and copy from it */
217		copy_size = len;
218		bytes_left_on_page =
219			PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
220		if (copy_size > bytes_left_on_page)
221			copy_size = bytes_left_on_page;
222		bytes_left_on_page =
223			PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
224		if (copy_size > bytes_left_on_page)
225			copy_size = bytes_left_on_page;
226		memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
227
228		/* Release the pages */
229		put_page(dst_page);
230		put_page(src_page);
231
232		/* Continue on the next page */
233		dest += copy_size;
234		source += copy_size;
235		len -= copy_size;
236	}
237
238	return func(dest, source, len);
239}
240
241void *memcpy(void *to, const void *from, __kernel_size_t n)
242{
243	if (n < LARGE_COPY_CUTOFF)
244		return (void *)__memcpy_asm(to, from, n);
245	else
246		return (void *)fast_copy(to, from, n, __memcpy_asm);
247}
248
249unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
250				      unsigned long n)
251{
252	if (n < LARGE_COPY_CUTOFF)
253		return __copy_to_user_inatomic_asm(to, from, n);
254	else
255		return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
256}
257
258unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
259					unsigned long n)
260{
261	if (n < LARGE_COPY_CUTOFF)
262		return __copy_from_user_inatomic_asm(to, from, n);
263	else
264		return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
265}
266
267unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
268				       unsigned long n)
269{
270	if (n < LARGE_COPY_CUTOFF)
271		return __copy_from_user_zeroing_asm(to, from, n);
272	else
273		return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
274}
275
276#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */