Linux Audio

Check our new training course

Open-source upstreaming

Need help get the support for your hardware in upstream Linux?
Loading...
v6.2
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Copyright IBM Corp. 2012
  4 *
  5 * Author(s):
  6 *   Jan Glauber <jang@linux.vnet.ibm.com>
  7 */
  8
  9#include <linux/kernel.h>
 10#include <linux/slab.h>
 11#include <linux/export.h>
 12#include <linux/iommu-helper.h>
 13#include <linux/dma-map-ops.h>
 14#include <linux/vmalloc.h>
 15#include <linux/pci.h>
 16#include <asm/pci_dma.h>
 17
 18static struct kmem_cache *dma_region_table_cache;
 19static struct kmem_cache *dma_page_table_cache;
 20static int s390_iommu_strict;
 21static u64 s390_iommu_aperture;
 22static u32 s390_iommu_aperture_factor = 1;
 23
 24static int zpci_refresh_global(struct zpci_dev *zdev)
 25{
 26	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
 27				  zdev->iommu_pages * PAGE_SIZE);
 28}
 29
 30unsigned long *dma_alloc_cpu_table(void)
 31{
 32	unsigned long *table, *entry;
 33
 34	table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
 35	if (!table)
 36		return NULL;
 37
 38	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
 39		*entry = ZPCI_TABLE_INVALID;
 40	return table;
 41}
 42
 43static void dma_free_cpu_table(void *table)
 44{
 45	kmem_cache_free(dma_region_table_cache, table);
 46}
 47
 48static unsigned long *dma_alloc_page_table(void)
 49{
 50	unsigned long *table, *entry;
 51
 52	table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
 53	if (!table)
 54		return NULL;
 55
 56	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
 57		*entry = ZPCI_PTE_INVALID;
 58	return table;
 59}
 60
 61static void dma_free_page_table(void *table)
 62{
 63	kmem_cache_free(dma_page_table_cache, table);
 64}
 65
 66static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
 67{
 68	unsigned long old_rte, rte;
 69	unsigned long *sto;
 70
 71	rte = READ_ONCE(*rtep);
 72	if (reg_entry_isvalid(rte)) {
 73		sto = get_rt_sto(rte);
 74	} else {
 75		sto = dma_alloc_cpu_table();
 76		if (!sto)
 77			return NULL;
 78
 79		set_rt_sto(&rte, virt_to_phys(sto));
 80		validate_rt_entry(&rte);
 81		entry_clr_protected(&rte);
 82
 83		old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
 84		if (old_rte != ZPCI_TABLE_INVALID) {
 85			/* Somone else was faster, use theirs */
 86			dma_free_cpu_table(sto);
 87			sto = get_rt_sto(old_rte);
 88		}
 89	}
 90	return sto;
 91}
 92
 93static unsigned long *dma_get_page_table_origin(unsigned long *step)
 94{
 95	unsigned long old_ste, ste;
 96	unsigned long *pto;
 97
 98	ste = READ_ONCE(*step);
 99	if (reg_entry_isvalid(ste)) {
100		pto = get_st_pto(ste);
101	} else {
102		pto = dma_alloc_page_table();
103		if (!pto)
104			return NULL;
105		set_st_pto(&ste, virt_to_phys(pto));
106		validate_st_entry(&ste);
107		entry_clr_protected(&ste);
108
109		old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
110		if (old_ste != ZPCI_TABLE_INVALID) {
111			/* Somone else was faster, use theirs */
112			dma_free_page_table(pto);
113			pto = get_st_pto(old_ste);
114		}
115	}
116	return pto;
117}
118
119unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
120{
121	unsigned long *sto, *pto;
122	unsigned int rtx, sx, px;
123
124	rtx = calc_rtx(dma_addr);
125	sto = dma_get_seg_table_origin(&rto[rtx]);
126	if (!sto)
127		return NULL;
128
129	sx = calc_sx(dma_addr);
130	pto = dma_get_page_table_origin(&sto[sx]);
131	if (!pto)
132		return NULL;
133
134	px = calc_px(dma_addr);
135	return &pto[px];
136}
137
138void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
139{
140	unsigned long pte;
141
142	pte = READ_ONCE(*ptep);
143	if (flags & ZPCI_PTE_INVALID) {
144		invalidate_pt_entry(&pte);
145	} else {
146		set_pt_pfaa(&pte, page_addr);
147		validate_pt_entry(&pte);
148	}
149
150	if (flags & ZPCI_TABLE_PROTECTED)
151		entry_set_protected(&pte);
152	else
153		entry_clr_protected(&pte);
154
155	xchg(ptep, pte);
156}
157
158static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
159			      dma_addr_t dma_addr, size_t size, int flags)
160{
161	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
162	phys_addr_t page_addr = (pa & PAGE_MASK);
 
 
163	unsigned long *entry;
164	int i, rc = 0;
165
166	if (!nr_pages)
167		return -EINVAL;
168
169	if (!zdev->dma_table)
170		return -EINVAL;
 
 
 
171
172	for (i = 0; i < nr_pages; i++) {
173		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
174		if (!entry) {
175			rc = -ENOMEM;
176			goto undo_cpu_trans;
177		}
178		dma_update_cpu_trans(entry, page_addr, flags);
179		page_addr += PAGE_SIZE;
180		dma_addr += PAGE_SIZE;
181	}
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183undo_cpu_trans:
184	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
185		flags = ZPCI_PTE_INVALID;
186		while (i-- > 0) {
187			page_addr -= PAGE_SIZE;
188			dma_addr -= PAGE_SIZE;
189			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
190			if (!entry)
191				break;
192			dma_update_cpu_trans(entry, page_addr, flags);
193		}
194	}
195	return rc;
196}
197
198static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
199			   size_t size, int flags)
200{
201	unsigned long irqflags;
202	int ret;
203
204	/*
205	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
206	 * translations when previously invalid translation-table entries are
207	 * validated. With lazy unmap, rpcit is skipped for previously valid
208	 * entries, but a global rpcit is then required before any address can
209	 * be re-used, i.e. after each iommu bitmap wrap-around.
210	 */
211	if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
212		if (!zdev->tlb_refresh)
213			return 0;
214	} else {
215		if (!s390_iommu_strict)
216			return 0;
217	}
218
219	ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
220				 PAGE_ALIGN(size));
221	if (ret == -ENOMEM && !s390_iommu_strict) {
222		/* enable the hypervisor to free some resources */
223		if (zpci_refresh_global(zdev))
224			goto out;
225
226		spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
227		bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
228			      zdev->lazy_bitmap, zdev->iommu_pages);
229		bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
230		spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
231		ret = 0;
232	}
233out:
234	return ret;
235}
236
237static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
238			    dma_addr_t dma_addr, size_t size, int flags)
239{
240	int rc;
241
242	rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
243	if (rc)
244		return rc;
245
246	rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
247	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
248		__dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
249
 
 
250	return rc;
251}
252
253void dma_free_seg_table(unsigned long entry)
254{
255	unsigned long *sto = get_rt_sto(entry);
256	int sx;
257
258	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
259		if (reg_entry_isvalid(sto[sx]))
260			dma_free_page_table(get_st_pto(sto[sx]));
261
262	dma_free_cpu_table(sto);
263}
264
265void dma_cleanup_tables(unsigned long *table)
266{
267	int rtx;
268
269	if (!table)
270		return;
271
272	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
273		if (reg_entry_isvalid(table[rtx]))
274			dma_free_seg_table(table[rtx]);
275
276	dma_free_cpu_table(table);
277}
278
279static unsigned long __dma_alloc_iommu(struct device *dev,
280				       unsigned long start, int size)
281{
282	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 
283
 
 
284	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
285				start, size, zdev->start_dma >> PAGE_SHIFT,
286				dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
287				0);
288}
289
290static dma_addr_t dma_alloc_address(struct device *dev, int size)
291{
292	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
293	unsigned long offset, flags;
 
294
295	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
296	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
297	if (offset == -1) {
298		if (!s390_iommu_strict) {
299			/* global flush before DMA addresses are reused */
300			if (zpci_refresh_global(zdev))
301				goto out_error;
302
303			bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
304				      zdev->lazy_bitmap, zdev->iommu_pages);
305			bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
306		}
307		/* wrap-around */
308		offset = __dma_alloc_iommu(dev, 0, size);
309		if (offset == -1)
310			goto out_error;
311	}
312	zdev->next_bit = offset + size;
313	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
314
315	return zdev->start_dma + offset * PAGE_SIZE;
316
317out_error:
 
 
 
318	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
319	return DMA_MAPPING_ERROR;
320}
321
322static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
323{
324	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
325	unsigned long flags, offset;
326
327	offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
328
329	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
330	if (!zdev->iommu_bitmap)
331		goto out;
332
333	if (s390_iommu_strict)
334		bitmap_clear(zdev->iommu_bitmap, offset, size);
335	else
336		bitmap_set(zdev->lazy_bitmap, offset, size);
337
 
338out:
339	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
340}
341
342static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
343{
344	struct {
345		unsigned long rc;
346		unsigned long addr;
347	} __packed data = {rc, addr};
348
349	zpci_err_hex(&data, sizeof(data));
350}
351
352static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
353				     unsigned long offset, size_t size,
354				     enum dma_data_direction direction,
355				     unsigned long attrs)
356{
357	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 
358	unsigned long pa = page_to_phys(page) + offset;
359	int flags = ZPCI_PTE_VALID;
360	unsigned long nr_pages;
361	dma_addr_t dma_addr;
362	int ret;
363
364	/* This rounds up number of pages based on size and offset */
365	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
366	dma_addr = dma_alloc_address(dev, nr_pages);
367	if (dma_addr == DMA_MAPPING_ERROR) {
368		ret = -ENOSPC;
369		goto out_err;
370	}
371
372	/* Use rounded up size */
373	size = nr_pages * PAGE_SIZE;
374
 
 
 
 
 
 
375	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
376		flags |= ZPCI_TABLE_PROTECTED;
377
378	ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
379	if (ret)
380		goto out_free;
381
382	atomic64_add(nr_pages, &zdev->mapped_pages);
383	return dma_addr + (offset & ~PAGE_MASK);
384
385out_free:
386	dma_free_address(dev, dma_addr, nr_pages);
387out_err:
388	zpci_err("map error:\n");
389	zpci_err_dma(ret, pa);
390	return DMA_MAPPING_ERROR;
391}
392
393static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
394				 size_t size, enum dma_data_direction direction,
395				 unsigned long attrs)
396{
397	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 
398	int npages, ret;
399
400	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
401	dma_addr = dma_addr & PAGE_MASK;
402	ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
403			       ZPCI_PTE_INVALID);
404	if (ret) {
405		zpci_err("unmap error:\n");
406		zpci_err_dma(ret, dma_addr);
407		return;
408	}
409
410	atomic64_add(npages, &zdev->unmapped_pages);
411	dma_free_address(dev, dma_addr, npages);
 
412}
413
414static void *s390_dma_alloc(struct device *dev, size_t size,
415			    dma_addr_t *dma_handle, gfp_t flag,
416			    unsigned long attrs)
417{
418	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
419	struct page *page;
420	phys_addr_t pa;
421	dma_addr_t map;
422
423	size = PAGE_ALIGN(size);
424	page = alloc_pages(flag | __GFP_ZERO, get_order(size));
425	if (!page)
426		return NULL;
427
428	pa = page_to_phys(page);
429	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
 
 
430	if (dma_mapping_error(dev, map)) {
431		__free_pages(page, get_order(size));
432		return NULL;
433	}
434
435	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
436	if (dma_handle)
437		*dma_handle = map;
438	return phys_to_virt(pa);
439}
440
441static void s390_dma_free(struct device *dev, size_t size,
442			  void *vaddr, dma_addr_t dma_handle,
443			  unsigned long attrs)
444{
445	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
446
447	size = PAGE_ALIGN(size);
448	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
449	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
450	free_pages((unsigned long)vaddr, get_order(size));
451}
452
453/* Map a segment into a contiguous dma address area */
454static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
455			     size_t size, dma_addr_t *handle,
456			     enum dma_data_direction dir)
457{
458	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
459	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
460	dma_addr_t dma_addr_base, dma_addr;
461	int flags = ZPCI_PTE_VALID;
462	struct scatterlist *s;
463	phys_addr_t pa = 0;
464	int ret;
465
466	dma_addr_base = dma_alloc_address(dev, nr_pages);
467	if (dma_addr_base == DMA_MAPPING_ERROR)
468		return -ENOMEM;
469
470	dma_addr = dma_addr_base;
471	if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
472		flags |= ZPCI_TABLE_PROTECTED;
473
474	for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
475		pa = page_to_phys(sg_page(s));
476		ret = __dma_update_trans(zdev, pa, dma_addr,
477					 s->offset + s->length, flags);
478		if (ret)
 
 
 
479			goto unmap;
480
481		dma_addr += s->offset + s->length;
482	}
483	ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
484	if (ret)
485		goto unmap;
486
487	*handle = dma_addr_base;
488	atomic64_add(nr_pages, &zdev->mapped_pages);
489
490	return ret;
491
492unmap:
493	dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
494			 ZPCI_PTE_INVALID);
495	dma_free_address(dev, dma_addr_base, nr_pages);
496	zpci_err("map error:\n");
497	zpci_err_dma(ret, pa);
498	return ret;
499}
500
501static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
502			   int nr_elements, enum dma_data_direction dir,
503			   unsigned long attrs)
504{
505	struct scatterlist *s = sg, *start = sg, *dma = sg;
506	unsigned int max = dma_get_max_seg_size(dev);
507	unsigned int size = s->offset + s->length;
508	unsigned int offset = s->offset;
509	int count = 0, i, ret;
510
511	for (i = 1; i < nr_elements; i++) {
512		s = sg_next(s);
513
514		s->dma_length = 0;
515
516		if (s->offset || (size & ~PAGE_MASK) ||
517		    size + s->length > max) {
518			ret = __s390_dma_map_sg(dev, start, size,
519						&dma->dma_address, dir);
520			if (ret)
521				goto unmap;
522
523			dma->dma_address += offset;
524			dma->dma_length = size - offset;
525
526			size = offset = s->offset;
527			start = s;
528			dma = sg_next(dma);
529			count++;
530		}
531		size += s->length;
532	}
533	ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
534	if (ret)
535		goto unmap;
536
537	dma->dma_address += offset;
538	dma->dma_length = size - offset;
539
540	return count + 1;
541unmap:
542	for_each_sg(sg, s, count, i)
543		s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
544				     dir, attrs);
545
546	return ret;
547}
548
549static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
550			      int nr_elements, enum dma_data_direction dir,
551			      unsigned long attrs)
552{
553	struct scatterlist *s;
554	int i;
555
556	for_each_sg(sg, s, nr_elements, i) {
557		if (s->dma_length)
558			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
559					     dir, attrs);
560		s->dma_address = 0;
561		s->dma_length = 0;
562	}
563}
564	
565int zpci_dma_init_device(struct zpci_dev *zdev)
566{
567	u8 status;
568	int rc;
569
570	/*
571	 * At this point, if the device is part of an IOMMU domain, this would
572	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
573	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
574	 */
575	WARN_ON(zdev->s390_domain);
576
577	spin_lock_init(&zdev->iommu_bitmap_lock);
 
578
579	zdev->dma_table = dma_alloc_cpu_table();
580	if (!zdev->dma_table) {
581		rc = -ENOMEM;
582		goto out;
583	}
584
585	/*
586	 * Restrict the iommu bitmap size to the minimum of the following:
587	 * - s390_iommu_aperture which defaults to high_memory
588	 * - 3-level pagetable address limit minus start_dma offset
589	 * - DMA address range allowed by the hardware (clp query pci fn)
590	 *
591	 * Also set zdev->end_dma to the actual end address of the usable
592	 * range, instead of the theoretical maximum as reported by hardware.
593	 *
594	 * This limits the number of concurrently usable DMA mappings since
595	 * for each DMA mapped memory address we need a DMA address including
596	 * extra DMA addresses for multiple mappings of the same memory address.
597	 */
598	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
599	zdev->iommu_size = min3(s390_iommu_aperture,
600				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
601				zdev->end_dma - zdev->start_dma + 1);
602	zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
603	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
604	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
605	if (!zdev->iommu_bitmap) {
606		rc = -ENOMEM;
607		goto free_dma_table;
608	}
609	if (!s390_iommu_strict) {
610		zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
611		if (!zdev->lazy_bitmap) {
612			rc = -ENOMEM;
613			goto free_bitmap;
614		}
615
616	}
617	if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
618			       virt_to_phys(zdev->dma_table), &status)) {
619		rc = -EIO;
620		goto free_bitmap;
621	}
622
623	return 0;
624free_bitmap:
625	vfree(zdev->iommu_bitmap);
626	zdev->iommu_bitmap = NULL;
627	vfree(zdev->lazy_bitmap);
628	zdev->lazy_bitmap = NULL;
629free_dma_table:
630	dma_free_cpu_table(zdev->dma_table);
631	zdev->dma_table = NULL;
632out:
633	return rc;
634}
635
636int zpci_dma_exit_device(struct zpci_dev *zdev)
637{
638	int cc = 0;
639
640	/*
641	 * At this point, if the device is part of an IOMMU domain, this would
642	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
643	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
644	 */
645	WARN_ON(zdev->s390_domain);
646	if (zdev_enabled(zdev))
647		cc = zpci_unregister_ioat(zdev, 0);
648	/*
649	 * cc == 3 indicates the function is gone already. This can happen
650	 * if the function was deconfigured/disabled suddenly and we have not
651	 * received a new handle yet.
652	 */
653	if (cc && cc != 3)
654		return -EIO;
655
 
656	dma_cleanup_tables(zdev->dma_table);
657	zdev->dma_table = NULL;
658	vfree(zdev->iommu_bitmap);
659	zdev->iommu_bitmap = NULL;
660	vfree(zdev->lazy_bitmap);
661	zdev->lazy_bitmap = NULL;
662	zdev->next_bit = 0;
663	return 0;
664}
665
666static int __init dma_alloc_cpu_table_caches(void)
667{
668	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
669					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
670					0, NULL);
671	if (!dma_region_table_cache)
672		return -ENOMEM;
673
674	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
675					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
676					0, NULL);
677	if (!dma_page_table_cache) {
678		kmem_cache_destroy(dma_region_table_cache);
679		return -ENOMEM;
680	}
681	return 0;
682}
683
684int __init zpci_dma_init(void)
685{
686	s390_iommu_aperture = (u64)virt_to_phys(high_memory);
687	if (!s390_iommu_aperture_factor)
688		s390_iommu_aperture = ULONG_MAX;
689	else
690		s390_iommu_aperture *= s390_iommu_aperture_factor;
691
692	return dma_alloc_cpu_table_caches();
693}
694
695void zpci_dma_exit(void)
696{
697	kmem_cache_destroy(dma_page_table_cache);
698	kmem_cache_destroy(dma_region_table_cache);
699}
700
701const struct dma_map_ops s390_pci_dma_ops = {
 
 
 
 
 
 
 
 
 
702	.alloc		= s390_dma_alloc,
703	.free		= s390_dma_free,
704	.map_sg		= s390_dma_map_sg,
705	.unmap_sg	= s390_dma_unmap_sg,
706	.map_page	= s390_dma_map_pages,
707	.unmap_page	= s390_dma_unmap_pages,
708	.mmap		= dma_common_mmap,
709	.get_sgtable	= dma_common_get_sgtable,
710	.alloc_pages	= dma_common_alloc_pages,
711	.free_pages	= dma_common_free_pages,
712	/* dma_supported is unconditionally true without a callback */
713};
714EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
715
716static int __init s390_iommu_setup(char *str)
717{
718	if (!strcmp(str, "strict"))
719		s390_iommu_strict = 1;
720	return 1;
721}
722
723__setup("s390_iommu=", s390_iommu_setup);
724
725static int __init s390_iommu_aperture_setup(char *str)
726{
727	if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
728		s390_iommu_aperture_factor = 1;
729	return 1;
730}
731
732__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
v4.6
 
  1/*
  2 * Copyright IBM Corp. 2012
  3 *
  4 * Author(s):
  5 *   Jan Glauber <jang@linux.vnet.ibm.com>
  6 */
  7
  8#include <linux/kernel.h>
  9#include <linux/slab.h>
 10#include <linux/export.h>
 11#include <linux/iommu-helper.h>
 12#include <linux/dma-mapping.h>
 13#include <linux/vmalloc.h>
 14#include <linux/pci.h>
 15#include <asm/pci_dma.h>
 16
 17static struct kmem_cache *dma_region_table_cache;
 18static struct kmem_cache *dma_page_table_cache;
 19static int s390_iommu_strict;
 
 
 20
 21static int zpci_refresh_global(struct zpci_dev *zdev)
 22{
 23	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
 24				  zdev->iommu_pages * PAGE_SIZE);
 25}
 26
 27unsigned long *dma_alloc_cpu_table(void)
 28{
 29	unsigned long *table, *entry;
 30
 31	table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
 32	if (!table)
 33		return NULL;
 34
 35	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
 36		*entry = ZPCI_TABLE_INVALID;
 37	return table;
 38}
 39
 40static void dma_free_cpu_table(void *table)
 41{
 42	kmem_cache_free(dma_region_table_cache, table);
 43}
 44
 45static unsigned long *dma_alloc_page_table(void)
 46{
 47	unsigned long *table, *entry;
 48
 49	table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
 50	if (!table)
 51		return NULL;
 52
 53	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
 54		*entry = ZPCI_PTE_INVALID;
 55	return table;
 56}
 57
 58static void dma_free_page_table(void *table)
 59{
 60	kmem_cache_free(dma_page_table_cache, table);
 61}
 62
 63static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
 64{
 
 65	unsigned long *sto;
 66
 67	if (reg_entry_isvalid(*entry))
 68		sto = get_rt_sto(*entry);
 69	else {
 
 70		sto = dma_alloc_cpu_table();
 71		if (!sto)
 72			return NULL;
 73
 74		set_rt_sto(entry, sto);
 75		validate_rt_entry(entry);
 76		entry_clr_protected(entry);
 
 
 
 
 
 
 
 77	}
 78	return sto;
 79}
 80
 81static unsigned long *dma_get_page_table_origin(unsigned long *entry)
 82{
 
 83	unsigned long *pto;
 84
 85	if (reg_entry_isvalid(*entry))
 86		pto = get_st_pto(*entry);
 87	else {
 
 88		pto = dma_alloc_page_table();
 89		if (!pto)
 90			return NULL;
 91		set_st_pto(entry, pto);
 92		validate_st_entry(entry);
 93		entry_clr_protected(entry);
 
 
 
 
 
 
 
 94	}
 95	return pto;
 96}
 97
 98unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
 99{
100	unsigned long *sto, *pto;
101	unsigned int rtx, sx, px;
102
103	rtx = calc_rtx(dma_addr);
104	sto = dma_get_seg_table_origin(&rto[rtx]);
105	if (!sto)
106		return NULL;
107
108	sx = calc_sx(dma_addr);
109	pto = dma_get_page_table_origin(&sto[sx]);
110	if (!pto)
111		return NULL;
112
113	px = calc_px(dma_addr);
114	return &pto[px];
115}
116
117void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
118{
 
 
 
119	if (flags & ZPCI_PTE_INVALID) {
120		invalidate_pt_entry(entry);
121	} else {
122		set_pt_pfaa(entry, page_addr);
123		validate_pt_entry(entry);
124	}
125
126	if (flags & ZPCI_TABLE_PROTECTED)
127		entry_set_protected(entry);
128	else
129		entry_clr_protected(entry);
 
 
130}
131
132static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
133			    dma_addr_t dma_addr, size_t size, int flags)
134{
135	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
136	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
137	dma_addr_t start_dma_addr = dma_addr;
138	unsigned long irq_flags;
139	unsigned long *entry;
140	int i, rc = 0;
141
142	if (!nr_pages)
143		return -EINVAL;
144
145	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
146	if (!zdev->dma_table) {
147		rc = -EINVAL;
148		goto no_refresh;
149	}
150
151	for (i = 0; i < nr_pages; i++) {
152		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
153		if (!entry) {
154			rc = -ENOMEM;
155			goto undo_cpu_trans;
156		}
157		dma_update_cpu_trans(entry, page_addr, flags);
158		page_addr += PAGE_SIZE;
159		dma_addr += PAGE_SIZE;
160	}
161
162	/*
163	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
164	 * translations when previously invalid translation-table entries are
165	 * validated. With lazy unmap, it also is skipped for previously valid
166	 * entries, but a global rpcit is then required before any address can
167	 * be re-used, i.e. after each iommu bitmap wrap-around.
168	 */
169	if (!zdev->tlb_refresh &&
170			(!s390_iommu_strict ||
171			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
172		goto no_refresh;
173
174	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
175				nr_pages * PAGE_SIZE);
176undo_cpu_trans:
177	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
178		flags = ZPCI_PTE_INVALID;
179		while (i-- > 0) {
180			page_addr -= PAGE_SIZE;
181			dma_addr -= PAGE_SIZE;
182			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
183			if (!entry)
184				break;
185			dma_update_cpu_trans(entry, page_addr, flags);
186		}
187	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
189no_refresh:
190	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
191	return rc;
192}
193
194void dma_free_seg_table(unsigned long entry)
195{
196	unsigned long *sto = get_rt_sto(entry);
197	int sx;
198
199	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
200		if (reg_entry_isvalid(sto[sx]))
201			dma_free_page_table(get_st_pto(sto[sx]));
202
203	dma_free_cpu_table(sto);
204}
205
206void dma_cleanup_tables(unsigned long *table)
207{
208	int rtx;
209
210	if (!table)
211		return;
212
213	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
214		if (reg_entry_isvalid(table[rtx]))
215			dma_free_seg_table(table[rtx]);
216
217	dma_free_cpu_table(table);
218}
219
220static unsigned long __dma_alloc_iommu(struct device *dev,
221				       unsigned long start, int size)
222{
223	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
224	unsigned long boundary_size;
225
226	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
227			      PAGE_SIZE) >> PAGE_SHIFT;
228	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
229				start, size, 0, boundary_size, 0);
 
 
230}
231
232static unsigned long dma_alloc_iommu(struct device *dev, int size)
233{
234	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
235	unsigned long offset, flags;
236	int wrap = 0;
237
238	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
239	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
240	if (offset == -1) {
 
 
 
 
 
 
 
 
 
241		/* wrap-around */
242		offset = __dma_alloc_iommu(dev, 0, size);
243		wrap = 1;
 
244	}
 
 
245
246	if (offset != -1) {
247		zdev->next_bit = offset + size;
248		if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
249			/* global flush after wrap-around with lazy unmap */
250			zpci_refresh_global(zdev);
251	}
252	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
253	return offset;
254}
255
256static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
257{
258	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
259	unsigned long flags;
 
 
260
261	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
262	if (!zdev->iommu_bitmap)
263		goto out;
264	bitmap_clear(zdev->iommu_bitmap, offset, size);
265	/*
266	 * Lazy flush for unmap: need to move next_bit to avoid address re-use
267	 * until wrap-around.
268	 */
269	if (!s390_iommu_strict && offset >= zdev->next_bit)
270		zdev->next_bit = offset + size;
271out:
272	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
273}
274
275static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
276{
277	struct {
278		unsigned long rc;
279		unsigned long addr;
280	} __packed data = {rc, addr};
281
282	zpci_err_hex(&data, sizeof(data));
283}
284
285static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
286				     unsigned long offset, size_t size,
287				     enum dma_data_direction direction,
288				     struct dma_attrs *attrs)
289{
290	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
291	unsigned long nr_pages, iommu_page_index;
292	unsigned long pa = page_to_phys(page) + offset;
293	int flags = ZPCI_PTE_VALID;
 
294	dma_addr_t dma_addr;
295	int ret;
296
297	/* This rounds up number of pages based on size and offset */
298	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
299	iommu_page_index = dma_alloc_iommu(dev, nr_pages);
300	if (iommu_page_index == -1) {
301		ret = -ENOSPC;
302		goto out_err;
303	}
304
305	/* Use rounded up size */
306	size = nr_pages * PAGE_SIZE;
307
308	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
309	if (dma_addr + size > zdev->end_dma) {
310		ret = -ERANGE;
311		goto out_free;
312	}
313
314	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
315		flags |= ZPCI_TABLE_PROTECTED;
316
317	ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
318	if (ret)
319		goto out_free;
320
321	atomic64_add(nr_pages, &zdev->mapped_pages);
322	return dma_addr + (offset & ~PAGE_MASK);
323
324out_free:
325	dma_free_iommu(dev, iommu_page_index, nr_pages);
326out_err:
327	zpci_err("map error:\n");
328	zpci_err_dma(ret, pa);
329	return DMA_ERROR_CODE;
330}
331
332static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
333				 size_t size, enum dma_data_direction direction,
334				 struct dma_attrs *attrs)
335{
336	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
337	unsigned long iommu_page_index;
338	int npages, ret;
339
340	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
341	dma_addr = dma_addr & PAGE_MASK;
342	ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
343			       ZPCI_PTE_INVALID);
344	if (ret) {
345		zpci_err("unmap error:\n");
346		zpci_err_dma(ret, dma_addr);
347		return;
348	}
349
350	atomic64_add(npages, &zdev->unmapped_pages);
351	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
352	dma_free_iommu(dev, iommu_page_index, npages);
353}
354
355static void *s390_dma_alloc(struct device *dev, size_t size,
356			    dma_addr_t *dma_handle, gfp_t flag,
357			    struct dma_attrs *attrs)
358{
359	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
360	struct page *page;
361	unsigned long pa;
362	dma_addr_t map;
363
364	size = PAGE_ALIGN(size);
365	page = alloc_pages(flag, get_order(size));
366	if (!page)
367		return NULL;
368
369	pa = page_to_phys(page);
370	memset((void *) pa, 0, size);
371
372	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL);
373	if (dma_mapping_error(dev, map)) {
374		free_pages(pa, get_order(size));
375		return NULL;
376	}
377
378	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
379	if (dma_handle)
380		*dma_handle = map;
381	return (void *) pa;
382}
383
384static void s390_dma_free(struct device *dev, size_t size,
385			  void *pa, dma_addr_t dma_handle,
386			  struct dma_attrs *attrs)
387{
388	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
389
390	size = PAGE_ALIGN(size);
391	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
392	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
393	free_pages((unsigned long) pa, get_order(size));
394}
395
396static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
397			   int nr_elements, enum dma_data_direction dir,
398			   struct dma_attrs *attrs)
 
399{
400	int mapped_elements = 0;
 
 
 
401	struct scatterlist *s;
402	int i;
 
 
 
 
 
 
 
 
 
403
404	for_each_sg(sg, s, nr_elements, i) {
405		struct page *page = sg_page(s);
406		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
407						    s->length, dir, NULL);
408		if (!dma_mapping_error(dev, s->dma_address)) {
409			s->dma_length = s->length;
410			mapped_elements++;
411		} else
412			goto unmap;
 
 
413	}
414out:
415	return mapped_elements;
 
 
 
 
 
 
416
417unmap:
418	for_each_sg(sg, s, mapped_elements, i) {
419		if (s->dma_address)
420			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
421					     dir, NULL);
422		s->dma_address = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423		s->dma_length = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424	}
425	mapped_elements = 0;
426	goto out;
 
 
 
 
 
 
 
 
 
 
 
 
427}
428
429static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
430			      int nr_elements, enum dma_data_direction dir,
431			      struct dma_attrs *attrs)
432{
433	struct scatterlist *s;
434	int i;
435
436	for_each_sg(sg, s, nr_elements, i) {
437		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL);
 
 
438		s->dma_address = 0;
439		s->dma_length = 0;
440	}
441}
442
443int zpci_dma_init_device(struct zpci_dev *zdev)
444{
 
445	int rc;
446
447	/*
448	 * At this point, if the device is part of an IOMMU domain, this would
449	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
450	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
451	 */
452	WARN_ON(zdev->s390_domain);
453
454	spin_lock_init(&zdev->iommu_bitmap_lock);
455	spin_lock_init(&zdev->dma_table_lock);
456
457	zdev->dma_table = dma_alloc_cpu_table();
458	if (!zdev->dma_table) {
459		rc = -ENOMEM;
460		goto out;
461	}
462
463	/*
464	 * Restrict the iommu bitmap size to the minimum of the following:
465	 * - main memory size
466	 * - 3-level pagetable address limit minus start_dma offset
467	 * - DMA address range allowed by the hardware (clp query pci fn)
468	 *
469	 * Also set zdev->end_dma to the actual end address of the usable
470	 * range, instead of the theoretical maximum as reported by hardware.
 
 
 
 
471	 */
472	zdev->iommu_size = min3((u64) high_memory,
 
473				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
474				zdev->end_dma - zdev->start_dma + 1);
475	zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
476	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
477	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
478	if (!zdev->iommu_bitmap) {
479		rc = -ENOMEM;
480		goto free_dma_table;
481	}
 
 
 
 
 
 
482
483	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
484				(u64) zdev->dma_table);
485	if (rc)
 
486		goto free_bitmap;
 
487
488	return 0;
489free_bitmap:
490	vfree(zdev->iommu_bitmap);
491	zdev->iommu_bitmap = NULL;
 
 
492free_dma_table:
493	dma_free_cpu_table(zdev->dma_table);
494	zdev->dma_table = NULL;
495out:
496	return rc;
497}
498
499void zpci_dma_exit_device(struct zpci_dev *zdev)
500{
 
 
501	/*
502	 * At this point, if the device is part of an IOMMU domain, this would
503	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
504	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
505	 */
506	WARN_ON(zdev->s390_domain);
 
 
 
 
 
 
 
 
 
507
508	zpci_unregister_ioat(zdev, 0);
509	dma_cleanup_tables(zdev->dma_table);
510	zdev->dma_table = NULL;
511	vfree(zdev->iommu_bitmap);
512	zdev->iommu_bitmap = NULL;
 
 
513	zdev->next_bit = 0;
 
514}
515
516static int __init dma_alloc_cpu_table_caches(void)
517{
518	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
519					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
520					0, NULL);
521	if (!dma_region_table_cache)
522		return -ENOMEM;
523
524	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
525					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
526					0, NULL);
527	if (!dma_page_table_cache) {
528		kmem_cache_destroy(dma_region_table_cache);
529		return -ENOMEM;
530	}
531	return 0;
532}
533
534int __init zpci_dma_init(void)
535{
 
 
 
 
 
 
536	return dma_alloc_cpu_table_caches();
537}
538
539void zpci_dma_exit(void)
540{
541	kmem_cache_destroy(dma_page_table_cache);
542	kmem_cache_destroy(dma_region_table_cache);
543}
544
545#define PREALLOC_DMA_DEBUG_ENTRIES	(1 << 16)
546
547static int __init dma_debug_do_init(void)
548{
549	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
550	return 0;
551}
552fs_initcall(dma_debug_do_init);
553
554struct dma_map_ops s390_pci_dma_ops = {
555	.alloc		= s390_dma_alloc,
556	.free		= s390_dma_free,
557	.map_sg		= s390_dma_map_sg,
558	.unmap_sg	= s390_dma_unmap_sg,
559	.map_page	= s390_dma_map_pages,
560	.unmap_page	= s390_dma_unmap_pages,
561	/* if we support direct DMA this must be conditional */
562	.is_phys	= 0,
 
 
563	/* dma_supported is unconditionally true without a callback */
564};
565EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
566
567static int __init s390_iommu_setup(char *str)
568{
569	if (!strncmp(str, "strict", 6))
570		s390_iommu_strict = 1;
571	return 0;
572}
573
574__setup("s390_iommu=", s390_iommu_setup);