Linux Audio

Check our new training course

Loading...
v6.8
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * IOMMU API for s390 PCI devices
  4 *
  5 * Copyright IBM Corp. 2015
  6 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  7 */
  8
  9#include <linux/pci.h>
 10#include <linux/iommu.h>
 11#include <linux/iommu-helper.h>
 
 12#include <linux/sizes.h>
 13#include <linux/rculist.h>
 14#include <linux/rcupdate.h>
 15#include <asm/pci_dma.h>
 16
 17#include "dma-iommu.h"
 18
 19static const struct iommu_ops s390_iommu_ops;
 20
 21static struct kmem_cache *dma_region_table_cache;
 22static struct kmem_cache *dma_page_table_cache;
 23
 24static u64 s390_iommu_aperture;
 25static u32 s390_iommu_aperture_factor = 1;
 26
 27struct s390_domain {
 28	struct iommu_domain	domain;
 29	struct list_head	devices;
 30	struct zpci_iommu_ctrs	ctrs;
 31	unsigned long		*dma_table;
 
 32	spinlock_t		list_lock;
 33	struct rcu_head		rcu;
 34};
 35
 36static inline unsigned int calc_rtx(dma_addr_t ptr)
 37{
 38	return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
 39}
 40
 41static inline unsigned int calc_sx(dma_addr_t ptr)
 42{
 43	return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
 44}
 45
 46static inline unsigned int calc_px(dma_addr_t ptr)
 47{
 48	return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
 49}
 50
 51static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
 52{
 53	*entry &= ZPCI_PTE_FLAG_MASK;
 54	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
 55}
 56
 57static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
 58{
 59	*entry &= ZPCI_RTE_FLAG_MASK;
 60	*entry |= (sto & ZPCI_RTE_ADDR_MASK);
 61	*entry |= ZPCI_TABLE_TYPE_RTX;
 62}
 63
 64static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
 65{
 66	*entry &= ZPCI_STE_FLAG_MASK;
 67	*entry |= (pto & ZPCI_STE_ADDR_MASK);
 68	*entry |= ZPCI_TABLE_TYPE_SX;
 69}
 70
 71static inline void validate_rt_entry(unsigned long *entry)
 72{
 73	*entry &= ~ZPCI_TABLE_VALID_MASK;
 74	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
 75	*entry |= ZPCI_TABLE_VALID;
 76	*entry |= ZPCI_TABLE_LEN_RTX;
 77}
 78
 79static inline void validate_st_entry(unsigned long *entry)
 80{
 81	*entry &= ~ZPCI_TABLE_VALID_MASK;
 82	*entry |= ZPCI_TABLE_VALID;
 83}
 84
 85static inline void invalidate_pt_entry(unsigned long *entry)
 86{
 87	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
 88	*entry &= ~ZPCI_PTE_VALID_MASK;
 89	*entry |= ZPCI_PTE_INVALID;
 90}
 91
 92static inline void validate_pt_entry(unsigned long *entry)
 93{
 94	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
 95	*entry &= ~ZPCI_PTE_VALID_MASK;
 96	*entry |= ZPCI_PTE_VALID;
 97}
 98
 99static inline void entry_set_protected(unsigned long *entry)
100{
101	*entry &= ~ZPCI_TABLE_PROT_MASK;
102	*entry |= ZPCI_TABLE_PROTECTED;
103}
104
105static inline void entry_clr_protected(unsigned long *entry)
106{
107	*entry &= ~ZPCI_TABLE_PROT_MASK;
108	*entry |= ZPCI_TABLE_UNPROTECTED;
109}
110
111static inline int reg_entry_isvalid(unsigned long entry)
112{
113	return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
114}
115
116static inline int pt_entry_isvalid(unsigned long entry)
117{
118	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
119}
120
121static inline unsigned long *get_rt_sto(unsigned long entry)
122{
123	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
124		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
125	else
126		return NULL;
127}
128
129static inline unsigned long *get_st_pto(unsigned long entry)
130{
131	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
132		return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
133	else
134		return NULL;
135}
136
137static int __init dma_alloc_cpu_table_caches(void)
138{
139	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
140						   ZPCI_TABLE_SIZE,
141						   ZPCI_TABLE_ALIGN,
142						   0, NULL);
143	if (!dma_region_table_cache)
144		return -ENOMEM;
145
146	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
147						 ZPCI_PT_SIZE,
148						 ZPCI_PT_ALIGN,
149						 0, NULL);
150	if (!dma_page_table_cache) {
151		kmem_cache_destroy(dma_region_table_cache);
152		return -ENOMEM;
153	}
154	return 0;
155}
156
157static unsigned long *dma_alloc_cpu_table(gfp_t gfp)
158{
159	unsigned long *table, *entry;
160
161	table = kmem_cache_alloc(dma_region_table_cache, gfp);
162	if (!table)
163		return NULL;
164
165	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
166		*entry = ZPCI_TABLE_INVALID;
167	return table;
168}
169
170static void dma_free_cpu_table(void *table)
171{
172	kmem_cache_free(dma_region_table_cache, table);
173}
174
175static void dma_free_page_table(void *table)
176{
177	kmem_cache_free(dma_page_table_cache, table);
178}
179
180static void dma_free_seg_table(unsigned long entry)
181{
182	unsigned long *sto = get_rt_sto(entry);
183	int sx;
184
185	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
186		if (reg_entry_isvalid(sto[sx]))
187			dma_free_page_table(get_st_pto(sto[sx]));
188
189	dma_free_cpu_table(sto);
190}
191
192static void dma_cleanup_tables(unsigned long *table)
193{
194	int rtx;
195
196	if (!table)
197		return;
198
199	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
200		if (reg_entry_isvalid(table[rtx]))
201			dma_free_seg_table(table[rtx]);
202
203	dma_free_cpu_table(table);
204}
205
206static unsigned long *dma_alloc_page_table(gfp_t gfp)
207{
208	unsigned long *table, *entry;
209
210	table = kmem_cache_alloc(dma_page_table_cache, gfp);
211	if (!table)
212		return NULL;
213
214	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
215		*entry = ZPCI_PTE_INVALID;
216	return table;
217}
218
219static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
220{
221	unsigned long old_rte, rte;
222	unsigned long *sto;
223
224	rte = READ_ONCE(*rtep);
225	if (reg_entry_isvalid(rte)) {
226		sto = get_rt_sto(rte);
227	} else {
228		sto = dma_alloc_cpu_table(gfp);
229		if (!sto)
230			return NULL;
231
232		set_rt_sto(&rte, virt_to_phys(sto));
233		validate_rt_entry(&rte);
234		entry_clr_protected(&rte);
235
236		old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
237		if (old_rte != ZPCI_TABLE_INVALID) {
238			/* Somone else was faster, use theirs */
239			dma_free_cpu_table(sto);
240			sto = get_rt_sto(old_rte);
241		}
242	}
243	return sto;
244}
245
246static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
247{
248	unsigned long old_ste, ste;
249	unsigned long *pto;
250
251	ste = READ_ONCE(*step);
252	if (reg_entry_isvalid(ste)) {
253		pto = get_st_pto(ste);
254	} else {
255		pto = dma_alloc_page_table(gfp);
256		if (!pto)
257			return NULL;
258		set_st_pto(&ste, virt_to_phys(pto));
259		validate_st_entry(&ste);
260		entry_clr_protected(&ste);
261
262		old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
263		if (old_ste != ZPCI_TABLE_INVALID) {
264			/* Somone else was faster, use theirs */
265			dma_free_page_table(pto);
266			pto = get_st_pto(old_ste);
267		}
268	}
269	return pto;
270}
271
272static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
273{
274	unsigned long *sto, *pto;
275	unsigned int rtx, sx, px;
276
277	rtx = calc_rtx(dma_addr);
278	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
279	if (!sto)
280		return NULL;
281
282	sx = calc_sx(dma_addr);
283	pto = dma_get_page_table_origin(&sto[sx], gfp);
284	if (!pto)
285		return NULL;
286
287	px = calc_px(dma_addr);
288	return &pto[px];
289}
290
291static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
292{
293	unsigned long pte;
294
295	pte = READ_ONCE(*ptep);
296	if (flags & ZPCI_PTE_INVALID) {
297		invalidate_pt_entry(&pte);
298	} else {
299		set_pt_pfaa(&pte, page_addr);
300		validate_pt_entry(&pte);
301	}
302
303	if (flags & ZPCI_TABLE_PROTECTED)
304		entry_set_protected(&pte);
305	else
306		entry_clr_protected(&pte);
307
308	xchg(ptep, pte);
309}
310
311static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
312{
313	return container_of(dom, struct s390_domain, domain);
314}
315
316static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
317{
318	struct zpci_dev *zdev = to_zpci_dev(dev);
319
320	switch (cap) {
321	case IOMMU_CAP_CACHE_COHERENCY:
322		return true;
323	case IOMMU_CAP_DEFERRED_FLUSH:
324		return zdev->pft != PCI_FUNC_TYPE_ISM;
325	default:
326		return false;
327	}
328}
329
330static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
331{
332	struct s390_domain *s390_domain;
333
 
 
 
334	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
335	if (!s390_domain)
336		return NULL;
337
338	s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
339	if (!s390_domain->dma_table) {
340		kfree(s390_domain);
341		return NULL;
342	}
343	s390_domain->domain.geometry.force_aperture = true;
344	s390_domain->domain.geometry.aperture_start = 0;
345	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
346
 
347	spin_lock_init(&s390_domain->list_lock);
348	INIT_LIST_HEAD_RCU(&s390_domain->devices);
349
350	return &s390_domain->domain;
351}
352
353static void s390_iommu_rcu_free_domain(struct rcu_head *head)
354{
355	struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu);
356
357	dma_cleanup_tables(s390_domain->dma_table);
358	kfree(s390_domain);
359}
360
361static void s390_domain_free(struct iommu_domain *domain)
362{
363	struct s390_domain *s390_domain = to_s390_domain(domain);
364
365	rcu_read_lock();
366	WARN_ON(!list_empty(&s390_domain->devices));
367	rcu_read_unlock();
368
369	call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
370}
371
372static void s390_iommu_detach_device(struct iommu_domain *domain,
373				     struct device *dev)
374{
375	struct s390_domain *s390_domain = to_s390_domain(domain);
376	struct zpci_dev *zdev = to_zpci_dev(dev);
377	unsigned long flags;
378
379	spin_lock_irqsave(&s390_domain->list_lock, flags);
380	list_del_rcu(&zdev->iommu_list);
381	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
382
383	zpci_unregister_ioat(zdev, 0);
384	zdev->s390_domain = NULL;
385	zdev->dma_table = NULL;
386}
387
388static int s390_iommu_attach_device(struct iommu_domain *domain,
389				    struct device *dev)
390{
391	struct s390_domain *s390_domain = to_s390_domain(domain);
392	struct zpci_dev *zdev = to_zpci_dev(dev);
 
393	unsigned long flags;
394	u8 status;
395	int cc;
396
397	if (!zdev)
398		return -ENODEV;
399
400	if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
401		domain->geometry.aperture_end < zdev->start_dma))
402		return -EINVAL;
403
404	if (zdev->s390_domain)
405		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
406
407	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
408				virt_to_phys(s390_domain->dma_table), &status);
409	/*
410	 * If the device is undergoing error recovery the reset code
411	 * will re-establish the new domain.
412	 */
413	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
414		return -EIO;
415
416	zdev->dma_table = s390_domain->dma_table;
417	zdev->s390_domain = s390_domain;
 
 
 
 
418
419	spin_lock_irqsave(&s390_domain->list_lock, flags);
420	list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
422
423	return 0;
 
 
 
 
 
 
424}
425
426static void s390_iommu_get_resv_regions(struct device *dev,
427					struct list_head *list)
428{
429	struct zpci_dev *zdev = to_zpci_dev(dev);
430	struct iommu_resv_region *region;
 
 
 
 
 
 
431
432	if (zdev->start_dma) {
433		region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
434						 IOMMU_RESV_RESERVED, GFP_KERNEL);
435		if (!region)
436			return;
437		list_add_tail(&region->list, list);
 
 
 
438	}
 
439
440	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
441		region = iommu_alloc_resv_region(zdev->end_dma + 1,
442						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
443						 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
444		if (!region)
445			return;
446		list_add_tail(&region->list, list);
447	}
448}
449
450static struct iommu_device *s390_iommu_probe_device(struct device *dev)
451{
452	struct zpci_dev *zdev;
453
454	if (!dev_is_pci(dev))
455		return ERR_PTR(-ENODEV);
456
457	zdev = to_zpci_dev(dev);
458
459	if (zdev->start_dma > zdev->end_dma ||
460	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
461		return ERR_PTR(-EINVAL);
462
463	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
464		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
 
 
 
 
465
466	if (zdev->tlb_refresh)
467		dev->iommu->shadow_on_flush = 1;
468
469	return &zdev->iommu_dev;
470}
471
472static void s390_iommu_release_device(struct device *dev)
473{
474	struct zpci_dev *zdev = to_zpci_dev(dev);
 
475
476	/*
477	 * release_device is expected to detach any domain currently attached
478	 * to the device, but keep it attached to other devices in the group.
 
 
 
 
 
 
 
479	 */
480	if (zdev)
481		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
482}
483
484static int zpci_refresh_all(struct zpci_dev *zdev)
485{
486	return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
487				  zdev->end_dma - zdev->start_dma + 1);
488}
489
490static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
491{
492	struct s390_domain *s390_domain = to_s390_domain(domain);
493	struct zpci_dev *zdev;
494
495	rcu_read_lock();
496	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
497		atomic64_inc(&s390_domain->ctrs.global_rpcits);
498		zpci_refresh_all(zdev);
499	}
500	rcu_read_unlock();
501}
502
503static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
504				  struct iommu_iotlb_gather *gather)
505{
506	struct s390_domain *s390_domain = to_s390_domain(domain);
507	size_t size = gather->end - gather->start + 1;
508	struct zpci_dev *zdev;
509
510	/* If gather was never added to there is nothing to flush */
511	if (!gather->end)
512		return;
513
514	rcu_read_lock();
515	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
516		atomic64_inc(&s390_domain->ctrs.sync_rpcits);
517		zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
518				   size);
519	}
520	rcu_read_unlock();
521}
522
523static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
524				     unsigned long iova, size_t size)
 
525{
526	struct s390_domain *s390_domain = to_s390_domain(domain);
527	struct zpci_dev *zdev;
528	int ret = 0;
529
530	rcu_read_lock();
531	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
532		if (!zdev->tlb_refresh)
533			continue;
534		atomic64_inc(&s390_domain->ctrs.sync_map_rpcits);
535		ret = zpci_refresh_trans((u64)zdev->fh << 32,
536					 iova, size);
537		/*
538		 * let the hypervisor discover invalidated entries
539		 * allowing it to free IOVAs and unpin pages
540		 */
541		if (ret == -ENOMEM) {
542			ret = zpci_refresh_all(zdev);
543			if (ret)
544				break;
545		}
546	}
547	rcu_read_unlock();
548
549	return ret;
550}
 
551
552static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
553				     phys_addr_t pa, dma_addr_t dma_addr,
554				     unsigned long nr_pages, int flags,
555				     gfp_t gfp)
556{
557	phys_addr_t page_addr = pa & PAGE_MASK;
558	unsigned long *entry;
559	unsigned long i;
560	int rc;
561
 
562	for (i = 0; i < nr_pages; i++) {
563		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
564					   gfp);
565		if (unlikely(!entry)) {
566			rc = -ENOMEM;
567			goto undo_cpu_trans;
568		}
569		dma_update_cpu_trans(entry, page_addr, flags);
570		page_addr += PAGE_SIZE;
571		dma_addr += PAGE_SIZE;
572	}
573
574	return 0;
575
576undo_cpu_trans:
577	while (i-- > 0) {
578		dma_addr -= PAGE_SIZE;
579		entry = dma_walk_cpu_trans(s390_domain->dma_table,
580					   dma_addr, gfp);
581		if (!entry)
582			break;
583		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
584	}
 
585
586	return rc;
587}
588
589static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
590				       dma_addr_t dma_addr, unsigned long nr_pages)
591{
592	unsigned long *entry;
593	unsigned long i;
594	int rc = 0;
595
596	for (i = 0; i < nr_pages; i++) {
597		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
598					   GFP_ATOMIC);
599		if (unlikely(!entry)) {
600			rc = -EINVAL;
601			break;
602		}
603		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
604		dma_addr += PAGE_SIZE;
605	}
 
606
607	return rc;
608}
609
610static int s390_iommu_map_pages(struct iommu_domain *domain,
611				unsigned long iova, phys_addr_t paddr,
612				size_t pgsize, size_t pgcount,
613				int prot, gfp_t gfp, size_t *mapped)
614{
615	struct s390_domain *s390_domain = to_s390_domain(domain);
616	size_t size = pgcount << __ffs(pgsize);
617	int flags = ZPCI_PTE_VALID, rc = 0;
618
619	if (pgsize != SZ_4K)
620		return -EINVAL;
621
622	if (iova < s390_domain->domain.geometry.aperture_start ||
623	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end)
624		return -EINVAL;
625
626	if (!IS_ALIGNED(iova | paddr, pgsize))
627		return -EINVAL;
628
629	if (!(prot & IOMMU_WRITE))
630		flags |= ZPCI_TABLE_PROTECTED;
631
632	rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
633				     pgcount, flags, gfp);
634	if (!rc) {
635		*mapped = size;
636		atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages);
637	}
638
639	return rc;
640}
641
642static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
643					   dma_addr_t iova)
644{
645	struct s390_domain *s390_domain = to_s390_domain(domain);
646	unsigned long *rto, *sto, *pto;
647	unsigned long ste, pte, rte;
648	unsigned int rtx, sx, px;
649	phys_addr_t phys = 0;
650
651	if (iova < domain->geometry.aperture_start ||
652	    iova > domain->geometry.aperture_end)
653		return 0;
654
655	rtx = calc_rtx(iova);
656	sx = calc_sx(iova);
657	px = calc_px(iova);
658	rto = s390_domain->dma_table;
659
660	rte = READ_ONCE(rto[rtx]);
661	if (reg_entry_isvalid(rte)) {
662		sto = get_rt_sto(rte);
663		ste = READ_ONCE(sto[sx]);
664		if (reg_entry_isvalid(ste)) {
665			pto = get_st_pto(ste);
666			pte = READ_ONCE(pto[px]);
667			if (pt_entry_isvalid(pte))
668				phys = pte & ZPCI_PTE_ADDR_MASK;
669		}
670	}
 
671
672	return phys;
673}
674
675static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
676				     unsigned long iova,
677				     size_t pgsize, size_t pgcount,
678				     struct iommu_iotlb_gather *gather)
679{
680	struct s390_domain *s390_domain = to_s390_domain(domain);
681	size_t size = pgcount << __ffs(pgsize);
 
682	int rc;
683
684	if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start ||
685	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end))
686		return 0;
687
688	rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount);
 
689	if (rc)
690		return 0;
691
692	iommu_iotlb_gather_add_range(gather, iova, size);
693	atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages);
694
695	return size;
696}
697
698static void s390_iommu_probe_finalize(struct device *dev)
699{
700	iommu_setup_dma_ops(dev, 0, U64_MAX);
701}
702
703struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
704{
705	if (!zdev || !zdev->s390_domain)
706		return NULL;
707	return &zdev->s390_domain->ctrs;
708}
709
710int zpci_init_iommu(struct zpci_dev *zdev)
711{
712	u64 aperture_size;
713	int rc = 0;
714
715	rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
716				    "s390-iommu.%08x", zdev->fid);
717	if (rc)
718		goto out_err;
719
720	rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL);
721	if (rc)
722		goto out_sysfs;
723
724	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
725	aperture_size = min3(s390_iommu_aperture,
726			     ZPCI_TABLE_SIZE_RT - zdev->start_dma,
727			     zdev->end_dma - zdev->start_dma + 1);
728	zdev->end_dma = zdev->start_dma + aperture_size - 1;
729
730	return 0;
731
732out_sysfs:
733	iommu_device_sysfs_remove(&zdev->iommu_dev);
734
735out_err:
736	return rc;
737}
738
739void zpci_destroy_iommu(struct zpci_dev *zdev)
740{
741	iommu_device_unregister(&zdev->iommu_dev);
742	iommu_device_sysfs_remove(&zdev->iommu_dev);
743}
744
745static int __init s390_iommu_setup(char *str)
746{
747	if (!strcmp(str, "strict")) {
748		pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n");
749		iommu_set_dma_strict();
750	}
751	return 1;
752}
753
754__setup("s390_iommu=", s390_iommu_setup);
755
756static int __init s390_iommu_aperture_setup(char *str)
757{
758	if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
759		s390_iommu_aperture_factor = 1;
760	return 1;
761}
762
763__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
764
765static int __init s390_iommu_init(void)
766{
767	int rc;
768
769	iommu_dma_forcedac = true;
770	s390_iommu_aperture = (u64)virt_to_phys(high_memory);
771	if (!s390_iommu_aperture_factor)
772		s390_iommu_aperture = ULONG_MAX;
773	else
774		s390_iommu_aperture *= s390_iommu_aperture_factor;
775
776	rc = dma_alloc_cpu_table_caches();
777	if (rc)
778		return rc;
779
780	return rc;
781}
782subsys_initcall(s390_iommu_init);
783
784static const struct iommu_ops s390_iommu_ops = {
785	.capable = s390_iommu_capable,
786	.domain_alloc_paging = s390_domain_alloc_paging,
787	.probe_device = s390_iommu_probe_device,
788	.probe_finalize = s390_iommu_probe_finalize,
789	.release_device = s390_iommu_release_device,
790	.device_group = generic_device_group,
791	.pgsize_bitmap = SZ_4K,
792	.get_resv_regions = s390_iommu_get_resv_regions,
793	.default_domain_ops = &(const struct iommu_domain_ops) {
794		.attach_dev	= s390_iommu_attach_device,
795		.map_pages	= s390_iommu_map_pages,
796		.unmap_pages	= s390_iommu_unmap_pages,
797		.flush_iotlb_all = s390_iommu_flush_iotlb_all,
798		.iotlb_sync      = s390_iommu_iotlb_sync,
799		.iotlb_sync_map  = s390_iommu_iotlb_sync_map,
800		.iova_to_phys	= s390_iommu_iova_to_phys,
801		.free		= s390_domain_free,
802	}
803};
v4.6
 
  1/*
  2 * IOMMU API for s390 PCI devices
  3 *
  4 * Copyright IBM Corp. 2015
  5 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  6 */
  7
  8#include <linux/pci.h>
  9#include <linux/iommu.h>
 10#include <linux/iommu-helper.h>
 11#include <linux/pci.h>
 12#include <linux/sizes.h>
 
 
 13#include <asm/pci_dma.h>
 14
 15/*
 16 * Physically contiguous memory regions can be mapped with 4 KiB alignment,
 17 * we allow all page sizes that are an order of 4KiB (no special large page
 18 * support so far).
 19 */
 20#define S390_IOMMU_PGSIZES	(~0xFFFUL)
 
 
 
 21
 22struct s390_domain {
 23	struct iommu_domain	domain;
 24	struct list_head	devices;
 
 25	unsigned long		*dma_table;
 26	spinlock_t		dma_table_lock;
 27	spinlock_t		list_lock;
 
 28};
 29
 30struct s390_domain_device {
 31	struct list_head	list;
 32	struct zpci_dev		*zdev;
 33};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 34
 35static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
 36{
 37	return container_of(dom, struct s390_domain, domain);
 38}
 39
 40static bool s390_iommu_capable(enum iommu_cap cap)
 41{
 
 
 42	switch (cap) {
 43	case IOMMU_CAP_CACHE_COHERENCY:
 44		return true;
 45	case IOMMU_CAP_INTR_REMAP:
 46		return true;
 47	default:
 48		return false;
 49	}
 50}
 51
 52static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 53{
 54	struct s390_domain *s390_domain;
 55
 56	if (domain_type != IOMMU_DOMAIN_UNMANAGED)
 57		return NULL;
 58
 59	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
 60	if (!s390_domain)
 61		return NULL;
 62
 63	s390_domain->dma_table = dma_alloc_cpu_table();
 64	if (!s390_domain->dma_table) {
 65		kfree(s390_domain);
 66		return NULL;
 67	}
 
 
 
 68
 69	spin_lock_init(&s390_domain->dma_table_lock);
 70	spin_lock_init(&s390_domain->list_lock);
 71	INIT_LIST_HEAD(&s390_domain->devices);
 72
 73	return &s390_domain->domain;
 74}
 75
 
 
 
 
 
 
 
 
 76static void s390_domain_free(struct iommu_domain *domain)
 77{
 78	struct s390_domain *s390_domain = to_s390_domain(domain);
 79
 80	dma_cleanup_tables(s390_domain->dma_table);
 81	kfree(s390_domain);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 82}
 83
 84static int s390_iommu_attach_device(struct iommu_domain *domain,
 85				    struct device *dev)
 86{
 87	struct s390_domain *s390_domain = to_s390_domain(domain);
 88	struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
 89	struct s390_domain_device *domain_device;
 90	unsigned long flags;
 91	int rc;
 
 92
 93	if (!zdev)
 94		return -ENODEV;
 95
 96	domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
 97	if (!domain_device)
 98		return -ENOMEM;
 
 
 
 99
100	if (zdev->dma_table)
101		zpci_dma_exit_device(zdev);
 
 
 
 
 
 
102
103	zdev->dma_table = s390_domain->dma_table;
104	rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
105				zdev->start_dma + zdev->iommu_size - 1,
106				(u64) zdev->dma_table);
107	if (rc)
108		goto out_restore;
109
110	spin_lock_irqsave(&s390_domain->list_lock, flags);
111	/* First device defines the DMA range limits */
112	if (list_empty(&s390_domain->devices)) {
113		domain->geometry.aperture_start = zdev->start_dma;
114		domain->geometry.aperture_end = zdev->end_dma;
115		domain->geometry.force_aperture = true;
116	/* Allow only devices with identical DMA range limits */
117	} else if (domain->geometry.aperture_start != zdev->start_dma ||
118		   domain->geometry.aperture_end != zdev->end_dma) {
119		rc = -EINVAL;
120		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
121		goto out_restore;
122	}
123	domain_device->zdev = zdev;
124	zdev->s390_domain = s390_domain;
125	list_add(&domain_device->list, &s390_domain->devices);
126	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
127
128	return 0;
129
130out_restore:
131	zpci_dma_init_device(zdev);
132	kfree(domain_device);
133
134	return rc;
135}
136
137static void s390_iommu_detach_device(struct iommu_domain *domain,
138				     struct device *dev)
139{
140	struct s390_domain *s390_domain = to_s390_domain(domain);
141	struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
142	struct s390_domain_device *domain_device, *tmp;
143	unsigned long flags;
144	int found = 0;
145
146	if (!zdev)
147		return;
148
149	spin_lock_irqsave(&s390_domain->list_lock, flags);
150	list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
151				 list) {
152		if (domain_device->zdev == zdev) {
153			list_del(&domain_device->list);
154			kfree(domain_device);
155			found = 1;
156			break;
157		}
158	}
159	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
160
161	if (found) {
162		zdev->s390_domain = NULL;
163		zpci_unregister_ioat(zdev, 0);
164		zpci_dma_init_device(zdev);
 
 
 
165	}
166}
167
168static int s390_iommu_add_device(struct device *dev)
169{
170	struct iommu_group *group;
171	int rc;
 
 
 
 
 
 
 
 
172
173	group = iommu_group_get(dev);
174	if (!group) {
175		group = iommu_group_alloc();
176		if (IS_ERR(group))
177			return PTR_ERR(group);
178	}
179
180	rc = iommu_group_add_device(group, dev);
181	iommu_group_put(group);
182
183	return rc;
184}
185
186static void s390_iommu_remove_device(struct device *dev)
187{
188	struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
189	struct iommu_domain *domain;
190
191	/*
192	 * This is a workaround for a scenario where the IOMMU API common code
193	 * "forgets" to call the detach_dev callback: After binding a device
194	 * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers
195	 * the attach_dev), removing the device via
196	 * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev,
197	 * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
198	 * notifier.
199	 *
200	 * So let's call detach_dev from here if it hasn't been called before.
201	 */
202	if (zdev && zdev->s390_domain) {
203		domain = iommu_get_domain_for_dev(dev);
204		if (domain)
205			s390_iommu_detach_device(domain, dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206	}
 
 
207
208	iommu_group_remove_device(dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209}
210
211static int s390_iommu_update_trans(struct s390_domain *s390_domain,
212				   unsigned long pa, dma_addr_t dma_addr,
213				   size_t size, int flags)
214{
215	struct s390_domain_device *domain_device;
216	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
217	dma_addr_t start_dma_addr = dma_addr;
218	unsigned long irq_flags, nr_pages, i;
219	unsigned long *entry;
220	int rc = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
222	if (dma_addr < s390_domain->domain.geometry.aperture_start ||
223	    dma_addr + size > s390_domain->domain.geometry.aperture_end)
224		return -EINVAL;
225
226	nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
227	if (!nr_pages)
228		return 0;
 
 
 
 
 
 
229
230	spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
231	for (i = 0; i < nr_pages; i++) {
232		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
233		if (!entry) {
 
234			rc = -ENOMEM;
235			goto undo_cpu_trans;
236		}
237		dma_update_cpu_trans(entry, page_addr, flags);
238		page_addr += PAGE_SIZE;
239		dma_addr += PAGE_SIZE;
240	}
241
242	spin_lock(&s390_domain->list_lock);
243	list_for_each_entry(domain_device, &s390_domain->devices, list) {
244		rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
245					start_dma_addr, nr_pages * PAGE_SIZE);
246		if (rc)
 
 
 
247			break;
 
248	}
249	spin_unlock(&s390_domain->list_lock);
250
251undo_cpu_trans:
252	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
253		flags = ZPCI_PTE_INVALID;
254		while (i-- > 0) {
255			page_addr -= PAGE_SIZE;
256			dma_addr -= PAGE_SIZE;
257			entry = dma_walk_cpu_trans(s390_domain->dma_table,
258						   dma_addr);
259			if (!entry)
260				break;
261			dma_update_cpu_trans(entry, page_addr, flags);
 
 
 
 
 
262		}
 
 
263	}
264	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
265
266	return rc;
267}
268
269static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
270			  phys_addr_t paddr, size_t size, int prot)
 
 
271{
272	struct s390_domain *s390_domain = to_s390_domain(domain);
 
273	int flags = ZPCI_PTE_VALID, rc = 0;
274
275	if (!(prot & IOMMU_READ))
 
 
 
 
 
 
 
276		return -EINVAL;
277
278	if (!(prot & IOMMU_WRITE))
279		flags |= ZPCI_TABLE_PROTECTED;
280
281	rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
282				     size, flags);
 
 
 
 
283
284	return rc;
285}
286
287static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
288					   dma_addr_t iova)
289{
290	struct s390_domain *s390_domain = to_s390_domain(domain);
291	unsigned long *sto, *pto, *rto, flags;
 
292	unsigned int rtx, sx, px;
293	phys_addr_t phys = 0;
294
295	if (iova < domain->geometry.aperture_start ||
296	    iova > domain->geometry.aperture_end)
297		return 0;
298
299	rtx = calc_rtx(iova);
300	sx = calc_sx(iova);
301	px = calc_px(iova);
302	rto = s390_domain->dma_table;
303
304	spin_lock_irqsave(&s390_domain->dma_table_lock, flags);
305	if (rto && reg_entry_isvalid(rto[rtx])) {
306		sto = get_rt_sto(rto[rtx]);
307		if (sto && reg_entry_isvalid(sto[sx])) {
308			pto = get_st_pto(sto[sx]);
309			if (pto && pt_entry_isvalid(pto[px]))
310				phys = pto[px] & ZPCI_PTE_ADDR_MASK;
 
 
311		}
312	}
313	spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags);
314
315	return phys;
316}
317
318static size_t s390_iommu_unmap(struct iommu_domain *domain,
319			       unsigned long iova, size_t size)
 
 
320{
321	struct s390_domain *s390_domain = to_s390_domain(domain);
322	int flags = ZPCI_PTE_INVALID;
323	phys_addr_t paddr;
324	int rc;
325
326	paddr = s390_iommu_iova_to_phys(domain, iova);
327	if (!paddr)
328		return 0;
329
330	rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
331				     size, flags);
332	if (rc)
333		return 0;
334
 
 
 
335	return size;
336}
337
338static struct iommu_ops s390_iommu_ops = {
339	.capable = s390_iommu_capable,
340	.domain_alloc = s390_domain_alloc,
341	.domain_free = s390_domain_free,
342	.attach_dev = s390_iommu_attach_device,
343	.detach_dev = s390_iommu_detach_device,
344	.map = s390_iommu_map,
345	.unmap = s390_iommu_unmap,
346	.iova_to_phys = s390_iommu_iova_to_phys,
347	.add_device = s390_iommu_add_device,
348	.remove_device = s390_iommu_remove_device,
349	.pgsize_bitmap = S390_IOMMU_PGSIZES,
350};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
352static int __init s390_iommu_init(void)
353{
354	return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
 
 
 
 
 
 
 
 
 
 
 
 
 
355}
356subsys_initcall(s390_iommu_init);