Linux Audio

Check our new training course

Loading...
v6.2
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * IOMMU API for s390 PCI devices
  4 *
  5 * Copyright IBM Corp. 2015
  6 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  7 */
  8
  9#include <linux/pci.h>
 10#include <linux/iommu.h>
 11#include <linux/iommu-helper.h>
 12#include <linux/sizes.h>
 13#include <linux/rculist.h>
 14#include <linux/rcupdate.h>
 15#include <asm/pci_dma.h>
 16
 
 
 17static const struct iommu_ops s390_iommu_ops;
 18
 
 
 
 
 
 
 19struct s390_domain {
 20	struct iommu_domain	domain;
 21	struct list_head	devices;
 
 22	unsigned long		*dma_table;
 23	spinlock_t		list_lock;
 24	struct rcu_head		rcu;
 25};
 26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 27static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
 28{
 29	return container_of(dom, struct s390_domain, domain);
 30}
 31
 32static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
 33{
 
 
 34	switch (cap) {
 35	case IOMMU_CAP_CACHE_COHERENCY:
 36		return true;
 37	case IOMMU_CAP_INTR_REMAP:
 38		return true;
 39	default:
 40		return false;
 41	}
 42}
 43
 44static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 45{
 46	struct s390_domain *s390_domain;
 47
 48	if (domain_type != IOMMU_DOMAIN_UNMANAGED)
 49		return NULL;
 50
 51	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
 52	if (!s390_domain)
 53		return NULL;
 54
 55	s390_domain->dma_table = dma_alloc_cpu_table();
 56	if (!s390_domain->dma_table) {
 57		kfree(s390_domain);
 58		return NULL;
 59	}
 60	s390_domain->domain.geometry.force_aperture = true;
 61	s390_domain->domain.geometry.aperture_start = 0;
 62	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
 63
 64	spin_lock_init(&s390_domain->list_lock);
 65	INIT_LIST_HEAD_RCU(&s390_domain->devices);
 66
 67	return &s390_domain->domain;
 68}
 69
 70static void s390_iommu_rcu_free_domain(struct rcu_head *head)
 71{
 72	struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu);
 73
 74	dma_cleanup_tables(s390_domain->dma_table);
 75	kfree(s390_domain);
 76}
 77
 78static void s390_domain_free(struct iommu_domain *domain)
 79{
 80	struct s390_domain *s390_domain = to_s390_domain(domain);
 81
 82	rcu_read_lock();
 83	WARN_ON(!list_empty(&s390_domain->devices));
 84	rcu_read_unlock();
 85
 86	call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
 87}
 88
 89static void __s390_iommu_detach_device(struct zpci_dev *zdev)
 
 90{
 91	struct s390_domain *s390_domain = zdev->s390_domain;
 
 92	unsigned long flags;
 93
 94	if (!s390_domain)
 95		return;
 96
 97	spin_lock_irqsave(&s390_domain->list_lock, flags);
 98	list_del_rcu(&zdev->iommu_list);
 99	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
100
101	zpci_unregister_ioat(zdev, 0);
102	zdev->s390_domain = NULL;
103	zdev->dma_table = NULL;
104}
105
106static int s390_iommu_attach_device(struct iommu_domain *domain,
107				    struct device *dev)
108{
109	struct s390_domain *s390_domain = to_s390_domain(domain);
110	struct zpci_dev *zdev = to_zpci_dev(dev);
111	unsigned long flags;
112	u8 status;
113	int cc;
114
115	if (!zdev)
116		return -ENODEV;
117
118	if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
119		domain->geometry.aperture_end < zdev->start_dma))
120		return -EINVAL;
121
122	if (zdev->s390_domain)
123		__s390_iommu_detach_device(zdev);
124	else if (zdev->dma_table)
125		zpci_dma_exit_device(zdev);
126
127	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
128				virt_to_phys(s390_domain->dma_table), &status);
129	/*
130	 * If the device is undergoing error recovery the reset code
131	 * will re-establish the new domain.
132	 */
133	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
134		return -EIO;
135	zdev->dma_table = s390_domain->dma_table;
136
137	zdev->dma_table = s390_domain->dma_table;
138	zdev->s390_domain = s390_domain;
139
140	spin_lock_irqsave(&s390_domain->list_lock, flags);
141	list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
142	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
143
144	return 0;
145}
146
147static void s390_iommu_detach_device(struct iommu_domain *domain,
148				     struct device *dev)
149{
150	struct zpci_dev *zdev = to_zpci_dev(dev);
151
152	WARN_ON(zdev->s390_domain != to_s390_domain(domain));
153
154	__s390_iommu_detach_device(zdev);
155	zpci_dma_init_device(zdev);
156}
157
158static void s390_iommu_get_resv_regions(struct device *dev,
159					struct list_head *list)
160{
161	struct zpci_dev *zdev = to_zpci_dev(dev);
162	struct iommu_resv_region *region;
163
164	if (zdev->start_dma) {
165		region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
166						 IOMMU_RESV_RESERVED, GFP_KERNEL);
167		if (!region)
168			return;
169		list_add_tail(&region->list, list);
170	}
171
172	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
173		region = iommu_alloc_resv_region(zdev->end_dma + 1,
174						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
175						 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
176		if (!region)
177			return;
178		list_add_tail(&region->list, list);
179	}
180}
181
182static struct iommu_device *s390_iommu_probe_device(struct device *dev)
183{
184	struct zpci_dev *zdev;
185
186	if (!dev_is_pci(dev))
187		return ERR_PTR(-ENODEV);
188
189	zdev = to_zpci_dev(dev);
190
191	if (zdev->start_dma > zdev->end_dma ||
192	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
193		return ERR_PTR(-EINVAL);
194
195	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
196		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
197
 
 
 
198	return &zdev->iommu_dev;
199}
200
201static void s390_iommu_release_device(struct device *dev)
202{
203	struct zpci_dev *zdev = to_zpci_dev(dev);
204
205	/*
206	 * release_device is expected to detach any domain currently attached
207	 * to the device, but keep it attached to other devices in the group.
208	 */
209	if (zdev)
210		__s390_iommu_detach_device(zdev);
 
 
 
 
 
 
211}
212
213static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
214{
215	struct s390_domain *s390_domain = to_s390_domain(domain);
216	struct zpci_dev *zdev;
217
218	rcu_read_lock();
219	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
220		zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
221				   zdev->end_dma - zdev->start_dma + 1);
222	}
223	rcu_read_unlock();
224}
225
226static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
227				  struct iommu_iotlb_gather *gather)
228{
229	struct s390_domain *s390_domain = to_s390_domain(domain);
230	size_t size = gather->end - gather->start + 1;
231	struct zpci_dev *zdev;
232
233	/* If gather was never added to there is nothing to flush */
234	if (!gather->end)
235		return;
236
237	rcu_read_lock();
238	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
 
239		zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
240				   size);
241	}
242	rcu_read_unlock();
243}
244
245static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
246				      unsigned long iova, size_t size)
247{
248	struct s390_domain *s390_domain = to_s390_domain(domain);
249	struct zpci_dev *zdev;
 
250
251	rcu_read_lock();
252	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
253		if (!zdev->tlb_refresh)
254			continue;
255		zpci_refresh_trans((u64)zdev->fh << 32,
256				   iova, size);
 
 
 
 
 
 
 
 
 
 
257	}
258	rcu_read_unlock();
 
 
259}
260
261static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
262				     phys_addr_t pa, dma_addr_t dma_addr,
263				     unsigned long nr_pages, int flags)
 
264{
265	phys_addr_t page_addr = pa & PAGE_MASK;
266	unsigned long *entry;
267	unsigned long i;
268	int rc;
269
270	for (i = 0; i < nr_pages; i++) {
271		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
 
272		if (unlikely(!entry)) {
273			rc = -ENOMEM;
274			goto undo_cpu_trans;
275		}
276		dma_update_cpu_trans(entry, page_addr, flags);
277		page_addr += PAGE_SIZE;
278		dma_addr += PAGE_SIZE;
279	}
280
281	return 0;
282
283undo_cpu_trans:
284	while (i-- > 0) {
285		dma_addr -= PAGE_SIZE;
286		entry = dma_walk_cpu_trans(s390_domain->dma_table,
287					   dma_addr);
288		if (!entry)
289			break;
290		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
291	}
292
293	return rc;
294}
295
296static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
297				       dma_addr_t dma_addr, unsigned long nr_pages)
298{
299	unsigned long *entry;
300	unsigned long i;
301	int rc = 0;
302
303	for (i = 0; i < nr_pages; i++) {
304		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
 
305		if (unlikely(!entry)) {
306			rc = -EINVAL;
307			break;
308		}
309		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
310		dma_addr += PAGE_SIZE;
311	}
312
313	return rc;
314}
315
316static int s390_iommu_map_pages(struct iommu_domain *domain,
317				unsigned long iova, phys_addr_t paddr,
318				size_t pgsize, size_t pgcount,
319				int prot, gfp_t gfp, size_t *mapped)
320{
321	struct s390_domain *s390_domain = to_s390_domain(domain);
322	size_t size = pgcount << __ffs(pgsize);
323	int flags = ZPCI_PTE_VALID, rc = 0;
324
325	if (pgsize != SZ_4K)
326		return -EINVAL;
327
328	if (iova < s390_domain->domain.geometry.aperture_start ||
329	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end)
330		return -EINVAL;
331
332	if (!IS_ALIGNED(iova | paddr, pgsize))
333		return -EINVAL;
334
335	if (!(prot & IOMMU_READ))
336		return -EINVAL;
337
338	if (!(prot & IOMMU_WRITE))
339		flags |= ZPCI_TABLE_PROTECTED;
340
341	rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
342				       pgcount, flags);
343	if (!rc)
344		*mapped = size;
 
 
345
346	return rc;
347}
348
349static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
350					   dma_addr_t iova)
351{
352	struct s390_domain *s390_domain = to_s390_domain(domain);
353	unsigned long *rto, *sto, *pto;
354	unsigned long ste, pte, rte;
355	unsigned int rtx, sx, px;
356	phys_addr_t phys = 0;
357
358	if (iova < domain->geometry.aperture_start ||
359	    iova > domain->geometry.aperture_end)
360		return 0;
361
362	rtx = calc_rtx(iova);
363	sx = calc_sx(iova);
364	px = calc_px(iova);
365	rto = s390_domain->dma_table;
366
367	rte = READ_ONCE(rto[rtx]);
368	if (reg_entry_isvalid(rte)) {
369		sto = get_rt_sto(rte);
370		ste = READ_ONCE(sto[sx]);
371		if (reg_entry_isvalid(ste)) {
372			pto = get_st_pto(ste);
373			pte = READ_ONCE(pto[px]);
374			if (pt_entry_isvalid(pte))
375				phys = pte & ZPCI_PTE_ADDR_MASK;
376		}
377	}
378
379	return phys;
380}
381
382static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
383				     unsigned long iova,
384				     size_t pgsize, size_t pgcount,
385				     struct iommu_iotlb_gather *gather)
386{
387	struct s390_domain *s390_domain = to_s390_domain(domain);
388	size_t size = pgcount << __ffs(pgsize);
389	int rc;
390
391	if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start ||
392	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end))
393		return 0;
394
395	rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount);
396	if (rc)
397		return 0;
398
399	iommu_iotlb_gather_add_range(gather, iova, size);
 
400
401	return size;
402}
403
 
 
 
 
 
 
 
 
 
 
 
 
404int zpci_init_iommu(struct zpci_dev *zdev)
405{
 
406	int rc = 0;
407
408	rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
409				    "s390-iommu.%08x", zdev->fid);
410	if (rc)
411		goto out_err;
412
413	rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL);
414	if (rc)
415		goto out_sysfs;
416
 
 
 
 
 
 
417	return 0;
418
419out_sysfs:
420	iommu_device_sysfs_remove(&zdev->iommu_dev);
421
422out_err:
423	return rc;
424}
425
426void zpci_destroy_iommu(struct zpci_dev *zdev)
427{
428	iommu_device_unregister(&zdev->iommu_dev);
429	iommu_device_sysfs_remove(&zdev->iommu_dev);
430}
431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432static const struct iommu_ops s390_iommu_ops = {
433	.capable = s390_iommu_capable,
434	.domain_alloc = s390_domain_alloc,
435	.probe_device = s390_iommu_probe_device,
 
436	.release_device = s390_iommu_release_device,
437	.device_group = generic_device_group,
438	.pgsize_bitmap = SZ_4K,
439	.get_resv_regions = s390_iommu_get_resv_regions,
440	.default_domain_ops = &(const struct iommu_domain_ops) {
441		.attach_dev	= s390_iommu_attach_device,
442		.detach_dev	= s390_iommu_detach_device,
443		.map_pages	= s390_iommu_map_pages,
444		.unmap_pages	= s390_iommu_unmap_pages,
445		.flush_iotlb_all = s390_iommu_flush_iotlb_all,
446		.iotlb_sync      = s390_iommu_iotlb_sync,
447		.iotlb_sync_map  = s390_iommu_iotlb_sync_map,
448		.iova_to_phys	= s390_iommu_iova_to_phys,
449		.free		= s390_domain_free,
450	}
451};
v6.8
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * IOMMU API for s390 PCI devices
  4 *
  5 * Copyright IBM Corp. 2015
  6 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  7 */
  8
  9#include <linux/pci.h>
 10#include <linux/iommu.h>
 11#include <linux/iommu-helper.h>
 12#include <linux/sizes.h>
 13#include <linux/rculist.h>
 14#include <linux/rcupdate.h>
 15#include <asm/pci_dma.h>
 16
 17#include "dma-iommu.h"
 18
 19static const struct iommu_ops s390_iommu_ops;
 20
 21static struct kmem_cache *dma_region_table_cache;
 22static struct kmem_cache *dma_page_table_cache;
 23
 24static u64 s390_iommu_aperture;
 25static u32 s390_iommu_aperture_factor = 1;
 26
 27struct s390_domain {
 28	struct iommu_domain	domain;
 29	struct list_head	devices;
 30	struct zpci_iommu_ctrs	ctrs;
 31	unsigned long		*dma_table;
 32	spinlock_t		list_lock;
 33	struct rcu_head		rcu;
 34};
 35
 36static inline unsigned int calc_rtx(dma_addr_t ptr)
 37{
 38	return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
 39}
 40
 41static inline unsigned int calc_sx(dma_addr_t ptr)
 42{
 43	return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
 44}
 45
 46static inline unsigned int calc_px(dma_addr_t ptr)
 47{
 48	return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
 49}
 50
 51static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
 52{
 53	*entry &= ZPCI_PTE_FLAG_MASK;
 54	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
 55}
 56
 57static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
 58{
 59	*entry &= ZPCI_RTE_FLAG_MASK;
 60	*entry |= (sto & ZPCI_RTE_ADDR_MASK);
 61	*entry |= ZPCI_TABLE_TYPE_RTX;
 62}
 63
 64static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
 65{
 66	*entry &= ZPCI_STE_FLAG_MASK;
 67	*entry |= (pto & ZPCI_STE_ADDR_MASK);
 68	*entry |= ZPCI_TABLE_TYPE_SX;
 69}
 70
 71static inline void validate_rt_entry(unsigned long *entry)
 72{
 73	*entry &= ~ZPCI_TABLE_VALID_MASK;
 74	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
 75	*entry |= ZPCI_TABLE_VALID;
 76	*entry |= ZPCI_TABLE_LEN_RTX;
 77}
 78
 79static inline void validate_st_entry(unsigned long *entry)
 80{
 81	*entry &= ~ZPCI_TABLE_VALID_MASK;
 82	*entry |= ZPCI_TABLE_VALID;
 83}
 84
 85static inline void invalidate_pt_entry(unsigned long *entry)
 86{
 87	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
 88	*entry &= ~ZPCI_PTE_VALID_MASK;
 89	*entry |= ZPCI_PTE_INVALID;
 90}
 91
 92static inline void validate_pt_entry(unsigned long *entry)
 93{
 94	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
 95	*entry &= ~ZPCI_PTE_VALID_MASK;
 96	*entry |= ZPCI_PTE_VALID;
 97}
 98
 99static inline void entry_set_protected(unsigned long *entry)
100{
101	*entry &= ~ZPCI_TABLE_PROT_MASK;
102	*entry |= ZPCI_TABLE_PROTECTED;
103}
104
105static inline void entry_clr_protected(unsigned long *entry)
106{
107	*entry &= ~ZPCI_TABLE_PROT_MASK;
108	*entry |= ZPCI_TABLE_UNPROTECTED;
109}
110
111static inline int reg_entry_isvalid(unsigned long entry)
112{
113	return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
114}
115
116static inline int pt_entry_isvalid(unsigned long entry)
117{
118	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
119}
120
121static inline unsigned long *get_rt_sto(unsigned long entry)
122{
123	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
124		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
125	else
126		return NULL;
127}
128
129static inline unsigned long *get_st_pto(unsigned long entry)
130{
131	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
132		return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
133	else
134		return NULL;
135}
136
137static int __init dma_alloc_cpu_table_caches(void)
138{
139	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
140						   ZPCI_TABLE_SIZE,
141						   ZPCI_TABLE_ALIGN,
142						   0, NULL);
143	if (!dma_region_table_cache)
144		return -ENOMEM;
145
146	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
147						 ZPCI_PT_SIZE,
148						 ZPCI_PT_ALIGN,
149						 0, NULL);
150	if (!dma_page_table_cache) {
151		kmem_cache_destroy(dma_region_table_cache);
152		return -ENOMEM;
153	}
154	return 0;
155}
156
157static unsigned long *dma_alloc_cpu_table(gfp_t gfp)
158{
159	unsigned long *table, *entry;
160
161	table = kmem_cache_alloc(dma_region_table_cache, gfp);
162	if (!table)
163		return NULL;
164
165	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
166		*entry = ZPCI_TABLE_INVALID;
167	return table;
168}
169
170static void dma_free_cpu_table(void *table)
171{
172	kmem_cache_free(dma_region_table_cache, table);
173}
174
175static void dma_free_page_table(void *table)
176{
177	kmem_cache_free(dma_page_table_cache, table);
178}
179
180static void dma_free_seg_table(unsigned long entry)
181{
182	unsigned long *sto = get_rt_sto(entry);
183	int sx;
184
185	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
186		if (reg_entry_isvalid(sto[sx]))
187			dma_free_page_table(get_st_pto(sto[sx]));
188
189	dma_free_cpu_table(sto);
190}
191
192static void dma_cleanup_tables(unsigned long *table)
193{
194	int rtx;
195
196	if (!table)
197		return;
198
199	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
200		if (reg_entry_isvalid(table[rtx]))
201			dma_free_seg_table(table[rtx]);
202
203	dma_free_cpu_table(table);
204}
205
206static unsigned long *dma_alloc_page_table(gfp_t gfp)
207{
208	unsigned long *table, *entry;
209
210	table = kmem_cache_alloc(dma_page_table_cache, gfp);
211	if (!table)
212		return NULL;
213
214	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
215		*entry = ZPCI_PTE_INVALID;
216	return table;
217}
218
219static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
220{
221	unsigned long old_rte, rte;
222	unsigned long *sto;
223
224	rte = READ_ONCE(*rtep);
225	if (reg_entry_isvalid(rte)) {
226		sto = get_rt_sto(rte);
227	} else {
228		sto = dma_alloc_cpu_table(gfp);
229		if (!sto)
230			return NULL;
231
232		set_rt_sto(&rte, virt_to_phys(sto));
233		validate_rt_entry(&rte);
234		entry_clr_protected(&rte);
235
236		old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
237		if (old_rte != ZPCI_TABLE_INVALID) {
238			/* Somone else was faster, use theirs */
239			dma_free_cpu_table(sto);
240			sto = get_rt_sto(old_rte);
241		}
242	}
243	return sto;
244}
245
246static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
247{
248	unsigned long old_ste, ste;
249	unsigned long *pto;
250
251	ste = READ_ONCE(*step);
252	if (reg_entry_isvalid(ste)) {
253		pto = get_st_pto(ste);
254	} else {
255		pto = dma_alloc_page_table(gfp);
256		if (!pto)
257			return NULL;
258		set_st_pto(&ste, virt_to_phys(pto));
259		validate_st_entry(&ste);
260		entry_clr_protected(&ste);
261
262		old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
263		if (old_ste != ZPCI_TABLE_INVALID) {
264			/* Somone else was faster, use theirs */
265			dma_free_page_table(pto);
266			pto = get_st_pto(old_ste);
267		}
268	}
269	return pto;
270}
271
272static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
273{
274	unsigned long *sto, *pto;
275	unsigned int rtx, sx, px;
276
277	rtx = calc_rtx(dma_addr);
278	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
279	if (!sto)
280		return NULL;
281
282	sx = calc_sx(dma_addr);
283	pto = dma_get_page_table_origin(&sto[sx], gfp);
284	if (!pto)
285		return NULL;
286
287	px = calc_px(dma_addr);
288	return &pto[px];
289}
290
291static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
292{
293	unsigned long pte;
294
295	pte = READ_ONCE(*ptep);
296	if (flags & ZPCI_PTE_INVALID) {
297		invalidate_pt_entry(&pte);
298	} else {
299		set_pt_pfaa(&pte, page_addr);
300		validate_pt_entry(&pte);
301	}
302
303	if (flags & ZPCI_TABLE_PROTECTED)
304		entry_set_protected(&pte);
305	else
306		entry_clr_protected(&pte);
307
308	xchg(ptep, pte);
309}
310
311static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
312{
313	return container_of(dom, struct s390_domain, domain);
314}
315
316static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
317{
318	struct zpci_dev *zdev = to_zpci_dev(dev);
319
320	switch (cap) {
321	case IOMMU_CAP_CACHE_COHERENCY:
322		return true;
323	case IOMMU_CAP_DEFERRED_FLUSH:
324		return zdev->pft != PCI_FUNC_TYPE_ISM;
325	default:
326		return false;
327	}
328}
329
330static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
331{
332	struct s390_domain *s390_domain;
333
 
 
 
334	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
335	if (!s390_domain)
336		return NULL;
337
338	s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
339	if (!s390_domain->dma_table) {
340		kfree(s390_domain);
341		return NULL;
342	}
343	s390_domain->domain.geometry.force_aperture = true;
344	s390_domain->domain.geometry.aperture_start = 0;
345	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
346
347	spin_lock_init(&s390_domain->list_lock);
348	INIT_LIST_HEAD_RCU(&s390_domain->devices);
349
350	return &s390_domain->domain;
351}
352
353static void s390_iommu_rcu_free_domain(struct rcu_head *head)
354{
355	struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu);
356
357	dma_cleanup_tables(s390_domain->dma_table);
358	kfree(s390_domain);
359}
360
361static void s390_domain_free(struct iommu_domain *domain)
362{
363	struct s390_domain *s390_domain = to_s390_domain(domain);
364
365	rcu_read_lock();
366	WARN_ON(!list_empty(&s390_domain->devices));
367	rcu_read_unlock();
368
369	call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
370}
371
372static void s390_iommu_detach_device(struct iommu_domain *domain,
373				     struct device *dev)
374{
375	struct s390_domain *s390_domain = to_s390_domain(domain);
376	struct zpci_dev *zdev = to_zpci_dev(dev);
377	unsigned long flags;
378
 
 
 
379	spin_lock_irqsave(&s390_domain->list_lock, flags);
380	list_del_rcu(&zdev->iommu_list);
381	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
382
383	zpci_unregister_ioat(zdev, 0);
384	zdev->s390_domain = NULL;
385	zdev->dma_table = NULL;
386}
387
388static int s390_iommu_attach_device(struct iommu_domain *domain,
389				    struct device *dev)
390{
391	struct s390_domain *s390_domain = to_s390_domain(domain);
392	struct zpci_dev *zdev = to_zpci_dev(dev);
393	unsigned long flags;
394	u8 status;
395	int cc;
396
397	if (!zdev)
398		return -ENODEV;
399
400	if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
401		domain->geometry.aperture_end < zdev->start_dma))
402		return -EINVAL;
403
404	if (zdev->s390_domain)
405		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
 
 
406
407	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
408				virt_to_phys(s390_domain->dma_table), &status);
409	/*
410	 * If the device is undergoing error recovery the reset code
411	 * will re-establish the new domain.
412	 */
413	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
414		return -EIO;
 
415
416	zdev->dma_table = s390_domain->dma_table;
417	zdev->s390_domain = s390_domain;
418
419	spin_lock_irqsave(&s390_domain->list_lock, flags);
420	list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
421	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
422
423	return 0;
424}
425
 
 
 
 
 
 
 
 
 
 
 
426static void s390_iommu_get_resv_regions(struct device *dev,
427					struct list_head *list)
428{
429	struct zpci_dev *zdev = to_zpci_dev(dev);
430	struct iommu_resv_region *region;
431
432	if (zdev->start_dma) {
433		region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
434						 IOMMU_RESV_RESERVED, GFP_KERNEL);
435		if (!region)
436			return;
437		list_add_tail(&region->list, list);
438	}
439
440	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
441		region = iommu_alloc_resv_region(zdev->end_dma + 1,
442						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
443						 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
444		if (!region)
445			return;
446		list_add_tail(&region->list, list);
447	}
448}
449
450static struct iommu_device *s390_iommu_probe_device(struct device *dev)
451{
452	struct zpci_dev *zdev;
453
454	if (!dev_is_pci(dev))
455		return ERR_PTR(-ENODEV);
456
457	zdev = to_zpci_dev(dev);
458
459	if (zdev->start_dma > zdev->end_dma ||
460	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
461		return ERR_PTR(-EINVAL);
462
463	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
464		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
465
466	if (zdev->tlb_refresh)
467		dev->iommu->shadow_on_flush = 1;
468
469	return &zdev->iommu_dev;
470}
471
472static void s390_iommu_release_device(struct device *dev)
473{
474	struct zpci_dev *zdev = to_zpci_dev(dev);
475
476	/*
477	 * release_device is expected to detach any domain currently attached
478	 * to the device, but keep it attached to other devices in the group.
479	 */
480	if (zdev)
481		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
482}
483
484static int zpci_refresh_all(struct zpci_dev *zdev)
485{
486	return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
487				  zdev->end_dma - zdev->start_dma + 1);
488}
489
490static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
491{
492	struct s390_domain *s390_domain = to_s390_domain(domain);
493	struct zpci_dev *zdev;
494
495	rcu_read_lock();
496	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
497		atomic64_inc(&s390_domain->ctrs.global_rpcits);
498		zpci_refresh_all(zdev);
499	}
500	rcu_read_unlock();
501}
502
503static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
504				  struct iommu_iotlb_gather *gather)
505{
506	struct s390_domain *s390_domain = to_s390_domain(domain);
507	size_t size = gather->end - gather->start + 1;
508	struct zpci_dev *zdev;
509
510	/* If gather was never added to there is nothing to flush */
511	if (!gather->end)
512		return;
513
514	rcu_read_lock();
515	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
516		atomic64_inc(&s390_domain->ctrs.sync_rpcits);
517		zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
518				   size);
519	}
520	rcu_read_unlock();
521}
522
523static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
524				     unsigned long iova, size_t size)
525{
526	struct s390_domain *s390_domain = to_s390_domain(domain);
527	struct zpci_dev *zdev;
528	int ret = 0;
529
530	rcu_read_lock();
531	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
532		if (!zdev->tlb_refresh)
533			continue;
534		atomic64_inc(&s390_domain->ctrs.sync_map_rpcits);
535		ret = zpci_refresh_trans((u64)zdev->fh << 32,
536					 iova, size);
537		/*
538		 * let the hypervisor discover invalidated entries
539		 * allowing it to free IOVAs and unpin pages
540		 */
541		if (ret == -ENOMEM) {
542			ret = zpci_refresh_all(zdev);
543			if (ret)
544				break;
545		}
546	}
547	rcu_read_unlock();
548
549	return ret;
550}
551
552static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
553				     phys_addr_t pa, dma_addr_t dma_addr,
554				     unsigned long nr_pages, int flags,
555				     gfp_t gfp)
556{
557	phys_addr_t page_addr = pa & PAGE_MASK;
558	unsigned long *entry;
559	unsigned long i;
560	int rc;
561
562	for (i = 0; i < nr_pages; i++) {
563		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
564					   gfp);
565		if (unlikely(!entry)) {
566			rc = -ENOMEM;
567			goto undo_cpu_trans;
568		}
569		dma_update_cpu_trans(entry, page_addr, flags);
570		page_addr += PAGE_SIZE;
571		dma_addr += PAGE_SIZE;
572	}
573
574	return 0;
575
576undo_cpu_trans:
577	while (i-- > 0) {
578		dma_addr -= PAGE_SIZE;
579		entry = dma_walk_cpu_trans(s390_domain->dma_table,
580					   dma_addr, gfp);
581		if (!entry)
582			break;
583		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
584	}
585
586	return rc;
587}
588
589static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
590				       dma_addr_t dma_addr, unsigned long nr_pages)
591{
592	unsigned long *entry;
593	unsigned long i;
594	int rc = 0;
595
596	for (i = 0; i < nr_pages; i++) {
597		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
598					   GFP_ATOMIC);
599		if (unlikely(!entry)) {
600			rc = -EINVAL;
601			break;
602		}
603		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
604		dma_addr += PAGE_SIZE;
605	}
606
607	return rc;
608}
609
610static int s390_iommu_map_pages(struct iommu_domain *domain,
611				unsigned long iova, phys_addr_t paddr,
612				size_t pgsize, size_t pgcount,
613				int prot, gfp_t gfp, size_t *mapped)
614{
615	struct s390_domain *s390_domain = to_s390_domain(domain);
616	size_t size = pgcount << __ffs(pgsize);
617	int flags = ZPCI_PTE_VALID, rc = 0;
618
619	if (pgsize != SZ_4K)
620		return -EINVAL;
621
622	if (iova < s390_domain->domain.geometry.aperture_start ||
623	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end)
624		return -EINVAL;
625
626	if (!IS_ALIGNED(iova | paddr, pgsize))
627		return -EINVAL;
628
 
 
 
629	if (!(prot & IOMMU_WRITE))
630		flags |= ZPCI_TABLE_PROTECTED;
631
632	rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
633				     pgcount, flags, gfp);
634	if (!rc) {
635		*mapped = size;
636		atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages);
637	}
638
639	return rc;
640}
641
642static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
643					   dma_addr_t iova)
644{
645	struct s390_domain *s390_domain = to_s390_domain(domain);
646	unsigned long *rto, *sto, *pto;
647	unsigned long ste, pte, rte;
648	unsigned int rtx, sx, px;
649	phys_addr_t phys = 0;
650
651	if (iova < domain->geometry.aperture_start ||
652	    iova > domain->geometry.aperture_end)
653		return 0;
654
655	rtx = calc_rtx(iova);
656	sx = calc_sx(iova);
657	px = calc_px(iova);
658	rto = s390_domain->dma_table;
659
660	rte = READ_ONCE(rto[rtx]);
661	if (reg_entry_isvalid(rte)) {
662		sto = get_rt_sto(rte);
663		ste = READ_ONCE(sto[sx]);
664		if (reg_entry_isvalid(ste)) {
665			pto = get_st_pto(ste);
666			pte = READ_ONCE(pto[px]);
667			if (pt_entry_isvalid(pte))
668				phys = pte & ZPCI_PTE_ADDR_MASK;
669		}
670	}
671
672	return phys;
673}
674
675static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
676				     unsigned long iova,
677				     size_t pgsize, size_t pgcount,
678				     struct iommu_iotlb_gather *gather)
679{
680	struct s390_domain *s390_domain = to_s390_domain(domain);
681	size_t size = pgcount << __ffs(pgsize);
682	int rc;
683
684	if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start ||
685	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end))
686		return 0;
687
688	rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount);
689	if (rc)
690		return 0;
691
692	iommu_iotlb_gather_add_range(gather, iova, size);
693	atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages);
694
695	return size;
696}
697
698static void s390_iommu_probe_finalize(struct device *dev)
699{
700	iommu_setup_dma_ops(dev, 0, U64_MAX);
701}
702
703struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
704{
705	if (!zdev || !zdev->s390_domain)
706		return NULL;
707	return &zdev->s390_domain->ctrs;
708}
709
710int zpci_init_iommu(struct zpci_dev *zdev)
711{
712	u64 aperture_size;
713	int rc = 0;
714
715	rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
716				    "s390-iommu.%08x", zdev->fid);
717	if (rc)
718		goto out_err;
719
720	rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL);
721	if (rc)
722		goto out_sysfs;
723
724	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
725	aperture_size = min3(s390_iommu_aperture,
726			     ZPCI_TABLE_SIZE_RT - zdev->start_dma,
727			     zdev->end_dma - zdev->start_dma + 1);
728	zdev->end_dma = zdev->start_dma + aperture_size - 1;
729
730	return 0;
731
732out_sysfs:
733	iommu_device_sysfs_remove(&zdev->iommu_dev);
734
735out_err:
736	return rc;
737}
738
739void zpci_destroy_iommu(struct zpci_dev *zdev)
740{
741	iommu_device_unregister(&zdev->iommu_dev);
742	iommu_device_sysfs_remove(&zdev->iommu_dev);
743}
744
745static int __init s390_iommu_setup(char *str)
746{
747	if (!strcmp(str, "strict")) {
748		pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n");
749		iommu_set_dma_strict();
750	}
751	return 1;
752}
753
754__setup("s390_iommu=", s390_iommu_setup);
755
756static int __init s390_iommu_aperture_setup(char *str)
757{
758	if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
759		s390_iommu_aperture_factor = 1;
760	return 1;
761}
762
763__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
764
765static int __init s390_iommu_init(void)
766{
767	int rc;
768
769	iommu_dma_forcedac = true;
770	s390_iommu_aperture = (u64)virt_to_phys(high_memory);
771	if (!s390_iommu_aperture_factor)
772		s390_iommu_aperture = ULONG_MAX;
773	else
774		s390_iommu_aperture *= s390_iommu_aperture_factor;
775
776	rc = dma_alloc_cpu_table_caches();
777	if (rc)
778		return rc;
779
780	return rc;
781}
782subsys_initcall(s390_iommu_init);
783
784static const struct iommu_ops s390_iommu_ops = {
785	.capable = s390_iommu_capable,
786	.domain_alloc_paging = s390_domain_alloc_paging,
787	.probe_device = s390_iommu_probe_device,
788	.probe_finalize = s390_iommu_probe_finalize,
789	.release_device = s390_iommu_release_device,
790	.device_group = generic_device_group,
791	.pgsize_bitmap = SZ_4K,
792	.get_resv_regions = s390_iommu_get_resv_regions,
793	.default_domain_ops = &(const struct iommu_domain_ops) {
794		.attach_dev	= s390_iommu_attach_device,
 
795		.map_pages	= s390_iommu_map_pages,
796		.unmap_pages	= s390_iommu_unmap_pages,
797		.flush_iotlb_all = s390_iommu_flush_iotlb_all,
798		.iotlb_sync      = s390_iommu_iotlb_sync,
799		.iotlb_sync_map  = s390_iommu_iotlb_sync_map,
800		.iova_to_phys	= s390_iommu_iova_to_phys,
801		.free		= s390_domain_free,
802	}
803};