Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0+
  2// Copyright 2017 IBM Corp.
  3#include <linux/sched/mm.h>
  4#include <linux/mutex.h>
  5#include <linux/mmu_context.h>
  6#include <asm/copro.h>
  7#include <asm/pnv-ocxl.h>
  8#include <misc/ocxl.h>
  9#include "ocxl_internal.h"
 10#include "trace.h"
 11
 12
 13#define SPA_PASID_BITS		15
 14#define SPA_PASID_MAX		((1 << SPA_PASID_BITS) - 1)
 15#define SPA_PE_MASK		SPA_PASID_MAX
 16#define SPA_SPA_SIZE_LOG	22 /* Each SPA is 4 Mb */
 17
 18#define SPA_CFG_SF		(1ull << (63-0))
 19#define SPA_CFG_TA		(1ull << (63-1))
 20#define SPA_CFG_HV		(1ull << (63-3))
 21#define SPA_CFG_UV		(1ull << (63-4))
 22#define SPA_CFG_XLAT_hpt	(0ull << (63-6)) /* Hashed page table (HPT) mode */
 23#define SPA_CFG_XLAT_roh	(2ull << (63-6)) /* Radix on HPT mode */
 24#define SPA_CFG_XLAT_ror	(3ull << (63-6)) /* Radix on Radix mode */
 25#define SPA_CFG_PR		(1ull << (63-49))
 26#define SPA_CFG_TC		(1ull << (63-54))
 27#define SPA_CFG_DR		(1ull << (63-59))
 28
 29#define SPA_XSL_TF		(1ull << (63-3))  /* Translation fault */
 30#define SPA_XSL_S		(1ull << (63-38)) /* Store operation */
 31
 32#define SPA_PE_VALID		0x80000000
 33
 34
 35struct pe_data {
 36	struct mm_struct *mm;
 37	/* callback to trigger when a translation fault occurs */
 38	void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
 39	/* opaque pointer to be passed to the above callback */
 40	void *xsl_err_data;
 41	struct rcu_head rcu;
 42};
 43
 44struct spa {
 45	struct ocxl_process_element *spa_mem;
 46	int spa_order;
 47	struct mutex spa_lock;
 48	struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
 49	char *irq_name;
 50	int virq;
 51	void __iomem *reg_dsisr;
 52	void __iomem *reg_dar;
 53	void __iomem *reg_tfc;
 54	void __iomem *reg_pe_handle;
 55	/*
 56	 * The following field are used by the memory fault
 57	 * interrupt handler. We can only have one interrupt at a
 58	 * time. The NPU won't raise another interrupt until the
 59	 * previous one has been ack'd by writing to the TFC register
 60	 */
 61	struct xsl_fault {
 62		struct work_struct fault_work;
 63		u64 pe;
 64		u64 dsisr;
 65		u64 dar;
 66		struct pe_data pe_data;
 67	} xsl_fault;
 68};
 69
 70/*
 71 * A opencapi link can be used be by several PCI functions. We have
 72 * one link per device slot.
 73 *
 74 * A linked list of opencapi links should suffice, as there's a
 75 * limited number of opencapi slots on a system and lookup is only
 76 * done when the device is probed
 77 */
 78struct link {
 79	struct list_head list;
 80	struct kref ref;
 81	int domain;
 82	int bus;
 83	int dev;
 84	atomic_t irq_available;
 85	struct spa *spa;
 86	void *platform_data;
 87};
 88static struct list_head links_list = LIST_HEAD_INIT(links_list);
 89static DEFINE_MUTEX(links_list_lock);
 90
 91enum xsl_response {
 92	CONTINUE,
 93	ADDRESS_ERROR,
 94	RESTART,
 95};
 96
 97
 98static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
 99{
100	u64 reg;
101
102	*dsisr = in_be64(spa->reg_dsisr);
103	*dar = in_be64(spa->reg_dar);
104	reg = in_be64(spa->reg_pe_handle);
105	*pe = reg & SPA_PE_MASK;
106}
107
108static void ack_irq(struct spa *spa, enum xsl_response r)
109{
110	u64 reg = 0;
111
112	/* continue is not supported */
113	if (r == RESTART)
114		reg = PPC_BIT(31);
115	else if (r == ADDRESS_ERROR)
116		reg = PPC_BIT(30);
117	else
118		WARN(1, "Invalid irq response %d\n", r);
119
120	if (reg) {
121		trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe,
122				spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg);
123		out_be64(spa->reg_tfc, reg);
124	}
125}
126
127static void xsl_fault_handler_bh(struct work_struct *fault_work)
128{
129	unsigned int flt = 0;
130	unsigned long access, flags, inv_flags = 0;
131	enum xsl_response r;
132	struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
133					fault_work);
134	struct spa *spa = container_of(fault, struct spa, xsl_fault);
135
136	int rc;
137
138	/*
139	 * We need to release a reference on the mm whenever exiting this
140	 * function (taken in the memory fault interrupt handler)
141	 */
142	rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
143				&flt);
144	if (rc) {
145		pr_debug("copro_handle_mm_fault failed: %d\n", rc);
146		if (fault->pe_data.xsl_err_cb) {
147			fault->pe_data.xsl_err_cb(
148				fault->pe_data.xsl_err_data,
149				fault->dar, fault->dsisr);
150		}
151		r = ADDRESS_ERROR;
152		goto ack;
153	}
154
155	if (!radix_enabled()) {
156		/*
157		 * update_mmu_cache() will not have loaded the hash
158		 * since current->trap is not a 0x400 or 0x300, so
159		 * just call hash_page_mm() here.
160		 */
161		access = _PAGE_PRESENT | _PAGE_READ;
162		if (fault->dsisr & SPA_XSL_S)
163			access |= _PAGE_WRITE;
164
165		if (REGION_ID(fault->dar) != USER_REGION_ID)
166			access |= _PAGE_PRIVILEGED;
167
168		local_irq_save(flags);
169		hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
170			inv_flags);
171		local_irq_restore(flags);
172	}
173	r = RESTART;
174ack:
175	mmdrop(fault->pe_data.mm);
176	ack_irq(spa, r);
177}
178
179static irqreturn_t xsl_fault_handler(int irq, void *data)
180{
181	struct link *link = (struct link *) data;
182	struct spa *spa = link->spa;
183	u64 dsisr, dar, pe_handle;
184	struct pe_data *pe_data;
185	struct ocxl_process_element *pe;
186	int lpid, pid, tid;
187
188	read_irq(spa, &dsisr, &dar, &pe_handle);
189	trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1);
190
191	WARN_ON(pe_handle > SPA_PE_MASK);
192	pe = spa->spa_mem + pe_handle;
193	lpid = be32_to_cpu(pe->lpid);
194	pid = be32_to_cpu(pe->pid);
195	tid = be32_to_cpu(pe->tid);
196	/* We could be reading all null values here if the PE is being
197	 * removed while an interrupt kicks in. It's not supposed to
198	 * happen if the driver notified the AFU to terminate the
199	 * PASID, and the AFU waited for pending operations before
200	 * acknowledging. But even if it happens, we won't find a
201	 * memory context below and fail silently, so it should be ok.
202	 */
203	if (!(dsisr & SPA_XSL_TF)) {
204		WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
205		ack_irq(spa, ADDRESS_ERROR);
206		return IRQ_HANDLED;
207	}
208
209	rcu_read_lock();
210	pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
211	if (!pe_data) {
212		/*
213		 * Could only happen if the driver didn't notify the
214		 * AFU about PASID termination before removing the PE,
215		 * or the AFU didn't wait for all memory access to
216		 * have completed.
217		 *
218		 * Either way, we fail early, but we shouldn't log an
219		 * error message, as it is a valid (if unexpected)
220		 * scenario
221		 */
222		rcu_read_unlock();
223		pr_debug("Unknown mm context for xsl interrupt\n");
224		ack_irq(spa, ADDRESS_ERROR);
225		return IRQ_HANDLED;
226	}
227	WARN_ON(pe_data->mm->context.id != pid);
228
229	spa->xsl_fault.pe = pe_handle;
230	spa->xsl_fault.dar = dar;
231	spa->xsl_fault.dsisr = dsisr;
232	spa->xsl_fault.pe_data = *pe_data;
233	mmgrab(pe_data->mm); /* mm count is released by bottom half */
234
235	rcu_read_unlock();
236	schedule_work(&spa->xsl_fault.fault_work);
237	return IRQ_HANDLED;
238}
239
240static void unmap_irq_registers(struct spa *spa)
241{
242	pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
243				spa->reg_pe_handle);
244}
245
246static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
247{
248	return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
249				&spa->reg_tfc, &spa->reg_pe_handle);
250}
251
252static int setup_xsl_irq(struct pci_dev *dev, struct link *link)
253{
254	struct spa *spa = link->spa;
255	int rc;
256	int hwirq;
257
258	rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
259	if (rc)
260		return rc;
261
262	rc = map_irq_registers(dev, spa);
263	if (rc)
264		return rc;
265
266	spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
267				link->domain, link->bus, link->dev);
268	if (!spa->irq_name) {
269		unmap_irq_registers(spa);
270		dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
271		return -ENOMEM;
272	}
273	/*
274	 * At some point, we'll need to look into allowing a higher
275	 * number of interrupts. Could we have an IRQ domain per link?
276	 */
277	spa->virq = irq_create_mapping(NULL, hwirq);
278	if (!spa->virq) {
279		kfree(spa->irq_name);
280		unmap_irq_registers(spa);
281		dev_err(&dev->dev,
282			"irq_create_mapping failed for translation interrupt\n");
283		return -EINVAL;
284	}
285
286	dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
287
288	rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
289			link);
290	if (rc) {
291		irq_dispose_mapping(spa->virq);
292		kfree(spa->irq_name);
293		unmap_irq_registers(spa);
294		dev_err(&dev->dev,
295			"request_irq failed for translation interrupt: %d\n",
296			rc);
297		return -EINVAL;
298	}
299	return 0;
300}
301
302static void release_xsl_irq(struct link *link)
303{
304	struct spa *spa = link->spa;
305
306	if (spa->virq) {
307		free_irq(spa->virq, link);
308		irq_dispose_mapping(spa->virq);
309	}
310	kfree(spa->irq_name);
311	unmap_irq_registers(spa);
312}
313
314static int alloc_spa(struct pci_dev *dev, struct link *link)
315{
316	struct spa *spa;
317
318	spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
319	if (!spa)
320		return -ENOMEM;
321
322	mutex_init(&spa->spa_lock);
323	INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
324	INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
325
326	spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
327	spa->spa_mem = (struct ocxl_process_element *)
328		__get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
329	if (!spa->spa_mem) {
330		dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
331		kfree(spa);
332		return -ENOMEM;
333	}
334	pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
335		link->dev, spa->spa_mem);
336
337	link->spa = spa;
338	return 0;
339}
340
341static void free_spa(struct link *link)
342{
343	struct spa *spa = link->spa;
344
345	pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
346		link->dev);
347
348	if (spa && spa->spa_mem) {
349		free_pages((unsigned long) spa->spa_mem, spa->spa_order);
350		kfree(spa);
351		link->spa = NULL;
352	}
353}
354
355static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link)
356{
357	struct link *link;
358	int rc;
359
360	link = kzalloc(sizeof(struct link), GFP_KERNEL);
361	if (!link)
362		return -ENOMEM;
363
364	kref_init(&link->ref);
365	link->domain = pci_domain_nr(dev->bus);
366	link->bus = dev->bus->number;
367	link->dev = PCI_SLOT(dev->devfn);
368	atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
369
370	rc = alloc_spa(dev, link);
371	if (rc)
372		goto err_free;
373
374	rc = setup_xsl_irq(dev, link);
375	if (rc)
376		goto err_spa;
377
378	/* platform specific hook */
379	rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
380				&link->platform_data);
381	if (rc)
382		goto err_xsl_irq;
383
384	*out_link = link;
385	return 0;
386
387err_xsl_irq:
388	release_xsl_irq(link);
389err_spa:
390	free_spa(link);
391err_free:
392	kfree(link);
393	return rc;
394}
395
396static void free_link(struct link *link)
397{
398	release_xsl_irq(link);
399	free_spa(link);
400	kfree(link);
401}
402
403int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
404{
405	int rc = 0;
406	struct link *link;
407
408	mutex_lock(&links_list_lock);
409	list_for_each_entry(link, &links_list, list) {
410		/* The functions of a device all share the same link */
411		if (link->domain == pci_domain_nr(dev->bus) &&
412			link->bus == dev->bus->number &&
413			link->dev == PCI_SLOT(dev->devfn)) {
414			kref_get(&link->ref);
415			*link_handle = link;
416			goto unlock;
417		}
418	}
419	rc = alloc_link(dev, PE_mask, &link);
420	if (rc)
421		goto unlock;
422
423	list_add(&link->list, &links_list);
424	*link_handle = link;
425unlock:
426	mutex_unlock(&links_list_lock);
427	return rc;
428}
429EXPORT_SYMBOL_GPL(ocxl_link_setup);
430
431static void release_xsl(struct kref *ref)
432{
433	struct link *link = container_of(ref, struct link, ref);
434
435	list_del(&link->list);
436	/* call platform code before releasing data */
437	pnv_ocxl_spa_release(link->platform_data);
438	free_link(link);
439}
440
441void ocxl_link_release(struct pci_dev *dev, void *link_handle)
442{
443	struct link *link = (struct link *) link_handle;
444
445	mutex_lock(&links_list_lock);
446	kref_put(&link->ref, release_xsl);
447	mutex_unlock(&links_list_lock);
448}
449EXPORT_SYMBOL_GPL(ocxl_link_release);
450
451static u64 calculate_cfg_state(bool kernel)
452{
453	u64 state;
454
455	state = SPA_CFG_DR;
456	if (mfspr(SPRN_LPCR) & LPCR_TC)
457		state |= SPA_CFG_TC;
458	if (radix_enabled())
459		state |= SPA_CFG_XLAT_ror;
460	else
461		state |= SPA_CFG_XLAT_hpt;
462	state |= SPA_CFG_HV;
463	if (kernel) {
464		if (mfmsr() & MSR_SF)
465			state |= SPA_CFG_SF;
466	} else {
467		state |= SPA_CFG_PR;
468		if (!test_tsk_thread_flag(current, TIF_32BIT))
469			state |= SPA_CFG_SF;
470	}
471	return state;
472}
473
474int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
475		u64 amr, struct mm_struct *mm,
476		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
477		void *xsl_err_data)
478{
479	struct link *link = (struct link *) link_handle;
480	struct spa *spa = link->spa;
481	struct ocxl_process_element *pe;
482	int pe_handle, rc = 0;
483	struct pe_data *pe_data;
484
485	BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
486	if (pasid > SPA_PASID_MAX)
487		return -EINVAL;
488
489	mutex_lock(&spa->spa_lock);
490	pe_handle = pasid & SPA_PE_MASK;
491	pe = spa->spa_mem + pe_handle;
492
493	if (pe->software_state) {
494		rc = -EBUSY;
495		goto unlock;
496	}
497
498	pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
499	if (!pe_data) {
500		rc = -ENOMEM;
501		goto unlock;
502	}
503
504	pe_data->mm = mm;
505	pe_data->xsl_err_cb = xsl_err_cb;
506	pe_data->xsl_err_data = xsl_err_data;
507
508	memset(pe, 0, sizeof(struct ocxl_process_element));
509	pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
510	pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
511	pe->pid = cpu_to_be32(pidr);
512	pe->tid = cpu_to_be32(tidr);
513	pe->amr = cpu_to_be64(amr);
514	pe->software_state = cpu_to_be32(SPA_PE_VALID);
515
516	mm_context_add_copro(mm);
517	/*
518	 * Barrier is to make sure PE is visible in the SPA before it
519	 * is used by the device. It also helps with the global TLBI
520	 * invalidation
521	 */
522	mb();
523	radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
524
525	/*
526	 * The mm must stay valid for as long as the device uses it. We
527	 * lower the count when the context is removed from the SPA.
528	 *
529	 * We grab mm_count (and not mm_users), as we don't want to
530	 * end up in a circular dependency if a process mmaps its
531	 * mmio, therefore incrementing the file ref count when
532	 * calling mmap(), and forgets to unmap before exiting. In
533	 * that scenario, when the kernel handles the death of the
534	 * process, the file is not cleaned because unmap was not
535	 * called, and the mm wouldn't be freed because we would still
536	 * have a reference on mm_users. Incrementing mm_count solves
537	 * the problem.
538	 */
539	mmgrab(mm);
540	trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr);
541unlock:
542	mutex_unlock(&spa->spa_lock);
543	return rc;
544}
545EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
546
547int ocxl_link_remove_pe(void *link_handle, int pasid)
548{
549	struct link *link = (struct link *) link_handle;
550	struct spa *spa = link->spa;
551	struct ocxl_process_element *pe;
552	struct pe_data *pe_data;
553	int pe_handle, rc;
554
555	if (pasid > SPA_PASID_MAX)
556		return -EINVAL;
557
558	/*
559	 * About synchronization with our memory fault handler:
560	 *
561	 * Before removing the PE, the driver is supposed to have
562	 * notified the AFU, which should have cleaned up and make
563	 * sure the PASID is no longer in use, including pending
564	 * interrupts. However, there's no way to be sure...
565	 *
566	 * We clear the PE and remove the context from our radix
567	 * tree. From that point on, any new interrupt for that
568	 * context will fail silently, which is ok. As mentioned
569	 * above, that's not expected, but it could happen if the
570	 * driver or AFU didn't do the right thing.
571	 *
572	 * There could still be a bottom half running, but we don't
573	 * need to wait/flush, as it is managing a reference count on
574	 * the mm it reads from the radix tree.
575	 */
576	pe_handle = pasid & SPA_PE_MASK;
577	pe = spa->spa_mem + pe_handle;
578
579	mutex_lock(&spa->spa_lock);
580
581	if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) {
582		rc = -EINVAL;
583		goto unlock;
584	}
585
586	trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid,
587				be32_to_cpu(pe->pid), be32_to_cpu(pe->tid));
588
589	memset(pe, 0, sizeof(struct ocxl_process_element));
590	/*
591	 * The barrier makes sure the PE is removed from the SPA
592	 * before we clear the NPU context cache below, so that the
593	 * old PE cannot be reloaded erroneously.
594	 */
595	mb();
596
597	/*
598	 * hook to platform code
599	 * On powerpc, the entry needs to be cleared from the context
600	 * cache of the NPU.
601	 */
602	rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle);
603	WARN_ON(rc);
604
605	pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
606	if (!pe_data) {
607		WARN(1, "Couldn't find pe data when removing PE\n");
608	} else {
609		mm_context_remove_copro(pe_data->mm);
610		mmdrop(pe_data->mm);
611		kfree_rcu(pe_data, rcu);
612	}
613unlock:
614	mutex_unlock(&spa->spa_lock);
615	return rc;
616}
617EXPORT_SYMBOL_GPL(ocxl_link_remove_pe);
618
619int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr)
620{
621	struct link *link = (struct link *) link_handle;
622	int rc, irq;
623	u64 addr;
624
625	if (atomic_dec_if_positive(&link->irq_available) < 0)
626		return -ENOSPC;
627
628	rc = pnv_ocxl_alloc_xive_irq(&irq, &addr);
629	if (rc) {
630		atomic_inc(&link->irq_available);
631		return rc;
632	}
633
634	*hw_irq = irq;
635	*trigger_addr = addr;
636	return 0;
637}
638EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc);
639
640void ocxl_link_free_irq(void *link_handle, int hw_irq)
641{
642	struct link *link = (struct link *) link_handle;
643
644	pnv_ocxl_free_xive_irq(hw_irq);
645	atomic_inc(&link->irq_available);
646}
647EXPORT_SYMBOL_GPL(ocxl_link_free_irq);