cxllib.c - drivers/misc/cxl/cxllib.c - Linux diff v6.8

  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * Copyright 2017 IBM Corp.
 
 
 
 
 
  4 */
  5
  6#include <linux/hugetlb.h>
  7#include <linux/sched/mm.h>
  8#include <asm/opal-api.h>
  9#include <asm/pnv-pci.h>
 10#include <misc/cxllib.h>
 11
 12#include "cxl.h"
 13
 14#define CXL_INVALID_DRA                 ~0ull
 15#define CXL_DUMMY_READ_SIZE             128
 16#define CXL_DUMMY_READ_ALIGN            8
 17#define CXL_CAPI_WINDOW_START           0x2000000000000ull
 18#define CXL_CAPI_WINDOW_LOG_SIZE        48
 19#define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
 20
 21
 22bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
 23{
 24	int rc;
 25	u32 phb_index;
 26	u64 chip_id, capp_unit_id;
 27
 28	/* No flags currently supported */
 29	if (flags)
 30		return false;
 31
 32	if (!cpu_has_feature(CPU_FTR_HVMODE))
 33		return false;
 34
 35	if (!cxl_is_power9())
 36		return false;
 37
 38	if (cxl_slot_is_switched(dev))
 39		return false;
 40
 41	/* on p9, some pci slots are not connected to a CAPP unit */
 42	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
 43	if (rc)
 44		return false;
 45
 46	return true;
 47}
 48EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
 49
 50static DEFINE_MUTEX(dra_mutex);
 51static u64 dummy_read_addr = CXL_INVALID_DRA;
 52
 53static int allocate_dummy_read_buf(void)
 54{
 55	u64 buf, vaddr;
 56	size_t buf_size;
 57
 58	/*
 59	 * Dummy read buffer is 128-byte long, aligned on a
 60	 * 256-byte boundary and we need the physical address.
 61	 */
 62	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
 63	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
 64	if (!buf)
 65		return -ENOMEM;
 66
 67	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
 68					(~0ull << CXL_DUMMY_READ_ALIGN);
 69
 70	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
 71		"Dummy read buffer alignment issue");
 72	dummy_read_addr = virt_to_phys((void *) vaddr);
 73	return 0;
 74}
 75
 76int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
 77{
 78	int rc;
 79	u32 phb_index;
 80	u64 chip_id, capp_unit_id;
 81
 82	if (!cpu_has_feature(CPU_FTR_HVMODE))
 83		return -EINVAL;
 84
 85	mutex_lock(&dra_mutex);
 86	if (dummy_read_addr == CXL_INVALID_DRA) {
 87		rc = allocate_dummy_read_buf();
 88		if (rc) {
 89			mutex_unlock(&dra_mutex);
 90			return rc;
 91		}
 92	}
 93	mutex_unlock(&dra_mutex);
 94
 95	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
 96	if (rc)
 97		return rc;
 98
 99	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
100	if (rc)
101		return rc;
 
 
 
 
102
103	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
104	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
105	cfg->bar_addr = CXL_CAPI_WINDOW_START;
106	cfg->dra = dummy_read_addr;
107	return 0;
108}
109EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
110
111int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
112			unsigned long flags)
113{
114	int rc = 0;
115
116	if (!cpu_has_feature(CPU_FTR_HVMODE))
117		return -EINVAL;
118
119	switch (mode) {
120	case CXL_MODE_PCI:
121		/*
122		 * We currently don't support going back to PCI mode
123		 * However, we'll turn the invalidations off, so that
124		 * the firmware doesn't have to ack them and can do
125		 * things like reset, etc.. with no worries.
126		 * So always return EPERM (can't go back to PCI) or
127		 * EBUSY if we couldn't even turn off snooping
128		 */
129		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
130		if (rc)
131			rc = -EBUSY;
132		else
133			rc = -EPERM;
134		break;
135	case CXL_MODE_CXL:
136		/* DMA only supported on TVT1 for the time being */
137		if (flags != CXL_MODE_DMA_TVT1)
138			return -EINVAL;
139		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
140		if (rc)
141			return rc;
142		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
143		break;
144	default:
145		rc = -EINVAL;
146	}
147	return rc;
148}
149EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
150
151/*
152 * When switching the PHB to capi mode, the TVT#1 entry for
153 * the Partitionable Endpoint is set in bypass mode, like
154 * in PCI mode.
155 * Configure the device dma to use TVT#1, which is done
156 * by calling dma_set_mask() with a mask large enough.
157 */
158int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
159{
160	int rc;
161
162	if (flags)
163		return -EINVAL;
164
165	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
166	return rc;
167}
168EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
169
170int cxllib_get_PE_attributes(struct task_struct *task,
171			     unsigned long translation_mode,
172			     struct cxllib_pe_attributes *attr)
173{
 
 
174	if (translation_mode != CXL_TRANSLATED_MODE &&
175		translation_mode != CXL_REAL_MODE)
176		return -EINVAL;
177
178	attr->sr = cxl_calculate_sr(false,
179				task == NULL,
180				translation_mode == CXL_REAL_MODE,
181				true);
182	attr->lpid = mfspr(SPRN_LPID);
183	if (task) {
184		struct mm_struct *mm = get_task_mm(task);
185		if (mm == NULL)
186			return -EINVAL;
187		/*
188		 * Caller is keeping a reference on mm_users for as long
189		 * as XSL uses the memory context
190		 */
191		attr->pid = mm->context.id;
192		mmput(mm);
193		attr->tid = task->thread.tidr;
194	} else {
195		attr->pid = 0;
196		attr->tid = 0;
197	}
198	return 0;
199}
200EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
201
202static int get_vma_info(struct mm_struct *mm, u64 addr,
203			u64 *vma_start, u64 *vma_end,
204			unsigned long *page_size)
205{
206	struct vm_area_struct *vma = NULL;
207	int rc = 0;
208
209	mmap_read_lock(mm);
210
211	vma = find_vma(mm, addr);
212	if (!vma) {
213		rc = -EFAULT;
214		goto out;
215	}
216	*page_size = vma_kernel_pagesize(vma);
217	*vma_start = vma->vm_start;
218	*vma_end = vma->vm_end;
219out:
220	mmap_read_unlock(mm);
221	return rc;
222}
223
224int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
225{
226	int rc;
227	u64 dar, vma_start, vma_end;
228	unsigned long page_size;
229
230	if (mm == NULL)
231		return -EFAULT;
232
233	/*
234	 * The buffer we have to process can extend over several pages
235	 * and may also cover several VMAs.
236	 * We iterate over all the pages. The page size could vary
237	 * between VMAs.
238	 */
239	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
240	if (rc)
241		return rc;
242
243	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
244	     dar += page_size) {
245		if (dar < vma_start || dar >= vma_end) {
246			/*
247			 * We don't hold mm->mmap_lock while iterating, since
248			 * the lock is required by one of the lower-level page
 
249			 * fault processing functions and it could
250			 * create a deadlock.
251			 *
252			 * It means the VMAs can be altered between 2
253			 * loop iterations and we could theoretically
254			 * miss a page (however unlikely). But that's
255			 * not really a problem, as the driver will
256			 * retry access, get another page fault on the
257			 * missing page and call us again.
258			 */
259			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
260					&page_size);
261			if (rc)
262				return rc;
263		}
264
265		rc = cxl_handle_mm_fault(mm, flags, dar);
266		if (rc)
267			return -EFAULT;
268	}
269	return 0;
270}
271EXPORT_SYMBOL_GPL(cxllib_handle_fault);

 
  1/*
  2 * Copyright 2017 IBM Corp.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of the GNU General Public License
  6 * as published by the Free Software Foundation; either version
  7 * 2 of the License, or (at your option) any later version.
  8 */
  9
 10#include <linux/hugetlb.h>
 11#include <linux/sched/mm.h>
 
 12#include <asm/pnv-pci.h>
 13#include <misc/cxllib.h>
 14
 15#include "cxl.h"
 16
 17#define CXL_INVALID_DRA                 ~0ull
 18#define CXL_DUMMY_READ_SIZE             128
 19#define CXL_DUMMY_READ_ALIGN            8
 20#define CXL_CAPI_WINDOW_START           0x2000000000000ull
 21#define CXL_CAPI_WINDOW_LOG_SIZE        48
 22#define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
 23
 24
 25bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
 26{
 27	int rc;
 28	u32 phb_index;
 29	u64 chip_id, capp_unit_id;
 30
 31	/* No flags currently supported */
 32	if (flags)
 33		return false;
 34
 35	if (!cpu_has_feature(CPU_FTR_HVMODE))
 36		return false;
 37
 38	if (!cxl_is_power9())
 39		return false;
 40
 41	if (cxl_slot_is_switched(dev))
 42		return false;
 43
 44	/* on p9, some pci slots are not connected to a CAPP unit */
 45	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
 46	if (rc)
 47		return false;
 48
 49	return true;
 50}
 51EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
 52
 53static DEFINE_MUTEX(dra_mutex);
 54static u64 dummy_read_addr = CXL_INVALID_DRA;
 55
 56static int allocate_dummy_read_buf(void)
 57{
 58	u64 buf, vaddr;
 59	size_t buf_size;
 60
 61	/*
 62	 * Dummy read buffer is 128-byte long, aligned on a
 63	 * 256-byte boundary and we need the physical address.
 64	 */
 65	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
 66	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
 67	if (!buf)
 68		return -ENOMEM;
 69
 70	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
 71					(~0ull << CXL_DUMMY_READ_ALIGN);
 72
 73	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
 74		"Dummy read buffer alignment issue");
 75	dummy_read_addr = virt_to_phys((void *) vaddr);
 76	return 0;
 77}
 78
 79int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
 80{
 81	int rc;
 82	u32 phb_index;
 83	u64 chip_id, capp_unit_id;
 84
 85	if (!cpu_has_feature(CPU_FTR_HVMODE))
 86		return -EINVAL;
 87
 88	mutex_lock(&dra_mutex);
 89	if (dummy_read_addr == CXL_INVALID_DRA) {
 90		rc = allocate_dummy_read_buf();
 91		if (rc) {
 92			mutex_unlock(&dra_mutex);
 93			return rc;
 94		}
 95	}
 96	mutex_unlock(&dra_mutex);
 97
 98	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
 99	if (rc)
100		return rc;
101
102	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
103	if (rc)
104		return rc;
105	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
106		/* workaround for DD1 - nbwind = capiind */
107		cfg->dsnctl |= ((u64)0x02 << (63-47));
108	}
109
110	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
111	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
112	cfg->bar_addr = CXL_CAPI_WINDOW_START;
113	cfg->dra = dummy_read_addr;
114	return 0;
115}
116EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
117
118int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
119			unsigned long flags)
120{
121	int rc = 0;
122
123	if (!cpu_has_feature(CPU_FTR_HVMODE))
124		return -EINVAL;
125
126	switch (mode) {
127	case CXL_MODE_PCI:
128		/*
129		 * We currently don't support going back to PCI mode
130		 * However, we'll turn the invalidations off, so that
131		 * the firmware doesn't have to ack them and can do
132		 * things like reset, etc.. with no worries.
133		 * So always return EPERM (can't go back to PCI) or
134		 * EBUSY if we couldn't even turn off snooping
135		 */
136		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
137		if (rc)
138			rc = -EBUSY;
139		else
140			rc = -EPERM;
141		break;
142	case CXL_MODE_CXL:
143		/* DMA only supported on TVT1 for the time being */
144		if (flags != CXL_MODE_DMA_TVT1)
145			return -EINVAL;
146		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
147		if (rc)
148			return rc;
149		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
150		break;
151	default:
152		rc = -EINVAL;
153	}
154	return rc;
155}
156EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
157
158/*
159 * When switching the PHB to capi mode, the TVT#1 entry for
160 * the Partitionable Endpoint is set in bypass mode, like
161 * in PCI mode.
162 * Configure the device dma to use TVT#1, which is done
163 * by calling dma_set_mask() with a mask large enough.
164 */
165int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
166{
167	int rc;
168
169	if (flags)
170		return -EINVAL;
171
172	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
173	return rc;
174}
175EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
176
177int cxllib_get_PE_attributes(struct task_struct *task,
178			     unsigned long translation_mode,
179			     struct cxllib_pe_attributes *attr)
180{
181	struct mm_struct *mm = NULL;
182
183	if (translation_mode != CXL_TRANSLATED_MODE &&
184		translation_mode != CXL_REAL_MODE)
185		return -EINVAL;
186
187	attr->sr = cxl_calculate_sr(false,
188				task == NULL,
189				translation_mode == CXL_REAL_MODE,
190				true);
191	attr->lpid = mfspr(SPRN_LPID);
192	if (task) {
193		mm = get_task_mm(task);
194		if (mm == NULL)
195			return -EINVAL;
196		/*
197		 * Caller is keeping a reference on mm_users for as long
198		 * as XSL uses the memory context
199		 */
200		attr->pid = mm->context.id;
201		mmput(mm);
202		attr->tid = task->thread.tidr;
203	} else {
204		attr->pid = 0;
205		attr->tid = 0;
206	}
207	return 0;
208}
209EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
210
211static int get_vma_info(struct mm_struct *mm, u64 addr,
212			u64 *vma_start, u64 *vma_end,
213			unsigned long *page_size)
214{
215	struct vm_area_struct *vma = NULL;
216	int rc = 0;
217
218	down_read(&mm->mmap_sem);
219
220	vma = find_vma(mm, addr);
221	if (!vma) {
222		rc = -EFAULT;
223		goto out;
224	}
225	*page_size = vma_kernel_pagesize(vma);
226	*vma_start = vma->vm_start;
227	*vma_end = vma->vm_end;
228out:
229	up_read(&mm->mmap_sem);
230	return rc;
231}
232
233int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
234{
235	int rc;
236	u64 dar, vma_start, vma_end;
237	unsigned long page_size;
238
239	if (mm == NULL)
240		return -EFAULT;
241
242	/*
243	 * The buffer we have to process can extend over several pages
244	 * and may also cover several VMAs.
245	 * We iterate over all the pages. The page size could vary
246	 * between VMAs.
247	 */
248	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
249	if (rc)
250		return rc;
251
252	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
253	     dar += page_size) {
254		if (dar < vma_start || dar >= vma_end) {
255			/*
256			 * We don't hold the mm->mmap_sem semaphore
257			 * while iterating, since the semaphore is
258			 * required by one of the lower-level page
259			 * fault processing functions and it could
260			 * create a deadlock.
261			 *
262			 * It means the VMAs can be altered between 2
263			 * loop iterations and we could theoretically
264			 * miss a page (however unlikely). But that's
265			 * not really a problem, as the driver will
266			 * retry access, get another page fault on the
267			 * missing page and call us again.
268			 */
269			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
270					&page_size);
271			if (rc)
272				return rc;
273		}
274
275		rc = cxl_handle_mm_fault(mm, flags, dar);
276		if (rc)
277			return -EFAULT;
278	}
279	return 0;
280}
281EXPORT_SYMBOL_GPL(cxllib_handle_fault);