Loading...
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright 2017 IBM Corp.
4 */
5
6#include <linux/hugetlb.h>
7#include <linux/sched/mm.h>
8#include <asm/opal-api.h>
9#include <asm/pnv-pci.h>
10#include <misc/cxllib.h>
11
12#include "cxl.h"
13
14#define CXL_INVALID_DRA ~0ull
15#define CXL_DUMMY_READ_SIZE 128
16#define CXL_DUMMY_READ_ALIGN 8
17#define CXL_CAPI_WINDOW_START 0x2000000000000ull
18#define CXL_CAPI_WINDOW_LOG_SIZE 48
19#define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1
20
21
22bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
23{
24 int rc;
25 u32 phb_index;
26 u64 chip_id, capp_unit_id;
27
28 /* No flags currently supported */
29 if (flags)
30 return false;
31
32 if (!cpu_has_feature(CPU_FTR_HVMODE))
33 return false;
34
35 if (!cxl_is_power9())
36 return false;
37
38 if (cxl_slot_is_switched(dev))
39 return false;
40
41 /* on p9, some pci slots are not connected to a CAPP unit */
42 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
43 if (rc)
44 return false;
45
46 return true;
47}
48EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
49
50static DEFINE_MUTEX(dra_mutex);
51static u64 dummy_read_addr = CXL_INVALID_DRA;
52
53static int allocate_dummy_read_buf(void)
54{
55 u64 buf, vaddr;
56 size_t buf_size;
57
58 /*
59 * Dummy read buffer is 128-byte long, aligned on a
60 * 256-byte boundary and we need the physical address.
61 */
62 buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
63 buf = (u64) kzalloc(buf_size, GFP_KERNEL);
64 if (!buf)
65 return -ENOMEM;
66
67 vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
68 (~0ull << CXL_DUMMY_READ_ALIGN);
69
70 WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
71 "Dummy read buffer alignment issue");
72 dummy_read_addr = virt_to_phys((void *) vaddr);
73 return 0;
74}
75
76int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
77{
78 int rc;
79 u32 phb_index;
80 u64 chip_id, capp_unit_id;
81
82 if (!cpu_has_feature(CPU_FTR_HVMODE))
83 return -EINVAL;
84
85 mutex_lock(&dra_mutex);
86 if (dummy_read_addr == CXL_INVALID_DRA) {
87 rc = allocate_dummy_read_buf();
88 if (rc) {
89 mutex_unlock(&dra_mutex);
90 return rc;
91 }
92 }
93 mutex_unlock(&dra_mutex);
94
95 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
96 if (rc)
97 return rc;
98
99 rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
100 if (rc)
101 return rc;
102
103 cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION;
104 cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
105 cfg->bar_addr = CXL_CAPI_WINDOW_START;
106 cfg->dra = dummy_read_addr;
107 return 0;
108}
109EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
110
111int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
112 unsigned long flags)
113{
114 int rc = 0;
115
116 if (!cpu_has_feature(CPU_FTR_HVMODE))
117 return -EINVAL;
118
119 switch (mode) {
120 case CXL_MODE_PCI:
121 /*
122 * We currently don't support going back to PCI mode
123 * However, we'll turn the invalidations off, so that
124 * the firmware doesn't have to ack them and can do
125 * things like reset, etc.. with no worries.
126 * So always return EPERM (can't go back to PCI) or
127 * EBUSY if we couldn't even turn off snooping
128 */
129 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
130 if (rc)
131 rc = -EBUSY;
132 else
133 rc = -EPERM;
134 break;
135 case CXL_MODE_CXL:
136 /* DMA only supported on TVT1 for the time being */
137 if (flags != CXL_MODE_DMA_TVT1)
138 return -EINVAL;
139 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
140 if (rc)
141 return rc;
142 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
143 break;
144 default:
145 rc = -EINVAL;
146 }
147 return rc;
148}
149EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
150
151/*
152 * When switching the PHB to capi mode, the TVT#1 entry for
153 * the Partitionable Endpoint is set in bypass mode, like
154 * in PCI mode.
155 * Configure the device dma to use TVT#1, which is done
156 * by calling dma_set_mask() with a mask large enough.
157 */
158int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
159{
160 int rc;
161
162 if (flags)
163 return -EINVAL;
164
165 rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
166 return rc;
167}
168EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
169
170int cxllib_get_PE_attributes(struct task_struct *task,
171 unsigned long translation_mode,
172 struct cxllib_pe_attributes *attr)
173{
174 if (translation_mode != CXL_TRANSLATED_MODE &&
175 translation_mode != CXL_REAL_MODE)
176 return -EINVAL;
177
178 attr->sr = cxl_calculate_sr(false,
179 task == NULL,
180 translation_mode == CXL_REAL_MODE,
181 true);
182 attr->lpid = mfspr(SPRN_LPID);
183 if (task) {
184 struct mm_struct *mm = get_task_mm(task);
185 if (mm == NULL)
186 return -EINVAL;
187 /*
188 * Caller is keeping a reference on mm_users for as long
189 * as XSL uses the memory context
190 */
191 attr->pid = mm->context.id;
192 mmput(mm);
193 attr->tid = task->thread.tidr;
194 } else {
195 attr->pid = 0;
196 attr->tid = 0;
197 }
198 return 0;
199}
200EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
201
202static int get_vma_info(struct mm_struct *mm, u64 addr,
203 u64 *vma_start, u64 *vma_end,
204 unsigned long *page_size)
205{
206 struct vm_area_struct *vma = NULL;
207 int rc = 0;
208
209 mmap_read_lock(mm);
210
211 vma = find_vma(mm, addr);
212 if (!vma) {
213 rc = -EFAULT;
214 goto out;
215 }
216 *page_size = vma_kernel_pagesize(vma);
217 *vma_start = vma->vm_start;
218 *vma_end = vma->vm_end;
219out:
220 mmap_read_unlock(mm);
221 return rc;
222}
223
224int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
225{
226 int rc;
227 u64 dar, vma_start, vma_end;
228 unsigned long page_size;
229
230 if (mm == NULL)
231 return -EFAULT;
232
233 /*
234 * The buffer we have to process can extend over several pages
235 * and may also cover several VMAs.
236 * We iterate over all the pages. The page size could vary
237 * between VMAs.
238 */
239 rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
240 if (rc)
241 return rc;
242
243 for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
244 dar += page_size) {
245 if (dar < vma_start || dar >= vma_end) {
246 /*
247 * We don't hold mm->mmap_lock while iterating, since
248 * the lock is required by one of the lower-level page
249 * fault processing functions and it could
250 * create a deadlock.
251 *
252 * It means the VMAs can be altered between 2
253 * loop iterations and we could theoretically
254 * miss a page (however unlikely). But that's
255 * not really a problem, as the driver will
256 * retry access, get another page fault on the
257 * missing page and call us again.
258 */
259 rc = get_vma_info(mm, dar, &vma_start, &vma_end,
260 &page_size);
261 if (rc)
262 return rc;
263 }
264
265 rc = cxl_handle_mm_fault(mm, flags, dar);
266 if (rc)
267 return -EFAULT;
268 }
269 return 0;
270}
271EXPORT_SYMBOL_GPL(cxllib_handle_fault);
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright 2017 IBM Corp.
4 */
5
6#include <linux/hugetlb.h>
7#include <linux/sched/mm.h>
8#include <asm/pnv-pci.h>
9#include <misc/cxllib.h>
10
11#include "cxl.h"
12
13#define CXL_INVALID_DRA ~0ull
14#define CXL_DUMMY_READ_SIZE 128
15#define CXL_DUMMY_READ_ALIGN 8
16#define CXL_CAPI_WINDOW_START 0x2000000000000ull
17#define CXL_CAPI_WINDOW_LOG_SIZE 48
18#define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1
19
20
21bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
22{
23 int rc;
24 u32 phb_index;
25 u64 chip_id, capp_unit_id;
26
27 /* No flags currently supported */
28 if (flags)
29 return false;
30
31 if (!cpu_has_feature(CPU_FTR_HVMODE))
32 return false;
33
34 if (!cxl_is_power9())
35 return false;
36
37 if (cxl_slot_is_switched(dev))
38 return false;
39
40 /* on p9, some pci slots are not connected to a CAPP unit */
41 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
42 if (rc)
43 return false;
44
45 return true;
46}
47EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
48
49static DEFINE_MUTEX(dra_mutex);
50static u64 dummy_read_addr = CXL_INVALID_DRA;
51
52static int allocate_dummy_read_buf(void)
53{
54 u64 buf, vaddr;
55 size_t buf_size;
56
57 /*
58 * Dummy read buffer is 128-byte long, aligned on a
59 * 256-byte boundary and we need the physical address.
60 */
61 buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
62 buf = (u64) kzalloc(buf_size, GFP_KERNEL);
63 if (!buf)
64 return -ENOMEM;
65
66 vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
67 (~0ull << CXL_DUMMY_READ_ALIGN);
68
69 WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
70 "Dummy read buffer alignment issue");
71 dummy_read_addr = virt_to_phys((void *) vaddr);
72 return 0;
73}
74
75int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
76{
77 int rc;
78 u32 phb_index;
79 u64 chip_id, capp_unit_id;
80
81 if (!cpu_has_feature(CPU_FTR_HVMODE))
82 return -EINVAL;
83
84 mutex_lock(&dra_mutex);
85 if (dummy_read_addr == CXL_INVALID_DRA) {
86 rc = allocate_dummy_read_buf();
87 if (rc) {
88 mutex_unlock(&dra_mutex);
89 return rc;
90 }
91 }
92 mutex_unlock(&dra_mutex);
93
94 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
95 if (rc)
96 return rc;
97
98 rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
99 if (rc)
100 return rc;
101
102 cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION;
103 cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
104 cfg->bar_addr = CXL_CAPI_WINDOW_START;
105 cfg->dra = dummy_read_addr;
106 return 0;
107}
108EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
109
110int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
111 unsigned long flags)
112{
113 int rc = 0;
114
115 if (!cpu_has_feature(CPU_FTR_HVMODE))
116 return -EINVAL;
117
118 switch (mode) {
119 case CXL_MODE_PCI:
120 /*
121 * We currently don't support going back to PCI mode
122 * However, we'll turn the invalidations off, so that
123 * the firmware doesn't have to ack them and can do
124 * things like reset, etc.. with no worries.
125 * So always return EPERM (can't go back to PCI) or
126 * EBUSY if we couldn't even turn off snooping
127 */
128 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
129 if (rc)
130 rc = -EBUSY;
131 else
132 rc = -EPERM;
133 break;
134 case CXL_MODE_CXL:
135 /* DMA only supported on TVT1 for the time being */
136 if (flags != CXL_MODE_DMA_TVT1)
137 return -EINVAL;
138 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
139 if (rc)
140 return rc;
141 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
142 break;
143 default:
144 rc = -EINVAL;
145 }
146 return rc;
147}
148EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
149
150/*
151 * When switching the PHB to capi mode, the TVT#1 entry for
152 * the Partitionable Endpoint is set in bypass mode, like
153 * in PCI mode.
154 * Configure the device dma to use TVT#1, which is done
155 * by calling dma_set_mask() with a mask large enough.
156 */
157int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
158{
159 int rc;
160
161 if (flags)
162 return -EINVAL;
163
164 rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
165 return rc;
166}
167EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
168
169int cxllib_get_PE_attributes(struct task_struct *task,
170 unsigned long translation_mode,
171 struct cxllib_pe_attributes *attr)
172{
173 struct mm_struct *mm = NULL;
174
175 if (translation_mode != CXL_TRANSLATED_MODE &&
176 translation_mode != CXL_REAL_MODE)
177 return -EINVAL;
178
179 attr->sr = cxl_calculate_sr(false,
180 task == NULL,
181 translation_mode == CXL_REAL_MODE,
182 true);
183 attr->lpid = mfspr(SPRN_LPID);
184 if (task) {
185 mm = get_task_mm(task);
186 if (mm == NULL)
187 return -EINVAL;
188 /*
189 * Caller is keeping a reference on mm_users for as long
190 * as XSL uses the memory context
191 */
192 attr->pid = mm->context.id;
193 mmput(mm);
194 attr->tid = task->thread.tidr;
195 } else {
196 attr->pid = 0;
197 attr->tid = 0;
198 }
199 return 0;
200}
201EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
202
203static int get_vma_info(struct mm_struct *mm, u64 addr,
204 u64 *vma_start, u64 *vma_end,
205 unsigned long *page_size)
206{
207 struct vm_area_struct *vma = NULL;
208 int rc = 0;
209
210 down_read(&mm->mmap_sem);
211
212 vma = find_vma(mm, addr);
213 if (!vma) {
214 rc = -EFAULT;
215 goto out;
216 }
217 *page_size = vma_kernel_pagesize(vma);
218 *vma_start = vma->vm_start;
219 *vma_end = vma->vm_end;
220out:
221 up_read(&mm->mmap_sem);
222 return rc;
223}
224
225int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
226{
227 int rc;
228 u64 dar, vma_start, vma_end;
229 unsigned long page_size;
230
231 if (mm == NULL)
232 return -EFAULT;
233
234 /*
235 * The buffer we have to process can extend over several pages
236 * and may also cover several VMAs.
237 * We iterate over all the pages. The page size could vary
238 * between VMAs.
239 */
240 rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
241 if (rc)
242 return rc;
243
244 for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
245 dar += page_size) {
246 if (dar < vma_start || dar >= vma_end) {
247 /*
248 * We don't hold the mm->mmap_sem semaphore
249 * while iterating, since the semaphore is
250 * required by one of the lower-level page
251 * fault processing functions and it could
252 * create a deadlock.
253 *
254 * It means the VMAs can be altered between 2
255 * loop iterations and we could theoretically
256 * miss a page (however unlikely). But that's
257 * not really a problem, as the driver will
258 * retry access, get another page fault on the
259 * missing page and call us again.
260 */
261 rc = get_vma_info(mm, dar, &vma_start, &vma_end,
262 &page_size);
263 if (rc)
264 return rc;
265 }
266
267 rc = cxl_handle_mm_fault(mm, flags, dar);
268 if (rc)
269 return -EFAULT;
270 }
271 return 0;
272}
273EXPORT_SYMBOL_GPL(cxllib_handle_fault);