Linux Audio

Check our new training course

Yocto / OpenEmbedded training

Mar 24-27, 2025, special US time zones
Register
Loading...
v6.2
  1// SPDX-License-Identifier: GPL-2.0-only
  2/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
  3#include <linux/io-64-nonatomic-lo-hi.h>
  4#include <linux/moduleparam.h>
 
  5#include <linux/module.h>
  6#include <linux/delay.h>
  7#include <linux/sizes.h>
  8#include <linux/mutex.h>
  9#include <linux/list.h>
 
 
 10#include <linux/pci.h>
 11#include <linux/pci-doe.h>
 12#include <linux/aer.h>
 13#include <linux/io.h>
 
 14#include "cxlmem.h"
 15#include "cxlpci.h"
 16#include "cxl.h"
 17#define CREATE_TRACE_POINTS
 18#include <trace/events/cxl.h>
 19
 20/**
 21 * DOC: cxl pci
 22 *
 23 * This implements the PCI exclusive functionality for a CXL device as it is
 24 * defined by the Compute Express Link specification. CXL devices may surface
 25 * certain functionality even if it isn't CXL enabled. While this driver is
 26 * focused around the PCI specific aspects of a CXL device, it binds to the
 27 * specific CXL memory device class code, and therefore the implementation of
 28 * cxl_pci is focused around CXL memory devices.
 29 *
 30 * The driver has several responsibilities, mainly:
 31 *  - Create the memX device and register on the CXL bus.
 32 *  - Enumerate device's register interface and map them.
 33 *  - Registers nvdimm bridge device with cxl_core.
 34 *  - Registers a CXL mailbox with cxl_core.
 
 35 */
 36
 37#define cxl_doorbell_busy(cxlds)                                                \
 38	(readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) &                  \
 39	 CXLDEV_MBOX_CTRL_DOORBELL)
 40
 41/* CXL 2.0 - 8.2.8.4 */
 42#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
 43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 44/*
 45 * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
 46 * dictate how long to wait for the mailbox to become ready. The new
 47 * field allows the device to tell software the amount of time to wait
 48 * before mailbox ready. This field per the spec theoretically allows
 49 * for up to 255 seconds. 255 seconds is unreasonably long, its longer
 50 * than the maximum SATA port link recovery wait. Default to 60 seconds
 51 * until someone builds a CXL device that needs more time in practice.
 52 */
 53static unsigned short mbox_ready_timeout = 60;
 54module_param(mbox_ready_timeout, ushort, 0644);
 55MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 56
 57static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 58{
 59	const unsigned long start = jiffies;
 60	unsigned long end = start;
 61
 62	while (cxl_doorbell_busy(cxlds)) {
 63		end = jiffies;
 64
 65		if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
 66			/* Check again in case preempted before timeout test */
 67			if (!cxl_doorbell_busy(cxlds))
 68				break;
 69			return -ETIMEDOUT;
 70		}
 71		cpu_relax();
 72	}
 73
 74	dev_dbg(cxlds->dev, "Doorbell wait took %dms",
 75		jiffies_to_msecs(end) - jiffies_to_msecs(start));
 76	return 0;
 77}
 78
 79#define cxl_err(dev, status, msg)                                        \
 80	dev_err_ratelimited(dev, msg ", device state %s%s\n",                  \
 81			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
 82			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
 83
 84#define cxl_cmd_err(dev, cmd, status, msg)                               \
 85	dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n",    \
 86			    (cmd)->opcode,                                     \
 87			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
 88			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
 
 
 
 
 
 
 
 
 89
 90/**
 91 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
 92 * @cxlds: The device state to communicate with.
 93 * @mbox_cmd: Command to send to the memory device.
 94 *
 95 * Context: Any context. Expects mbox_mutex to be held.
 96 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
 97 *         Caller should check the return code in @mbox_cmd to make sure it
 98 *         succeeded.
 99 *
100 * This is a generic form of the CXL mailbox send command thus only using the
101 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
102 * devices, and perhaps other types of CXL devices may have further information
103 * available upon error conditions. Driver facilities wishing to send mailbox
104 * commands should use the wrapper command.
105 *
106 * The CXL spec allows for up to two mailboxes. The intention is for the primary
107 * mailbox to be OS controlled and the secondary mailbox to be used by system
108 * firmware. This allows the OS and firmware to communicate with the device and
109 * not need to coordinate with each other. The driver only uses the primary
110 * mailbox.
111 */
112static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
113				   struct cxl_mbox_cmd *mbox_cmd)
114{
115	void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
116	struct device *dev = cxlds->dev;
117	u64 cmd_reg, status_reg;
118	size_t out_len;
119	int rc;
120
121	lockdep_assert_held(&cxlds->mbox_mutex);
122
123	/*
124	 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
125	 *   1. Caller reads MB Control Register to verify doorbell is clear
126	 *   2. Caller writes Command Register
127	 *   3. Caller writes Command Payload Registers if input payload is non-empty
128	 *   4. Caller writes MB Control Register to set doorbell
129	 *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
130	 *   6. Caller reads MB Status Register to fetch Return code
131	 *   7. If command successful, Caller reads Command Register to get Payload Length
132	 *   8. If output payload is non-empty, host reads Command Payload Registers
133	 *
134	 * Hardware is free to do whatever it wants before the doorbell is rung,
135	 * and isn't allowed to change anything after it clears the doorbell. As
136	 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
137	 * also happen in any order (though some orders might not make sense).
138	 */
139
140	/* #1 */
141	if (cxl_doorbell_busy(cxlds)) {
142		u64 md_status =
143			readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
144
145		cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
146			    "mailbox queue busy");
147		return -EBUSY;
148	}
149
150	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
151			     mbox_cmd->opcode);
152	if (mbox_cmd->size_in) {
153		if (WARN_ON(!mbox_cmd->payload_in))
154			return -EINVAL;
155
156		cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
157				      mbox_cmd->size_in);
158		memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
159	}
160
161	/* #2, #3 */
162	writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
163
164	/* #4 */
165	dev_dbg(dev, "Sending command\n");
166	writel(CXLDEV_MBOX_CTRL_DOORBELL,
167	       cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
168
169	/* #5 */
170	rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
171	if (rc == -ETIMEDOUT) {
172		u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
173
174		cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout");
175		return rc;
176	}
177
178	/* #6 */
179	status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
180	mbox_cmd->return_code =
181		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
182
183	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
184		dev_dbg(dev, "Mailbox operation had an error: %s\n",
185			cxl_mbox_cmd_rc2str(mbox_cmd));
186		return 0; /* completed but caller must check return_code */
187	}
188
189	/* #7 */
190	cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
191	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
192
193	/* #8 */
194	if (out_len && mbox_cmd->payload_out) {
195		/*
196		 * Sanitize the copy. If hardware misbehaves, out_len per the
197		 * spec can actually be greater than the max allowed size (21
198		 * bits available but spec defined 1M max). The caller also may
199		 * have requested less data than the hardware supplied even
200		 * within spec.
201		 */
202		size_t n = min3(mbox_cmd->size_out, cxlds->payload_size, out_len);
203
204		memcpy_fromio(mbox_cmd->payload_out, payload, n);
205		mbox_cmd->size_out = n;
206	} else {
207		mbox_cmd->size_out = 0;
208	}
209
210	return 0;
211}
212
213static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
 
 
 
 
 
 
 
214{
 
 
215	int rc;
216
217	mutex_lock_io(&cxlds->mbox_mutex);
218	rc = __cxl_pci_mbox_send_cmd(cxlds, cmd);
219	mutex_unlock(&cxlds->mbox_mutex);
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221	return rc;
222}
223
224static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
 
 
 
 
 
 
225{
226	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
227	unsigned long timeout;
228	u64 md_status;
229
230	timeout = jiffies + mbox_ready_timeout * HZ;
231	do {
232		md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
233		if (md_status & CXLMDEV_MBOX_IF_READY)
234			break;
235		if (msleep_interruptible(100))
236			break;
237	} while (!time_after(jiffies, timeout));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
239	if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
240		cxl_err(cxlds->dev, md_status,
241			"timeout awaiting mailbox ready");
242		return -ETIMEDOUT;
243	}
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245	/*
246	 * A command may be in flight from a previous driver instance,
247	 * think kexec, do one doorbell wait so that
248	 * __cxl_pci_mbox_send_cmd() can assume that it is the only
249	 * source for future doorbell busy events.
250	 */
251	if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
252		cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle");
253		return -ETIMEDOUT;
 
 
 
 
 
254	}
255
256	cxlds->mbox_send = cxl_pci_mbox_send;
257	cxlds->payload_size =
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258		1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
259
260	/*
261	 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
262	 *
263	 * If the size is too small, mandatory commands will not work and so
264	 * there's no point in going forward. If the size is too large, there's
265	 * no harm is soft limiting it.
266	 */
267	cxlds->payload_size = min_t(size_t, cxlds->payload_size, SZ_1M);
268	if (cxlds->payload_size < 256) {
269		dev_err(cxlds->dev, "Mailbox is too small (%zub)",
270			cxlds->payload_size);
271		return -ENXIO;
272	}
273
274	dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
275		cxlds->payload_size);
276
277	return 0;
278}
279
280static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
281{
282	struct device *dev = &pdev->dev;
 
283
284	map->base = ioremap(map->resource, map->max_size);
285	if (!map->base) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286		dev_err(dev, "failed to map registers\n");
287		return -ENOMEM;
288	}
289
290	dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource);
291	return 0;
 
 
 
 
 
 
 
292}
293
294static void cxl_unmap_regblock(struct pci_dev *pdev,
295			       struct cxl_register_map *map)
296{
297	iounmap(map->base);
298	map->base = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299}
300
301static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
 
302{
 
 
303	struct cxl_component_reg_map *comp_map;
304	struct cxl_device_reg_map *dev_map;
305	struct device *dev = &pdev->dev;
306	void __iomem *base = map->base;
307
308	switch (map->reg_type) {
309	case CXL_REGLOC_RBI_COMPONENT:
310		comp_map = &map->component_map;
311		cxl_probe_component_regs(dev, base, comp_map);
312		if (!comp_map->hdm_decoder.valid) {
313			dev_err(dev, "HDM decoder registers not found\n");
314			return -ENXIO;
315		}
316
317		if (!comp_map->ras.valid)
318			dev_dbg(dev, "RAS registers not found\n");
319
320		dev_dbg(dev, "Set up component registers\n");
321		break;
322	case CXL_REGLOC_RBI_MEMDEV:
323		dev_map = &map->device_map;
324		cxl_probe_device_regs(dev, base, dev_map);
325		if (!dev_map->status.valid || !dev_map->mbox.valid ||
326		    !dev_map->memdev.valid) {
327			dev_err(dev, "registers not found: %s%s%s\n",
328				!dev_map->status.valid ? "status " : "",
329				!dev_map->mbox.valid ? "mbox " : "",
330				!dev_map->memdev.valid ? "memdev " : "");
331			return -ENXIO;
332		}
333
334		dev_dbg(dev, "Probing device registers...\n");
335		break;
336	default:
337		break;
338	}
339
340	return 0;
341}
342
343static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
344			  struct cxl_register_map *map)
345{
346	int rc;
347
348	rc = cxl_find_regblock(pdev, type, map);
349	if (rc)
350		return rc;
351
352	rc = cxl_map_regblock(pdev, map);
353	if (rc)
354		return rc;
355
356	rc = cxl_probe_regs(pdev, map);
357	cxl_unmap_regblock(pdev, map);
 
 
 
 
 
 
 
 
 
 
358
359	return rc;
360}
361
362static void cxl_pci_destroy_doe(void *mbs)
 
363{
364	xa_destroy(mbs);
 
 
365}
366
367static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
 
 
 
 
 
 
 
 
 
 
368{
369	struct device *dev = cxlds->dev;
370	struct pci_dev *pdev = to_pci_dev(dev);
371	u16 off = 0;
372
373	xa_init(&cxlds->doe_mbs);
374	if (devm_add_action(&pdev->dev, cxl_pci_destroy_doe, &cxlds->doe_mbs)) {
375		dev_err(dev, "Failed to create XArray for DOE's\n");
376		return;
 
 
 
 
 
377	}
378
379	/*
380	 * Mailbox creation is best effort.  Higher layers must determine if
381	 * the lack of a mailbox for their protocol is a device failure or not.
382	 */
383	pci_doe_for_each_off(pdev, off) {
384		struct pci_doe_mb *doe_mb;
385
386		doe_mb = pcim_doe_create_mb(pdev, off);
387		if (IS_ERR(doe_mb)) {
388			dev_err(dev, "Failed to create MB object for MB @ %x\n",
389				off);
390			continue;
 
 
 
 
 
 
 
 
 
 
 
 
391		}
392
393		if (!pci_request_config_region_exclusive(pdev, off,
394							 PCI_DOE_CAP_SIZEOF,
395							 dev_name(dev)))
396			pci_err(pdev, "Failed to exclude DOE registers\n");
397
398		if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
399			dev_err(dev, "xa_insert failed to insert MB @ %x\n",
400				off);
401			continue;
 
 
 
 
 
 
402		}
403
404		dev_dbg(dev, "Created DOE mailbox @%x\n", off);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405	}
 
 
406}
407
408/*
409 * Assume that any RCIEP that emits the CXL memory expander class code
410 * is an RCD
411 */
412static bool is_cxl_restricted(struct pci_dev *pdev)
413{
414	return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
 
 
 
415}
416
417static void disable_aer(void *pdev)
 
418{
419	pci_disable_pcie_error_reporting(pdev);
420}
421
422static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
423{
424	struct cxl_register_map map;
425	struct cxl_memdev *cxlmd;
426	struct cxl_dev_state *cxlds;
427	int rc;
428
429	/*
430	 * Double check the anonymous union trickery in struct cxl_regs
431	 * FIXME switch to struct_group()
432	 */
433	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
434		     offsetof(struct cxl_regs, device_regs.memdev));
435
436	rc = pcim_enable_device(pdev);
437	if (rc)
438		return rc;
 
 
439
440	cxlds = cxl_dev_state_create(&pdev->dev);
441	if (IS_ERR(cxlds))
442		return PTR_ERR(cxlds);
443	pci_set_drvdata(pdev, cxlds);
444
445	cxlds->rcd = is_cxl_restricted(pdev);
446	cxlds->serial = pci_get_dsn(pdev);
447	cxlds->cxl_dvsec = pci_find_dvsec_capability(
448		pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
449	if (!cxlds->cxl_dvsec)
450		dev_warn(&pdev->dev,
451			 "Device DVSEC not present, skip CXL.mem init\n");
452
453	rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
454	if (rc)
455		return rc;
456
457	rc = cxl_map_device_regs(&pdev->dev, &cxlds->regs.device_regs, &map);
458	if (rc)
459		return rc;
 
 
 
460
461	/*
462	 * If the component registers can't be found, the cxl_pci driver may
463	 * still be useful for management functions so don't return an error.
464	 */
465	cxlds->component_reg_phys = CXL_RESOURCE_NONE;
466	rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
467	if (rc)
468		dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
469
470	cxlds->component_reg_phys = map.resource;
 
471
472	devm_cxl_pci_create_doe(cxlds);
 
 
 
 
 
473
474	rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component,
475				    &map, BIT(CXL_CM_CAP_CAP_ID_RAS));
476	if (rc)
477		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
478
479	rc = cxl_pci_setup_mailbox(cxlds);
480	if (rc)
481		return rc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
483	rc = cxl_enumerate_cmds(cxlds);
 
484	if (rc)
485		return rc;
 
 
 
 
 
 
486
487	rc = cxl_dev_state_identify(cxlds);
 
488	if (rc)
489		return rc;
490
491	rc = cxl_mem_create_range_info(cxlds);
492	if (rc)
493		return rc;
 
494
495	cxlmd = devm_cxl_add_memdev(cxlds);
496	if (IS_ERR(cxlmd))
497		return PTR_ERR(cxlmd);
 
 
 
 
 
 
498
499	if (cxlds->regs.ras) {
500		pci_enable_pcie_error_reporting(pdev);
501		rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
502		if (rc)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503			return rc;
 
 
 
 
504	}
505	pci_save_state(pdev);
506
507	return rc;
508}
509
510static const struct pci_device_id cxl_mem_pci_tbl[] = {
511	/* PCI class code for CXL.mem Type-3 Devices */
512	{ PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
513	{ /* terminate list */ },
514};
515MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
516
517/* CXL spec rev3.0 8.2.4.16.1 */
518static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
 
519{
520	void __iomem *addr;
521	u32 *log_addr;
522	int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
524	addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
525	log_addr = log;
 
 
526
527	for (i = 0; i < log_u32_size; i++) {
528		*log_addr = readl(addr);
529		log_addr++;
530		addr += sizeof(u32);
 
 
 
 
 
531	}
 
 
532}
533
534/*
535 * Log the state of the RAS status registers and prepare them to log the
536 * next error status. Return 1 if reset needed.
 
 
 
 
 
 
537 */
538static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
539{
540	struct cxl_memdev *cxlmd = cxlds->cxlmd;
541	struct device *dev = &cxlmd->dev;
542	u32 hl[CXL_HEADERLOG_SIZE_U32];
543	void __iomem *addr;
544	u32 status;
545	u32 fe;
 
 
 
 
 
 
 
 
546
547	if (!cxlds->regs.ras)
548		return false;
549
550	addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
551	status = readl(addr);
552	if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
553		return false;
554
555	/* If multiple errors, log header points to first error from ctrl reg */
556	if (hweight32(status) > 1) {
557		void __iomem *rcc_addr =
558			cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
 
559
560		fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
561				   readl(rcc_addr)));
562	} else {
563		fe = status;
564	}
565
566	header_log_copy(cxlds, hl);
567	trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
568	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
569
570	return true;
 
 
 
 
 
 
 
 
 
 
 
571}
572
573static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
574					   pci_channel_state_t state)
 
 
 
 
 
 
 
 
575{
576	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
577	struct cxl_memdev *cxlmd = cxlds->cxlmd;
578	struct device *dev = &cxlmd->dev;
579	bool ue;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580
581	/*
582	 * A frozen channel indicates an impending reset which is fatal to
583	 * CXL.mem operation, and will likely crash the system. On the off
584	 * chance the situation is recoverable dump the status of the RAS
585	 * capability registers and bounce the active state of the memdev.
586	 */
587	ue = cxl_report_and_clear(cxlds);
 
588
589	switch (state) {
590	case pci_channel_io_normal:
591		if (ue) {
592			device_release_driver(dev);
593			return PCI_ERS_RESULT_NEED_RESET;
594		}
595		return PCI_ERS_RESULT_CAN_RECOVER;
596	case pci_channel_io_frozen:
597		dev_warn(&pdev->dev,
598			 "%s: frozen state error detected, disable CXL.mem\n",
599			 dev_name(dev));
600		device_release_driver(dev);
601		return PCI_ERS_RESULT_NEED_RESET;
602	case pci_channel_io_perm_failure:
603		dev_warn(&pdev->dev,
604			 "failure state error detected, request disconnect\n");
605		return PCI_ERS_RESULT_DISCONNECT;
606	}
607	return PCI_ERS_RESULT_NEED_RESET;
608}
609
610static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
611{
612	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
613	struct cxl_memdev *cxlmd = cxlds->cxlmd;
614	struct device *dev = &cxlmd->dev;
615
616	dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
617		 dev_name(dev));
618	pci_restore_state(pdev);
619	if (device_attach(dev) <= 0)
620		return PCI_ERS_RESULT_DISCONNECT;
621	return PCI_ERS_RESULT_RECOVERED;
622}
623
624static void cxl_error_resume(struct pci_dev *pdev)
625{
626	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
627	struct cxl_memdev *cxlmd = cxlds->cxlmd;
628	struct device *dev = &cxlmd->dev;
629
630	dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
631		 dev->driver ? "successful" : "failed");
632}
633
634static void cxl_cor_error_detected(struct pci_dev *pdev)
635{
636	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
637	struct cxl_memdev *cxlmd = cxlds->cxlmd;
638	struct device *dev = &cxlmd->dev;
639	void __iomem *addr;
640	u32 status;
641
642	if (!cxlds->regs.ras)
643		return;
 
644
645	addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
646	status = readl(addr);
647	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
648		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
649		trace_cxl_aer_correctable_error(dev, status);
650	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651}
652
653static const struct pci_error_handlers cxl_error_handlers = {
654	.error_detected	= cxl_error_detected,
655	.slot_reset	= cxl_slot_reset,
656	.resume		= cxl_error_resume,
657	.cor_error_detected	= cxl_cor_error_detected,
658};
 
659
660static struct pci_driver cxl_pci_driver = {
661	.name			= KBUILD_MODNAME,
662	.id_table		= cxl_mem_pci_tbl,
663	.probe			= cxl_pci_probe,
664	.err_handler		= &cxl_error_handlers,
665	.driver	= {
666		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
667	},
668};
669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
670MODULE_LICENSE("GPL v2");
671module_pci_driver(cxl_pci_driver);
 
672MODULE_IMPORT_NS(CXL);
v5.14.15
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
   3#include <uapi/linux/cxl_mem.h>
   4#include <linux/security.h>
   5#include <linux/debugfs.h>
   6#include <linux/module.h>
 
   7#include <linux/sizes.h>
   8#include <linux/mutex.h>
   9#include <linux/list.h>
  10#include <linux/cdev.h>
  11#include <linux/idr.h>
  12#include <linux/pci.h>
 
 
  13#include <linux/io.h>
  14#include <linux/io-64-nonatomic-lo-hi.h>
  15#include "cxlmem.h"
  16#include "pci.h"
  17#include "cxl.h"
 
 
  18
  19/**
  20 * DOC: cxl pci
  21 *
  22 * This implements the PCI exclusive functionality for a CXL device as it is
  23 * defined by the Compute Express Link specification. CXL devices may surface
  24 * certain functionality even if it isn't CXL enabled.
 
 
 
  25 *
  26 * The driver has several responsibilities, mainly:
  27 *  - Create the memX device and register on the CXL bus.
  28 *  - Enumerate device's register interface and map them.
  29 *  - Probe the device attributes to establish sysfs interface.
  30 *  - Provide an IOCTL interface to userspace to communicate with the device for
  31 *    things like firmware update.
  32 */
  33
  34#define cxl_doorbell_busy(cxlm)                                                \
  35	(readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) &                  \
  36	 CXLDEV_MBOX_CTRL_DOORBELL)
  37
  38/* CXL 2.0 - 8.2.8.4 */
  39#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
  40
  41enum opcode {
  42	CXL_MBOX_OP_INVALID		= 0x0000,
  43	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
  44	CXL_MBOX_OP_GET_FW_INFO		= 0x0200,
  45	CXL_MBOX_OP_ACTIVATE_FW		= 0x0202,
  46	CXL_MBOX_OP_GET_SUPPORTED_LOGS	= 0x0400,
  47	CXL_MBOX_OP_GET_LOG		= 0x0401,
  48	CXL_MBOX_OP_IDENTIFY		= 0x4000,
  49	CXL_MBOX_OP_GET_PARTITION_INFO	= 0x4100,
  50	CXL_MBOX_OP_SET_PARTITION_INFO	= 0x4101,
  51	CXL_MBOX_OP_GET_LSA		= 0x4102,
  52	CXL_MBOX_OP_SET_LSA		= 0x4103,
  53	CXL_MBOX_OP_GET_HEALTH_INFO	= 0x4200,
  54	CXL_MBOX_OP_GET_ALERT_CONFIG	= 0x4201,
  55	CXL_MBOX_OP_SET_ALERT_CONFIG	= 0x4202,
  56	CXL_MBOX_OP_GET_SHUTDOWN_STATE	= 0x4203,
  57	CXL_MBOX_OP_SET_SHUTDOWN_STATE	= 0x4204,
  58	CXL_MBOX_OP_GET_POISON		= 0x4300,
  59	CXL_MBOX_OP_INJECT_POISON	= 0x4301,
  60	CXL_MBOX_OP_CLEAR_POISON	= 0x4302,
  61	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
  62	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
  63	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
  64	CXL_MBOX_OP_MAX			= 0x10000
  65};
  66
  67/**
  68 * struct mbox_cmd - A command to be submitted to hardware.
  69 * @opcode: (input) The command set and command submitted to hardware.
  70 * @payload_in: (input) Pointer to the input payload.
  71 * @payload_out: (output) Pointer to the output payload. Must be allocated by
  72 *		 the caller.
  73 * @size_in: (input) Number of bytes to load from @payload_in.
  74 * @size_out: (input) Max number of bytes loaded into @payload_out.
  75 *            (output) Number of bytes generated by the device. For fixed size
  76 *            outputs commands this is always expected to be deterministic. For
  77 *            variable sized output commands, it tells the exact number of bytes
  78 *            written.
  79 * @return_code: (output) Error code returned from hardware.
  80 *
  81 * This is the primary mechanism used to send commands to the hardware.
  82 * All the fields except @payload_* correspond exactly to the fields described in
  83 * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
  84 * @payload_out are written to, and read from the Command Payload Registers
  85 * defined in CXL 2.0 8.2.8.4.8.
  86 */
  87struct mbox_cmd {
  88	u16 opcode;
  89	void *payload_in;
  90	void *payload_out;
  91	size_t size_in;
  92	size_t size_out;
  93	u16 return_code;
  94#define CXL_MBOX_SUCCESS 0
  95};
  96
  97static int cxl_mem_major;
  98static DEFINE_IDA(cxl_memdev_ida);
  99static DECLARE_RWSEM(cxl_memdev_rwsem);
 100static struct dentry *cxl_debugfs;
 101static bool cxl_raw_allow_all;
 102
 103enum {
 104	CEL_UUID,
 105	VENDOR_DEBUG_UUID,
 106};
 107
 108/* See CXL 2.0 Table 170. Get Log Input Payload */
 109static const uuid_t log_uuid[] = {
 110	[CEL_UUID] = UUID_INIT(0xda9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, 0x96,
 111			       0xb1, 0x62, 0x3b, 0x3f, 0x17),
 112	[VENDOR_DEBUG_UUID] = UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f,
 113					0xd6, 0x07, 0x19, 0x40, 0x3d, 0x86),
 114};
 115
 116/**
 117 * struct cxl_mem_command - Driver representation of a memory device command
 118 * @info: Command information as it exists for the UAPI
 119 * @opcode: The actual bits used for the mailbox protocol
 120 * @flags: Set of flags effecting driver behavior.
 121 *
 122 *  * %CXL_CMD_FLAG_FORCE_ENABLE: In cases of error, commands with this flag
 123 *    will be enabled by the driver regardless of what hardware may have
 124 *    advertised.
 125 *
 126 * The cxl_mem_command is the driver's internal representation of commands that
 127 * are supported by the driver. Some of these commands may not be supported by
 128 * the hardware. The driver will use @info to validate the fields passed in by
 129 * the user then submit the @opcode to the hardware.
 130 *
 131 * See struct cxl_command_info.
 132 */
 133struct cxl_mem_command {
 134	struct cxl_command_info info;
 135	enum opcode opcode;
 136	u32 flags;
 137#define CXL_CMD_FLAG_NONE 0
 138#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
 139};
 140
 141#define CXL_CMD(_id, sin, sout, _flags)                                        \
 142	[CXL_MEM_COMMAND_ID_##_id] = {                                         \
 143	.info =	{                                                              \
 144			.id = CXL_MEM_COMMAND_ID_##_id,                        \
 145			.size_in = sin,                                        \
 146			.size_out = sout,                                      \
 147		},                                                             \
 148	.opcode = CXL_MBOX_OP_##_id,                                           \
 149	.flags = _flags,                                                       \
 150	}
 151
 152/*
 153 * This table defines the supported mailbox commands for the driver. This table
 154 * is made up of a UAPI structure. Non-negative values as parameters in the
 155 * table will be validated against the user's input. For example, if size_in is
 156 * 0, and the user passed in 1, it is an error.
 157 */
 158static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
 159	CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
 160#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
 161	CXL_CMD(RAW, ~0, ~0, 0),
 162#endif
 163	CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
 164	CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
 165	CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
 166	CXL_CMD(GET_LSA, 0x8, ~0, 0),
 167	CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
 168	CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
 169	CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0),
 170	CXL_CMD(SET_LSA, ~0, 0, 0),
 171	CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0),
 172	CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
 173	CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
 174	CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
 175	CXL_CMD(GET_POISON, 0x10, ~0, 0),
 176	CXL_CMD(INJECT_POISON, 0x8, 0, 0),
 177	CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
 178	CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
 179	CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
 180	CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0),
 181};
 182
 183/*
 184 * Commands that RAW doesn't permit. The rationale for each:
 185 *
 186 * CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
 187 * coordination of transaction timeout values at the root bridge level.
 188 *
 189 * CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
 190 * and needs to be coordinated with HDM updates.
 191 *
 192 * CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
 193 * driver and any writes from userspace invalidates those contents.
 194 *
 195 * CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
 196 * to the device after it is marked clean, userspace can not make that
 197 * assertion.
 198 *
 199 * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
 200 * is kept up to date with patrol notifications and error management.
 201 */
 202static u16 cxl_disabled_raw_commands[] = {
 203	CXL_MBOX_OP_ACTIVATE_FW,
 204	CXL_MBOX_OP_SET_PARTITION_INFO,
 205	CXL_MBOX_OP_SET_LSA,
 206	CXL_MBOX_OP_SET_SHUTDOWN_STATE,
 207	CXL_MBOX_OP_SCAN_MEDIA,
 208	CXL_MBOX_OP_GET_SCAN_MEDIA,
 209};
 210
 211/*
 212 * Command sets that RAW doesn't permit. All opcodes in this set are
 213 * disabled because they pass plain text security payloads over the
 214 * user/kernel boundary. This functionality is intended to be wrapped
 215 * behind the keys ABI which allows for encrypted payloads in the UAPI
 216 */
 217static u8 security_command_sets[] = {
 218	0x44, /* Sanitize */
 219	0x45, /* Persistent Memory Data-at-rest Security */
 220	0x46, /* Security Passthrough */
 221};
 222
 223#define cxl_for_each_cmd(cmd)                                                  \
 224	for ((cmd) = &mem_commands[0];                                         \
 225	     ((cmd) - mem_commands) < ARRAY_SIZE(mem_commands); (cmd)++)
 226
 227#define cxl_cmd_count ARRAY_SIZE(mem_commands)
 228
 229static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm)
 230{
 231	const unsigned long start = jiffies;
 232	unsigned long end = start;
 233
 234	while (cxl_doorbell_busy(cxlm)) {
 235		end = jiffies;
 236
 237		if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
 238			/* Check again in case preempted before timeout test */
 239			if (!cxl_doorbell_busy(cxlm))
 240				break;
 241			return -ETIMEDOUT;
 242		}
 243		cpu_relax();
 244	}
 245
 246	dev_dbg(&cxlm->pdev->dev, "Doorbell wait took %dms",
 247		jiffies_to_msecs(end) - jiffies_to_msecs(start));
 248	return 0;
 249}
 250
 251static bool cxl_is_security_command(u16 opcode)
 252{
 253	int i;
 254
 255	for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
 256		if (security_command_sets[i] == (opcode >> 8))
 257			return true;
 258	return false;
 259}
 260
 261static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm,
 262				 struct mbox_cmd *mbox_cmd)
 263{
 264	struct device *dev = &cxlm->pdev->dev;
 265
 266	dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
 267		mbox_cmd->opcode, mbox_cmd->size_in);
 268}
 269
 270/**
 271 * __cxl_mem_mbox_send_cmd() - Execute a mailbox command
 272 * @cxlm: The CXL memory device to communicate with.
 273 * @mbox_cmd: Command to send to the memory device.
 274 *
 275 * Context: Any context. Expects mbox_mutex to be held.
 276 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
 277 *         Caller should check the return code in @mbox_cmd to make sure it
 278 *         succeeded.
 279 *
 280 * This is a generic form of the CXL mailbox send command thus only using the
 281 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
 282 * devices, and perhaps other types of CXL devices may have further information
 283 * available upon error conditions. Driver facilities wishing to send mailbox
 284 * commands should use the wrapper command.
 285 *
 286 * The CXL spec allows for up to two mailboxes. The intention is for the primary
 287 * mailbox to be OS controlled and the secondary mailbox to be used by system
 288 * firmware. This allows the OS and firmware to communicate with the device and
 289 * not need to coordinate with each other. The driver only uses the primary
 290 * mailbox.
 291 */
 292static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
 293				   struct mbox_cmd *mbox_cmd)
 294{
 295	void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
 
 296	u64 cmd_reg, status_reg;
 297	size_t out_len;
 298	int rc;
 299
 300	lockdep_assert_held(&cxlm->mbox_mutex);
 301
 302	/*
 303	 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
 304	 *   1. Caller reads MB Control Register to verify doorbell is clear
 305	 *   2. Caller writes Command Register
 306	 *   3. Caller writes Command Payload Registers if input payload is non-empty
 307	 *   4. Caller writes MB Control Register to set doorbell
 308	 *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
 309	 *   6. Caller reads MB Status Register to fetch Return code
 310	 *   7. If command successful, Caller reads Command Register to get Payload Length
 311	 *   8. If output payload is non-empty, host reads Command Payload Registers
 312	 *
 313	 * Hardware is free to do whatever it wants before the doorbell is rung,
 314	 * and isn't allowed to change anything after it clears the doorbell. As
 315	 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
 316	 * also happen in any order (though some orders might not make sense).
 317	 */
 318
 319	/* #1 */
 320	if (cxl_doorbell_busy(cxlm)) {
 321		dev_err_ratelimited(&cxlm->pdev->dev,
 322				    "Mailbox re-busy after acquiring\n");
 
 
 
 323		return -EBUSY;
 324	}
 325
 326	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
 327			     mbox_cmd->opcode);
 328	if (mbox_cmd->size_in) {
 329		if (WARN_ON(!mbox_cmd->payload_in))
 330			return -EINVAL;
 331
 332		cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
 333				      mbox_cmd->size_in);
 334		memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
 335	}
 336
 337	/* #2, #3 */
 338	writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 339
 340	/* #4 */
 341	dev_dbg(&cxlm->pdev->dev, "Sending command\n");
 342	writel(CXLDEV_MBOX_CTRL_DOORBELL,
 343	       cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
 344
 345	/* #5 */
 346	rc = cxl_mem_wait_for_doorbell(cxlm);
 347	if (rc == -ETIMEDOUT) {
 348		cxl_mem_mbox_timeout(cxlm, mbox_cmd);
 
 
 349		return rc;
 350	}
 351
 352	/* #6 */
 353	status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
 354	mbox_cmd->return_code =
 355		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
 356
 357	if (mbox_cmd->return_code != 0) {
 358		dev_dbg(&cxlm->pdev->dev, "Mailbox operation had an error\n");
 359		return 0;
 
 360	}
 361
 362	/* #7 */
 363	cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 364	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
 365
 366	/* #8 */
 367	if (out_len && mbox_cmd->payload_out) {
 368		/*
 369		 * Sanitize the copy. If hardware misbehaves, out_len per the
 370		 * spec can actually be greater than the max allowed size (21
 371		 * bits available but spec defined 1M max). The caller also may
 372		 * have requested less data than the hardware supplied even
 373		 * within spec.
 374		 */
 375		size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len);
 376
 377		memcpy_fromio(mbox_cmd->payload_out, payload, n);
 378		mbox_cmd->size_out = n;
 379	} else {
 380		mbox_cmd->size_out = 0;
 381	}
 382
 383	return 0;
 384}
 385
 386/**
 387 * cxl_mem_mbox_get() - Acquire exclusive access to the mailbox.
 388 * @cxlm: The memory device to gain access to.
 389 *
 390 * Context: Any context. Takes the mbox_mutex.
 391 * Return: 0 if exclusive access was acquired.
 392 */
 393static int cxl_mem_mbox_get(struct cxl_mem *cxlm)
 394{
 395	struct device *dev = &cxlm->pdev->dev;
 396	u64 md_status;
 397	int rc;
 398
 399	mutex_lock_io(&cxlm->mbox_mutex);
 
 
 400
 401	/*
 402	 * XXX: There is some amount of ambiguity in the 2.0 version of the spec
 403	 * around the mailbox interface ready (8.2.8.5.1.1).  The purpose of the
 404	 * bit is to allow firmware running on the device to notify the driver
 405	 * that it's ready to receive commands. It is unclear if the bit needs
 406	 * to be read for each transaction mailbox, ie. the firmware can switch
 407	 * it on and off as needed. Second, there is no defined timeout for
 408	 * mailbox ready, like there is for the doorbell interface.
 409	 *
 410	 * Assumptions:
 411	 * 1. The firmware might toggle the Mailbox Interface Ready bit, check
 412	 *    it for every command.
 413	 *
 414	 * 2. If the doorbell is clear, the firmware should have first set the
 415	 *    Mailbox Interface Ready bit. Therefore, waiting for the doorbell
 416	 *    to be ready is sufficient.
 417	 */
 418	rc = cxl_mem_wait_for_doorbell(cxlm);
 419	if (rc) {
 420		dev_warn(dev, "Mailbox interface not ready\n");
 421		goto out;
 422	}
 423
 424	md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET);
 425	if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
 426		dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
 427		rc = -EBUSY;
 428		goto out;
 429	}
 430
 431	/*
 432	 * Hardware shouldn't allow a ready status but also have failure bits
 433	 * set. Spit out an error, this should be a bug report
 434	 */
 435	rc = -EFAULT;
 436	if (md_status & CXLMDEV_DEV_FATAL) {
 437		dev_err(dev, "mbox: reported ready, but fatal\n");
 438		goto out;
 439	}
 440	if (md_status & CXLMDEV_FW_HALT) {
 441		dev_err(dev, "mbox: reported ready, but halted\n");
 442		goto out;
 443	}
 444	if (CXLMDEV_RESET_NEEDED(md_status)) {
 445		dev_err(dev, "mbox: reported ready, but reset needed\n");
 446		goto out;
 447	}
 448
 449	/* with lock held */
 450	return 0;
 451
 452out:
 453	mutex_unlock(&cxlm->mbox_mutex);
 454	return rc;
 455}
 456
 457/**
 458 * cxl_mem_mbox_put() - Release exclusive access to the mailbox.
 459 * @cxlm: The CXL memory device to communicate with.
 460 *
 461 * Context: Any context. Expects mbox_mutex to be held.
 462 */
 463static void cxl_mem_mbox_put(struct cxl_mem *cxlm)
 464{
 465	mutex_unlock(&cxlm->mbox_mutex);
 466}
 
 467
 468/**
 469 * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
 470 * @cxlm: The CXL memory device to communicate with.
 471 * @cmd: The validated command.
 472 * @in_payload: Pointer to userspace's input payload.
 473 * @out_payload: Pointer to userspace's output payload.
 474 * @size_out: (Input) Max payload size to copy out.
 475 *            (Output) Payload size hardware generated.
 476 * @retval: Hardware generated return code from the operation.
 477 *
 478 * Return:
 479 *  * %0	- Mailbox transaction succeeded. This implies the mailbox
 480 *		  protocol completed successfully not that the operation itself
 481 *		  was successful.
 482 *  * %-ENOMEM  - Couldn't allocate a bounce buffer.
 483 *  * %-EFAULT	- Something happened with copy_to/from_user.
 484 *  * %-EINTR	- Mailbox acquisition interrupted.
 485 *  * %-EXXX	- Transaction level failures.
 486 *
 487 * Creates the appropriate mailbox command and dispatches it on behalf of a
 488 * userspace request. The input and output payloads are copied between
 489 * userspace.
 490 *
 491 * See cxl_send_cmd().
 492 */
 493static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm,
 494					const struct cxl_mem_command *cmd,
 495					u64 in_payload, u64 out_payload,
 496					s32 *size_out, u32 *retval)
 497{
 498	struct device *dev = &cxlm->pdev->dev;
 499	struct mbox_cmd mbox_cmd = {
 500		.opcode = cmd->opcode,
 501		.size_in = cmd->info.size_in,
 502		.size_out = cmd->info.size_out,
 503	};
 504	int rc;
 505
 506	if (cmd->info.size_out) {
 507		mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL);
 508		if (!mbox_cmd.payload_out)
 509			return -ENOMEM;
 510	}
 511
 512	if (cmd->info.size_in) {
 513		mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
 514						   cmd->info.size_in);
 515		if (IS_ERR(mbox_cmd.payload_in)) {
 516			kvfree(mbox_cmd.payload_out);
 517			return PTR_ERR(mbox_cmd.payload_in);
 518		}
 519	}
 520
 521	rc = cxl_mem_mbox_get(cxlm);
 522	if (rc)
 523		goto out;
 524
 525	dev_dbg(dev,
 526		"Submitting %s command for user\n"
 527		"\topcode: %x\n"
 528		"\tsize: %ub\n",
 529		cxl_command_names[cmd->info.id].name, mbox_cmd.opcode,
 530		cmd->info.size_in);
 531
 532	dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW,
 533		      "raw command path used\n");
 534
 535	rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
 536	cxl_mem_mbox_put(cxlm);
 537	if (rc)
 538		goto out;
 539
 540	/*
 541	 * @size_out contains the max size that's allowed to be written back out
 542	 * to userspace. While the payload may have written more output than
 543	 * this it will have to be ignored.
 
 544	 */
 545	if (mbox_cmd.size_out) {
 546		dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out,
 547			      "Invalid return size\n");
 548		if (copy_to_user(u64_to_user_ptr(out_payload),
 549				 mbox_cmd.payload_out, mbox_cmd.size_out)) {
 550			rc = -EFAULT;
 551			goto out;
 552		}
 553	}
 554
 555	*size_out = mbox_cmd.size_out;
 556	*retval = mbox_cmd.return_code;
 557
 558out:
 559	kvfree(mbox_cmd.payload_in);
 560	kvfree(mbox_cmd.payload_out);
 561	return rc;
 562}
 563
 564static bool cxl_mem_raw_command_allowed(u16 opcode)
 565{
 566	int i;
 567
 568	if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
 569		return false;
 570
 571	if (security_locked_down(LOCKDOWN_PCI_ACCESS))
 572		return false;
 573
 574	if (cxl_raw_allow_all)
 575		return true;
 576
 577	if (cxl_is_security_command(opcode))
 578		return false;
 579
 580	for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
 581		if (cxl_disabled_raw_commands[i] == opcode)
 582			return false;
 583
 584	return true;
 585}
 586
 587/**
 588 * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
 589 * @cxlm: &struct cxl_mem device whose mailbox will be used.
 590 * @send_cmd: &struct cxl_send_command copied in from userspace.
 591 * @out_cmd: Sanitized and populated &struct cxl_mem_command.
 592 *
 593 * Return:
 594 *  * %0	- @out_cmd is ready to send.
 595 *  * %-ENOTTY	- Invalid command specified.
 596 *  * %-EINVAL	- Reserved fields or invalid values were used.
 597 *  * %-ENOMEM	- Input or output buffer wasn't sized properly.
 598 *  * %-EPERM	- Attempted to use a protected command.
 599 *
 600 * The result of this command is a fully validated command in @out_cmd that is
 601 * safe to send to the hardware.
 602 *
 603 * See handle_mailbox_cmd_from_user()
 604 */
 605static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
 606				      const struct cxl_send_command *send_cmd,
 607				      struct cxl_mem_command *out_cmd)
 608{
 609	const struct cxl_command_info *info;
 610	struct cxl_mem_command *c;
 611
 612	if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
 613		return -ENOTTY;
 614
 615	/*
 616	 * The user can never specify an input payload larger than what hardware
 617	 * supports, but output can be arbitrarily large (simply write out as
 618	 * much data as the hardware provides).
 619	 */
 620	if (send_cmd->in.size > cxlm->payload_size)
 621		return -EINVAL;
 622
 623	/*
 624	 * Checks are bypassed for raw commands but a WARN/taint will occur
 625	 * later in the callchain
 626	 */
 627	if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) {
 628		const struct cxl_mem_command temp = {
 629			.info = {
 630				.id = CXL_MEM_COMMAND_ID_RAW,
 631				.flags = 0,
 632				.size_in = send_cmd->in.size,
 633				.size_out = send_cmd->out.size,
 634			},
 635			.opcode = send_cmd->raw.opcode
 636		};
 637
 638		if (send_cmd->raw.rsvd)
 639			return -EINVAL;
 640
 641		/*
 642		 * Unlike supported commands, the output size of RAW commands
 643		 * gets passed along without further checking, so it must be
 644		 * validated here.
 645		 */
 646		if (send_cmd->out.size > cxlm->payload_size)
 647			return -EINVAL;
 648
 649		if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
 650			return -EPERM;
 651
 652		memcpy(out_cmd, &temp, sizeof(temp));
 653
 654		return 0;
 655	}
 656
 657	if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
 658		return -EINVAL;
 659
 660	if (send_cmd->rsvd)
 661		return -EINVAL;
 662
 663	if (send_cmd->in.rsvd || send_cmd->out.rsvd)
 664		return -EINVAL;
 665
 666	/* Convert user's command into the internal representation */
 667	c = &mem_commands[send_cmd->id];
 668	info = &c->info;
 669
 670	/* Check that the command is enabled for hardware */
 671	if (!test_bit(info->id, cxlm->enabled_cmds))
 672		return -ENOTTY;
 673
 674	/* Check the input buffer is the expected size */
 675	if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
 676		return -ENOMEM;
 677
 678	/* Check the output buffer is at least large enough */
 679	if (info->size_out >= 0 && send_cmd->out.size < info->size_out)
 680		return -ENOMEM;
 681
 682	memcpy(out_cmd, c, sizeof(*c));
 683	out_cmd->info.size_in = send_cmd->in.size;
 684	/*
 685	 * XXX: out_cmd->info.size_out will be controlled by the driver, and the
 686	 * specified number of bytes @send_cmd->out.size will be copied back out
 687	 * to userspace.
 688	 */
 689
 690	return 0;
 691}
 692
 693static int cxl_query_cmd(struct cxl_memdev *cxlmd,
 694			 struct cxl_mem_query_commands __user *q)
 695{
 696	struct device *dev = &cxlmd->dev;
 697	struct cxl_mem_command *cmd;
 698	u32 n_commands;
 699	int j = 0;
 700
 701	dev_dbg(dev, "Query IOCTL\n");
 702
 703	if (get_user(n_commands, &q->n_commands))
 704		return -EFAULT;
 705
 706	/* returns the total number if 0 elements are requested. */
 707	if (n_commands == 0)
 708		return put_user(cxl_cmd_count, &q->n_commands);
 709
 710	/*
 711	 * otherwise, return max(n_commands, total commands) cxl_command_info
 712	 * structures.
 713	 */
 714	cxl_for_each_cmd(cmd) {
 715		const struct cxl_command_info *info = &cmd->info;
 716
 717		if (copy_to_user(&q->commands[j++], info, sizeof(*info)))
 718			return -EFAULT;
 719
 720		if (j == n_commands)
 721			break;
 722	}
 723
 724	return 0;
 725}
 726
 727static int cxl_send_cmd(struct cxl_memdev *cxlmd,
 728			struct cxl_send_command __user *s)
 729{
 730	struct cxl_mem *cxlm = cxlmd->cxlm;
 731	struct device *dev = &cxlmd->dev;
 732	struct cxl_send_command send;
 733	struct cxl_mem_command c;
 734	int rc;
 735
 736	dev_dbg(dev, "Send IOCTL\n");
 737
 738	if (copy_from_user(&send, s, sizeof(send)))
 739		return -EFAULT;
 740
 741	rc = cxl_validate_cmd_from_user(cxlmd->cxlm, &send, &c);
 742	if (rc)
 743		return rc;
 744
 745	/* Prepare to handle a full payload for variable sized output */
 746	if (c.info.size_out < 0)
 747		c.info.size_out = cxlm->payload_size;
 748
 749	rc = handle_mailbox_cmd_from_user(cxlm, &c, send.in.payload,
 750					  send.out.payload, &send.out.size,
 751					  &send.retval);
 752	if (rc)
 753		return rc;
 754
 755	if (copy_to_user(s, &send, sizeof(send)))
 756		return -EFAULT;
 757
 758	return 0;
 759}
 760
 761static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
 762			       unsigned long arg)
 763{
 764	switch (cmd) {
 765	case CXL_MEM_QUERY_COMMANDS:
 766		return cxl_query_cmd(cxlmd, (void __user *)arg);
 767	case CXL_MEM_SEND_COMMAND:
 768		return cxl_send_cmd(cxlmd, (void __user *)arg);
 769	default:
 770		return -ENOTTY;
 771	}
 772}
 773
 774static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
 775			     unsigned long arg)
 776{
 777	struct cxl_memdev *cxlmd = file->private_data;
 778	int rc = -ENXIO;
 779
 780	down_read(&cxl_memdev_rwsem);
 781	if (cxlmd->cxlm)
 782		rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
 783	up_read(&cxl_memdev_rwsem);
 784
 785	return rc;
 786}
 787
 788static int cxl_memdev_open(struct inode *inode, struct file *file)
 789{
 790	struct cxl_memdev *cxlmd =
 791		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
 792
 793	get_device(&cxlmd->dev);
 794	file->private_data = cxlmd;
 795
 796	return 0;
 797}
 798
 799static int cxl_memdev_release_file(struct inode *inode, struct file *file)
 800{
 801	struct cxl_memdev *cxlmd =
 802		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
 803
 804	put_device(&cxlmd->dev);
 805
 806	return 0;
 807}
 808
 809static struct cxl_memdev *to_cxl_memdev(struct device *dev)
 810{
 811	return container_of(dev, struct cxl_memdev, dev);
 812}
 813
 814static void cxl_memdev_shutdown(struct device *dev)
 815{
 816	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 817
 818	down_write(&cxl_memdev_rwsem);
 819	cxlmd->cxlm = NULL;
 820	up_write(&cxl_memdev_rwsem);
 821}
 822
 823static const struct cdevm_file_operations cxl_memdev_fops = {
 824	.fops = {
 825		.owner = THIS_MODULE,
 826		.unlocked_ioctl = cxl_memdev_ioctl,
 827		.open = cxl_memdev_open,
 828		.release = cxl_memdev_release_file,
 829		.compat_ioctl = compat_ptr_ioctl,
 830		.llseek = noop_llseek,
 831	},
 832	.shutdown = cxl_memdev_shutdown,
 833};
 834
 835static inline struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
 836{
 837	struct cxl_mem_command *c;
 838
 839	cxl_for_each_cmd(c)
 840		if (c->opcode == opcode)
 841			return c;
 842
 843	return NULL;
 844}
 845
 846/**
 847 * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device.
 848 * @cxlm: The CXL memory device to communicate with.
 849 * @opcode: Opcode for the mailbox command.
 850 * @in: The input payload for the mailbox command.
 851 * @in_size: The length of the input payload
 852 * @out: Caller allocated buffer for the output.
 853 * @out_size: Expected size of output.
 854 *
 855 * Context: Any context. Will acquire and release mbox_mutex.
 856 * Return:
 857 *  * %>=0	- Number of bytes returned in @out.
 858 *  * %-E2BIG	- Payload is too large for hardware.
 859 *  * %-EBUSY	- Couldn't acquire exclusive mailbox access.
 860 *  * %-EFAULT	- Hardware error occurred.
 861 *  * %-ENXIO	- Command completed, but device reported an error.
 862 *  * %-EIO	- Unexpected output size.
 863 *
 864 * Mailbox commands may execute successfully yet the device itself reported an
 865 * error. While this distinction can be useful for commands from userspace, the
 866 * kernel will only be able to use results when both are successful.
 867 *
 868 * See __cxl_mem_mbox_send_cmd()
 869 */
 870static int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode,
 871				 void *in, size_t in_size,
 872				 void *out, size_t out_size)
 873{
 874	const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
 875	struct mbox_cmd mbox_cmd = {
 876		.opcode = opcode,
 877		.payload_in = in,
 878		.size_in = in_size,
 879		.size_out = out_size,
 880		.payload_out = out,
 881	};
 882	int rc;
 883
 884	if (out_size > cxlm->payload_size)
 885		return -E2BIG;
 886
 887	rc = cxl_mem_mbox_get(cxlm);
 888	if (rc)
 889		return rc;
 890
 891	rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
 892	cxl_mem_mbox_put(cxlm);
 893	if (rc)
 894		return rc;
 895
 896	/* TODO: Map return code to proper kernel style errno */
 897	if (mbox_cmd.return_code != CXL_MBOX_SUCCESS)
 898		return -ENXIO;
 899
 900	/*
 901	 * Variable sized commands can't be validated and so it's up to the
 902	 * caller to do that if they wish.
 903	 */
 904	if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size)
 905		return -EIO;
 906
 907	return 0;
 908}
 909
 910static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
 911{
 912	const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
 913
 914	cxlm->payload_size =
 915		1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
 916
 917	/*
 918	 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
 919	 *
 920	 * If the size is too small, mandatory commands will not work and so
 921	 * there's no point in going forward. If the size is too large, there's
 922	 * no harm is soft limiting it.
 923	 */
 924	cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M);
 925	if (cxlm->payload_size < 256) {
 926		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
 927			cxlm->payload_size);
 928		return -ENXIO;
 929	}
 930
 931	dev_dbg(&cxlm->pdev->dev, "Mailbox payload sized %zu",
 932		cxlm->payload_size);
 933
 934	return 0;
 935}
 936
 937static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev)
 938{
 939	struct device *dev = &pdev->dev;
 940	struct cxl_mem *cxlm;
 941
 942	cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
 943	if (!cxlm) {
 944		dev_err(dev, "No memory available\n");
 945		return ERR_PTR(-ENOMEM);
 946	}
 947
 948	mutex_init(&cxlm->mbox_mutex);
 949	cxlm->pdev = pdev;
 950	cxlm->enabled_cmds =
 951		devm_kmalloc_array(dev, BITS_TO_LONGS(cxl_cmd_count),
 952				   sizeof(unsigned long),
 953				   GFP_KERNEL | __GFP_ZERO);
 954	if (!cxlm->enabled_cmds) {
 955		dev_err(dev, "No memory available for bitmap\n");
 956		return ERR_PTR(-ENOMEM);
 957	}
 958
 959	return cxlm;
 960}
 961
 962static void __iomem *cxl_mem_map_regblock(struct cxl_mem *cxlm,
 963					  u8 bar, u64 offset)
 964{
 965	struct pci_dev *pdev = cxlm->pdev;
 966	struct device *dev = &pdev->dev;
 967	void __iomem *addr;
 968
 969	/* Basic sanity check that BAR is big enough */
 970	if (pci_resource_len(pdev, bar) < offset) {
 971		dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar,
 972			&pdev->resource[bar], (unsigned long long)offset);
 973		return IOMEM_ERR_PTR(-ENXIO);
 974	}
 975
 976	addr = pci_iomap(pdev, bar, 0);
 977	if (!addr) {
 978		dev_err(dev, "failed to map registers\n");
 979		return addr;
 980	}
 981
 982	dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %#llx\n",
 983		bar, offset);
 984
 985	return addr;
 986}
 987
 988static void cxl_mem_unmap_regblock(struct cxl_mem *cxlm, void __iomem *base)
 989{
 990	pci_iounmap(cxlm->pdev, base);
 991}
 992
 993static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
 
 994{
 995	int pos;
 996
 997	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
 998	if (!pos)
 999		return 0;
1000
1001	while (pos) {
1002		u16 vendor, id;
1003
1004		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
1005		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
1006		if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
1007			return pos;
1008
1009		pos = pci_find_next_ext_capability(pdev, pos,
1010						   PCI_EXT_CAP_ID_DVSEC);
1011	}
1012
1013	return 0;
1014}
1015
1016static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
1017			  struct cxl_register_map *map)
1018{
1019	struct pci_dev *pdev = cxlm->pdev;
1020	struct device *dev = &pdev->dev;
1021	struct cxl_component_reg_map *comp_map;
1022	struct cxl_device_reg_map *dev_map;
 
 
1023
1024	switch (map->reg_type) {
1025	case CXL_REGLOC_RBI_COMPONENT:
1026		comp_map = &map->component_map;
1027		cxl_probe_component_regs(dev, base, comp_map);
1028		if (!comp_map->hdm_decoder.valid) {
1029			dev_err(dev, "HDM decoder registers not found\n");
1030			return -ENXIO;
1031		}
1032
 
 
 
1033		dev_dbg(dev, "Set up component registers\n");
1034		break;
1035	case CXL_REGLOC_RBI_MEMDEV:
1036		dev_map = &map->device_map;
1037		cxl_probe_device_regs(dev, base, dev_map);
1038		if (!dev_map->status.valid || !dev_map->mbox.valid ||
1039		    !dev_map->memdev.valid) {
1040			dev_err(dev, "registers not found: %s%s%s\n",
1041				!dev_map->status.valid ? "status " : "",
1042				!dev_map->mbox.valid ? "mbox " : "",
1043				!dev_map->memdev.valid ? "memdev " : "");
1044			return -ENXIO;
1045		}
1046
1047		dev_dbg(dev, "Probing device registers...\n");
1048		break;
1049	default:
1050		break;
1051	}
1052
1053	return 0;
1054}
1055
1056static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map)
 
1057{
1058	struct pci_dev *pdev = cxlm->pdev;
1059	struct device *dev = &pdev->dev;
 
 
 
 
 
 
 
1060
1061	switch (map->reg_type) {
1062	case CXL_REGLOC_RBI_COMPONENT:
1063		cxl_map_component_regs(pdev, &cxlm->regs.component, map);
1064		dev_dbg(dev, "Mapping component registers...\n");
1065		break;
1066	case CXL_REGLOC_RBI_MEMDEV:
1067		cxl_map_device_regs(pdev, &cxlm->regs.device_regs, map);
1068		dev_dbg(dev, "Probing device registers...\n");
1069		break;
1070	default:
1071		break;
1072	}
1073
1074	return 0;
1075}
1076
1077static void cxl_decode_register_block(u32 reg_lo, u32 reg_hi,
1078				      u8 *bar, u64 *offset, u8 *reg_type)
1079{
1080	*offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
1081	*bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
1082	*reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
1083}
1084
1085/**
1086 * cxl_mem_setup_regs() - Setup necessary MMIO.
1087 * @cxlm: The CXL memory device to communicate with.
1088 *
1089 * Return: 0 if all necessary registers mapped.
1090 *
1091 * A memory device is required by spec to implement a certain set of MMIO
1092 * regions. The purpose of this function is to enumerate and map those
1093 * registers.
1094 */
1095static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
1096{
1097	struct pci_dev *pdev = cxlm->pdev;
1098	struct device *dev = &pdev->dev;
1099	u32 regloc_size, regblocks;
1100	void __iomem *base;
1101	int regloc, i;
1102	struct cxl_register_map *map, *n;
1103	LIST_HEAD(register_maps);
1104	int ret = 0;
1105
1106	regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
1107	if (!regloc) {
1108		dev_err(dev, "register location dvsec not found\n");
1109		return -ENXIO;
1110	}
1111
1112	if (pci_request_mem_regions(pdev, pci_name(pdev)))
1113		return -ENODEV;
 
 
 
 
1114
1115	/* Get the size of the Register Locator DVSEC */
1116	pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
1117	regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
1118
1119	regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
1120	regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
1121
1122	for (i = 0; i < regblocks; i++, regloc += 8) {
1123		u32 reg_lo, reg_hi;
1124		u8 reg_type;
1125		u64 offset;
1126		u8 bar;
1127
1128		map = kzalloc(sizeof(*map), GFP_KERNEL);
1129		if (!map) {
1130			ret = -ENOMEM;
1131			goto free_maps;
1132		}
1133
1134		list_add(&map->list, &register_maps);
1135
1136		pci_read_config_dword(pdev, regloc, &reg_lo);
1137		pci_read_config_dword(pdev, regloc + 4, &reg_hi);
1138
1139		cxl_decode_register_block(reg_lo, reg_hi, &bar, &offset,
1140					  &reg_type);
1141
1142		dev_dbg(dev, "Found register block in bar %u @ 0x%llx of type %u\n",
1143			bar, offset, reg_type);
1144
1145		base = cxl_mem_map_regblock(cxlm, bar, offset);
1146		if (!base) {
1147			ret = -ENOMEM;
1148			goto free_maps;
1149		}
1150
1151		map->barno = bar;
1152		map->block_offset = offset;
1153		map->reg_type = reg_type;
1154
1155		ret = cxl_probe_regs(cxlm, base + offset, map);
1156
1157		/* Always unmap the regblock regardless of probe success */
1158		cxl_mem_unmap_regblock(cxlm, base);
1159
1160		if (ret)
1161			goto free_maps;
1162	}
1163
1164	pci_release_mem_regions(pdev);
1165
1166	list_for_each_entry(map, &register_maps, list) {
1167		ret = cxl_map_regs(cxlm, map);
1168		if (ret)
1169			goto free_maps;
1170	}
1171
1172free_maps:
1173	list_for_each_entry_safe(map, n, &register_maps, list) {
1174		list_del(&map->list);
1175		kfree(map);
1176	}
1177
1178	return ret;
1179}
1180
1181static void cxl_memdev_release(struct device *dev)
 
 
 
 
1182{
1183	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1184
1185	ida_free(&cxl_memdev_ida, cxlmd->id);
1186	kfree(cxlmd);
1187}
1188
1189static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
1190				kgid_t *gid)
1191{
1192	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
1193}
1194
1195static ssize_t firmware_version_show(struct device *dev,
1196				     struct device_attribute *attr, char *buf)
1197{
1198	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1199	struct cxl_mem *cxlm = cxlmd->cxlm;
 
 
1200
1201	return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version);
1202}
1203static DEVICE_ATTR_RO(firmware_version);
 
 
 
1204
1205static ssize_t payload_max_show(struct device *dev,
1206				struct device_attribute *attr, char *buf)
1207{
1208	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1209	struct cxl_mem *cxlm = cxlmd->cxlm;
1210
1211	return sysfs_emit(buf, "%zu\n", cxlm->payload_size);
1212}
1213static DEVICE_ATTR_RO(payload_max);
 
1214
1215static ssize_t label_storage_size_show(struct device *dev,
1216				struct device_attribute *attr, char *buf)
1217{
1218	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1219	struct cxl_mem *cxlm = cxlmd->cxlm;
 
 
1220
1221	return sysfs_emit(buf, "%zu\n", cxlm->lsa_size);
1222}
1223static DEVICE_ATTR_RO(label_storage_size);
1224
1225static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
1226			     char *buf)
1227{
1228	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1229	struct cxl_mem *cxlm = cxlmd->cxlm;
1230	unsigned long long len = range_len(&cxlm->ram_range);
1231
1232	return sysfs_emit(buf, "%#llx\n", len);
1233}
 
 
 
 
 
 
1234
1235static struct device_attribute dev_attr_ram_size =
1236	__ATTR(size, 0444, ram_size_show, NULL);
1237
1238static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
1239			      char *buf)
1240{
1241	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1242	struct cxl_mem *cxlm = cxlmd->cxlm;
1243	unsigned long long len = range_len(&cxlm->pmem_range);
1244
1245	return sysfs_emit(buf, "%#llx\n", len);
1246}
 
 
1247
1248static struct device_attribute dev_attr_pmem_size =
1249	__ATTR(size, 0444, pmem_size_show, NULL);
1250
1251static struct attribute *cxl_memdev_attributes[] = {
1252	&dev_attr_firmware_version.attr,
1253	&dev_attr_payload_max.attr,
1254	&dev_attr_label_storage_size.attr,
1255	NULL,
1256};
1257
1258static struct attribute *cxl_memdev_pmem_attributes[] = {
1259	&dev_attr_pmem_size.attr,
1260	NULL,
1261};
1262
1263static struct attribute *cxl_memdev_ram_attributes[] = {
1264	&dev_attr_ram_size.attr,
1265	NULL,
1266};
1267
1268static struct attribute_group cxl_memdev_attribute_group = {
1269	.attrs = cxl_memdev_attributes,
1270};
1271
1272static struct attribute_group cxl_memdev_ram_attribute_group = {
1273	.name = "ram",
1274	.attrs = cxl_memdev_ram_attributes,
1275};
1276
1277static struct attribute_group cxl_memdev_pmem_attribute_group = {
1278	.name = "pmem",
1279	.attrs = cxl_memdev_pmem_attributes,
1280};
1281
1282static const struct attribute_group *cxl_memdev_attribute_groups[] = {
1283	&cxl_memdev_attribute_group,
1284	&cxl_memdev_ram_attribute_group,
1285	&cxl_memdev_pmem_attribute_group,
1286	NULL,
1287};
1288
1289static const struct device_type cxl_memdev_type = {
1290	.name = "cxl_memdev",
1291	.release = cxl_memdev_release,
1292	.devnode = cxl_memdev_devnode,
1293	.groups = cxl_memdev_attribute_groups,
1294};
1295
1296static void cxl_memdev_unregister(void *_cxlmd)
1297{
1298	struct cxl_memdev *cxlmd = _cxlmd;
1299	struct device *dev = &cxlmd->dev;
1300	struct cdev *cdev = &cxlmd->cdev;
1301	const struct cdevm_file_operations *cdevm_fops;
1302
1303	cdevm_fops = container_of(cdev->ops, typeof(*cdevm_fops), fops);
1304	cdevm_fops->shutdown(dev);
1305
1306	cdev_device_del(&cxlmd->cdev, dev);
1307	put_device(dev);
1308}
1309
1310static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm,
1311					   const struct file_operations *fops)
1312{
1313	struct pci_dev *pdev = cxlm->pdev;
1314	struct cxl_memdev *cxlmd;
1315	struct device *dev;
1316	struct cdev *cdev;
1317	int rc;
1318
1319	cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
1320	if (!cxlmd)
1321		return ERR_PTR(-ENOMEM);
1322
1323	rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
1324	if (rc < 0)
1325		goto err;
1326	cxlmd->id = rc;
1327
1328	dev = &cxlmd->dev;
1329	device_initialize(dev);
1330	dev->parent = &pdev->dev;
1331	dev->bus = &cxl_bus_type;
1332	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
1333	dev->type = &cxl_memdev_type;
1334	device_set_pm_not_required(dev);
1335
1336	cdev = &cxlmd->cdev;
1337	cdev_init(cdev, fops);
1338	return cxlmd;
1339
1340err:
1341	kfree(cxlmd);
1342	return ERR_PTR(rc);
1343}
1344
1345static struct cxl_memdev *
1346devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
1347		    const struct cdevm_file_operations *cdevm_fops)
1348{
1349	struct cxl_memdev *cxlmd;
1350	struct device *dev;
1351	struct cdev *cdev;
1352	int rc;
1353
1354	cxlmd = cxl_memdev_alloc(cxlm, &cdevm_fops->fops);
1355	if (IS_ERR(cxlmd))
1356		return cxlmd;
1357
1358	dev = &cxlmd->dev;
1359	rc = dev_set_name(dev, "mem%d", cxlmd->id);
1360	if (rc)
1361		goto err;
1362
1363	/*
1364	 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
1365	 * needed as this is ordered with cdev_add() publishing the device.
1366	 */
1367	cxlmd->cxlm = cxlm;
1368
1369	cdev = &cxlmd->cdev;
1370	rc = cdev_device_add(cdev, dev);
1371	if (rc)
1372		goto err;
1373
1374	rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
1375	if (rc)
1376		return ERR_PTR(rc);
1377	return cxlmd;
1378
1379err:
1380	/*
1381	 * The cdev was briefly live, shutdown any ioctl operations that
1382	 * saw that state.
1383	 */
1384	cdevm_fops->shutdown(dev);
1385	put_device(dev);
1386	return ERR_PTR(rc);
1387}
1388
1389static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
1390{
1391	u32 remaining = size;
1392	u32 offset = 0;
1393
1394	while (remaining) {
1395		u32 xfer_size = min_t(u32, remaining, cxlm->payload_size);
1396		struct cxl_mbox_get_log {
1397			uuid_t uuid;
1398			__le32 offset;
1399			__le32 length;
1400		} __packed log = {
1401			.uuid = *uuid,
1402			.offset = cpu_to_le32(offset),
1403			.length = cpu_to_le32(xfer_size)
1404		};
1405		int rc;
1406
1407		rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LOG, &log,
1408					   sizeof(log), out, xfer_size);
1409		if (rc < 0)
1410			return rc;
1411
1412		out += xfer_size;
1413		remaining -= xfer_size;
1414		offset += xfer_size;
1415	}
 
1416
1417	return 0;
1418}
1419
1420/**
1421 * cxl_walk_cel() - Walk through the Command Effects Log.
1422 * @cxlm: Device.
1423 * @size: Length of the Command Effects Log.
1424 * @cel: CEL
1425 *
1426 * Iterate over each entry in the CEL and determine if the driver supports the
1427 * command. If so, the command is enabled for the device and can be used later.
1428 */
1429static void cxl_walk_cel(struct cxl_mem *cxlm, size_t size, u8 *cel)
1430{
1431	struct cel_entry {
1432		__le16 opcode;
1433		__le16 effect;
1434	} __packed * cel_entry;
1435	const int cel_entries = size / sizeof(*cel_entry);
1436	int i;
1437
1438	cel_entry = (struct cel_entry *)cel;
1439
1440	for (i = 0; i < cel_entries; i++) {
1441		u16 opcode = le16_to_cpu(cel_entry[i].opcode);
1442		struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
1443
1444		if (!cmd) {
1445			dev_dbg(&cxlm->pdev->dev,
1446				"Opcode 0x%04x unsupported by driver", opcode);
1447			continue;
1448		}
1449
1450		set_bit(cmd->info.id, cxlm->enabled_cmds);
1451	}
1452}
1453
1454struct cxl_mbox_get_supported_logs {
1455	__le16 entries;
1456	u8 rsvd[6];
1457	struct gsl_entry {
1458		uuid_t uuid;
1459		__le32 size;
1460	} __packed entry[];
1461} __packed;
1462
1463static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm)
1464{
1465	struct cxl_mbox_get_supported_logs *ret;
1466	int rc;
1467
1468	ret = kvmalloc(cxlm->payload_size, GFP_KERNEL);
1469	if (!ret)
1470		return ERR_PTR(-ENOMEM);
1471
1472	rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL,
1473				   0, ret, cxlm->payload_size);
1474	if (rc < 0) {
1475		kvfree(ret);
1476		return ERR_PTR(rc);
1477	}
1478
1479	return ret;
1480}
1481
1482/**
1483 * cxl_mem_enumerate_cmds() - Enumerate commands for a device.
1484 * @cxlm: The device.
1485 *
1486 * Returns 0 if enumerate completed successfully.
1487 *
1488 * CXL devices have optional support for certain commands. This function will
1489 * determine the set of supported commands for the hardware and update the
1490 * enabled_cmds bitmap in the @cxlm.
1491 */
1492static int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm)
1493{
1494	struct cxl_mbox_get_supported_logs *gsl;
1495	struct device *dev = &cxlm->pdev->dev;
1496	struct cxl_mem_command *cmd;
1497	int i, rc;
1498
1499	gsl = cxl_get_gsl(cxlm);
1500	if (IS_ERR(gsl))
1501		return PTR_ERR(gsl);
1502
1503	rc = -ENOENT;
1504	for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
1505		u32 size = le32_to_cpu(gsl->entry[i].size);
1506		uuid_t uuid = gsl->entry[i].uuid;
1507		u8 *log;
1508
1509		dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
 
1510
1511		if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
1512			continue;
 
 
1513
1514		log = kvmalloc(size, GFP_KERNEL);
1515		if (!log) {
1516			rc = -ENOMEM;
1517			goto out;
1518		}
1519
1520		rc = cxl_xfer_log(cxlm, &uuid, size, log);
1521		if (rc) {
1522			kvfree(log);
1523			goto out;
1524		}
1525
1526		cxl_walk_cel(cxlm, size, log);
1527		kvfree(log);
 
1528
1529		/* In case CEL was bogus, enable some default commands. */
1530		cxl_for_each_cmd(cmd)
1531			if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
1532				set_bit(cmd->info.id, cxlm->enabled_cmds);
1533
1534		/* Found the required CEL */
1535		rc = 0;
1536	}
1537
1538out:
1539	kvfree(gsl);
1540	return rc;
1541}
1542
1543/**
1544 * cxl_mem_identify() - Send the IDENTIFY command to the device.
1545 * @cxlm: The device to identify.
1546 *
1547 * Return: 0 if identify was executed successfully.
1548 *
1549 * This will dispatch the identify command to the device and on success populate
1550 * structures to be exported to sysfs.
1551 */
1552static int cxl_mem_identify(struct cxl_mem *cxlm)
1553{
1554	/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
1555	struct cxl_mbox_identify {
1556		char fw_revision[0x10];
1557		__le64 total_capacity;
1558		__le64 volatile_capacity;
1559		__le64 persistent_capacity;
1560		__le64 partition_align;
1561		__le16 info_event_log_size;
1562		__le16 warning_event_log_size;
1563		__le16 failure_event_log_size;
1564		__le16 fatal_event_log_size;
1565		__le32 lsa_size;
1566		u8 poison_list_max_mer[3];
1567		__le16 inject_poison_limit;
1568		u8 poison_caps;
1569		u8 qos_telemetry_caps;
1570	} __packed id;
1571	int rc;
1572
1573	rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
1574				   sizeof(id));
1575	if (rc < 0)
1576		return rc;
1577
1578	/*
1579	 * TODO: enumerate DPA map, as 'ram' and 'pmem' do not alias.
1580	 * For now, only the capacity is exported in sysfs
 
 
1581	 */
1582	cxlm->ram_range.start = 0;
1583	cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) * SZ_256M - 1;
1584
1585	cxlm->pmem_range.start = 0;
1586	cxlm->pmem_range.end =
1587		le64_to_cpu(id.persistent_capacity) * SZ_256M - 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1588
1589	cxlm->lsa_size = le32_to_cpu(id.lsa_size);
1590	memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
 
 
 
1591
1592	return 0;
 
 
 
 
 
1593}
1594
1595static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1596{
1597	struct cxl_memdev *cxlmd;
1598	struct cxl_mem *cxlm;
1599	int rc;
1600
1601	rc = pcim_enable_device(pdev);
1602	if (rc)
1603		return rc;
1604
1605	cxlm = cxl_mem_create(pdev);
1606	if (IS_ERR(cxlm))
1607		return PTR_ERR(cxlm);
 
 
 
 
1608
1609	rc = cxl_mem_setup_regs(cxlm);
1610	if (rc)
1611		return rc;
1612
1613	rc = cxl_mem_setup_mailbox(cxlm);
1614	if (rc)
1615		return rc;
1616
1617	rc = cxl_mem_enumerate_cmds(cxlm);
1618	if (rc)
1619		return rc;
1620
1621	rc = cxl_mem_identify(cxlm);
1622	if (rc)
1623		return rc;
1624
1625	cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm, &cxl_memdev_fops);
1626	if (IS_ERR(cxlmd))
1627		return PTR_ERR(cxlmd);
1628
1629	if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
1630		rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
1631
1632	return rc;
1633}
1634
1635static const struct pci_device_id cxl_mem_pci_tbl[] = {
1636	/* PCI class code for CXL.mem Type-3 Devices */
1637	{ PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
1638	{ /* terminate list */ },
 
1639};
1640MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
1641
1642static struct pci_driver cxl_mem_driver = {
1643	.name			= KBUILD_MODNAME,
1644	.id_table		= cxl_mem_pci_tbl,
1645	.probe			= cxl_mem_probe,
 
1646	.driver	= {
1647		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
1648	},
1649};
1650
1651static __init int cxl_mem_init(void)
1652{
1653	struct dentry *mbox_debugfs;
1654	dev_t devt;
1655	int rc;
1656
1657	/* Double check the anonymous union trickery in struct cxl_regs */
1658	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
1659		     offsetof(struct cxl_regs, device_regs.memdev));
1660
1661	rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
1662	if (rc)
1663		return rc;
1664
1665	cxl_mem_major = MAJOR(devt);
1666
1667	rc = pci_register_driver(&cxl_mem_driver);
1668	if (rc) {
1669		unregister_chrdev_region(MKDEV(cxl_mem_major, 0),
1670					 CXL_MEM_MAX_DEVS);
1671		return rc;
1672	}
1673
1674	cxl_debugfs = debugfs_create_dir("cxl", NULL);
1675	mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
1676	debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
1677			    &cxl_raw_allow_all);
1678
1679	return 0;
1680}
1681
1682static __exit void cxl_mem_exit(void)
1683{
1684	debugfs_remove_recursive(cxl_debugfs);
1685	pci_unregister_driver(&cxl_mem_driver);
1686	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
1687}
1688
1689MODULE_LICENSE("GPL v2");
1690module_init(cxl_mem_init);
1691module_exit(cxl_mem_exit);
1692MODULE_IMPORT_NS(CXL);