Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
   3#include <linux/unaligned.h>
   4#include <linux/io-64-nonatomic-lo-hi.h>
   5#include <linux/moduleparam.h>
   6#include <linux/module.h>
   7#include <linux/delay.h>
   8#include <linux/sizes.h>
   9#include <linux/mutex.h>
  10#include <linux/list.h>
 
 
  11#include <linux/pci.h>
  12#include <linux/aer.h>
  13#include <linux/io.h>
  14#include <cxl/mailbox.h>
  15#include "cxlmem.h"
  16#include "cxlpci.h"
  17#include "cxl.h"
  18#include "pmu.h"
  19
  20/**
  21 * DOC: cxl pci
  22 *
  23 * This implements the PCI exclusive functionality for a CXL device as it is
  24 * defined by the Compute Express Link specification. CXL devices may surface
  25 * certain functionality even if it isn't CXL enabled. While this driver is
  26 * focused around the PCI specific aspects of a CXL device, it binds to the
  27 * specific CXL memory device class code, and therefore the implementation of
  28 * cxl_pci is focused around CXL memory devices.
  29 *
  30 * The driver has several responsibilities, mainly:
  31 *  - Create the memX device and register on the CXL bus.
  32 *  - Enumerate device's register interface and map them.
  33 *  - Registers nvdimm bridge device with cxl_core.
  34 *  - Registers a CXL mailbox with cxl_core.
 
  35 */
  36
  37#define cxl_doorbell_busy(cxlds)                                                \
  38	(readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) &                  \
  39	 CXLDEV_MBOX_CTRL_DOORBELL)
  40
  41/* CXL 2.0 - 8.2.8.4 */
  42#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
  43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  44/*
  45 * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
  46 * dictate how long to wait for the mailbox to become ready. The new
  47 * field allows the device to tell software the amount of time to wait
  48 * before mailbox ready. This field per the spec theoretically allows
  49 * for up to 255 seconds. 255 seconds is unreasonably long, its longer
  50 * than the maximum SATA port link recovery wait. Default to 60 seconds
  51 * until someone builds a CXL device that needs more time in practice.
  52 */
  53static unsigned short mbox_ready_timeout = 60;
  54module_param(mbox_ready_timeout, ushort, 0644);
  55MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
 
 
 
 
 
 
 
 
  56
  57static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
  58{
  59	const unsigned long start = jiffies;
  60	unsigned long end = start;
  61
  62	while (cxl_doorbell_busy(cxlds)) {
  63		end = jiffies;
  64
  65		if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
  66			/* Check again in case preempted before timeout test */
  67			if (!cxl_doorbell_busy(cxlds))
  68				break;
  69			return -ETIMEDOUT;
  70		}
  71		cpu_relax();
  72	}
  73
  74	dev_dbg(cxlds->dev, "Doorbell wait took %dms",
  75		jiffies_to_msecs(end) - jiffies_to_msecs(start));
  76	return 0;
  77}
  78
  79#define cxl_err(dev, status, msg)                                        \
  80	dev_err_ratelimited(dev, msg ", device state %s%s\n",                  \
  81			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
  82			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
  83
  84#define cxl_cmd_err(dev, cmd, status, msg)                               \
  85	dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n",    \
  86			    (cmd)->opcode,                                     \
  87			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
  88			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
  89
  90/*
  91 * Threaded irq dev_id's must be globally unique.  cxl_dev_id provides a unique
  92 * wrapper object for each irq within the same cxlds.
  93 */
  94struct cxl_dev_id {
  95	struct cxl_dev_state *cxlds;
  96};
  97
  98static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq,
  99			   irq_handler_t thread_fn)
 100{
 101	struct device *dev = cxlds->dev;
 102	struct cxl_dev_id *dev_id;
 103
 104	dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
 105	if (!dev_id)
 106		return -ENOMEM;
 107	dev_id->cxlds = cxlds;
 108
 109	return devm_request_threaded_irq(dev, irq, NULL, thread_fn,
 110					 IRQF_SHARED | IRQF_ONESHOT, NULL,
 111					 dev_id);
 112}
 113
 114static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
 115{
 116	u64 reg;
 117
 118	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
 119	return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100;
 120}
 121
 122static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
 123{
 124	u64 reg;
 125	u16 opcode;
 126	struct cxl_dev_id *dev_id = id;
 127	struct cxl_dev_state *cxlds = dev_id->cxlds;
 128	struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
 129	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 130
 131	if (!cxl_mbox_background_complete(cxlds))
 132		return IRQ_NONE;
 133
 134	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
 135	opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
 136	if (opcode == CXL_MBOX_OP_SANITIZE) {
 137		mutex_lock(&cxl_mbox->mbox_mutex);
 138		if (mds->security.sanitize_node)
 139			mod_delayed_work(system_wq, &mds->security.poll_dwork, 0);
 140		mutex_unlock(&cxl_mbox->mbox_mutex);
 141	} else {
 142		/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
 143		rcuwait_wake_up(&cxl_mbox->mbox_wait);
 144	}
 145
 146	return IRQ_HANDLED;
 
 
 
 147}
 148
 149/*
 150 * Sanitization operation polling mode.
 151 */
 152static void cxl_mbox_sanitize_work(struct work_struct *work)
 153{
 154	struct cxl_memdev_state *mds =
 155		container_of(work, typeof(*mds), security.poll_dwork.work);
 156	struct cxl_dev_state *cxlds = &mds->cxlds;
 157	struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
 158
 159	mutex_lock(&cxl_mbox->mbox_mutex);
 160	if (cxl_mbox_background_complete(cxlds)) {
 161		mds->security.poll_tmo_secs = 0;
 162		if (mds->security.sanitize_node)
 163			sysfs_notify_dirent(mds->security.sanitize_node);
 164		mds->security.sanitize_active = false;
 165
 166		dev_dbg(cxlds->dev, "Sanitization operation ended\n");
 167	} else {
 168		int timeout = mds->security.poll_tmo_secs + 10;
 169
 170		mds->security.poll_tmo_secs = min(15 * 60, timeout);
 171		schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ);
 172	}
 173	mutex_unlock(&cxl_mbox->mbox_mutex);
 174}
 175
 176/**
 177 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
 178 * @cxl_mbox: CXL mailbox context
 179 * @mbox_cmd: Command to send to the memory device.
 180 *
 181 * Context: Any context. Expects mbox_mutex to be held.
 182 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
 183 *         Caller should check the return code in @mbox_cmd to make sure it
 184 *         succeeded.
 185 *
 186 * This is a generic form of the CXL mailbox send command thus only using the
 187 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
 188 * devices, and perhaps other types of CXL devices may have further information
 189 * available upon error conditions. Driver facilities wishing to send mailbox
 190 * commands should use the wrapper command.
 191 *
 192 * The CXL spec allows for up to two mailboxes. The intention is for the primary
 193 * mailbox to be OS controlled and the secondary mailbox to be used by system
 194 * firmware. This allows the OS and firmware to communicate with the device and
 195 * not need to coordinate with each other. The driver only uses the primary
 196 * mailbox.
 197 */
 198static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox,
 199				   struct cxl_mbox_cmd *mbox_cmd)
 200{
 201	struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox);
 202	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 203	void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
 204	struct device *dev = cxlds->dev;
 205	u64 cmd_reg, status_reg;
 206	size_t out_len;
 207	int rc;
 208
 209	lockdep_assert_held(&cxl_mbox->mbox_mutex);
 210
 211	/*
 212	 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
 213	 *   1. Caller reads MB Control Register to verify doorbell is clear
 214	 *   2. Caller writes Command Register
 215	 *   3. Caller writes Command Payload Registers if input payload is non-empty
 216	 *   4. Caller writes MB Control Register to set doorbell
 217	 *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
 218	 *   6. Caller reads MB Status Register to fetch Return code
 219	 *   7. If command successful, Caller reads Command Register to get Payload Length
 220	 *   8. If output payload is non-empty, host reads Command Payload Registers
 221	 *
 222	 * Hardware is free to do whatever it wants before the doorbell is rung,
 223	 * and isn't allowed to change anything after it clears the doorbell. As
 224	 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
 225	 * also happen in any order (though some orders might not make sense).
 226	 */
 227
 228	/* #1 */
 229	if (cxl_doorbell_busy(cxlds)) {
 230		u64 md_status =
 231			readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
 232
 233		cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
 234			    "mailbox queue busy");
 235		return -EBUSY;
 236	}
 237
 238	/*
 239	 * With sanitize polling, hardware might be done and the poller still
 240	 * not be in sync. Ensure no new command comes in until so. Keep the
 241	 * hardware semantics and only allow device health status.
 242	 */
 243	if (mds->security.poll_tmo_secs > 0) {
 244		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)
 245			return -EBUSY;
 246	}
 247
 248	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
 249			     mbox_cmd->opcode);
 250	if (mbox_cmd->size_in) {
 251		if (WARN_ON(!mbox_cmd->payload_in))
 252			return -EINVAL;
 253
 254		cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
 255				      mbox_cmd->size_in);
 256		memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
 257	}
 258
 259	/* #2, #3 */
 260	writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 261
 262	/* #4 */
 263	dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode);
 264	writel(CXLDEV_MBOX_CTRL_DOORBELL,
 265	       cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
 266
 267	/* #5 */
 268	rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
 269	if (rc == -ETIMEDOUT) {
 270		u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
 271
 272		cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout");
 273		return rc;
 274	}
 275
 276	/* #6 */
 277	status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
 278	mbox_cmd->return_code =
 279		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
 280
 281	/*
 282	 * Handle the background command in a synchronous manner.
 283	 *
 284	 * All other mailbox commands will serialize/queue on the mbox_mutex,
 285	 * which we currently hold. Furthermore this also guarantees that
 286	 * cxl_mbox_background_complete() checks are safe amongst each other,
 287	 * in that no new bg operation can occur in between.
 288	 *
 289	 * Background operations are timesliced in accordance with the nature
 290	 * of the command. In the event of timeout, the mailbox state is
 291	 * indeterminate until the next successful command submission and the
 292	 * driver can get back in sync with the hardware state.
 293	 */
 294	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
 295		u64 bg_status_reg;
 296		int i, timeout;
 297
 298		/*
 299		 * Sanitization is a special case which monopolizes the device
 300		 * and cannot be timesliced. Handle asynchronously instead,
 301		 * and allow userspace to poll(2) for completion.
 302		 */
 303		if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
 304			if (mds->security.sanitize_active)
 305				return -EBUSY;
 306
 307			/* give first timeout a second */
 308			timeout = 1;
 309			mds->security.poll_tmo_secs = timeout;
 310			mds->security.sanitize_active = true;
 311			schedule_delayed_work(&mds->security.poll_dwork,
 312					      timeout * HZ);
 313			dev_dbg(dev, "Sanitization operation started\n");
 314			goto success;
 315		}
 316
 317		dev_dbg(dev, "Mailbox background operation (0x%04x) started\n",
 318			mbox_cmd->opcode);
 319
 320		timeout = mbox_cmd->poll_interval_ms;
 321		for (i = 0; i < mbox_cmd->poll_count; i++) {
 322			if (rcuwait_wait_event_timeout(&cxl_mbox->mbox_wait,
 323						       cxl_mbox_background_complete(cxlds),
 324						       TASK_UNINTERRUPTIBLE,
 325						       msecs_to_jiffies(timeout)) > 0)
 326				break;
 327		}
 328
 329		if (!cxl_mbox_background_complete(cxlds)) {
 330			dev_err(dev, "timeout waiting for background (%d ms)\n",
 331				timeout * mbox_cmd->poll_count);
 332			return -ETIMEDOUT;
 333		}
 334
 335		bg_status_reg = readq(cxlds->regs.mbox +
 336				      CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
 337		mbox_cmd->return_code =
 338			FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
 339				  bg_status_reg);
 340		dev_dbg(dev,
 341			"Mailbox background operation (0x%04x) completed\n",
 342			mbox_cmd->opcode);
 343	}
 344
 345	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
 346		dev_dbg(dev, "Mailbox operation had an error: %s\n",
 347			cxl_mbox_cmd_rc2str(mbox_cmd));
 348		return 0; /* completed but caller must check return_code */
 349	}
 350
 351success:
 352	/* #7 */
 353	cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 354	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
 355
 356	/* #8 */
 357	if (out_len && mbox_cmd->payload_out) {
 358		/*
 359		 * Sanitize the copy. If hardware misbehaves, out_len per the
 360		 * spec can actually be greater than the max allowed size (21
 361		 * bits available but spec defined 1M max). The caller also may
 362		 * have requested less data than the hardware supplied even
 363		 * within spec.
 364		 */
 365		size_t n;
 366
 367		n = min3(mbox_cmd->size_out, cxl_mbox->payload_size, out_len);
 368		memcpy_fromio(mbox_cmd->payload_out, payload, n);
 369		mbox_cmd->size_out = n;
 370	} else {
 371		mbox_cmd->size_out = 0;
 372	}
 373
 374	return 0;
 375}
 376
 377static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox,
 378			     struct cxl_mbox_cmd *cmd)
 
 
 
 
 
 
 379{
 
 
 380	int rc;
 381
 382	mutex_lock_io(&cxl_mbox->mbox_mutex);
 383	rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd);
 384	mutex_unlock(&cxl_mbox->mbox_mutex);
 385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 386	return rc;
 387}
 388
 389static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
 
 
 
 
 
 
 390{
 391	struct cxl_dev_state *cxlds = &mds->cxlds;
 392	struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
 393	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
 394	struct device *dev = cxlds->dev;
 395	unsigned long timeout;
 396	int irq, msgnum;
 397	u64 md_status;
 398	u32 ctrl;
 399
 400	timeout = jiffies + mbox_ready_timeout * HZ;
 401	do {
 402		md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
 403		if (md_status & CXLMDEV_MBOX_IF_READY)
 404			break;
 405		if (msleep_interruptible(100))
 406			break;
 407	} while (!time_after(jiffies, timeout));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 408
 409	if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
 410		cxl_err(dev, md_status, "timeout awaiting mailbox ready");
 411		return -ETIMEDOUT;
 
 412	}
 413
 414	/*
 415	 * A command may be in flight from a previous driver instance,
 416	 * think kexec, do one doorbell wait so that
 417	 * __cxl_pci_mbox_send_cmd() can assume that it is the only
 418	 * source for future doorbell busy events.
 419	 */
 420	if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
 421		cxl_err(dev, md_status, "timeout awaiting mailbox idle");
 422		return -ETIMEDOUT;
 423	}
 424
 425	cxl_mbox->mbox_send = cxl_pci_mbox_send;
 426	cxl_mbox->payload_size =
 427		1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 428
 429	/*
 430	 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
 431	 *
 432	 * If the size is too small, mandatory commands will not work and so
 433	 * there's no point in going forward. If the size is too large, there's
 434	 * no harm is soft limiting it.
 435	 */
 436	cxl_mbox->payload_size = min_t(size_t, cxl_mbox->payload_size, SZ_1M);
 437	if (cxl_mbox->payload_size < 256) {
 438		dev_err(dev, "Mailbox is too small (%zub)",
 439			cxl_mbox->payload_size);
 440		return -ENXIO;
 
 
 
 441	}
 442
 443	dev_dbg(dev, "Mailbox payload sized %zu", cxl_mbox->payload_size);
 
 444
 445	INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work);
 
 
 
 
 446
 447	/* background command interrupts are optional */
 448	if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail)
 449		return 0;
 450
 451	msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
 452	irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum);
 453	if (irq < 0)
 454		return 0;
 455
 456	if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq))
 457		return 0;
 458
 459	dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n");
 460	/* enable background command mbox irq support */
 461	ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
 462	ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ;
 463	writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
 464
 465	return 0;
 
 
 
 
 
 
 
 466}
 467
 468/*
 469 * Assume that any RCIEP that emits the CXL memory expander class code
 470 * is an RCD
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 471 */
 472static bool is_cxl_restricted(struct pci_dev *pdev)
 
 
 473{
 474	return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
 475}
 476
 477static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev,
 478				  struct cxl_register_map *map,
 479				  struct cxl_dport *dport)
 480{
 481	resource_size_t component_reg_phys;
 482
 483	*map = (struct cxl_register_map) {
 484		.host = &pdev->dev,
 485		.resource = CXL_RESOURCE_NONE,
 486	};
 
 
 
 487
 488	struct cxl_port *port __free(put_cxl_port) =
 489		cxl_pci_find_port(pdev, &dport);
 490	if (!port)
 491		return -EPROBE_DEFER;
 
 
 
 
 
 
 
 
 
 
 492
 493	component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport);
 494	if (component_reg_phys == CXL_RESOURCE_NONE)
 495		return -ENXIO;
 
 
 
 
 
 
 
 496
 497	map->resource = component_reg_phys;
 498	map->reg_type = CXL_REGLOC_RBI_COMPONENT;
 499	map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 500
 501	return 0;
 502}
 503
 504static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
 505			      struct cxl_register_map *map)
 506{
 507	int rc;
 508
 509	rc = cxl_find_regblock(pdev, type, map);
 
 
 
 
 
 
 
 
 
 
 510
 511	/*
 512	 * If the Register Locator DVSEC does not exist, check if it
 513	 * is an RCH and try to extract the Component Registers from
 514	 * an RCRB.
 515	 */
 516	if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) {
 517		struct cxl_dport *dport;
 518		struct cxl_port *port __free(put_cxl_port) =
 519			cxl_pci_find_port(pdev, &dport);
 520		if (!port)
 521			return -EPROBE_DEFER;
 522
 523		rc = cxl_rcrb_get_comp_regs(pdev, map, dport);
 524		if (rc)
 525			return rc;
 526
 527		rc = cxl_dport_map_rcd_linkcap(pdev, dport);
 528		if (rc)
 529			return rc;
 530
 531	} else if (rc) {
 532		return rc;
 533	}
 534
 535	return cxl_setup_regs(map);
 536}
 537
 538static int cxl_pci_ras_unmask(struct pci_dev *pdev)
 
 539{
 540	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
 541	void __iomem *addr;
 542	u32 orig_val, val, mask;
 543	u16 cap;
 544	int rc;
 545
 546	if (!cxlds->regs.ras) {
 547		dev_dbg(&pdev->dev, "No RAS registers.\n");
 548		return 0;
 549	}
 550
 551	/* BIOS has PCIe AER error control */
 552	if (!pcie_aer_is_native(pdev))
 553		return 0;
 554
 555	rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap);
 556	if (rc)
 557		return rc;
 558
 559	if (cap & PCI_EXP_DEVCTL_URRE) {
 560		addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET;
 561		orig_val = readl(addr);
 562
 563		mask = CXL_RAS_UNCORRECTABLE_MASK_MASK |
 564		       CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK;
 565		val = orig_val & ~mask;
 566		writel(val, addr);
 567		dev_dbg(&pdev->dev,
 568			"Uncorrectable RAS Errors Mask: %#x -> %#x\n",
 569			orig_val, val);
 570	}
 571
 572	if (cap & PCI_EXP_DEVCTL_CERE) {
 573		addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET;
 574		orig_val = readl(addr);
 575		val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK;
 576		writel(val, addr);
 577		dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n",
 578			orig_val, val);
 579	}
 580
 581	return 0;
 582}
 583
 584static void free_event_buf(void *buf)
 
 585{
 586	kvfree(buf);
 
 
 
 
 
 
 
 587}
 588
 589/*
 590 * There is a single buffer for reading event logs from the mailbox.  All logs
 591 * share this buffer protected by the mds->event_log_lock.
 592 */
 593static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
 594{
 595	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
 596	struct cxl_get_event_payload *buf;
 597
 598	buf = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
 599	if (!buf)
 600		return -ENOMEM;
 601	mds->event.buf = buf;
 602
 603	return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf);
 604}
 605
 606static bool cxl_alloc_irq_vectors(struct pci_dev *pdev)
 607{
 608	int nvecs;
 
 609
 610	/*
 611	 * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must
 612	 * not generate INTx messages if that function participates in
 613	 * CXL.cache or CXL.mem.
 614	 *
 615	 * Additionally pci_alloc_irq_vectors() handles calling
 616	 * pci_free_irq_vectors() automatically despite not being called
 617	 * pcim_*.  See pci_setup_msi_context().
 618	 */
 619	nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS,
 620				      PCI_IRQ_MSIX | PCI_IRQ_MSI);
 621	if (nvecs < 1) {
 622		dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
 623		return false;
 624	}
 625	return true;
 626}
 627
 628static irqreturn_t cxl_event_thread(int irq, void *id)
 629{
 630	struct cxl_dev_id *dev_id = id;
 631	struct cxl_dev_state *cxlds = dev_id->cxlds;
 632	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 633	u32 status;
 634
 635	do {
 636		/*
 637		 * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
 638		 * ignore the reserved upper 32 bits
 639		 */
 640		status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
 641		/* Ignore logs unknown to the driver */
 642		status &= CXLDEV_EVENT_STATUS_ALL;
 643		if (!status)
 644			break;
 645		cxl_mem_get_event_records(mds, status);
 646		cond_resched();
 647	} while (status);
 648
 649	return IRQ_HANDLED;
 650}
 651
 652static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
 653{
 654	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
 655	int irq;
 656
 657	if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX)
 658		return -ENXIO;
 
 659
 660	irq =  pci_irq_vector(pdev,
 661			      FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting));
 662	if (irq < 0)
 663		return irq;
 664
 665	return cxl_request_irq(cxlds, irq, cxl_event_thread);
 666}
 667
 668static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
 669				    struct cxl_event_interrupt_policy *policy)
 670{
 671	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
 672	struct cxl_mbox_cmd mbox_cmd = {
 673		.opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY,
 674		.payload_out = policy,
 675		.size_out = sizeof(*policy),
 676	};
 677	int rc;
 678
 679	rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
 680	if (rc < 0)
 681		dev_err(mds->cxlds.dev,
 682			"Failed to get event interrupt policy : %d", rc);
 
 
 
 
 
 
 
 683
 684	return rc;
 
 
 
 
 
 
 
 
 685}
 686
 687static int cxl_event_config_msgnums(struct cxl_memdev_state *mds,
 688				    struct cxl_event_interrupt_policy *policy)
 689{
 690	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
 691	struct cxl_mbox_cmd mbox_cmd;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 692	int rc;
 693
 694	*policy = (struct cxl_event_interrupt_policy) {
 695		.info_settings = CXL_INT_MSI_MSIX,
 696		.warn_settings = CXL_INT_MSI_MSIX,
 697		.failure_settings = CXL_INT_MSI_MSIX,
 698		.fatal_settings = CXL_INT_MSI_MSIX,
 699	};
 700
 701	mbox_cmd = (struct cxl_mbox_cmd) {
 702		.opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY,
 703		.payload_in = policy,
 704		.size_in = sizeof(*policy),
 705	};
 706
 707	rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
 708	if (rc < 0) {
 709		dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d",
 710			rc);
 711		return rc;
 712	}
 713
 714	/* Retrieve final interrupt settings */
 715	return cxl_event_get_int_policy(mds, policy);
 
 
 
 
 
 
 
 
 
 
 716}
 717
 718static int cxl_event_irqsetup(struct cxl_memdev_state *mds)
 719{
 720	struct cxl_dev_state *cxlds = &mds->cxlds;
 721	struct cxl_event_interrupt_policy policy;
 722	int rc;
 723
 724	rc = cxl_event_config_msgnums(mds, &policy);
 725	if (rc)
 726		return rc;
 727
 728	rc = cxl_event_req_irq(cxlds, policy.info_settings);
 729	if (rc) {
 730		dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n");
 731		return rc;
 
 
 
 
 
 
 
 
 732	}
 733
 734	rc = cxl_event_req_irq(cxlds, policy.warn_settings);
 735	if (rc) {
 736		dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n");
 737		return rc;
 738	}
 739
 740	rc = cxl_event_req_irq(cxlds, policy.failure_settings);
 741	if (rc) {
 742		dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n");
 743		return rc;
 
 
 
 
 
 
 
 
 744	}
 745
 746	rc = cxl_event_req_irq(cxlds, policy.fatal_settings);
 747	if (rc) {
 748		dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n");
 749		return rc;
 
 
 
 
 
 750	}
 751
 752	return 0;
 753}
 754
 755static bool cxl_event_int_is_fw(u8 setting)
 
 756{
 757	u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting);
 
 
 758
 759	return mode == CXL_INT_FW;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 760}
 761
 762static int cxl_event_config(struct pci_host_bridge *host_bridge,
 763			    struct cxl_memdev_state *mds, bool irq_avail)
 764{
 765	struct cxl_event_interrupt_policy policy;
 766	int rc;
 767
 768	/*
 769	 * When BIOS maintains CXL error reporting control, it will process
 770	 * event records.  Only one agent can do so.
 771	 */
 772	if (!host_bridge->native_cxl_error)
 773		return 0;
 774
 775	if (!irq_avail) {
 776		dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n");
 777		return 0;
 778	}
 779
 780	rc = cxl_event_get_int_policy(mds, &policy);
 781	if (rc)
 782		return rc;
 783
 784	if (cxl_event_int_is_fw(policy.info_settings) ||
 785	    cxl_event_int_is_fw(policy.warn_settings) ||
 786	    cxl_event_int_is_fw(policy.failure_settings) ||
 787	    cxl_event_int_is_fw(policy.fatal_settings)) {
 788		dev_err(mds->cxlds.dev,
 789			"FW still in control of Event Logs despite _OSC settings\n");
 790		return -EBUSY;
 791	}
 792
 793	rc = cxl_mem_alloc_event_buf(mds);
 794	if (rc)
 795		return rc;
 796
 797	rc = cxl_event_irqsetup(mds);
 798	if (rc)
 799		return rc;
 
 
 
 
 800
 801	cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 802
 803	return 0;
 804}
 805
 806static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds)
 807{
 808	int rc;
 809
 810	/*
 811	 * Fail the init if there's no mailbox. For a type3 this is out of spec.
 812	 */
 813	if (!cxlds->reg_map.device_map.mbox.valid)
 814		return -ENODEV;
 815
 816	rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev);
 817	if (rc)
 818		return rc;
 
 
 
 
 
 
 
 
 
 819
 820	return 0;
 821}
 822
 823static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size_t width)
 
 824{
 825	struct cxl_dev_state *cxlds = dev_get_drvdata(dev);
 826	struct cxl_memdev *cxlmd = cxlds->cxlmd;
 827	struct device *root_dev;
 828	struct cxl_dport *dport;
 829	struct cxl_port *root __free(put_cxl_port) =
 830		cxl_mem_find_port(cxlmd, &dport);
 831
 832	if (!root)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 833		return -ENXIO;
 
 834
 835	root_dev = root->uport_dev;
 836	if (!root_dev)
 837		return -ENXIO;
 838
 839	if (!dport->regs.rcd_pcie_cap)
 840		return -ENXIO;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 841
 842	guard(device)(root_dev);
 843	if (!root_dev->driver)
 844		return -ENXIO;
 845
 846	switch (width) {
 847	case 2:
 848		return sysfs_emit(buf, "%#x\n",
 849				  readw(dport->regs.rcd_pcie_cap + offset));
 850	case 4:
 851		return sysfs_emit(buf, "%#x\n",
 852				  readl(dport->regs.rcd_pcie_cap + offset));
 853	default:
 854		return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 855	}
 
 
 856}
 857
 858static ssize_t rcd_link_cap_show(struct device *dev,
 859				 struct device_attribute *attr, char *buf)
 860{
 861	return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCAP, buf, sizeof(u32));
 
 
 
 862}
 863static DEVICE_ATTR_RO(rcd_link_cap);
 864
 865static ssize_t rcd_link_ctrl_show(struct device *dev,
 866				  struct device_attribute *attr, char *buf)
 867{
 868	return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCTL, buf, sizeof(u16));
 869}
 870static DEVICE_ATTR_RO(rcd_link_ctrl);
 871
 872static ssize_t rcd_link_status_show(struct device *dev,
 873				    struct device_attribute *attr, char *buf)
 874{
 875	return rcd_pcie_cap_emit(dev, PCI_EXP_LNKSTA, buf, sizeof(u16));
 
 
 
 876}
 877static DEVICE_ATTR_RO(rcd_link_status);
 878
 879static struct attribute *cxl_rcd_attrs[] = {
 880	&dev_attr_rcd_link_cap.attr,
 881	&dev_attr_rcd_link_ctrl.attr,
 882	&dev_attr_rcd_link_status.attr,
 883	NULL
 884};
 885
 886static umode_t cxl_rcd_visible(struct kobject *kobj, struct attribute *a, int n)
 
 
 
 
 
 887{
 888	struct device *dev = kobj_to_dev(kobj);
 889	struct pci_dev *pdev = to_pci_dev(dev);
 890
 891	if (is_cxl_restricted(pdev))
 892		return a->mode;
 
 893
 894	return 0;
 
 
 
 
 
 
 
 895}
 896
 897static struct attribute_group cxl_rcd_group = {
 898	.attrs = cxl_rcd_attrs,
 899	.is_visible = cxl_rcd_visible,
 900};
 901__ATTRIBUTE_GROUPS(cxl_rcd);
 902
 903static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 904{
 905	struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
 906	struct cxl_memdev_state *mds;
 907	struct cxl_dev_state *cxlds;
 908	struct cxl_register_map map;
 909	struct cxl_memdev *cxlmd;
 910	int i, rc, pmu_count;
 911	bool irq_avail;
 912
 913	/*
 914	 * Double check the anonymous union trickery in struct cxl_regs
 915	 * FIXME switch to struct_group()
 916	 */
 917	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
 918		     offsetof(struct cxl_regs, device_regs.memdev));
 919
 920	rc = pcim_enable_device(pdev);
 921	if (rc)
 922		return rc;
 923	pci_set_master(pdev);
 924
 925	mds = cxl_memdev_state_create(&pdev->dev);
 926	if (IS_ERR(mds))
 927		return PTR_ERR(mds);
 928	cxlds = &mds->cxlds;
 929	pci_set_drvdata(pdev, cxlds);
 930
 931	cxlds->rcd = is_cxl_restricted(pdev);
 932	cxlds->serial = pci_get_dsn(pdev);
 933	cxlds->cxl_dvsec = pci_find_dvsec_capability(
 934		pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
 935	if (!cxlds->cxl_dvsec)
 936		dev_warn(&pdev->dev,
 937			 "Device DVSEC not present, skip CXL.mem init\n");
 938
 939	rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
 940	if (rc)
 941		return rc;
 
 942
 943	rc = cxl_map_device_regs(&map, &cxlds->regs.device_regs);
 944	if (rc)
 945		return rc;
 
 946
 947	/*
 948	 * If the component registers can't be found, the cxl_pci driver may
 949	 * still be useful for management functions so don't return an error.
 950	 */
 951	rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT,
 952				&cxlds->reg_map);
 953	if (rc)
 954		dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
 955	else if (!cxlds->reg_map.component_map.ras.valid)
 956		dev_dbg(&pdev->dev, "RAS registers not found\n");
 957
 958	rc = cxl_map_component_regs(&cxlds->reg_map, &cxlds->regs.component,
 959				    BIT(CXL_CM_CAP_CAP_ID_RAS));
 960	if (rc)
 961		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
 962
 963	rc = cxl_pci_type3_init_mailbox(cxlds);
 964	if (rc)
 965		return rc;
 
 966
 967	rc = cxl_await_media_ready(cxlds);
 968	if (rc == 0)
 969		cxlds->media_ready = true;
 970	else
 971		dev_warn(&pdev->dev, "Media not active (%d)\n", rc);
 
 972
 973	irq_avail = cxl_alloc_irq_vectors(pdev);
 
 
 
 
 
 974
 975	rc = cxl_pci_setup_mailbox(mds, irq_avail);
 976	if (rc)
 977		return rc;
 
 
 
 978
 979	rc = cxl_enumerate_cmds(mds);
 980	if (rc)
 981		return rc;
 982
 983	rc = cxl_set_timestamp(mds);
 984	if (rc)
 985		return rc;
 986
 987	rc = cxl_poison_state_init(mds);
 988	if (rc)
 989		return rc;
 
 
 
 
 
 990
 991	rc = cxl_dev_state_identify(mds);
 992	if (rc)
 993		return rc;
 994
 995	rc = cxl_mem_create_range_info(mds);
 996	if (rc)
 997		return rc;
 
 998
 999	cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1000	if (IS_ERR(cxlmd))
1001		return PTR_ERR(cxlmd);
1002
1003	rc = devm_cxl_setup_fw_upload(&pdev->dev, mds);
 
1004	if (rc)
1005		return rc;
1006
1007	rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd);
1008	if (rc)
1009		return rc;
 
 
1010
1011	pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU);
1012	for (i = 0; i < pmu_count; i++) {
1013		struct cxl_pmu_regs pmu_regs;
 
1014
1015		rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i);
1016		if (rc) {
1017			dev_dbg(&pdev->dev, "Could not find PMU regblock\n");
1018			break;
1019		}
1020
1021		rc = cxl_map_pmu_regs(&map, &pmu_regs);
1022		if (rc) {
1023			dev_dbg(&pdev->dev, "Could not map PMU regs\n");
1024			break;
1025		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1026
1027		rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV);
1028		if (rc) {
1029			dev_dbg(&pdev->dev, "Could not add PMU instance\n");
1030			break;
1031		}
1032	}
1033
1034	rc = cxl_event_config(host_bridge, mds, irq_avail);
1035	if (rc)
1036		return rc;
1037
1038	if (cxl_pci_ras_unmask(pdev))
1039		dev_dbg(&pdev->dev, "No RAS reporting unmasked\n");
1040
1041	pci_save_state(pdev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1042
1043	return rc;
 
1044}
1045
1046static const struct pci_device_id cxl_mem_pci_tbl[] = {
1047	/* PCI class code for CXL.mem Type-3 Devices */
1048	{ PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
1049	{ /* terminate list */ },
1050};
1051MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
 
 
1052
1053static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
1054{
1055	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
1056	struct cxl_memdev *cxlmd = cxlds->cxlmd;
1057	struct device *dev = &cxlmd->dev;
 
 
 
 
 
 
 
 
 
 
1058
1059	dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
1060		 dev_name(dev));
1061	pci_restore_state(pdev);
1062	if (device_attach(dev) <= 0)
1063		return PCI_ERS_RESULT_DISCONNECT;
1064	return PCI_ERS_RESULT_RECOVERED;
1065}
1066
1067static void cxl_error_resume(struct pci_dev *pdev)
 
 
 
 
 
 
 
 
 
 
1068{
1069	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
1070	struct cxl_memdev *cxlmd = cxlds->cxlmd;
1071	struct device *dev = &cxlmd->dev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1072
1073	dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
1074		 dev->driver ? "successful" : "failed");
 
 
 
 
 
 
 
 
 
 
1075}
1076
1077static void cxl_reset_done(struct pci_dev *pdev)
 
 
 
 
 
 
 
 
 
1078{
1079	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
1080	struct cxl_memdev *cxlmd = cxlds->cxlmd;
1081	struct device *dev = &pdev->dev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1082
1083	/*
1084	 * FLR does not expect to touch the HDM decoders and related
1085	 * registers.  SBR, however, will wipe all device configurations.
1086	 * Issue a warning if there was an active decoder before the reset
1087	 * that no longer exists.
1088	 */
1089	guard(device)(&cxlmd->dev);
1090	if (cxlmd->endpoint &&
1091	    cxl_endpoint_decoder_reset_detected(cxlmd->endpoint)) {
1092		dev_crit(dev, "SBR happened without memory regions removal.\n");
1093		dev_crit(dev, "System may be unstable if regions hosted system memory.\n");
1094		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
1095	}
1096}
1097
1098static const struct pci_error_handlers cxl_error_handlers = {
1099	.error_detected	= cxl_error_detected,
1100	.slot_reset	= cxl_slot_reset,
1101	.resume		= cxl_error_resume,
1102	.cor_error_detected	= cxl_cor_error_detected,
1103	.reset_done	= cxl_reset_done,
1104};
1105
1106static struct pci_driver cxl_pci_driver = {
1107	.name			= KBUILD_MODNAME,
1108	.id_table		= cxl_mem_pci_tbl,
1109	.probe			= cxl_pci_probe,
1110	.err_handler		= &cxl_error_handlers,
1111	.dev_groups		= cxl_rcd_groups,
1112	.driver	= {
1113		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
1114	},
1115};
1116
1117#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
1118static void cxl_handle_cper_event(enum cxl_event_type ev_type,
1119				  struct cxl_cper_event_rec *rec)
 
1120{
1121	struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
1122	struct pci_dev *pdev __free(pci_dev_put) = NULL;
1123	enum cxl_event_log_type log_type;
1124	struct cxl_dev_state *cxlds;
1125	unsigned int devfn;
1126	u32 hdr_flags;
1127
1128	pr_debug("CPER event %d for device %u:%u:%u.%u\n", ev_type,
1129		 device_id->segment_num, device_id->bus_num,
1130		 device_id->device_num, device_id->func_num);
1131
1132	devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
1133	pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
1134					   device_id->bus_num, devfn);
1135	if (!pdev)
1136		return;
1137
1138	guard(device)(&pdev->dev);
1139	if (pdev->driver != &cxl_pci_driver)
1140		return;
1141
1142	cxlds = pci_get_drvdata(pdev);
1143	if (!cxlds)
1144		return;
1145
1146	/* Fabricate a log type */
1147	hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
1148	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
1149
1150	cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type,
1151			       &uuid_null, &rec->event);
1152}
1153
1154static void cxl_cper_work_fn(struct work_struct *work)
1155{
1156	struct cxl_cper_work_data wd;
 
 
 
1157
1158	while (cxl_cper_kfifo_get(&wd))
1159		cxl_handle_cper_event(wd.event_type, &wd.rec);
1160}
1161static DECLARE_WORK(cxl_cper_work, cxl_cper_work_fn);
1162
1163static int __init cxl_pci_driver_init(void)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1164{
 
 
1165	int rc;
1166
1167	rc = pci_register_driver(&cxl_pci_driver);
 
 
 
 
1168	if (rc)
1169		return rc;
1170
1171	rc = cxl_cper_register_work(&cxl_cper_work);
1172	if (rc)
1173		pci_unregister_driver(&cxl_pci_driver);
1174
1175	return rc;
 
 
 
 
 
 
 
 
 
 
 
 
1176}
1177
1178static void __exit cxl_pci_driver_exit(void)
1179{
1180	cxl_cper_unregister_work(&cxl_cper_work);
1181	cancel_work_sync(&cxl_cper_work);
1182	pci_unregister_driver(&cxl_pci_driver);
1183}
1184
1185module_init(cxl_pci_driver_init);
1186module_exit(cxl_pci_driver_exit);
1187MODULE_DESCRIPTION("CXL: PCI manageability");
1188MODULE_LICENSE("GPL v2");
1189MODULE_IMPORT_NS("CXL");
 
 
v5.14.15
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
   3#include <uapi/linux/cxl_mem.h>
   4#include <linux/security.h>
   5#include <linux/debugfs.h>
   6#include <linux/module.h>
 
   7#include <linux/sizes.h>
   8#include <linux/mutex.h>
   9#include <linux/list.h>
  10#include <linux/cdev.h>
  11#include <linux/idr.h>
  12#include <linux/pci.h>
 
  13#include <linux/io.h>
  14#include <linux/io-64-nonatomic-lo-hi.h>
  15#include "cxlmem.h"
  16#include "pci.h"
  17#include "cxl.h"
 
  18
  19/**
  20 * DOC: cxl pci
  21 *
  22 * This implements the PCI exclusive functionality for a CXL device as it is
  23 * defined by the Compute Express Link specification. CXL devices may surface
  24 * certain functionality even if it isn't CXL enabled.
 
 
 
  25 *
  26 * The driver has several responsibilities, mainly:
  27 *  - Create the memX device and register on the CXL bus.
  28 *  - Enumerate device's register interface and map them.
  29 *  - Probe the device attributes to establish sysfs interface.
  30 *  - Provide an IOCTL interface to userspace to communicate with the device for
  31 *    things like firmware update.
  32 */
  33
  34#define cxl_doorbell_busy(cxlm)                                                \
  35	(readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) &                  \
  36	 CXLDEV_MBOX_CTRL_DOORBELL)
  37
  38/* CXL 2.0 - 8.2.8.4 */
  39#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
  40
  41enum opcode {
  42	CXL_MBOX_OP_INVALID		= 0x0000,
  43	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
  44	CXL_MBOX_OP_GET_FW_INFO		= 0x0200,
  45	CXL_MBOX_OP_ACTIVATE_FW		= 0x0202,
  46	CXL_MBOX_OP_GET_SUPPORTED_LOGS	= 0x0400,
  47	CXL_MBOX_OP_GET_LOG		= 0x0401,
  48	CXL_MBOX_OP_IDENTIFY		= 0x4000,
  49	CXL_MBOX_OP_GET_PARTITION_INFO	= 0x4100,
  50	CXL_MBOX_OP_SET_PARTITION_INFO	= 0x4101,
  51	CXL_MBOX_OP_GET_LSA		= 0x4102,
  52	CXL_MBOX_OP_SET_LSA		= 0x4103,
  53	CXL_MBOX_OP_GET_HEALTH_INFO	= 0x4200,
  54	CXL_MBOX_OP_GET_ALERT_CONFIG	= 0x4201,
  55	CXL_MBOX_OP_SET_ALERT_CONFIG	= 0x4202,
  56	CXL_MBOX_OP_GET_SHUTDOWN_STATE	= 0x4203,
  57	CXL_MBOX_OP_SET_SHUTDOWN_STATE	= 0x4204,
  58	CXL_MBOX_OP_GET_POISON		= 0x4300,
  59	CXL_MBOX_OP_INJECT_POISON	= 0x4301,
  60	CXL_MBOX_OP_CLEAR_POISON	= 0x4302,
  61	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
  62	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
  63	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
  64	CXL_MBOX_OP_MAX			= 0x10000
  65};
  66
  67/**
  68 * struct mbox_cmd - A command to be submitted to hardware.
  69 * @opcode: (input) The command set and command submitted to hardware.
  70 * @payload_in: (input) Pointer to the input payload.
  71 * @payload_out: (output) Pointer to the output payload. Must be allocated by
  72 *		 the caller.
  73 * @size_in: (input) Number of bytes to load from @payload_in.
  74 * @size_out: (input) Max number of bytes loaded into @payload_out.
  75 *            (output) Number of bytes generated by the device. For fixed size
  76 *            outputs commands this is always expected to be deterministic. For
  77 *            variable sized output commands, it tells the exact number of bytes
  78 *            written.
  79 * @return_code: (output) Error code returned from hardware.
  80 *
  81 * This is the primary mechanism used to send commands to the hardware.
  82 * All the fields except @payload_* correspond exactly to the fields described in
  83 * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
  84 * @payload_out are written to, and read from the Command Payload Registers
  85 * defined in CXL 2.0 8.2.8.4.8.
  86 */
  87struct mbox_cmd {
  88	u16 opcode;
  89	void *payload_in;
  90	void *payload_out;
  91	size_t size_in;
  92	size_t size_out;
  93	u16 return_code;
  94#define CXL_MBOX_SUCCESS 0
  95};
  96
  97static int cxl_mem_major;
  98static DEFINE_IDA(cxl_memdev_ida);
  99static DECLARE_RWSEM(cxl_memdev_rwsem);
 100static struct dentry *cxl_debugfs;
 101static bool cxl_raw_allow_all;
 102
 103enum {
 104	CEL_UUID,
 105	VENDOR_DEBUG_UUID,
 106};
 107
 108/* See CXL 2.0 Table 170. Get Log Input Payload */
 109static const uuid_t log_uuid[] = {
 110	[CEL_UUID] = UUID_INIT(0xda9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, 0x96,
 111			       0xb1, 0x62, 0x3b, 0x3f, 0x17),
 112	[VENDOR_DEBUG_UUID] = UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f,
 113					0xd6, 0x07, 0x19, 0x40, 0x3d, 0x86),
 114};
 115
 116/**
 117 * struct cxl_mem_command - Driver representation of a memory device command
 118 * @info: Command information as it exists for the UAPI
 119 * @opcode: The actual bits used for the mailbox protocol
 120 * @flags: Set of flags effecting driver behavior.
 121 *
 122 *  * %CXL_CMD_FLAG_FORCE_ENABLE: In cases of error, commands with this flag
 123 *    will be enabled by the driver regardless of what hardware may have
 124 *    advertised.
 125 *
 126 * The cxl_mem_command is the driver's internal representation of commands that
 127 * are supported by the driver. Some of these commands may not be supported by
 128 * the hardware. The driver will use @info to validate the fields passed in by
 129 * the user then submit the @opcode to the hardware.
 130 *
 131 * See struct cxl_command_info.
 132 */
 133struct cxl_mem_command {
 134	struct cxl_command_info info;
 135	enum opcode opcode;
 136	u32 flags;
 137#define CXL_CMD_FLAG_NONE 0
 138#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
 139};
 140
 141#define CXL_CMD(_id, sin, sout, _flags)                                        \
 142	[CXL_MEM_COMMAND_ID_##_id] = {                                         \
 143	.info =	{                                                              \
 144			.id = CXL_MEM_COMMAND_ID_##_id,                        \
 145			.size_in = sin,                                        \
 146			.size_out = sout,                                      \
 147		},                                                             \
 148	.opcode = CXL_MBOX_OP_##_id,                                           \
 149	.flags = _flags,                                                       \
 150	}
 151
 152/*
 153 * This table defines the supported mailbox commands for the driver. This table
 154 * is made up of a UAPI structure. Non-negative values as parameters in the
 155 * table will be validated against the user's input. For example, if size_in is
 156 * 0, and the user passed in 1, it is an error.
 157 */
 158static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
 159	CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
 160#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
 161	CXL_CMD(RAW, ~0, ~0, 0),
 162#endif
 163	CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
 164	CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
 165	CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
 166	CXL_CMD(GET_LSA, 0x8, ~0, 0),
 167	CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
 168	CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
 169	CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0),
 170	CXL_CMD(SET_LSA, ~0, 0, 0),
 171	CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0),
 172	CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
 173	CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
 174	CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
 175	CXL_CMD(GET_POISON, 0x10, ~0, 0),
 176	CXL_CMD(INJECT_POISON, 0x8, 0, 0),
 177	CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
 178	CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
 179	CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
 180	CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0),
 181};
 182
 183/*
 184 * Commands that RAW doesn't permit. The rationale for each:
 185 *
 186 * CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
 187 * coordination of transaction timeout values at the root bridge level.
 188 *
 189 * CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
 190 * and needs to be coordinated with HDM updates.
 191 *
 192 * CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
 193 * driver and any writes from userspace invalidates those contents.
 194 *
 195 * CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
 196 * to the device after it is marked clean, userspace can not make that
 197 * assertion.
 198 *
 199 * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
 200 * is kept up to date with patrol notifications and error management.
 201 */
 202static u16 cxl_disabled_raw_commands[] = {
 203	CXL_MBOX_OP_ACTIVATE_FW,
 204	CXL_MBOX_OP_SET_PARTITION_INFO,
 205	CXL_MBOX_OP_SET_LSA,
 206	CXL_MBOX_OP_SET_SHUTDOWN_STATE,
 207	CXL_MBOX_OP_SCAN_MEDIA,
 208	CXL_MBOX_OP_GET_SCAN_MEDIA,
 209};
 210
 211/*
 212 * Command sets that RAW doesn't permit. All opcodes in this set are
 213 * disabled because they pass plain text security payloads over the
 214 * user/kernel boundary. This functionality is intended to be wrapped
 215 * behind the keys ABI which allows for encrypted payloads in the UAPI
 
 
 
 216 */
 217static u8 security_command_sets[] = {
 218	0x44, /* Sanitize */
 219	0x45, /* Persistent Memory Data-at-rest Security */
 220	0x46, /* Security Passthrough */
 221};
 222
 223#define cxl_for_each_cmd(cmd)                                                  \
 224	for ((cmd) = &mem_commands[0];                                         \
 225	     ((cmd) - mem_commands) < ARRAY_SIZE(mem_commands); (cmd)++)
 226
 227#define cxl_cmd_count ARRAY_SIZE(mem_commands)
 228
 229static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm)
 230{
 231	const unsigned long start = jiffies;
 232	unsigned long end = start;
 233
 234	while (cxl_doorbell_busy(cxlm)) {
 235		end = jiffies;
 236
 237		if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
 238			/* Check again in case preempted before timeout test */
 239			if (!cxl_doorbell_busy(cxlm))
 240				break;
 241			return -ETIMEDOUT;
 242		}
 243		cpu_relax();
 244	}
 245
 246	dev_dbg(&cxlm->pdev->dev, "Doorbell wait took %dms",
 247		jiffies_to_msecs(end) - jiffies_to_msecs(start));
 248	return 0;
 249}
 250
 251static bool cxl_is_security_command(u16 opcode)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 252{
 253	int i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 254
 255	for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
 256		if (security_command_sets[i] == (opcode >> 8))
 257			return true;
 258	return false;
 259}
 260
 261static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm,
 262				 struct mbox_cmd *mbox_cmd)
 
 
 263{
 264	struct device *dev = &cxlm->pdev->dev;
 
 
 
 
 
 
 
 
 
 
 265
 266	dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
 267		mbox_cmd->opcode, mbox_cmd->size_in);
 
 
 
 
 
 
 268}
 269
 270/**
 271 * __cxl_mem_mbox_send_cmd() - Execute a mailbox command
 272 * @cxlm: The CXL memory device to communicate with.
 273 * @mbox_cmd: Command to send to the memory device.
 274 *
 275 * Context: Any context. Expects mbox_mutex to be held.
 276 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
 277 *         Caller should check the return code in @mbox_cmd to make sure it
 278 *         succeeded.
 279 *
 280 * This is a generic form of the CXL mailbox send command thus only using the
 281 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
 282 * devices, and perhaps other types of CXL devices may have further information
 283 * available upon error conditions. Driver facilities wishing to send mailbox
 284 * commands should use the wrapper command.
 285 *
 286 * The CXL spec allows for up to two mailboxes. The intention is for the primary
 287 * mailbox to be OS controlled and the secondary mailbox to be used by system
 288 * firmware. This allows the OS and firmware to communicate with the device and
 289 * not need to coordinate with each other. The driver only uses the primary
 290 * mailbox.
 291 */
 292static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
 293				   struct mbox_cmd *mbox_cmd)
 294{
 295	void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
 
 
 
 296	u64 cmd_reg, status_reg;
 297	size_t out_len;
 298	int rc;
 299
 300	lockdep_assert_held(&cxlm->mbox_mutex);
 301
 302	/*
 303	 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
 304	 *   1. Caller reads MB Control Register to verify doorbell is clear
 305	 *   2. Caller writes Command Register
 306	 *   3. Caller writes Command Payload Registers if input payload is non-empty
 307	 *   4. Caller writes MB Control Register to set doorbell
 308	 *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
 309	 *   6. Caller reads MB Status Register to fetch Return code
 310	 *   7. If command successful, Caller reads Command Register to get Payload Length
 311	 *   8. If output payload is non-empty, host reads Command Payload Registers
 312	 *
 313	 * Hardware is free to do whatever it wants before the doorbell is rung,
 314	 * and isn't allowed to change anything after it clears the doorbell. As
 315	 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
 316	 * also happen in any order (though some orders might not make sense).
 317	 */
 318
 319	/* #1 */
 320	if (cxl_doorbell_busy(cxlm)) {
 321		dev_err_ratelimited(&cxlm->pdev->dev,
 322				    "Mailbox re-busy after acquiring\n");
 
 
 
 323		return -EBUSY;
 324	}
 325
 
 
 
 
 
 
 
 
 
 
 326	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
 327			     mbox_cmd->opcode);
 328	if (mbox_cmd->size_in) {
 329		if (WARN_ON(!mbox_cmd->payload_in))
 330			return -EINVAL;
 331
 332		cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
 333				      mbox_cmd->size_in);
 334		memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
 335	}
 336
 337	/* #2, #3 */
 338	writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 339
 340	/* #4 */
 341	dev_dbg(&cxlm->pdev->dev, "Sending command\n");
 342	writel(CXLDEV_MBOX_CTRL_DOORBELL,
 343	       cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
 344
 345	/* #5 */
 346	rc = cxl_mem_wait_for_doorbell(cxlm);
 347	if (rc == -ETIMEDOUT) {
 348		cxl_mem_mbox_timeout(cxlm, mbox_cmd);
 
 
 349		return rc;
 350	}
 351
 352	/* #6 */
 353	status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
 354	mbox_cmd->return_code =
 355		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
 356
 357	if (mbox_cmd->return_code != 0) {
 358		dev_dbg(&cxlm->pdev->dev, "Mailbox operation had an error\n");
 359		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 360	}
 361
 
 362	/* #7 */
 363	cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 364	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
 365
 366	/* #8 */
 367	if (out_len && mbox_cmd->payload_out) {
 368		/*
 369		 * Sanitize the copy. If hardware misbehaves, out_len per the
 370		 * spec can actually be greater than the max allowed size (21
 371		 * bits available but spec defined 1M max). The caller also may
 372		 * have requested less data than the hardware supplied even
 373		 * within spec.
 374		 */
 375		size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len);
 376
 
 377		memcpy_fromio(mbox_cmd->payload_out, payload, n);
 378		mbox_cmd->size_out = n;
 379	} else {
 380		mbox_cmd->size_out = 0;
 381	}
 382
 383	return 0;
 384}
 385
 386/**
 387 * cxl_mem_mbox_get() - Acquire exclusive access to the mailbox.
 388 * @cxlm: The memory device to gain access to.
 389 *
 390 * Context: Any context. Takes the mbox_mutex.
 391 * Return: 0 if exclusive access was acquired.
 392 */
 393static int cxl_mem_mbox_get(struct cxl_mem *cxlm)
 394{
 395	struct device *dev = &cxlm->pdev->dev;
 396	u64 md_status;
 397	int rc;
 398
 399	mutex_lock_io(&cxlm->mbox_mutex);
 
 
 400
 401	/*
 402	 * XXX: There is some amount of ambiguity in the 2.0 version of the spec
 403	 * around the mailbox interface ready (8.2.8.5.1.1).  The purpose of the
 404	 * bit is to allow firmware running on the device to notify the driver
 405	 * that it's ready to receive commands. It is unclear if the bit needs
 406	 * to be read for each transaction mailbox, ie. the firmware can switch
 407	 * it on and off as needed. Second, there is no defined timeout for
 408	 * mailbox ready, like there is for the doorbell interface.
 409	 *
 410	 * Assumptions:
 411	 * 1. The firmware might toggle the Mailbox Interface Ready bit, check
 412	 *    it for every command.
 413	 *
 414	 * 2. If the doorbell is clear, the firmware should have first set the
 415	 *    Mailbox Interface Ready bit. Therefore, waiting for the doorbell
 416	 *    to be ready is sufficient.
 417	 */
 418	rc = cxl_mem_wait_for_doorbell(cxlm);
 419	if (rc) {
 420		dev_warn(dev, "Mailbox interface not ready\n");
 421		goto out;
 422	}
 423
 424	md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET);
 425	if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
 426		dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
 427		rc = -EBUSY;
 428		goto out;
 429	}
 430
 431	/*
 432	 * Hardware shouldn't allow a ready status but also have failure bits
 433	 * set. Spit out an error, this should be a bug report
 434	 */
 435	rc = -EFAULT;
 436	if (md_status & CXLMDEV_DEV_FATAL) {
 437		dev_err(dev, "mbox: reported ready, but fatal\n");
 438		goto out;
 439	}
 440	if (md_status & CXLMDEV_FW_HALT) {
 441		dev_err(dev, "mbox: reported ready, but halted\n");
 442		goto out;
 443	}
 444	if (CXLMDEV_RESET_NEEDED(md_status)) {
 445		dev_err(dev, "mbox: reported ready, but reset needed\n");
 446		goto out;
 447	}
 448
 449	/* with lock held */
 450	return 0;
 451
 452out:
 453	mutex_unlock(&cxlm->mbox_mutex);
 454	return rc;
 455}
 456
 457/**
 458 * cxl_mem_mbox_put() - Release exclusive access to the mailbox.
 459 * @cxlm: The CXL memory device to communicate with.
 460 *
 461 * Context: Any context. Expects mbox_mutex to be held.
 462 */
 463static void cxl_mem_mbox_put(struct cxl_mem *cxlm)
 464{
 465	mutex_unlock(&cxlm->mbox_mutex);
 466}
 
 
 
 
 
 
 467
 468/**
 469 * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
 470 * @cxlm: The CXL memory device to communicate with.
 471 * @cmd: The validated command.
 472 * @in_payload: Pointer to userspace's input payload.
 473 * @out_payload: Pointer to userspace's output payload.
 474 * @size_out: (Input) Max payload size to copy out.
 475 *            (Output) Payload size hardware generated.
 476 * @retval: Hardware generated return code from the operation.
 477 *
 478 * Return:
 479 *  * %0	- Mailbox transaction succeeded. This implies the mailbox
 480 *		  protocol completed successfully not that the operation itself
 481 *		  was successful.
 482 *  * %-ENOMEM  - Couldn't allocate a bounce buffer.
 483 *  * %-EFAULT	- Something happened with copy_to/from_user.
 484 *  * %-EINTR	- Mailbox acquisition interrupted.
 485 *  * %-EXXX	- Transaction level failures.
 486 *
 487 * Creates the appropriate mailbox command and dispatches it on behalf of a
 488 * userspace request. The input and output payloads are copied between
 489 * userspace.
 490 *
 491 * See cxl_send_cmd().
 492 */
 493static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm,
 494					const struct cxl_mem_command *cmd,
 495					u64 in_payload, u64 out_payload,
 496					s32 *size_out, u32 *retval)
 497{
 498	struct device *dev = &cxlm->pdev->dev;
 499	struct mbox_cmd mbox_cmd = {
 500		.opcode = cmd->opcode,
 501		.size_in = cmd->info.size_in,
 502		.size_out = cmd->info.size_out,
 503	};
 504	int rc;
 505
 506	if (cmd->info.size_out) {
 507		mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL);
 508		if (!mbox_cmd.payload_out)
 509			return -ENOMEM;
 510	}
 511
 512	if (cmd->info.size_in) {
 513		mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
 514						   cmd->info.size_in);
 515		if (IS_ERR(mbox_cmd.payload_in)) {
 516			kvfree(mbox_cmd.payload_out);
 517			return PTR_ERR(mbox_cmd.payload_in);
 518		}
 
 
 519	}
 520
 521	rc = cxl_mem_mbox_get(cxlm);
 522	if (rc)
 523		goto out;
 524
 525	dev_dbg(dev,
 526		"Submitting %s command for user\n"
 527		"\topcode: %x\n"
 528		"\tsize: %ub\n",
 529		cxl_command_names[cmd->info.id].name, mbox_cmd.opcode,
 530		cmd->info.size_in);
 531
 532	dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW,
 533		      "raw command path used\n");
 534
 535	rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
 536	cxl_mem_mbox_put(cxlm);
 537	if (rc)
 538		goto out;
 539
 540	/*
 541	 * @size_out contains the max size that's allowed to be written back out
 542	 * to userspace. While the payload may have written more output than
 543	 * this it will have to be ignored.
 
 
 544	 */
 545	if (mbox_cmd.size_out) {
 546		dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out,
 547			      "Invalid return size\n");
 548		if (copy_to_user(u64_to_user_ptr(out_payload),
 549				 mbox_cmd.payload_out, mbox_cmd.size_out)) {
 550			rc = -EFAULT;
 551			goto out;
 552		}
 553	}
 554
 555	*size_out = mbox_cmd.size_out;
 556	*retval = mbox_cmd.return_code;
 557
 558out:
 559	kvfree(mbox_cmd.payload_in);
 560	kvfree(mbox_cmd.payload_out);
 561	return rc;
 562}
 563
 564static bool cxl_mem_raw_command_allowed(u16 opcode)
 565{
 566	int i;
 567
 568	if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
 569		return false;
 
 
 570
 571	if (security_locked_down(LOCKDOWN_PCI_ACCESS))
 572		return false;
 573
 574	if (cxl_raw_allow_all)
 575		return true;
 
 
 
 576
 577	if (cxl_is_security_command(opcode))
 578		return false;
 579
 580	for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
 581		if (cxl_disabled_raw_commands[i] == opcode)
 582			return false;
 583
 584	return true;
 585}
 586
 587/**
 588 * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
 589 * @cxlm: &struct cxl_mem device whose mailbox will be used.
 590 * @send_cmd: &struct cxl_send_command copied in from userspace.
 591 * @out_cmd: Sanitized and populated &struct cxl_mem_command.
 592 *
 593 * Return:
 594 *  * %0	- @out_cmd is ready to send.
 595 *  * %-ENOTTY	- Invalid command specified.
 596 *  * %-EINVAL	- Reserved fields or invalid values were used.
 597 *  * %-ENOMEM	- Input or output buffer wasn't sized properly.
 598 *  * %-EPERM	- Attempted to use a protected command.
 599 *
 600 * The result of this command is a fully validated command in @out_cmd that is
 601 * safe to send to the hardware.
 602 *
 603 * See handle_mailbox_cmd_from_user()
 604 */
 605static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
 606				      const struct cxl_send_command *send_cmd,
 607				      struct cxl_mem_command *out_cmd)
 608{
 609	const struct cxl_command_info *info;
 610	struct cxl_mem_command *c;
 611
 612	if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
 613		return -ENOTTY;
 
 
 
 614
 615	/*
 616	 * The user can never specify an input payload larger than what hardware
 617	 * supports, but output can be arbitrarily large (simply write out as
 618	 * much data as the hardware provides).
 619	 */
 620	if (send_cmd->in.size > cxlm->payload_size)
 621		return -EINVAL;
 622
 623	/*
 624	 * Checks are bypassed for raw commands but a WARN/taint will occur
 625	 * later in the callchain
 626	 */
 627	if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) {
 628		const struct cxl_mem_command temp = {
 629			.info = {
 630				.id = CXL_MEM_COMMAND_ID_RAW,
 631				.flags = 0,
 632				.size_in = send_cmd->in.size,
 633				.size_out = send_cmd->out.size,
 634			},
 635			.opcode = send_cmd->raw.opcode
 636		};
 637
 638		if (send_cmd->raw.rsvd)
 639			return -EINVAL;
 640
 641		/*
 642		 * Unlike supported commands, the output size of RAW commands
 643		 * gets passed along without further checking, so it must be
 644		 * validated here.
 645		 */
 646		if (send_cmd->out.size > cxlm->payload_size)
 647			return -EINVAL;
 648
 649		if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
 650			return -EPERM;
 651
 652		memcpy(out_cmd, &temp, sizeof(temp));
 653
 654		return 0;
 655	}
 656
 657	if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
 658		return -EINVAL;
 659
 660	if (send_cmd->rsvd)
 661		return -EINVAL;
 662
 663	if (send_cmd->in.rsvd || send_cmd->out.rsvd)
 664		return -EINVAL;
 665
 666	/* Convert user's command into the internal representation */
 667	c = &mem_commands[send_cmd->id];
 668	info = &c->info;
 669
 670	/* Check that the command is enabled for hardware */
 671	if (!test_bit(info->id, cxlm->enabled_cmds))
 672		return -ENOTTY;
 673
 674	/* Check the input buffer is the expected size */
 675	if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
 676		return -ENOMEM;
 677
 678	/* Check the output buffer is at least large enough */
 679	if (info->size_out >= 0 && send_cmd->out.size < info->size_out)
 680		return -ENOMEM;
 681
 682	memcpy(out_cmd, c, sizeof(*c));
 683	out_cmd->info.size_in = send_cmd->in.size;
 684	/*
 685	 * XXX: out_cmd->info.size_out will be controlled by the driver, and the
 686	 * specified number of bytes @send_cmd->out.size will be copied back out
 687	 * to userspace.
 688	 */
 689
 690	return 0;
 691}
 692
 693static int cxl_query_cmd(struct cxl_memdev *cxlmd,
 694			 struct cxl_mem_query_commands __user *q)
 695{
 696	struct device *dev = &cxlmd->dev;
 697	struct cxl_mem_command *cmd;
 698	u32 n_commands;
 699	int j = 0;
 700
 701	dev_dbg(dev, "Query IOCTL\n");
 702
 703	if (get_user(n_commands, &q->n_commands))
 704		return -EFAULT;
 705
 706	/* returns the total number if 0 elements are requested. */
 707	if (n_commands == 0)
 708		return put_user(cxl_cmd_count, &q->n_commands);
 709
 710	/*
 711	 * otherwise, return max(n_commands, total commands) cxl_command_info
 712	 * structures.
 
 713	 */
 714	cxl_for_each_cmd(cmd) {
 715		const struct cxl_command_info *info = &cmd->info;
 
 
 
 
 716
 717		if (copy_to_user(&q->commands[j++], info, sizeof(*info)))
 718			return -EFAULT;
 
 719
 720		if (j == n_commands)
 721			break;
 
 
 
 
 722	}
 723
 724	return 0;
 725}
 726
 727static int cxl_send_cmd(struct cxl_memdev *cxlmd,
 728			struct cxl_send_command __user *s)
 729{
 730	struct cxl_mem *cxlm = cxlmd->cxlm;
 731	struct device *dev = &cxlmd->dev;
 732	struct cxl_send_command send;
 733	struct cxl_mem_command c;
 734	int rc;
 735
 736	dev_dbg(dev, "Send IOCTL\n");
 
 
 
 737
 738	if (copy_from_user(&send, s, sizeof(send)))
 739		return -EFAULT;
 
 740
 741	rc = cxl_validate_cmd_from_user(cxlmd->cxlm, &send, &c);
 742	if (rc)
 743		return rc;
 744
 745	/* Prepare to handle a full payload for variable sized output */
 746	if (c.info.size_out < 0)
 747		c.info.size_out = cxlm->payload_size;
 748
 749	rc = handle_mailbox_cmd_from_user(cxlm, &c, send.in.payload,
 750					  send.out.payload, &send.out.size,
 751					  &send.retval);
 752	if (rc)
 753		return rc;
 754
 755	if (copy_to_user(s, &send, sizeof(send)))
 756		return -EFAULT;
 
 
 
 
 
 
 
 
 
 757
 758	return 0;
 759}
 760
 761static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
 762			       unsigned long arg)
 763{
 764	switch (cmd) {
 765	case CXL_MEM_QUERY_COMMANDS:
 766		return cxl_query_cmd(cxlmd, (void __user *)arg);
 767	case CXL_MEM_SEND_COMMAND:
 768		return cxl_send_cmd(cxlmd, (void __user *)arg);
 769	default:
 770		return -ENOTTY;
 771	}
 772}
 773
 774static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
 775			     unsigned long arg)
 
 
 
 776{
 777	struct cxl_memdev *cxlmd = file->private_data;
 778	int rc = -ENXIO;
 779
 780	down_read(&cxl_memdev_rwsem);
 781	if (cxlmd->cxlm)
 782		rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
 783	up_read(&cxl_memdev_rwsem);
 784
 785	return rc;
 786}
 787
 788static int cxl_memdev_open(struct inode *inode, struct file *file)
 789{
 790	struct cxl_memdev *cxlmd =
 791		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
 792
 793	get_device(&cxlmd->dev);
 794	file->private_data = cxlmd;
 795
 796	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 797}
 798
 799static int cxl_memdev_release_file(struct inode *inode, struct file *file)
 800{
 801	struct cxl_memdev *cxlmd =
 802		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
 
 
 803
 804	put_device(&cxlmd->dev);
 
 
 
 
 
 
 
 
 
 
 
 
 805
 806	return 0;
 807}
 808
 809static struct cxl_memdev *to_cxl_memdev(struct device *dev)
 810{
 811	return container_of(dev, struct cxl_memdev, dev);
 812}
 813
 814static void cxl_memdev_shutdown(struct device *dev)
 815{
 816	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 817
 818	down_write(&cxl_memdev_rwsem);
 819	cxlmd->cxlm = NULL;
 820	up_write(&cxl_memdev_rwsem);
 821}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 822
 823static const struct cdevm_file_operations cxl_memdev_fops = {
 824	.fops = {
 825		.owner = THIS_MODULE,
 826		.unlocked_ioctl = cxl_memdev_ioctl,
 827		.open = cxl_memdev_open,
 828		.release = cxl_memdev_release_file,
 829		.compat_ioctl = compat_ptr_ioctl,
 830		.llseek = noop_llseek,
 831	},
 832	.shutdown = cxl_memdev_shutdown,
 833};
 834
 835static inline struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
 836{
 837	struct cxl_mem_command *c;
 838
 839	cxl_for_each_cmd(c)
 840		if (c->opcode == opcode)
 841			return c;
 842
 843	return NULL;
 844}
 845
 846/**
 847 * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device.
 848 * @cxlm: The CXL memory device to communicate with.
 849 * @opcode: Opcode for the mailbox command.
 850 * @in: The input payload for the mailbox command.
 851 * @in_size: The length of the input payload
 852 * @out: Caller allocated buffer for the output.
 853 * @out_size: Expected size of output.
 854 *
 855 * Context: Any context. Will acquire and release mbox_mutex.
 856 * Return:
 857 *  * %>=0	- Number of bytes returned in @out.
 858 *  * %-E2BIG	- Payload is too large for hardware.
 859 *  * %-EBUSY	- Couldn't acquire exclusive mailbox access.
 860 *  * %-EFAULT	- Hardware error occurred.
 861 *  * %-ENXIO	- Command completed, but device reported an error.
 862 *  * %-EIO	- Unexpected output size.
 863 *
 864 * Mailbox commands may execute successfully yet the device itself reported an
 865 * error. While this distinction can be useful for commands from userspace, the
 866 * kernel will only be able to use results when both are successful.
 867 *
 868 * See __cxl_mem_mbox_send_cmd()
 869 */
 870static int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode,
 871				 void *in, size_t in_size,
 872				 void *out, size_t out_size)
 873{
 874	const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
 875	struct mbox_cmd mbox_cmd = {
 876		.opcode = opcode,
 877		.payload_in = in,
 878		.size_in = in_size,
 879		.size_out = out_size,
 880		.payload_out = out,
 881	};
 882	int rc;
 883
 884	if (out_size > cxlm->payload_size)
 885		return -E2BIG;
 
 
 
 
 886
 887	rc = cxl_mem_mbox_get(cxlm);
 888	if (rc)
 889		return rc;
 
 
 890
 891	rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
 892	cxl_mem_mbox_put(cxlm);
 893	if (rc)
 
 894		return rc;
 
 895
 896	/* TODO: Map return code to proper kernel style errno */
 897	if (mbox_cmd.return_code != CXL_MBOX_SUCCESS)
 898		return -ENXIO;
 899
 900	/*
 901	 * Variable sized commands can't be validated and so it's up to the
 902	 * caller to do that if they wish.
 903	 */
 904	if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size)
 905		return -EIO;
 906
 907	return 0;
 908}
 909
 910static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
 911{
 912	const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
 
 
 913
 914	cxlm->payload_size =
 915		1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
 
 916
 917	/*
 918	 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
 919	 *
 920	 * If the size is too small, mandatory commands will not work and so
 921	 * there's no point in going forward. If the size is too large, there's
 922	 * no harm is soft limiting it.
 923	 */
 924	cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M);
 925	if (cxlm->payload_size < 256) {
 926		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
 927			cxlm->payload_size);
 928		return -ENXIO;
 929	}
 930
 931	dev_dbg(&cxlm->pdev->dev, "Mailbox payload sized %zu",
 932		cxlm->payload_size);
 
 
 
 933
 934	return 0;
 935}
 936
 937static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev)
 938{
 939	struct device *dev = &pdev->dev;
 940	struct cxl_mem *cxlm;
 941
 942	cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
 943	if (!cxlm) {
 944		dev_err(dev, "No memory available\n");
 945		return ERR_PTR(-ENOMEM);
 946	}
 947
 948	mutex_init(&cxlm->mbox_mutex);
 949	cxlm->pdev = pdev;
 950	cxlm->enabled_cmds =
 951		devm_kmalloc_array(dev, BITS_TO_LONGS(cxl_cmd_count),
 952				   sizeof(unsigned long),
 953				   GFP_KERNEL | __GFP_ZERO);
 954	if (!cxlm->enabled_cmds) {
 955		dev_err(dev, "No memory available for bitmap\n");
 956		return ERR_PTR(-ENOMEM);
 957	}
 958
 959	return cxlm;
 960}
 961
 962static void __iomem *cxl_mem_map_regblock(struct cxl_mem *cxlm,
 963					  u8 bar, u64 offset)
 964{
 965	struct pci_dev *pdev = cxlm->pdev;
 966	struct device *dev = &pdev->dev;
 967	void __iomem *addr;
 968
 969	/* Basic sanity check that BAR is big enough */
 970	if (pci_resource_len(pdev, bar) < offset) {
 971		dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar,
 972			&pdev->resource[bar], (unsigned long long)offset);
 973		return IOMEM_ERR_PTR(-ENXIO);
 974	}
 975
 976	addr = pci_iomap(pdev, bar, 0);
 977	if (!addr) {
 978		dev_err(dev, "failed to map registers\n");
 979		return addr;
 980	}
 981
 982	dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %#llx\n",
 983		bar, offset);
 984
 985	return addr;
 986}
 987
 988static void cxl_mem_unmap_regblock(struct cxl_mem *cxlm, void __iomem *base)
 
 989{
 990	pci_iounmap(cxlm->pdev, base);
 991}
 992
 993static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
 994{
 995	int pos;
 
 
 
 996
 997	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
 998	if (!pos)
 999		return 0;
 
1000
1001	while (pos) {
1002		u16 vendor, id;
 
1003
1004		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
1005		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
1006		if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
1007			return pos;
1008
1009		pos = pci_find_next_ext_capability(pdev, pos,
1010						   PCI_EXT_CAP_ID_DVSEC);
1011	}
1012
1013	return 0;
1014}
 
1015
1016static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
1017			  struct cxl_register_map *map)
1018{
1019	struct pci_dev *pdev = cxlm->pdev;
1020	struct device *dev = &pdev->dev;
1021	struct cxl_component_reg_map *comp_map;
1022	struct cxl_device_reg_map *dev_map;
1023
1024	switch (map->reg_type) {
1025	case CXL_REGLOC_RBI_COMPONENT:
1026		comp_map = &map->component_map;
1027		cxl_probe_component_regs(dev, base, comp_map);
1028		if (!comp_map->hdm_decoder.valid) {
1029			dev_err(dev, "HDM decoder registers not found\n");
1030			return -ENXIO;
1031		}
1032
1033		dev_dbg(dev, "Set up component registers\n");
1034		break;
1035	case CXL_REGLOC_RBI_MEMDEV:
1036		dev_map = &map->device_map;
1037		cxl_probe_device_regs(dev, base, dev_map);
1038		if (!dev_map->status.valid || !dev_map->mbox.valid ||
1039		    !dev_map->memdev.valid) {
1040			dev_err(dev, "registers not found: %s%s%s\n",
1041				!dev_map->status.valid ? "status " : "",
1042				!dev_map->mbox.valid ? "mbox " : "",
1043				!dev_map->memdev.valid ? "memdev " : "");
1044			return -ENXIO;
1045		}
1046
1047		dev_dbg(dev, "Probing device registers...\n");
1048		break;
1049	default:
1050		break;
1051	}
1052
1053	return 0;
1054}
1055
1056static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map)
1057{
1058	struct pci_dev *pdev = cxlm->pdev;
1059	struct device *dev = &pdev->dev;
 
 
 
 
 
1060
1061	switch (map->reg_type) {
1062	case CXL_REGLOC_RBI_COMPONENT:
1063		cxl_map_component_regs(pdev, &cxlm->regs.component, map);
1064		dev_dbg(dev, "Mapping component registers...\n");
1065		break;
1066	case CXL_REGLOC_RBI_MEMDEV:
1067		cxl_map_device_regs(pdev, &cxlm->regs.device_regs, map);
1068		dev_dbg(dev, "Probing device registers...\n");
1069		break;
1070	default:
1071		break;
1072	}
1073
1074	return 0;
1075}
1076
1077static void cxl_decode_register_block(u32 reg_lo, u32 reg_hi,
1078				      u8 *bar, u64 *offset, u8 *reg_type)
1079{
1080	*offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
1081	*bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
1082	*reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
1083}
 
 
1084
1085/**
1086 * cxl_mem_setup_regs() - Setup necessary MMIO.
1087 * @cxlm: The CXL memory device to communicate with.
1088 *
1089 * Return: 0 if all necessary registers mapped.
1090 *
1091 * A memory device is required by spec to implement a certain set of MMIO
1092 * regions. The purpose of this function is to enumerate and map those
1093 * registers.
1094 */
1095static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
1096{
1097	struct pci_dev *pdev = cxlm->pdev;
1098	struct device *dev = &pdev->dev;
1099	u32 regloc_size, regblocks;
1100	void __iomem *base;
1101	int regloc, i;
1102	struct cxl_register_map *map, *n;
1103	LIST_HEAD(register_maps);
1104	int ret = 0;
1105
1106	regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
1107	if (!regloc) {
1108		dev_err(dev, "register location dvsec not found\n");
1109		return -ENXIO;
1110	}
1111
1112	if (pci_request_mem_regions(pdev, pci_name(pdev)))
1113		return -ENODEV;
 
1114
1115	/* Get the size of the Register Locator DVSEC */
1116	pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
1117	regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
1118
1119	regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
1120	regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
1121
1122	for (i = 0; i < regblocks; i++, regloc += 8) {
1123		u32 reg_lo, reg_hi;
1124		u8 reg_type;
1125		u64 offset;
1126		u8 bar;
1127
1128		map = kzalloc(sizeof(*map), GFP_KERNEL);
1129		if (!map) {
1130			ret = -ENOMEM;
1131			goto free_maps;
1132		}
1133
1134		list_add(&map->list, &register_maps);
 
 
1135
1136		pci_read_config_dword(pdev, regloc, &reg_lo);
1137		pci_read_config_dword(pdev, regloc + 4, &reg_hi);
1138
1139		cxl_decode_register_block(reg_lo, reg_hi, &bar, &offset,
1140					  &reg_type);
1141
1142		dev_dbg(dev, "Found register block in bar %u @ 0x%llx of type %u\n",
1143			bar, offset, reg_type);
1144
1145		base = cxl_mem_map_regblock(cxlm, bar, offset);
1146		if (!base) {
1147			ret = -ENOMEM;
1148			goto free_maps;
1149		}
1150
1151		map->barno = bar;
1152		map->block_offset = offset;
1153		map->reg_type = reg_type;
1154
1155		ret = cxl_probe_regs(cxlm, base + offset, map);
1156
1157		/* Always unmap the regblock regardless of probe success */
1158		cxl_mem_unmap_regblock(cxlm, base);
1159
1160		if (ret)
1161			goto free_maps;
1162	}
1163
1164	pci_release_mem_regions(pdev);
1165
1166	list_for_each_entry(map, &register_maps, list) {
1167		ret = cxl_map_regs(cxlm, map);
1168		if (ret)
1169			goto free_maps;
1170	}
1171
1172free_maps:
1173	list_for_each_entry_safe(map, n, &register_maps, list) {
1174		list_del(&map->list);
1175		kfree(map);
1176	}
1177
1178	return ret;
1179}
1180
1181static void cxl_memdev_release(struct device *dev)
 
1182{
1183	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1184
1185	ida_free(&cxl_memdev_ida, cxlmd->id);
1186	kfree(cxlmd);
1187}
 
1188
1189static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
1190				kgid_t *gid)
1191{
1192	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
1193}
 
1194
1195static ssize_t firmware_version_show(struct device *dev,
1196				     struct device_attribute *attr, char *buf)
1197{
1198	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1199	struct cxl_mem *cxlm = cxlmd->cxlm;
1200
1201	return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version);
1202}
1203static DEVICE_ATTR_RO(firmware_version);
1204
1205static ssize_t payload_max_show(struct device *dev,
1206				struct device_attribute *attr, char *buf)
1207{
1208	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1209	struct cxl_mem *cxlm = cxlmd->cxlm;
 
1210
1211	return sysfs_emit(buf, "%zu\n", cxlm->payload_size);
1212}
1213static DEVICE_ATTR_RO(payload_max);
1214
1215static ssize_t label_storage_size_show(struct device *dev,
1216				struct device_attribute *attr, char *buf)
1217{
1218	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1219	struct cxl_mem *cxlm = cxlmd->cxlm;
1220
1221	return sysfs_emit(buf, "%zu\n", cxlm->lsa_size);
1222}
1223static DEVICE_ATTR_RO(label_storage_size);
1224
1225static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
1226			     char *buf)
1227{
1228	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1229	struct cxl_mem *cxlm = cxlmd->cxlm;
1230	unsigned long long len = range_len(&cxlm->ram_range);
1231
1232	return sysfs_emit(buf, "%#llx\n", len);
1233}
1234
1235static struct device_attribute dev_attr_ram_size =
1236	__ATTR(size, 0444, ram_size_show, NULL);
 
 
 
1237
1238static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
1239			      char *buf)
1240{
1241	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1242	struct cxl_mem *cxlm = cxlmd->cxlm;
1243	unsigned long long len = range_len(&cxlm->pmem_range);
 
 
 
 
1244
1245	return sysfs_emit(buf, "%#llx\n", len);
1246}
 
 
 
 
1247
1248static struct device_attribute dev_attr_pmem_size =
1249	__ATTR(size, 0444, pmem_size_show, NULL);
 
 
1250
1251static struct attribute *cxl_memdev_attributes[] = {
1252	&dev_attr_firmware_version.attr,
1253	&dev_attr_payload_max.attr,
1254	&dev_attr_label_storage_size.attr,
1255	NULL,
1256};
 
 
 
 
 
 
 
1257
1258static struct attribute *cxl_memdev_pmem_attributes[] = {
1259	&dev_attr_pmem_size.attr,
1260	NULL,
1261};
1262
1263static struct attribute *cxl_memdev_ram_attributes[] = {
1264	&dev_attr_ram_size.attr,
1265	NULL,
1266};
1267
1268static struct attribute_group cxl_memdev_attribute_group = {
1269	.attrs = cxl_memdev_attributes,
1270};
 
 
 
 
 
 
 
1271
1272static struct attribute_group cxl_memdev_ram_attribute_group = {
1273	.name = "ram",
1274	.attrs = cxl_memdev_ram_attributes,
1275};
1276
1277static struct attribute_group cxl_memdev_pmem_attribute_group = {
1278	.name = "pmem",
1279	.attrs = cxl_memdev_pmem_attributes,
1280};
1281
1282static const struct attribute_group *cxl_memdev_attribute_groups[] = {
1283	&cxl_memdev_attribute_group,
1284	&cxl_memdev_ram_attribute_group,
1285	&cxl_memdev_pmem_attribute_group,
1286	NULL,
1287};
1288
1289static const struct device_type cxl_memdev_type = {
1290	.name = "cxl_memdev",
1291	.release = cxl_memdev_release,
1292	.devnode = cxl_memdev_devnode,
1293	.groups = cxl_memdev_attribute_groups,
1294};
1295
1296static void cxl_memdev_unregister(void *_cxlmd)
1297{
1298	struct cxl_memdev *cxlmd = _cxlmd;
1299	struct device *dev = &cxlmd->dev;
1300	struct cdev *cdev = &cxlmd->cdev;
1301	const struct cdevm_file_operations *cdevm_fops;
1302
1303	cdevm_fops = container_of(cdev->ops, typeof(*cdevm_fops), fops);
1304	cdevm_fops->shutdown(dev);
 
1305
1306	cdev_device_del(&cxlmd->cdev, dev);
1307	put_device(dev);
1308}
1309
1310static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm,
1311					   const struct file_operations *fops)
1312{
1313	struct pci_dev *pdev = cxlm->pdev;
1314	struct cxl_memdev *cxlmd;
1315	struct device *dev;
1316	struct cdev *cdev;
1317	int rc;
1318
1319	cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
1320	if (!cxlmd)
1321		return ERR_PTR(-ENOMEM);
1322
1323	rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
1324	if (rc < 0)
1325		goto err;
1326	cxlmd->id = rc;
1327
1328	dev = &cxlmd->dev;
1329	device_initialize(dev);
1330	dev->parent = &pdev->dev;
1331	dev->bus = &cxl_bus_type;
1332	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
1333	dev->type = &cxl_memdev_type;
1334	device_set_pm_not_required(dev);
1335
1336	cdev = &cxlmd->cdev;
1337	cdev_init(cdev, fops);
1338	return cxlmd;
1339
1340err:
1341	kfree(cxlmd);
1342	return ERR_PTR(rc);
1343}
1344
1345static struct cxl_memdev *
1346devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
1347		    const struct cdevm_file_operations *cdevm_fops)
1348{
1349	struct cxl_memdev *cxlmd;
1350	struct device *dev;
1351	struct cdev *cdev;
1352	int rc;
1353
1354	cxlmd = cxl_memdev_alloc(cxlm, &cdevm_fops->fops);
1355	if (IS_ERR(cxlmd))
1356		return cxlmd;
1357
1358	dev = &cxlmd->dev;
1359	rc = dev_set_name(dev, "mem%d", cxlmd->id);
1360	if (rc)
1361		goto err;
1362
1363	/*
1364	 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
1365	 * needed as this is ordered with cdev_add() publishing the device.
1366	 */
1367	cxlmd->cxlm = cxlm;
1368
1369	cdev = &cxlmd->cdev;
1370	rc = cdev_device_add(cdev, dev);
1371	if (rc)
1372		goto err;
1373
1374	rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
1375	if (rc)
1376		return ERR_PTR(rc);
1377	return cxlmd;
 
1378
1379err:
1380	/*
1381	 * The cdev was briefly live, shutdown any ioctl operations that
1382	 * saw that state.
1383	 */
1384	cdevm_fops->shutdown(dev);
1385	put_device(dev);
1386	return ERR_PTR(rc);
1387}
1388
1389static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
1390{
1391	u32 remaining = size;
1392	u32 offset = 0;
1393
1394	while (remaining) {
1395		u32 xfer_size = min_t(u32, remaining, cxlm->payload_size);
1396		struct cxl_mbox_get_log {
1397			uuid_t uuid;
1398			__le32 offset;
1399			__le32 length;
1400		} __packed log = {
1401			.uuid = *uuid,
1402			.offset = cpu_to_le32(offset),
1403			.length = cpu_to_le32(xfer_size)
1404		};
1405		int rc;
1406
1407		rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LOG, &log,
1408					   sizeof(log), out, xfer_size);
1409		if (rc < 0)
1410			return rc;
1411
1412		out += xfer_size;
1413		remaining -= xfer_size;
1414		offset += xfer_size;
 
 
1415	}
1416
1417	return 0;
1418}
 
 
 
 
1419
1420/**
1421 * cxl_walk_cel() - Walk through the Command Effects Log.
1422 * @cxlm: Device.
1423 * @size: Length of the Command Effects Log.
1424 * @cel: CEL
1425 *
1426 * Iterate over each entry in the CEL and determine if the driver supports the
1427 * command. If so, the command is enabled for the device and can be used later.
1428 */
1429static void cxl_walk_cel(struct cxl_mem *cxlm, size_t size, u8 *cel)
1430{
1431	struct cel_entry {
1432		__le16 opcode;
1433		__le16 effect;
1434	} __packed * cel_entry;
1435	const int cel_entries = size / sizeof(*cel_entry);
1436	int i;
1437
1438	cel_entry = (struct cel_entry *)cel;
1439
1440	for (i = 0; i < cel_entries; i++) {
1441		u16 opcode = le16_to_cpu(cel_entry[i].opcode);
1442		struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
1443
1444		if (!cmd) {
1445			dev_dbg(&cxlm->pdev->dev,
1446				"Opcode 0x%04x unsupported by driver", opcode);
1447			continue;
1448		}
1449
1450		set_bit(cmd->info.id, cxlm->enabled_cmds);
1451	}
1452}
1453
1454struct cxl_mbox_get_supported_logs {
1455	__le16 entries;
1456	u8 rsvd[6];
1457	struct gsl_entry {
1458		uuid_t uuid;
1459		__le32 size;
1460	} __packed entry[];
1461} __packed;
1462
1463static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm)
1464{
1465	struct cxl_mbox_get_supported_logs *ret;
1466	int rc;
1467
1468	ret = kvmalloc(cxlm->payload_size, GFP_KERNEL);
1469	if (!ret)
1470		return ERR_PTR(-ENOMEM);
1471
1472	rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL,
1473				   0, ret, cxlm->payload_size);
1474	if (rc < 0) {
1475		kvfree(ret);
1476		return ERR_PTR(rc);
1477	}
1478
1479	return ret;
 
 
 
 
 
1480}
1481
1482/**
1483 * cxl_mem_enumerate_cmds() - Enumerate commands for a device.
1484 * @cxlm: The device.
1485 *
1486 * Returns 0 if enumerate completed successfully.
1487 *
1488 * CXL devices have optional support for certain commands. This function will
1489 * determine the set of supported commands for the hardware and update the
1490 * enabled_cmds bitmap in the @cxlm.
1491 */
1492static int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm)
1493{
1494	struct cxl_mbox_get_supported_logs *gsl;
1495	struct device *dev = &cxlm->pdev->dev;
1496	struct cxl_mem_command *cmd;
1497	int i, rc;
1498
1499	gsl = cxl_get_gsl(cxlm);
1500	if (IS_ERR(gsl))
1501		return PTR_ERR(gsl);
1502
1503	rc = -ENOENT;
1504	for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
1505		u32 size = le32_to_cpu(gsl->entry[i].size);
1506		uuid_t uuid = gsl->entry[i].uuid;
1507		u8 *log;
1508
1509		dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
1510
1511		if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
1512			continue;
1513
1514		log = kvmalloc(size, GFP_KERNEL);
1515		if (!log) {
1516			rc = -ENOMEM;
1517			goto out;
1518		}
1519
1520		rc = cxl_xfer_log(cxlm, &uuid, size, log);
1521		if (rc) {
1522			kvfree(log);
1523			goto out;
1524		}
1525
1526		cxl_walk_cel(cxlm, size, log);
1527		kvfree(log);
1528
1529		/* In case CEL was bogus, enable some default commands. */
1530		cxl_for_each_cmd(cmd)
1531			if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
1532				set_bit(cmd->info.id, cxlm->enabled_cmds);
1533
1534		/* Found the required CEL */
1535		rc = 0;
1536	}
1537
1538out:
1539	kvfree(gsl);
1540	return rc;
1541}
1542
1543/**
1544 * cxl_mem_identify() - Send the IDENTIFY command to the device.
1545 * @cxlm: The device to identify.
1546 *
1547 * Return: 0 if identify was executed successfully.
1548 *
1549 * This will dispatch the identify command to the device and on success populate
1550 * structures to be exported to sysfs.
1551 */
1552static int cxl_mem_identify(struct cxl_mem *cxlm)
1553{
1554	/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
1555	struct cxl_mbox_identify {
1556		char fw_revision[0x10];
1557		__le64 total_capacity;
1558		__le64 volatile_capacity;
1559		__le64 persistent_capacity;
1560		__le64 partition_align;
1561		__le16 info_event_log_size;
1562		__le16 warning_event_log_size;
1563		__le16 failure_event_log_size;
1564		__le16 fatal_event_log_size;
1565		__le32 lsa_size;
1566		u8 poison_list_max_mer[3];
1567		__le16 inject_poison_limit;
1568		u8 poison_caps;
1569		u8 qos_telemetry_caps;
1570	} __packed id;
1571	int rc;
1572
1573	rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
1574				   sizeof(id));
1575	if (rc < 0)
1576		return rc;
1577
1578	/*
1579	 * TODO: enumerate DPA map, as 'ram' and 'pmem' do not alias.
1580	 * For now, only the capacity is exported in sysfs
 
 
1581	 */
1582	cxlm->ram_range.start = 0;
1583	cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) * SZ_256M - 1;
 
 
 
 
 
 
1584
1585	cxlm->pmem_range.start = 0;
1586	cxlm->pmem_range.end =
1587		le64_to_cpu(id.persistent_capacity) * SZ_256M - 1;
 
 
 
 
1588
1589	cxlm->lsa_size = le32_to_cpu(id.lsa_size);
1590	memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
 
 
 
 
 
 
 
 
1591
1592	return 0;
1593}
1594
1595static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1596{
1597	struct cxl_memdev *cxlmd;
1598	struct cxl_mem *cxlm;
1599	int rc;
 
 
 
1600
1601	rc = pcim_enable_device(pdev);
1602	if (rc)
1603		return rc;
1604
1605	cxlm = cxl_mem_create(pdev);
1606	if (IS_ERR(cxlm))
1607		return PTR_ERR(cxlm);
 
 
1608
1609	rc = cxl_mem_setup_regs(cxlm);
1610	if (rc)
1611		return rc;
1612
1613	rc = cxl_mem_setup_mailbox(cxlm);
1614	if (rc)
1615		return rc;
1616
1617	rc = cxl_mem_enumerate_cmds(cxlm);
1618	if (rc)
1619		return rc;
1620
1621	rc = cxl_mem_identify(cxlm);
1622	if (rc)
1623		return rc;
1624
1625	cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm, &cxl_memdev_fops);
1626	if (IS_ERR(cxlmd))
1627		return PTR_ERR(cxlmd);
1628
1629	if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
1630		rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
1631
1632	return rc;
 
1633}
 
1634
1635static const struct pci_device_id cxl_mem_pci_tbl[] = {
1636	/* PCI class code for CXL.mem Type-3 Devices */
1637	{ PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
1638	{ /* terminate list */ },
1639};
1640MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
1641
1642static struct pci_driver cxl_mem_driver = {
1643	.name			= KBUILD_MODNAME,
1644	.id_table		= cxl_mem_pci_tbl,
1645	.probe			= cxl_mem_probe,
1646	.driver	= {
1647		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
1648	},
1649};
1650
1651static __init int cxl_mem_init(void)
1652{
1653	struct dentry *mbox_debugfs;
1654	dev_t devt;
1655	int rc;
1656
1657	/* Double check the anonymous union trickery in struct cxl_regs */
1658	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
1659		     offsetof(struct cxl_regs, device_regs.memdev));
1660
1661	rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
1662	if (rc)
1663		return rc;
1664
1665	cxl_mem_major = MAJOR(devt);
 
 
1666
1667	rc = pci_register_driver(&cxl_mem_driver);
1668	if (rc) {
1669		unregister_chrdev_region(MKDEV(cxl_mem_major, 0),
1670					 CXL_MEM_MAX_DEVS);
1671		return rc;
1672	}
1673
1674	cxl_debugfs = debugfs_create_dir("cxl", NULL);
1675	mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
1676	debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
1677			    &cxl_raw_allow_all);
1678
1679	return 0;
1680}
1681
1682static __exit void cxl_mem_exit(void)
1683{
1684	debugfs_remove_recursive(cxl_debugfs);
1685	pci_unregister_driver(&cxl_mem_driver);
1686	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
1687}
1688
 
 
 
1689MODULE_LICENSE("GPL v2");
1690module_init(cxl_mem_init);
1691module_exit(cxl_mem_exit);
1692MODULE_IMPORT_NS(CXL);