Linux Audio

Check our new training course

Loading...
v6.8
  1// SPDX-License-Identifier: GPL-2.0-only
  2/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
  3#include <asm-generic/unaligned.h>
  4#include <linux/io-64-nonatomic-lo-hi.h>
  5#include <linux/moduleparam.h>
  6#include <linux/module.h>
  7#include <linux/delay.h>
  8#include <linux/sizes.h>
  9#include <linux/mutex.h>
 10#include <linux/list.h>
 
 
 11#include <linux/pci.h>
 12#include <linux/aer.h>
 13#include <linux/io.h>
 
 14#include "cxlmem.h"
 15#include "cxlpci.h"
 16#include "cxl.h"
 17#include "pmu.h"
 18
 19/**
 20 * DOC: cxl pci
 21 *
 22 * This implements the PCI exclusive functionality for a CXL device as it is
 23 * defined by the Compute Express Link specification. CXL devices may surface
 24 * certain functionality even if it isn't CXL enabled. While this driver is
 25 * focused around the PCI specific aspects of a CXL device, it binds to the
 26 * specific CXL memory device class code, and therefore the implementation of
 27 * cxl_pci is focused around CXL memory devices.
 28 *
 29 * The driver has several responsibilities, mainly:
 30 *  - Create the memX device and register on the CXL bus.
 31 *  - Enumerate device's register interface and map them.
 32 *  - Registers nvdimm bridge device with cxl_core.
 33 *  - Registers a CXL mailbox with cxl_core.
 
 34 */
 35
 36#define cxl_doorbell_busy(cxlds)                                                \
 37	(readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) &                  \
 38	 CXLDEV_MBOX_CTRL_DOORBELL)
 39
 40/* CXL 2.0 - 8.2.8.4 */
 41#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
 42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 43/*
 44 * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
 45 * dictate how long to wait for the mailbox to become ready. The new
 46 * field allows the device to tell software the amount of time to wait
 47 * before mailbox ready. This field per the spec theoretically allows
 48 * for up to 255 seconds. 255 seconds is unreasonably long, its longer
 49 * than the maximum SATA port link recovery wait. Default to 60 seconds
 50 * until someone builds a CXL device that needs more time in practice.
 51 */
 52static unsigned short mbox_ready_timeout = 60;
 53module_param(mbox_ready_timeout, ushort, 0644);
 54MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
 55
 56static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
 
 
 
 
 
 
 57{
 58	const unsigned long start = jiffies;
 59	unsigned long end = start;
 60
 61	while (cxl_doorbell_busy(cxlds)) {
 62		end = jiffies;
 63
 64		if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
 65			/* Check again in case preempted before timeout test */
 66			if (!cxl_doorbell_busy(cxlds))
 67				break;
 68			return -ETIMEDOUT;
 69		}
 70		cpu_relax();
 71	}
 72
 73	dev_dbg(cxlds->dev, "Doorbell wait took %dms",
 74		jiffies_to_msecs(end) - jiffies_to_msecs(start));
 75	return 0;
 76}
 77
 78#define cxl_err(dev, status, msg)                                        \
 79	dev_err_ratelimited(dev, msg ", device state %s%s\n",                  \
 80			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
 81			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
 82
 83#define cxl_cmd_err(dev, cmd, status, msg)                               \
 84	dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n",    \
 85			    (cmd)->opcode,                                     \
 86			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
 87			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
 88
 89/*
 90 * Threaded irq dev_id's must be globally unique.  cxl_dev_id provides a unique
 91 * wrapper object for each irq within the same cxlds.
 92 */
 93struct cxl_dev_id {
 94	struct cxl_dev_state *cxlds;
 95};
 96
 97static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq,
 98			   irq_handler_t thread_fn)
 99{
100	struct device *dev = cxlds->dev;
101	struct cxl_dev_id *dev_id;
102
103	dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
104	if (!dev_id)
105		return -ENOMEM;
106	dev_id->cxlds = cxlds;
107
108	return devm_request_threaded_irq(dev, irq, NULL, thread_fn,
109					 IRQF_SHARED | IRQF_ONESHOT, NULL,
110					 dev_id);
 
111}
112
113static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
 
114{
115	u64 reg;
116
117	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
118	return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100;
119}
120
121static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
122{
123	u64 reg;
124	u16 opcode;
125	struct cxl_dev_id *dev_id = id;
126	struct cxl_dev_state *cxlds = dev_id->cxlds;
127	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
128
129	if (!cxl_mbox_background_complete(cxlds))
130		return IRQ_NONE;
131
132	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
133	opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
134	if (opcode == CXL_MBOX_OP_SANITIZE) {
135		mutex_lock(&mds->mbox_mutex);
136		if (mds->security.sanitize_node)
137			mod_delayed_work(system_wq, &mds->security.poll_dwork, 0);
138		mutex_unlock(&mds->mbox_mutex);
139	} else {
140		/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
141		rcuwait_wake_up(&mds->mbox_wait);
142	}
143
144	return IRQ_HANDLED;
145}
146
147/*
148 * Sanitization operation polling mode.
149 */
150static void cxl_mbox_sanitize_work(struct work_struct *work)
151{
152	struct cxl_memdev_state *mds =
153		container_of(work, typeof(*mds), security.poll_dwork.work);
154	struct cxl_dev_state *cxlds = &mds->cxlds;
155
156	mutex_lock(&mds->mbox_mutex);
157	if (cxl_mbox_background_complete(cxlds)) {
158		mds->security.poll_tmo_secs = 0;
159		if (mds->security.sanitize_node)
160			sysfs_notify_dirent(mds->security.sanitize_node);
161		mds->security.sanitize_active = false;
162
163		dev_dbg(cxlds->dev, "Sanitization operation ended\n");
164	} else {
165		int timeout = mds->security.poll_tmo_secs + 10;
166
167		mds->security.poll_tmo_secs = min(15 * 60, timeout);
168		schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ);
169	}
170	mutex_unlock(&mds->mbox_mutex);
171}
172
173/**
174 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
175 * @mds: The memory device driver data
176 * @mbox_cmd: Command to send to the memory device.
177 *
178 * Context: Any context. Expects mbox_mutex to be held.
179 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
180 *         Caller should check the return code in @mbox_cmd to make sure it
181 *         succeeded.
182 *
183 * This is a generic form of the CXL mailbox send command thus only using the
184 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
185 * devices, and perhaps other types of CXL devices may have further information
186 * available upon error conditions. Driver facilities wishing to send mailbox
187 * commands should use the wrapper command.
188 *
189 * The CXL spec allows for up to two mailboxes. The intention is for the primary
190 * mailbox to be OS controlled and the secondary mailbox to be used by system
191 * firmware. This allows the OS and firmware to communicate with the device and
192 * not need to coordinate with each other. The driver only uses the primary
193 * mailbox.
194 */
195static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds,
196				   struct cxl_mbox_cmd *mbox_cmd)
197{
198	struct cxl_dev_state *cxlds = &mds->cxlds;
199	void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
200	struct device *dev = cxlds->dev;
201	u64 cmd_reg, status_reg;
202	size_t out_len;
203	int rc;
204
205	lockdep_assert_held(&mds->mbox_mutex);
206
207	/*
208	 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
209	 *   1. Caller reads MB Control Register to verify doorbell is clear
210	 *   2. Caller writes Command Register
211	 *   3. Caller writes Command Payload Registers if input payload is non-empty
212	 *   4. Caller writes MB Control Register to set doorbell
213	 *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
214	 *   6. Caller reads MB Status Register to fetch Return code
215	 *   7. If command successful, Caller reads Command Register to get Payload Length
216	 *   8. If output payload is non-empty, host reads Command Payload Registers
217	 *
218	 * Hardware is free to do whatever it wants before the doorbell is rung,
219	 * and isn't allowed to change anything after it clears the doorbell. As
220	 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
221	 * also happen in any order (though some orders might not make sense).
222	 */
223
224	/* #1 */
225	if (cxl_doorbell_busy(cxlds)) {
226		u64 md_status =
227			readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
228
229		cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
230			    "mailbox queue busy");
231		return -EBUSY;
232	}
233
234	/*
235	 * With sanitize polling, hardware might be done and the poller still
236	 * not be in sync. Ensure no new command comes in until so. Keep the
237	 * hardware semantics and only allow device health status.
238	 */
239	if (mds->security.poll_tmo_secs > 0) {
240		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)
241			return -EBUSY;
242	}
243
244	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
245			     mbox_cmd->opcode);
246	if (mbox_cmd->size_in) {
247		if (WARN_ON(!mbox_cmd->payload_in))
248			return -EINVAL;
249
250		cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
251				      mbox_cmd->size_in);
252		memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
253	}
254
255	/* #2, #3 */
256	writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
257
258	/* #4 */
259	dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode);
260	writel(CXLDEV_MBOX_CTRL_DOORBELL,
261	       cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
262
263	/* #5 */
264	rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
265	if (rc == -ETIMEDOUT) {
266		u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
267
268		cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout");
269		return rc;
270	}
271
272	/* #6 */
273	status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
274	mbox_cmd->return_code =
275		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
276
277	/*
278	 * Handle the background command in a synchronous manner.
279	 *
280	 * All other mailbox commands will serialize/queue on the mbox_mutex,
281	 * which we currently hold. Furthermore this also guarantees that
282	 * cxl_mbox_background_complete() checks are safe amongst each other,
283	 * in that no new bg operation can occur in between.
284	 *
285	 * Background operations are timesliced in accordance with the nature
286	 * of the command. In the event of timeout, the mailbox state is
287	 * indeterminate until the next successful command submission and the
288	 * driver can get back in sync with the hardware state.
289	 */
290	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
291		u64 bg_status_reg;
292		int i, timeout;
293
294		/*
295		 * Sanitization is a special case which monopolizes the device
296		 * and cannot be timesliced. Handle asynchronously instead,
297		 * and allow userspace to poll(2) for completion.
298		 */
299		if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
300			if (mds->security.sanitize_active)
301				return -EBUSY;
302
303			/* give first timeout a second */
304			timeout = 1;
305			mds->security.poll_tmo_secs = timeout;
306			mds->security.sanitize_active = true;
307			schedule_delayed_work(&mds->security.poll_dwork,
308					      timeout * HZ);
309			dev_dbg(dev, "Sanitization operation started\n");
310			goto success;
311		}
312
313		dev_dbg(dev, "Mailbox background operation (0x%04x) started\n",
314			mbox_cmd->opcode);
315
316		timeout = mbox_cmd->poll_interval_ms;
317		for (i = 0; i < mbox_cmd->poll_count; i++) {
318			if (rcuwait_wait_event_timeout(&mds->mbox_wait,
319				       cxl_mbox_background_complete(cxlds),
320				       TASK_UNINTERRUPTIBLE,
321				       msecs_to_jiffies(timeout)) > 0)
322				break;
323		}
324
325		if (!cxl_mbox_background_complete(cxlds)) {
326			dev_err(dev, "timeout waiting for background (%d ms)\n",
327				timeout * mbox_cmd->poll_count);
328			return -ETIMEDOUT;
329		}
330
331		bg_status_reg = readq(cxlds->regs.mbox +
332				      CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
333		mbox_cmd->return_code =
334			FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
335				  bg_status_reg);
336		dev_dbg(dev,
337			"Mailbox background operation (0x%04x) completed\n",
338			mbox_cmd->opcode);
339	}
340
341	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
342		dev_dbg(dev, "Mailbox operation had an error: %s\n",
343			cxl_mbox_cmd_rc2str(mbox_cmd));
344		return 0; /* completed but caller must check return_code */
345	}
346
347success:
348	/* #7 */
349	cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
350	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
351
352	/* #8 */
353	if (out_len && mbox_cmd->payload_out) {
354		/*
355		 * Sanitize the copy. If hardware misbehaves, out_len per the
356		 * spec can actually be greater than the max allowed size (21
357		 * bits available but spec defined 1M max). The caller also may
358		 * have requested less data than the hardware supplied even
359		 * within spec.
360		 */
361		size_t n;
362
363		n = min3(mbox_cmd->size_out, mds->payload_size, out_len);
364		memcpy_fromio(mbox_cmd->payload_out, payload, n);
365		mbox_cmd->size_out = n;
366	} else {
367		mbox_cmd->size_out = 0;
368	}
369
370	return 0;
371}
372
373static int cxl_pci_mbox_send(struct cxl_memdev_state *mds,
374			     struct cxl_mbox_cmd *cmd)
 
 
 
 
 
 
375{
 
 
376	int rc;
377
378	mutex_lock_io(&mds->mbox_mutex);
379	rc = __cxl_pci_mbox_send_cmd(mds, cmd);
380	mutex_unlock(&mds->mbox_mutex);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382	return rc;
383}
384
385static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
 
 
 
 
 
 
386{
387	struct cxl_dev_state *cxlds = &mds->cxlds;
388	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
389	struct device *dev = cxlds->dev;
390	unsigned long timeout;
391	int irq, msgnum;
392	u64 md_status;
393	u32 ctrl;
394
395	timeout = jiffies + mbox_ready_timeout * HZ;
396	do {
397		md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
398		if (md_status & CXLMDEV_MBOX_IF_READY)
399			break;
400		if (msleep_interruptible(100))
401			break;
402	} while (!time_after(jiffies, timeout));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
404	if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
405		cxl_err(dev, md_status, "timeout awaiting mailbox ready");
406		return -ETIMEDOUT;
 
407	}
408
409	/*
410	 * A command may be in flight from a previous driver instance,
411	 * think kexec, do one doorbell wait so that
412	 * __cxl_pci_mbox_send_cmd() can assume that it is the only
413	 * source for future doorbell busy events.
414	 */
415	if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
416		cxl_err(dev, md_status, "timeout awaiting mailbox idle");
417		return -ETIMEDOUT;
418	}
419
420	mds->mbox_send = cxl_pci_mbox_send;
421	mds->payload_size =
422		1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
424	/*
425	 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
426	 *
427	 * If the size is too small, mandatory commands will not work and so
428	 * there's no point in going forward. If the size is too large, there's
429	 * no harm is soft limiting it.
430	 */
431	mds->payload_size = min_t(size_t, mds->payload_size, SZ_1M);
432	if (mds->payload_size < 256) {
433		dev_err(dev, "Mailbox is too small (%zub)",
434			mds->payload_size);
435		return -ENXIO;
 
 
 
436	}
437
438	dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size);
 
439
440	rcuwait_init(&mds->mbox_wait);
441	INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work);
 
 
 
442
443	/* background command interrupts are optional */
444	if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail)
445		return 0;
446
447	msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
448	irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum);
449	if (irq < 0)
450		return 0;
451
452	if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq))
453		return 0;
454
455	dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n");
456	/* enable background command mbox irq support */
457	ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
458	ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ;
459	writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
460
461	return 0;
 
 
 
 
 
 
 
462}
463
464/*
465 * Assume that any RCIEP that emits the CXL memory expander class code
466 * is an RCD
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467 */
468static bool is_cxl_restricted(struct pci_dev *pdev)
 
 
469{
470	return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
471}
472
473static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev,
474				  struct cxl_register_map *map)
475{
476	struct cxl_port *port;
477	struct cxl_dport *dport;
478	resource_size_t component_reg_phys;
479
480	*map = (struct cxl_register_map) {
481		.host = &pdev->dev,
482		.resource = CXL_RESOURCE_NONE,
483	};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
485	port = cxl_pci_find_port(pdev, &dport);
486	if (!port)
487		return -EPROBE_DEFER;
 
 
 
 
488
489	component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport);
 
490
491	put_device(&port->dev);
492
493	if (component_reg_phys == CXL_RESOURCE_NONE)
494		return -ENXIO;
 
 
 
 
 
 
 
 
 
495
496	map->resource = component_reg_phys;
497	map->reg_type = CXL_REGLOC_RBI_COMPONENT;
498	map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
500	return 0;
501}
502
503static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
504			      struct cxl_register_map *map)
505{
506	int rc;
507
508	rc = cxl_find_regblock(pdev, type, map);
 
 
 
 
 
 
 
 
 
 
509
510	/*
511	 * If the Register Locator DVSEC does not exist, check if it
512	 * is an RCH and try to extract the Component Registers from
513	 * an RCRB.
514	 */
515	if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev))
516		rc = cxl_rcrb_get_comp_regs(pdev, map);
517
518	if (rc)
519		return rc;
 
 
 
 
520
521	return cxl_setup_regs(map);
522}
523
524static int cxl_pci_ras_unmask(struct pci_dev *pdev)
 
525{
526	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
527	void __iomem *addr;
528	u32 orig_val, val, mask;
529	u16 cap;
530	int rc;
531
532	if (!cxlds->regs.ras) {
533		dev_dbg(&pdev->dev, "No RAS registers.\n");
534		return 0;
535	}
536
537	/* BIOS has PCIe AER error control */
538	if (!pcie_aer_is_native(pdev))
539		return 0;
540
541	rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap);
542	if (rc)
543		return rc;
544
545	if (cap & PCI_EXP_DEVCTL_URRE) {
546		addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET;
547		orig_val = readl(addr);
548
549		mask = CXL_RAS_UNCORRECTABLE_MASK_MASK |
550		       CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK;
551		val = orig_val & ~mask;
552		writel(val, addr);
553		dev_dbg(&pdev->dev,
554			"Uncorrectable RAS Errors Mask: %#x -> %#x\n",
555			orig_val, val);
556	}
 
 
 
557
558	if (cap & PCI_EXP_DEVCTL_CERE) {
559		addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET;
560		orig_val = readl(addr);
561		val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK;
562		writel(val, addr);
563		dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n",
564			orig_val, val);
 
 
 
565	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
567	return 0;
568}
569
570static void free_event_buf(void *buf)
571{
572	kvfree(buf);
 
 
 
 
 
573}
574
575/*
576 * There is a single buffer for reading event logs from the mailbox.  All logs
577 * share this buffer protected by the mds->event_log_lock.
578 */
579static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
580{
581	struct cxl_get_event_payload *buf;
 
582
583	buf = kvmalloc(mds->payload_size, GFP_KERNEL);
584	if (!buf)
585		return -ENOMEM;
586	mds->event.buf = buf;
587
588	return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf);
 
 
589}
590
591static bool cxl_alloc_irq_vectors(struct pci_dev *pdev)
 
 
 
 
 
 
 
 
 
 
 
 
592{
593	int nvecs;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
595	/*
596	 * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must
597	 * not generate INTx messages if that function participates in
598	 * CXL.cache or CXL.mem.
599	 *
600	 * Additionally pci_alloc_irq_vectors() handles calling
601	 * pci_free_irq_vectors() automatically despite not being called
602	 * pcim_*.  See pci_setup_msi_context().
603	 */
604	nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS,
605				      PCI_IRQ_MSIX | PCI_IRQ_MSI);
606	if (nvecs < 1) {
607		dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
608		return false;
609	}
610	return true;
 
 
 
 
611}
612
613static irqreturn_t cxl_event_thread(int irq, void *id)
614{
615	struct cxl_dev_id *dev_id = id;
616	struct cxl_dev_state *cxlds = dev_id->cxlds;
617	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
618	u32 status;
619
620	do {
621		/*
622		 * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
623		 * ignore the reserved upper 32 bits
624		 */
625		status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
626		/* Ignore logs unknown to the driver */
627		status &= CXLDEV_EVENT_STATUS_ALL;
628		if (!status)
629			break;
630		cxl_mem_get_event_records(mds, status);
631		cond_resched();
632	} while (status);
 
 
 
633
634	return IRQ_HANDLED;
635}
636
637static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
 
638{
639	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
640	int irq;
 
641
642	if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX)
643		return -ENXIO;
 
 
 
 
 
 
 
 
 
 
644
645	irq =  pci_irq_vector(pdev,
646			      FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting));
647	if (irq < 0)
648		return irq;
649
650	return cxl_request_irq(cxlds, irq, cxl_event_thread);
651}
652
653static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
654				    struct cxl_event_interrupt_policy *policy)
655{
656	struct cxl_mbox_cmd mbox_cmd = {
657		.opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY,
658		.payload_out = policy,
659		.size_out = sizeof(*policy),
660	};
661	int rc;
662
663	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
664	if (rc < 0)
665		dev_err(mds->cxlds.dev,
666			"Failed to get event interrupt policy : %d", rc);
667
668	return rc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669}
670
671static int cxl_event_config_msgnums(struct cxl_memdev_state *mds,
672				    struct cxl_event_interrupt_policy *policy)
673{
674	struct cxl_mbox_cmd mbox_cmd;
675	int rc;
676
677	*policy = (struct cxl_event_interrupt_policy) {
678		.info_settings = CXL_INT_MSI_MSIX,
679		.warn_settings = CXL_INT_MSI_MSIX,
680		.failure_settings = CXL_INT_MSI_MSIX,
681		.fatal_settings = CXL_INT_MSI_MSIX,
682	};
 
 
 
 
683
684	mbox_cmd = (struct cxl_mbox_cmd) {
685		.opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY,
686		.payload_in = policy,
687		.size_in = sizeof(*policy),
688	};
 
 
 
 
 
 
 
 
689
690	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
691	if (rc < 0) {
692		dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d",
693			rc);
694		return rc;
695	}
696
697	/* Retrieve final interrupt settings */
698	return cxl_event_get_int_policy(mds, policy);
699}
700
701static int cxl_event_irqsetup(struct cxl_memdev_state *mds)
702{
703	struct cxl_dev_state *cxlds = &mds->cxlds;
704	struct cxl_event_interrupt_policy policy;
705	int rc;
706
707	rc = cxl_event_config_msgnums(mds, &policy);
708	if (rc)
709		return rc;
 
 
 
 
 
 
 
 
 
710
711	rc = cxl_event_req_irq(cxlds, policy.info_settings);
712	if (rc) {
713		dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n");
714		return rc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715	}
716
717	rc = cxl_event_req_irq(cxlds, policy.warn_settings);
718	if (rc) {
719		dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n");
720		return rc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721	}
722
723	rc = cxl_event_req_irq(cxlds, policy.failure_settings);
724	if (rc) {
725		dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n");
726		return rc;
 
 
727	}
728
729	rc = cxl_event_req_irq(cxlds, policy.fatal_settings);
730	if (rc) {
731		dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n");
732		return rc;
733	}
734
735	return 0;
736}
737
738static bool cxl_event_int_is_fw(u8 setting)
739{
740	u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting);
741
742	return mode == CXL_INT_FW;
 
743}
744
745static int cxl_event_config(struct pci_host_bridge *host_bridge,
746			    struct cxl_memdev_state *mds, bool irq_avail)
747{
748	struct cxl_event_interrupt_policy policy;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
749	int rc;
750
 
 
 
 
 
 
 
 
 
751	/*
752	 * When BIOS maintains CXL error reporting control, it will process
753	 * event records.  Only one agent can do so.
754	 */
755	if (!host_bridge->native_cxl_error)
756		return 0;
757
758	if (!irq_avail) {
759		dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n");
760		return 0;
761	}
762
763	rc = cxl_mem_alloc_event_buf(mds);
764	if (rc)
765		return rc;
766
767	rc = cxl_event_get_int_policy(mds, &policy);
768	if (rc)
769		return rc;
 
770
771	if (cxl_event_int_is_fw(policy.info_settings) ||
772	    cxl_event_int_is_fw(policy.warn_settings) ||
773	    cxl_event_int_is_fw(policy.failure_settings) ||
774	    cxl_event_int_is_fw(policy.fatal_settings)) {
775		dev_err(mds->cxlds.dev,
776			"FW still in control of Event Logs despite _OSC settings\n");
777		return -EBUSY;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
778	}
779
780	rc = cxl_event_irqsetup(mds);
781	if (rc)
782		return rc;
783
784	cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
785
786	return 0;
 
787}
788
789static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 
 
 
 
 
 
 
 
790{
791	struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
792	struct cxl_memdev_state *mds;
793	struct cxl_dev_state *cxlds;
794	struct cxl_register_map map;
795	struct cxl_memdev *cxlmd;
796	int i, rc, pmu_count;
797	bool irq_avail;
798
799	/*
800	 * Double check the anonymous union trickery in struct cxl_regs
801	 * FIXME switch to struct_group()
802	 */
803	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
804		     offsetof(struct cxl_regs, device_regs.memdev));
805
806	rc = pcim_enable_device(pdev);
807	if (rc)
808		return rc;
809	pci_set_master(pdev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
810
811	mds = cxl_memdev_state_create(&pdev->dev);
812	if (IS_ERR(mds))
813		return PTR_ERR(mds);
814	cxlds = &mds->cxlds;
815	pci_set_drvdata(pdev, cxlds);
816
817	cxlds->rcd = is_cxl_restricted(pdev);
818	cxlds->serial = pci_get_dsn(pdev);
819	cxlds->cxl_dvsec = pci_find_dvsec_capability(
820		pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
821	if (!cxlds->cxl_dvsec)
822		dev_warn(&pdev->dev,
823			 "Device DVSEC not present, skip CXL.mem init\n");
824
825	rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
826	if (rc)
827		return rc;
 
828
829	rc = cxl_map_device_regs(&map, &cxlds->regs.device_regs);
830	if (rc)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
831		return rc;
832
833	/*
834	 * If the component registers can't be found, the cxl_pci driver may
835	 * still be useful for management functions so don't return an error.
836	 */
837	rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT,
838				&cxlds->reg_map);
839	if (rc)
840		dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
841	else if (!cxlds->reg_map.component_map.ras.valid)
842		dev_dbg(&pdev->dev, "RAS registers not found\n");
843
844	rc = cxl_map_component_regs(&cxlds->reg_map, &cxlds->regs.component,
845				    BIT(CXL_CM_CAP_CAP_ID_RAS));
846	if (rc)
847		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
848
849	rc = cxl_await_media_ready(cxlds);
850	if (rc == 0)
851		cxlds->media_ready = true;
852	else
853		dev_warn(&pdev->dev, "Media not active (%d)\n", rc);
854
855	irq_avail = cxl_alloc_irq_vectors(pdev);
 
 
 
 
856
857	rc = cxl_pci_setup_mailbox(mds, irq_avail);
858	if (rc)
859		return rc;
860
861	rc = cxl_enumerate_cmds(mds);
862	if (rc)
863		return rc;
864
865	rc = cxl_set_timestamp(mds);
866	if (rc)
867		return rc;
868
869	rc = cxl_poison_state_init(mds);
870	if (rc)
871		return rc;
872
873	rc = cxl_dev_state_identify(mds);
874	if (rc)
875		return rc;
876
877	rc = cxl_mem_create_range_info(mds);
878	if (rc)
879		return rc;
880
881	cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
882	if (IS_ERR(cxlmd))
883		return PTR_ERR(cxlmd);
884
885	rc = devm_cxl_setup_fw_upload(&pdev->dev, mds);
886	if (rc)
887		return rc;
888
889	rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd);
890	if (rc)
891		return rc;
892
893	pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU);
894	for (i = 0; i < pmu_count; i++) {
895		struct cxl_pmu_regs pmu_regs;
896
897		rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i);
898		if (rc) {
899			dev_dbg(&pdev->dev, "Could not find PMU regblock\n");
900			break;
901		}
902
903		rc = cxl_map_pmu_regs(&map, &pmu_regs);
904		if (rc) {
905			dev_dbg(&pdev->dev, "Could not map PMU regs\n");
906			break;
907		}
908
909		rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV);
910		if (rc) {
911			dev_dbg(&pdev->dev, "Could not add PMU instance\n");
912			break;
913		}
914	}
915
916	rc = cxl_event_config(host_bridge, mds, irq_avail);
917	if (rc)
918		return rc;
919
920	rc = cxl_pci_ras_unmask(pdev);
921	if (rc)
922		dev_dbg(&pdev->dev, "No RAS reporting unmasked\n");
923
924	pci_save_state(pdev);
925
926	return rc;
927}
928
929static const struct pci_device_id cxl_mem_pci_tbl[] = {
930	/* PCI class code for CXL.mem Type-3 Devices */
931	{ PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
932	{ /* terminate list */ },
933};
934MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
935
936static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
937{
938	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
939	struct cxl_memdev *cxlmd = cxlds->cxlmd;
940	struct device *dev = &cxlmd->dev;
941
942	dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
943		 dev_name(dev));
944	pci_restore_state(pdev);
945	if (device_attach(dev) <= 0)
946		return PCI_ERS_RESULT_DISCONNECT;
947	return PCI_ERS_RESULT_RECOVERED;
948}
949
950static void cxl_error_resume(struct pci_dev *pdev)
951{
952	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
953	struct cxl_memdev *cxlmd = cxlds->cxlmd;
954	struct device *dev = &cxlmd->dev;
955
956	dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
957		 dev->driver ? "successful" : "failed");
958}
959
960static const struct pci_error_handlers cxl_error_handlers = {
961	.error_detected	= cxl_error_detected,
962	.slot_reset	= cxl_slot_reset,
963	.resume		= cxl_error_resume,
964	.cor_error_detected	= cxl_cor_error_detected,
965};
966
967static struct pci_driver cxl_pci_driver = {
968	.name			= KBUILD_MODNAME,
969	.id_table		= cxl_mem_pci_tbl,
970	.probe			= cxl_pci_probe,
971	.err_handler		= &cxl_error_handlers,
972	.driver	= {
973		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
974	},
975};
976
977module_pci_driver(cxl_pci_driver);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
978MODULE_LICENSE("GPL v2");
 
 
979MODULE_IMPORT_NS(CXL);
v5.14.15
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
   3#include <uapi/linux/cxl_mem.h>
   4#include <linux/security.h>
   5#include <linux/debugfs.h>
   6#include <linux/module.h>
 
   7#include <linux/sizes.h>
   8#include <linux/mutex.h>
   9#include <linux/list.h>
  10#include <linux/cdev.h>
  11#include <linux/idr.h>
  12#include <linux/pci.h>
 
  13#include <linux/io.h>
  14#include <linux/io-64-nonatomic-lo-hi.h>
  15#include "cxlmem.h"
  16#include "pci.h"
  17#include "cxl.h"
 
  18
  19/**
  20 * DOC: cxl pci
  21 *
  22 * This implements the PCI exclusive functionality for a CXL device as it is
  23 * defined by the Compute Express Link specification. CXL devices may surface
  24 * certain functionality even if it isn't CXL enabled.
 
 
 
  25 *
  26 * The driver has several responsibilities, mainly:
  27 *  - Create the memX device and register on the CXL bus.
  28 *  - Enumerate device's register interface and map them.
  29 *  - Probe the device attributes to establish sysfs interface.
  30 *  - Provide an IOCTL interface to userspace to communicate with the device for
  31 *    things like firmware update.
  32 */
  33
  34#define cxl_doorbell_busy(cxlm)                                                \
  35	(readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) &                  \
  36	 CXLDEV_MBOX_CTRL_DOORBELL)
  37
  38/* CXL 2.0 - 8.2.8.4 */
  39#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
  40
  41enum opcode {
  42	CXL_MBOX_OP_INVALID		= 0x0000,
  43	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
  44	CXL_MBOX_OP_GET_FW_INFO		= 0x0200,
  45	CXL_MBOX_OP_ACTIVATE_FW		= 0x0202,
  46	CXL_MBOX_OP_GET_SUPPORTED_LOGS	= 0x0400,
  47	CXL_MBOX_OP_GET_LOG		= 0x0401,
  48	CXL_MBOX_OP_IDENTIFY		= 0x4000,
  49	CXL_MBOX_OP_GET_PARTITION_INFO	= 0x4100,
  50	CXL_MBOX_OP_SET_PARTITION_INFO	= 0x4101,
  51	CXL_MBOX_OP_GET_LSA		= 0x4102,
  52	CXL_MBOX_OP_SET_LSA		= 0x4103,
  53	CXL_MBOX_OP_GET_HEALTH_INFO	= 0x4200,
  54	CXL_MBOX_OP_GET_ALERT_CONFIG	= 0x4201,
  55	CXL_MBOX_OP_SET_ALERT_CONFIG	= 0x4202,
  56	CXL_MBOX_OP_GET_SHUTDOWN_STATE	= 0x4203,
  57	CXL_MBOX_OP_SET_SHUTDOWN_STATE	= 0x4204,
  58	CXL_MBOX_OP_GET_POISON		= 0x4300,
  59	CXL_MBOX_OP_INJECT_POISON	= 0x4301,
  60	CXL_MBOX_OP_CLEAR_POISON	= 0x4302,
  61	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
  62	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
  63	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
  64	CXL_MBOX_OP_MAX			= 0x10000
  65};
  66
  67/**
  68 * struct mbox_cmd - A command to be submitted to hardware.
  69 * @opcode: (input) The command set and command submitted to hardware.
  70 * @payload_in: (input) Pointer to the input payload.
  71 * @payload_out: (output) Pointer to the output payload. Must be allocated by
  72 *		 the caller.
  73 * @size_in: (input) Number of bytes to load from @payload_in.
  74 * @size_out: (input) Max number of bytes loaded into @payload_out.
  75 *            (output) Number of bytes generated by the device. For fixed size
  76 *            outputs commands this is always expected to be deterministic. For
  77 *            variable sized output commands, it tells the exact number of bytes
  78 *            written.
  79 * @return_code: (output) Error code returned from hardware.
  80 *
  81 * This is the primary mechanism used to send commands to the hardware.
  82 * All the fields except @payload_* correspond exactly to the fields described in
  83 * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
  84 * @payload_out are written to, and read from the Command Payload Registers
  85 * defined in CXL 2.0 8.2.8.4.8.
  86 */
  87struct mbox_cmd {
  88	u16 opcode;
  89	void *payload_in;
  90	void *payload_out;
  91	size_t size_in;
  92	size_t size_out;
  93	u16 return_code;
  94#define CXL_MBOX_SUCCESS 0
  95};
  96
  97static int cxl_mem_major;
  98static DEFINE_IDA(cxl_memdev_ida);
  99static DECLARE_RWSEM(cxl_memdev_rwsem);
 100static struct dentry *cxl_debugfs;
 101static bool cxl_raw_allow_all;
 102
 103enum {
 104	CEL_UUID,
 105	VENDOR_DEBUG_UUID,
 106};
 107
 108/* See CXL 2.0 Table 170. Get Log Input Payload */
 109static const uuid_t log_uuid[] = {
 110	[CEL_UUID] = UUID_INIT(0xda9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, 0x96,
 111			       0xb1, 0x62, 0x3b, 0x3f, 0x17),
 112	[VENDOR_DEBUG_UUID] = UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f,
 113					0xd6, 0x07, 0x19, 0x40, 0x3d, 0x86),
 114};
 115
 116/**
 117 * struct cxl_mem_command - Driver representation of a memory device command
 118 * @info: Command information as it exists for the UAPI
 119 * @opcode: The actual bits used for the mailbox protocol
 120 * @flags: Set of flags effecting driver behavior.
 121 *
 122 *  * %CXL_CMD_FLAG_FORCE_ENABLE: In cases of error, commands with this flag
 123 *    will be enabled by the driver regardless of what hardware may have
 124 *    advertised.
 125 *
 126 * The cxl_mem_command is the driver's internal representation of commands that
 127 * are supported by the driver. Some of these commands may not be supported by
 128 * the hardware. The driver will use @info to validate the fields passed in by
 129 * the user then submit the @opcode to the hardware.
 130 *
 131 * See struct cxl_command_info.
 132 */
 133struct cxl_mem_command {
 134	struct cxl_command_info info;
 135	enum opcode opcode;
 136	u32 flags;
 137#define CXL_CMD_FLAG_NONE 0
 138#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
 139};
 140
 141#define CXL_CMD(_id, sin, sout, _flags)                                        \
 142	[CXL_MEM_COMMAND_ID_##_id] = {                                         \
 143	.info =	{                                                              \
 144			.id = CXL_MEM_COMMAND_ID_##_id,                        \
 145			.size_in = sin,                                        \
 146			.size_out = sout,                                      \
 147		},                                                             \
 148	.opcode = CXL_MBOX_OP_##_id,                                           \
 149	.flags = _flags,                                                       \
 150	}
 151
 152/*
 153 * This table defines the supported mailbox commands for the driver. This table
 154 * is made up of a UAPI structure. Non-negative values as parameters in the
 155 * table will be validated against the user's input. For example, if size_in is
 156 * 0, and the user passed in 1, it is an error.
 157 */
 158static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
 159	CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
 160#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
 161	CXL_CMD(RAW, ~0, ~0, 0),
 162#endif
 163	CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
 164	CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
 165	CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
 166	CXL_CMD(GET_LSA, 0x8, ~0, 0),
 167	CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
 168	CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
 169	CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0),
 170	CXL_CMD(SET_LSA, ~0, 0, 0),
 171	CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0),
 172	CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
 173	CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
 174	CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
 175	CXL_CMD(GET_POISON, 0x10, ~0, 0),
 176	CXL_CMD(INJECT_POISON, 0x8, 0, 0),
 177	CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
 178	CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
 179	CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
 180	CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0),
 181};
 182
 183/*
 184 * Commands that RAW doesn't permit. The rationale for each:
 185 *
 186 * CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
 187 * coordination of transaction timeout values at the root bridge level.
 188 *
 189 * CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
 190 * and needs to be coordinated with HDM updates.
 191 *
 192 * CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
 193 * driver and any writes from userspace invalidates those contents.
 194 *
 195 * CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
 196 * to the device after it is marked clean, userspace can not make that
 197 * assertion.
 198 *
 199 * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
 200 * is kept up to date with patrol notifications and error management.
 201 */
 202static u16 cxl_disabled_raw_commands[] = {
 203	CXL_MBOX_OP_ACTIVATE_FW,
 204	CXL_MBOX_OP_SET_PARTITION_INFO,
 205	CXL_MBOX_OP_SET_LSA,
 206	CXL_MBOX_OP_SET_SHUTDOWN_STATE,
 207	CXL_MBOX_OP_SCAN_MEDIA,
 208	CXL_MBOX_OP_GET_SCAN_MEDIA,
 209};
 210
 211/*
 212 * Command sets that RAW doesn't permit. All opcodes in this set are
 213 * disabled because they pass plain text security payloads over the
 214 * user/kernel boundary. This functionality is intended to be wrapped
 215 * behind the keys ABI which allows for encrypted payloads in the UAPI
 216 */
 217static u8 security_command_sets[] = {
 218	0x44, /* Sanitize */
 219	0x45, /* Persistent Memory Data-at-rest Security */
 220	0x46, /* Security Passthrough */
 221};
 
 222
 223#define cxl_for_each_cmd(cmd)                                                  \
 224	for ((cmd) = &mem_commands[0];                                         \
 225	     ((cmd) - mem_commands) < ARRAY_SIZE(mem_commands); (cmd)++)
 226
 227#define cxl_cmd_count ARRAY_SIZE(mem_commands)
 228
 229static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm)
 230{
 231	const unsigned long start = jiffies;
 232	unsigned long end = start;
 233
 234	while (cxl_doorbell_busy(cxlm)) {
 235		end = jiffies;
 236
 237		if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
 238			/* Check again in case preempted before timeout test */
 239			if (!cxl_doorbell_busy(cxlm))
 240				break;
 241			return -ETIMEDOUT;
 242		}
 243		cpu_relax();
 244	}
 245
 246	dev_dbg(&cxlm->pdev->dev, "Doorbell wait took %dms",
 247		jiffies_to_msecs(end) - jiffies_to_msecs(start));
 248	return 0;
 249}
 250
 251static bool cxl_is_security_command(u16 opcode)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 252{
 253	int i;
 
 
 
 
 
 
 254
 255	for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
 256		if (security_command_sets[i] == (opcode >> 8))
 257			return true;
 258	return false;
 259}
 260
 261static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm,
 262				 struct mbox_cmd *mbox_cmd)
 263{
 264	struct device *dev = &cxlm->pdev->dev;
 265
 266	dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
 267		mbox_cmd->opcode, mbox_cmd->size_in);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 268}
 269
 270/**
 271 * __cxl_mem_mbox_send_cmd() - Execute a mailbox command
 272 * @cxlm: The CXL memory device to communicate with.
 273 * @mbox_cmd: Command to send to the memory device.
 274 *
 275 * Context: Any context. Expects mbox_mutex to be held.
 276 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
 277 *         Caller should check the return code in @mbox_cmd to make sure it
 278 *         succeeded.
 279 *
 280 * This is a generic form of the CXL mailbox send command thus only using the
 281 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
 282 * devices, and perhaps other types of CXL devices may have further information
 283 * available upon error conditions. Driver facilities wishing to send mailbox
 284 * commands should use the wrapper command.
 285 *
 286 * The CXL spec allows for up to two mailboxes. The intention is for the primary
 287 * mailbox to be OS controlled and the secondary mailbox to be used by system
 288 * firmware. This allows the OS and firmware to communicate with the device and
 289 * not need to coordinate with each other. The driver only uses the primary
 290 * mailbox.
 291 */
 292static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
 293				   struct mbox_cmd *mbox_cmd)
 294{
 295	void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
 
 
 296	u64 cmd_reg, status_reg;
 297	size_t out_len;
 298	int rc;
 299
 300	lockdep_assert_held(&cxlm->mbox_mutex);
 301
 302	/*
 303	 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
 304	 *   1. Caller reads MB Control Register to verify doorbell is clear
 305	 *   2. Caller writes Command Register
 306	 *   3. Caller writes Command Payload Registers if input payload is non-empty
 307	 *   4. Caller writes MB Control Register to set doorbell
 308	 *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
 309	 *   6. Caller reads MB Status Register to fetch Return code
 310	 *   7. If command successful, Caller reads Command Register to get Payload Length
 311	 *   8. If output payload is non-empty, host reads Command Payload Registers
 312	 *
 313	 * Hardware is free to do whatever it wants before the doorbell is rung,
 314	 * and isn't allowed to change anything after it clears the doorbell. As
 315	 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
 316	 * also happen in any order (though some orders might not make sense).
 317	 */
 318
 319	/* #1 */
 320	if (cxl_doorbell_busy(cxlm)) {
 321		dev_err_ratelimited(&cxlm->pdev->dev,
 322				    "Mailbox re-busy after acquiring\n");
 
 
 
 323		return -EBUSY;
 324	}
 325
 
 
 
 
 
 
 
 
 
 
 326	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
 327			     mbox_cmd->opcode);
 328	if (mbox_cmd->size_in) {
 329		if (WARN_ON(!mbox_cmd->payload_in))
 330			return -EINVAL;
 331
 332		cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
 333				      mbox_cmd->size_in);
 334		memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
 335	}
 336
 337	/* #2, #3 */
 338	writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 339
 340	/* #4 */
 341	dev_dbg(&cxlm->pdev->dev, "Sending command\n");
 342	writel(CXLDEV_MBOX_CTRL_DOORBELL,
 343	       cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
 344
 345	/* #5 */
 346	rc = cxl_mem_wait_for_doorbell(cxlm);
 347	if (rc == -ETIMEDOUT) {
 348		cxl_mem_mbox_timeout(cxlm, mbox_cmd);
 
 
 349		return rc;
 350	}
 351
 352	/* #6 */
 353	status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
 354	mbox_cmd->return_code =
 355		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
 356
 357	if (mbox_cmd->return_code != 0) {
 358		dev_dbg(&cxlm->pdev->dev, "Mailbox operation had an error\n");
 359		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 360	}
 361
 
 362	/* #7 */
 363	cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 364	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
 365
 366	/* #8 */
 367	if (out_len && mbox_cmd->payload_out) {
 368		/*
 369		 * Sanitize the copy. If hardware misbehaves, out_len per the
 370		 * spec can actually be greater than the max allowed size (21
 371		 * bits available but spec defined 1M max). The caller also may
 372		 * have requested less data than the hardware supplied even
 373		 * within spec.
 374		 */
 375		size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len);
 376
 
 377		memcpy_fromio(mbox_cmd->payload_out, payload, n);
 378		mbox_cmd->size_out = n;
 379	} else {
 380		mbox_cmd->size_out = 0;
 381	}
 382
 383	return 0;
 384}
 385
 386/**
 387 * cxl_mem_mbox_get() - Acquire exclusive access to the mailbox.
 388 * @cxlm: The memory device to gain access to.
 389 *
 390 * Context: Any context. Takes the mbox_mutex.
 391 * Return: 0 if exclusive access was acquired.
 392 */
 393static int cxl_mem_mbox_get(struct cxl_mem *cxlm)
 394{
 395	struct device *dev = &cxlm->pdev->dev;
 396	u64 md_status;
 397	int rc;
 398
 399	mutex_lock_io(&cxlm->mbox_mutex);
 400
 401	/*
 402	 * XXX: There is some amount of ambiguity in the 2.0 version of the spec
 403	 * around the mailbox interface ready (8.2.8.5.1.1).  The purpose of the
 404	 * bit is to allow firmware running on the device to notify the driver
 405	 * that it's ready to receive commands. It is unclear if the bit needs
 406	 * to be read for each transaction mailbox, ie. the firmware can switch
 407	 * it on and off as needed. Second, there is no defined timeout for
 408	 * mailbox ready, like there is for the doorbell interface.
 409	 *
 410	 * Assumptions:
 411	 * 1. The firmware might toggle the Mailbox Interface Ready bit, check
 412	 *    it for every command.
 413	 *
 414	 * 2. If the doorbell is clear, the firmware should have first set the
 415	 *    Mailbox Interface Ready bit. Therefore, waiting for the doorbell
 416	 *    to be ready is sufficient.
 417	 */
 418	rc = cxl_mem_wait_for_doorbell(cxlm);
 419	if (rc) {
 420		dev_warn(dev, "Mailbox interface not ready\n");
 421		goto out;
 422	}
 423
 424	md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET);
 425	if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
 426		dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
 427		rc = -EBUSY;
 428		goto out;
 429	}
 430
 431	/*
 432	 * Hardware shouldn't allow a ready status but also have failure bits
 433	 * set. Spit out an error, this should be a bug report
 434	 */
 435	rc = -EFAULT;
 436	if (md_status & CXLMDEV_DEV_FATAL) {
 437		dev_err(dev, "mbox: reported ready, but fatal\n");
 438		goto out;
 439	}
 440	if (md_status & CXLMDEV_FW_HALT) {
 441		dev_err(dev, "mbox: reported ready, but halted\n");
 442		goto out;
 443	}
 444	if (CXLMDEV_RESET_NEEDED(md_status)) {
 445		dev_err(dev, "mbox: reported ready, but reset needed\n");
 446		goto out;
 447	}
 448
 449	/* with lock held */
 450	return 0;
 451
 452out:
 453	mutex_unlock(&cxlm->mbox_mutex);
 454	return rc;
 455}
 456
 457/**
 458 * cxl_mem_mbox_put() - Release exclusive access to the mailbox.
 459 * @cxlm: The CXL memory device to communicate with.
 460 *
 461 * Context: Any context. Expects mbox_mutex to be held.
 462 */
 463static void cxl_mem_mbox_put(struct cxl_mem *cxlm)
 464{
 465	mutex_unlock(&cxlm->mbox_mutex);
 466}
 
 
 
 
 
 467
 468/**
 469 * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
 470 * @cxlm: The CXL memory device to communicate with.
 471 * @cmd: The validated command.
 472 * @in_payload: Pointer to userspace's input payload.
 473 * @out_payload: Pointer to userspace's output payload.
 474 * @size_out: (Input) Max payload size to copy out.
 475 *            (Output) Payload size hardware generated.
 476 * @retval: Hardware generated return code from the operation.
 477 *
 478 * Return:
 479 *  * %0	- Mailbox transaction succeeded. This implies the mailbox
 480 *		  protocol completed successfully not that the operation itself
 481 *		  was successful.
 482 *  * %-ENOMEM  - Couldn't allocate a bounce buffer.
 483 *  * %-EFAULT	- Something happened with copy_to/from_user.
 484 *  * %-EINTR	- Mailbox acquisition interrupted.
 485 *  * %-EXXX	- Transaction level failures.
 486 *
 487 * Creates the appropriate mailbox command and dispatches it on behalf of a
 488 * userspace request. The input and output payloads are copied between
 489 * userspace.
 490 *
 491 * See cxl_send_cmd().
 492 */
 493static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm,
 494					const struct cxl_mem_command *cmd,
 495					u64 in_payload, u64 out_payload,
 496					s32 *size_out, u32 *retval)
 497{
 498	struct device *dev = &cxlm->pdev->dev;
 499	struct mbox_cmd mbox_cmd = {
 500		.opcode = cmd->opcode,
 501		.size_in = cmd->info.size_in,
 502		.size_out = cmd->info.size_out,
 503	};
 504	int rc;
 505
 506	if (cmd->info.size_out) {
 507		mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL);
 508		if (!mbox_cmd.payload_out)
 509			return -ENOMEM;
 510	}
 511
 512	if (cmd->info.size_in) {
 513		mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
 514						   cmd->info.size_in);
 515		if (IS_ERR(mbox_cmd.payload_in)) {
 516			kvfree(mbox_cmd.payload_out);
 517			return PTR_ERR(mbox_cmd.payload_in);
 518		}
 
 
 519	}
 520
 521	rc = cxl_mem_mbox_get(cxlm);
 522	if (rc)
 523		goto out;
 524
 525	dev_dbg(dev,
 526		"Submitting %s command for user\n"
 527		"\topcode: %x\n"
 528		"\tsize: %ub\n",
 529		cxl_command_names[cmd->info.id].name, mbox_cmd.opcode,
 530		cmd->info.size_in);
 531
 532	dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW,
 533		      "raw command path used\n");
 534
 535	rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
 536	cxl_mem_mbox_put(cxlm);
 537	if (rc)
 538		goto out;
 539
 540	/*
 541	 * @size_out contains the max size that's allowed to be written back out
 542	 * to userspace. While the payload may have written more output than
 543	 * this it will have to be ignored.
 
 
 544	 */
 545	if (mbox_cmd.size_out) {
 546		dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out,
 547			      "Invalid return size\n");
 548		if (copy_to_user(u64_to_user_ptr(out_payload),
 549				 mbox_cmd.payload_out, mbox_cmd.size_out)) {
 550			rc = -EFAULT;
 551			goto out;
 552		}
 553	}
 554
 555	*size_out = mbox_cmd.size_out;
 556	*retval = mbox_cmd.return_code;
 557
 558out:
 559	kvfree(mbox_cmd.payload_in);
 560	kvfree(mbox_cmd.payload_out);
 561	return rc;
 562}
 563
 564static bool cxl_mem_raw_command_allowed(u16 opcode)
 565{
 566	int i;
 567
 568	if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
 569		return false;
 
 
 570
 571	if (security_locked_down(LOCKDOWN_PCI_ACCESS))
 572		return false;
 573
 574	if (cxl_raw_allow_all)
 575		return true;
 
 
 
 576
 577	if (cxl_is_security_command(opcode))
 578		return false;
 579
 580	for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
 581		if (cxl_disabled_raw_commands[i] == opcode)
 582			return false;
 583
 584	return true;
 585}
 586
 587/**
 588 * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
 589 * @cxlm: &struct cxl_mem device whose mailbox will be used.
 590 * @send_cmd: &struct cxl_send_command copied in from userspace.
 591 * @out_cmd: Sanitized and populated &struct cxl_mem_command.
 592 *
 593 * Return:
 594 *  * %0	- @out_cmd is ready to send.
 595 *  * %-ENOTTY	- Invalid command specified.
 596 *  * %-EINVAL	- Reserved fields or invalid values were used.
 597 *  * %-ENOMEM	- Input or output buffer wasn't sized properly.
 598 *  * %-EPERM	- Attempted to use a protected command.
 599 *
 600 * The result of this command is a fully validated command in @out_cmd that is
 601 * safe to send to the hardware.
 602 *
 603 * See handle_mailbox_cmd_from_user()
 604 */
 605static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
 606				      const struct cxl_send_command *send_cmd,
 607				      struct cxl_mem_command *out_cmd)
 608{
 609	const struct cxl_command_info *info;
 610	struct cxl_mem_command *c;
 611
 612	if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
 613		return -ENOTTY;
 
 
 
 
 614
 615	/*
 616	 * The user can never specify an input payload larger than what hardware
 617	 * supports, but output can be arbitrarily large (simply write out as
 618	 * much data as the hardware provides).
 619	 */
 620	if (send_cmd->in.size > cxlm->payload_size)
 621		return -EINVAL;
 622
 623	/*
 624	 * Checks are bypassed for raw commands but a WARN/taint will occur
 625	 * later in the callchain
 626	 */
 627	if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) {
 628		const struct cxl_mem_command temp = {
 629			.info = {
 630				.id = CXL_MEM_COMMAND_ID_RAW,
 631				.flags = 0,
 632				.size_in = send_cmd->in.size,
 633				.size_out = send_cmd->out.size,
 634			},
 635			.opcode = send_cmd->raw.opcode
 636		};
 637
 638		if (send_cmd->raw.rsvd)
 639			return -EINVAL;
 640
 641		/*
 642		 * Unlike supported commands, the output size of RAW commands
 643		 * gets passed along without further checking, so it must be
 644		 * validated here.
 645		 */
 646		if (send_cmd->out.size > cxlm->payload_size)
 647			return -EINVAL;
 648
 649		if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
 650			return -EPERM;
 651
 652		memcpy(out_cmd, &temp, sizeof(temp));
 653
 654		return 0;
 655	}
 656
 657	if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
 658		return -EINVAL;
 659
 660	if (send_cmd->rsvd)
 661		return -EINVAL;
 662
 663	if (send_cmd->in.rsvd || send_cmd->out.rsvd)
 664		return -EINVAL;
 665
 666	/* Convert user's command into the internal representation */
 667	c = &mem_commands[send_cmd->id];
 668	info = &c->info;
 669
 670	/* Check that the command is enabled for hardware */
 671	if (!test_bit(info->id, cxlm->enabled_cmds))
 672		return -ENOTTY;
 673
 674	/* Check the input buffer is the expected size */
 675	if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
 676		return -ENOMEM;
 677
 678	/* Check the output buffer is at least large enough */
 679	if (info->size_out >= 0 && send_cmd->out.size < info->size_out)
 680		return -ENOMEM;
 681
 682	memcpy(out_cmd, c, sizeof(*c));
 683	out_cmd->info.size_in = send_cmd->in.size;
 684	/*
 685	 * XXX: out_cmd->info.size_out will be controlled by the driver, and the
 686	 * specified number of bytes @send_cmd->out.size will be copied back out
 687	 * to userspace.
 688	 */
 689
 690	return 0;
 691}
 692
 693static int cxl_query_cmd(struct cxl_memdev *cxlmd,
 694			 struct cxl_mem_query_commands __user *q)
 695{
 696	struct device *dev = &cxlmd->dev;
 697	struct cxl_mem_command *cmd;
 698	u32 n_commands;
 699	int j = 0;
 700
 701	dev_dbg(dev, "Query IOCTL\n");
 702
 703	if (get_user(n_commands, &q->n_commands))
 704		return -EFAULT;
 705
 706	/* returns the total number if 0 elements are requested. */
 707	if (n_commands == 0)
 708		return put_user(cxl_cmd_count, &q->n_commands);
 709
 710	/*
 711	 * otherwise, return max(n_commands, total commands) cxl_command_info
 712	 * structures.
 
 713	 */
 714	cxl_for_each_cmd(cmd) {
 715		const struct cxl_command_info *info = &cmd->info;
 716
 717		if (copy_to_user(&q->commands[j++], info, sizeof(*info)))
 718			return -EFAULT;
 719
 720		if (j == n_commands)
 721			break;
 722	}
 723
 724	return 0;
 725}
 726
 727static int cxl_send_cmd(struct cxl_memdev *cxlmd,
 728			struct cxl_send_command __user *s)
 729{
 730	struct cxl_mem *cxlm = cxlmd->cxlm;
 731	struct device *dev = &cxlmd->dev;
 732	struct cxl_send_command send;
 733	struct cxl_mem_command c;
 734	int rc;
 735
 736	dev_dbg(dev, "Send IOCTL\n");
 
 
 
 737
 738	if (copy_from_user(&send, s, sizeof(send)))
 739		return -EFAULT;
 
 740
 741	rc = cxl_validate_cmd_from_user(cxlmd->cxlm, &send, &c);
 742	if (rc)
 743		return rc;
 744
 745	/* Prepare to handle a full payload for variable sized output */
 746	if (c.info.size_out < 0)
 747		c.info.size_out = cxlm->payload_size;
 748
 749	rc = handle_mailbox_cmd_from_user(cxlm, &c, send.in.payload,
 750					  send.out.payload, &send.out.size,
 751					  &send.retval);
 752	if (rc)
 753		return rc;
 754
 755	if (copy_to_user(s, &send, sizeof(send)))
 756		return -EFAULT;
 757
 758	return 0;
 759}
 760
 761static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
 762			       unsigned long arg)
 763{
 764	switch (cmd) {
 765	case CXL_MEM_QUERY_COMMANDS:
 766		return cxl_query_cmd(cxlmd, (void __user *)arg);
 767	case CXL_MEM_SEND_COMMAND:
 768		return cxl_send_cmd(cxlmd, (void __user *)arg);
 769	default:
 770		return -ENOTTY;
 771	}
 772}
 773
 774static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
 775			     unsigned long arg)
 776{
 777	struct cxl_memdev *cxlmd = file->private_data;
 778	int rc = -ENXIO;
 779
 780	down_read(&cxl_memdev_rwsem);
 781	if (cxlmd->cxlm)
 782		rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
 783	up_read(&cxl_memdev_rwsem);
 784
 785	return rc;
 786}
 787
 788static int cxl_memdev_open(struct inode *inode, struct file *file)
 789{
 790	struct cxl_memdev *cxlmd =
 791		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
 792
 793	get_device(&cxlmd->dev);
 794	file->private_data = cxlmd;
 795
 796	return 0;
 797}
 798
 799static int cxl_memdev_release_file(struct inode *inode, struct file *file)
 800{
 801	struct cxl_memdev *cxlmd =
 802		container_of(inode->i_cdev, typeof(*cxlmd), cdev);
 803
 804	put_device(&cxlmd->dev);
 805
 806	return 0;
 807}
 808
 809static struct cxl_memdev *to_cxl_memdev(struct device *dev)
 
 
 
 
 810{
 811	return container_of(dev, struct cxl_memdev, dev);
 812}
 813
 814static void cxl_memdev_shutdown(struct device *dev)
 815{
 816	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 
 817
 818	down_write(&cxl_memdev_rwsem);
 819	cxlmd->cxlm = NULL;
 820	up_write(&cxl_memdev_rwsem);
 821}
 822
 823static const struct cdevm_file_operations cxl_memdev_fops = {
 824	.fops = {
 825		.owner = THIS_MODULE,
 826		.unlocked_ioctl = cxl_memdev_ioctl,
 827		.open = cxl_memdev_open,
 828		.release = cxl_memdev_release_file,
 829		.compat_ioctl = compat_ptr_ioctl,
 830		.llseek = noop_llseek,
 831	},
 832	.shutdown = cxl_memdev_shutdown,
 833};
 834
 835static inline struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
 836{
 837	struct cxl_mem_command *c;
 838
 839	cxl_for_each_cmd(c)
 840		if (c->opcode == opcode)
 841			return c;
 842
 843	return NULL;
 844}
 845
 846/**
 847 * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device.
 848 * @cxlm: The CXL memory device to communicate with.
 849 * @opcode: Opcode for the mailbox command.
 850 * @in: The input payload for the mailbox command.
 851 * @in_size: The length of the input payload
 852 * @out: Caller allocated buffer for the output.
 853 * @out_size: Expected size of output.
 854 *
 855 * Context: Any context. Will acquire and release mbox_mutex.
 856 * Return:
 857 *  * %>=0	- Number of bytes returned in @out.
 858 *  * %-E2BIG	- Payload is too large for hardware.
 859 *  * %-EBUSY	- Couldn't acquire exclusive mailbox access.
 860 *  * %-EFAULT	- Hardware error occurred.
 861 *  * %-ENXIO	- Command completed, but device reported an error.
 862 *  * %-EIO	- Unexpected output size.
 863 *
 864 * Mailbox commands may execute successfully yet the device itself reported an
 865 * error. While this distinction can be useful for commands from userspace, the
 866 * kernel will only be able to use results when both are successful.
 867 *
 868 * See __cxl_mem_mbox_send_cmd()
 869 */
 870static int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode,
 871				 void *in, size_t in_size,
 872				 void *out, size_t out_size)
 873{
 874	const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
 875	struct mbox_cmd mbox_cmd = {
 876		.opcode = opcode,
 877		.payload_in = in,
 878		.size_in = in_size,
 879		.size_out = out_size,
 880		.payload_out = out,
 881	};
 882	int rc;
 883
 884	if (out_size > cxlm->payload_size)
 885		return -E2BIG;
 886
 887	rc = cxl_mem_mbox_get(cxlm);
 888	if (rc)
 889		return rc;
 890
 891	rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
 892	cxl_mem_mbox_put(cxlm);
 893	if (rc)
 894		return rc;
 895
 896	/* TODO: Map return code to proper kernel style errno */
 897	if (mbox_cmd.return_code != CXL_MBOX_SUCCESS)
 898		return -ENXIO;
 899
 900	/*
 901	 * Variable sized commands can't be validated and so it's up to the
 902	 * caller to do that if they wish.
 903	 */
 904	if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size)
 905		return -EIO;
 906
 907	return 0;
 908}
 909
 910static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
 911{
 912	const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
 913
 914	cxlm->payload_size =
 915		1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
 916
 917	/*
 918	 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
 
 
 919	 *
 920	 * If the size is too small, mandatory commands will not work and so
 921	 * there's no point in going forward. If the size is too large, there's
 922	 * no harm is soft limiting it.
 923	 */
 924	cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M);
 925	if (cxlm->payload_size < 256) {
 926		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
 927			cxlm->payload_size);
 928		return -ENXIO;
 929	}
 930
 931	dev_dbg(&cxlm->pdev->dev, "Mailbox payload sized %zu",
 932		cxlm->payload_size);
 933
 934	return 0;
 935}
 936
 937static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev)
 938{
 939	struct device *dev = &pdev->dev;
 940	struct cxl_mem *cxlm;
 
 
 941
 942	cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
 943	if (!cxlm) {
 944		dev_err(dev, "No memory available\n");
 945		return ERR_PTR(-ENOMEM);
 946	}
 947
 948	mutex_init(&cxlm->mbox_mutex);
 949	cxlm->pdev = pdev;
 950	cxlm->enabled_cmds =
 951		devm_kmalloc_array(dev, BITS_TO_LONGS(cxl_cmd_count),
 952				   sizeof(unsigned long),
 953				   GFP_KERNEL | __GFP_ZERO);
 954	if (!cxlm->enabled_cmds) {
 955		dev_err(dev, "No memory available for bitmap\n");
 956		return ERR_PTR(-ENOMEM);
 957	}
 958
 959	return cxlm;
 960}
 961
 962static void __iomem *cxl_mem_map_regblock(struct cxl_mem *cxlm,
 963					  u8 bar, u64 offset)
 964{
 965	struct pci_dev *pdev = cxlm->pdev;
 966	struct device *dev = &pdev->dev;
 967	void __iomem *addr;
 968
 969	/* Basic sanity check that BAR is big enough */
 970	if (pci_resource_len(pdev, bar) < offset) {
 971		dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar,
 972			&pdev->resource[bar], (unsigned long long)offset);
 973		return IOMEM_ERR_PTR(-ENXIO);
 974	}
 975
 976	addr = pci_iomap(pdev, bar, 0);
 977	if (!addr) {
 978		dev_err(dev, "failed to map registers\n");
 979		return addr;
 980	}
 981
 982	dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %#llx\n",
 983		bar, offset);
 
 
 984
 985	return addr;
 986}
 987
 988static void cxl_mem_unmap_regblock(struct cxl_mem *cxlm, void __iomem *base)
 
 989{
 990	pci_iounmap(cxlm->pdev, base);
 991}
 
 
 
 
 992
 993static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
 994{
 995	int pos;
 
 996
 997	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
 998	if (!pos)
 999		return 0;
1000
1001	while (pos) {
1002		u16 vendor, id;
1003
1004		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
1005		pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
1006		if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
1007			return pos;
1008
1009		pos = pci_find_next_ext_capability(pdev, pos,
1010						   PCI_EXT_CAP_ID_DVSEC);
1011	}
1012
1013	return 0;
1014}
1015
1016static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
1017			  struct cxl_register_map *map)
1018{
1019	struct pci_dev *pdev = cxlm->pdev;
1020	struct device *dev = &pdev->dev;
1021	struct cxl_component_reg_map *comp_map;
1022	struct cxl_device_reg_map *dev_map;
1023
1024	switch (map->reg_type) {
1025	case CXL_REGLOC_RBI_COMPONENT:
1026		comp_map = &map->component_map;
1027		cxl_probe_component_regs(dev, base, comp_map);
1028		if (!comp_map->hdm_decoder.valid) {
1029			dev_err(dev, "HDM decoder registers not found\n");
1030			return -ENXIO;
1031		}
1032
1033		dev_dbg(dev, "Set up component registers\n");
1034		break;
1035	case CXL_REGLOC_RBI_MEMDEV:
1036		dev_map = &map->device_map;
1037		cxl_probe_device_regs(dev, base, dev_map);
1038		if (!dev_map->status.valid || !dev_map->mbox.valid ||
1039		    !dev_map->memdev.valid) {
1040			dev_err(dev, "registers not found: %s%s%s\n",
1041				!dev_map->status.valid ? "status " : "",
1042				!dev_map->mbox.valid ? "mbox " : "",
1043				!dev_map->memdev.valid ? "memdev " : "");
1044			return -ENXIO;
1045		}
1046
1047		dev_dbg(dev, "Probing device registers...\n");
1048		break;
1049	default:
1050		break;
 
1051	}
1052
1053	return 0;
 
1054}
1055
1056static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map)
1057{
1058	struct pci_dev *pdev = cxlm->pdev;
1059	struct device *dev = &pdev->dev;
 
1060
1061	switch (map->reg_type) {
1062	case CXL_REGLOC_RBI_COMPONENT:
1063		cxl_map_component_regs(pdev, &cxlm->regs.component, map);
1064		dev_dbg(dev, "Mapping component registers...\n");
1065		break;
1066	case CXL_REGLOC_RBI_MEMDEV:
1067		cxl_map_device_regs(pdev, &cxlm->regs.device_regs, map);
1068		dev_dbg(dev, "Probing device registers...\n");
1069		break;
1070	default:
1071		break;
1072	}
1073
1074	return 0;
1075}
1076
1077static void cxl_decode_register_block(u32 reg_lo, u32 reg_hi,
1078				      u8 *bar, u64 *offset, u8 *reg_type)
1079{
1080	*offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
1081	*bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
1082	*reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
1083}
1084
1085/**
1086 * cxl_mem_setup_regs() - Setup necessary MMIO.
1087 * @cxlm: The CXL memory device to communicate with.
1088 *
1089 * Return: 0 if all necessary registers mapped.
1090 *
1091 * A memory device is required by spec to implement a certain set of MMIO
1092 * regions. The purpose of this function is to enumerate and map those
1093 * registers.
1094 */
1095static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
1096{
1097	struct pci_dev *pdev = cxlm->pdev;
1098	struct device *dev = &pdev->dev;
1099	u32 regloc_size, regblocks;
1100	void __iomem *base;
1101	int regloc, i;
1102	struct cxl_register_map *map, *n;
1103	LIST_HEAD(register_maps);
1104	int ret = 0;
1105
1106	regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
1107	if (!regloc) {
1108		dev_err(dev, "register location dvsec not found\n");
1109		return -ENXIO;
1110	}
1111
1112	if (pci_request_mem_regions(pdev, pci_name(pdev)))
1113		return -ENODEV;
1114
1115	/* Get the size of the Register Locator DVSEC */
1116	pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
1117	regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
1118
1119	regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
1120	regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
1121
1122	for (i = 0; i < regblocks; i++, regloc += 8) {
1123		u32 reg_lo, reg_hi;
1124		u8 reg_type;
1125		u64 offset;
1126		u8 bar;
1127
1128		map = kzalloc(sizeof(*map), GFP_KERNEL);
1129		if (!map) {
1130			ret = -ENOMEM;
1131			goto free_maps;
1132		}
1133
1134		list_add(&map->list, &register_maps);
1135
1136		pci_read_config_dword(pdev, regloc, &reg_lo);
1137		pci_read_config_dword(pdev, regloc + 4, &reg_hi);
1138
1139		cxl_decode_register_block(reg_lo, reg_hi, &bar, &offset,
1140					  &reg_type);
1141
1142		dev_dbg(dev, "Found register block in bar %u @ 0x%llx of type %u\n",
1143			bar, offset, reg_type);
1144
1145		base = cxl_mem_map_regblock(cxlm, bar, offset);
1146		if (!base) {
1147			ret = -ENOMEM;
1148			goto free_maps;
1149		}
1150
1151		map->barno = bar;
1152		map->block_offset = offset;
1153		map->reg_type = reg_type;
1154
1155		ret = cxl_probe_regs(cxlm, base + offset, map);
1156
1157		/* Always unmap the regblock regardless of probe success */
1158		cxl_mem_unmap_regblock(cxlm, base);
1159
1160		if (ret)
1161			goto free_maps;
1162	}
1163
1164	pci_release_mem_regions(pdev);
1165
1166	list_for_each_entry(map, &register_maps, list) {
1167		ret = cxl_map_regs(cxlm, map);
1168		if (ret)
1169			goto free_maps;
1170	}
1171
1172free_maps:
1173	list_for_each_entry_safe(map, n, &register_maps, list) {
1174		list_del(&map->list);
1175		kfree(map);
1176	}
1177
1178	return ret;
1179}
1180
1181static void cxl_memdev_release(struct device *dev)
1182{
1183	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1184
1185	ida_free(&cxl_memdev_ida, cxlmd->id);
1186	kfree(cxlmd);
1187}
1188
1189static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
1190				kgid_t *gid)
1191{
1192	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
1193}
1194
1195static ssize_t firmware_version_show(struct device *dev,
1196				     struct device_attribute *attr, char *buf)
1197{
1198	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1199	struct cxl_mem *cxlm = cxlmd->cxlm;
1200
1201	return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version);
1202}
1203static DEVICE_ATTR_RO(firmware_version);
1204
1205static ssize_t payload_max_show(struct device *dev,
1206				struct device_attribute *attr, char *buf)
1207{
1208	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1209	struct cxl_mem *cxlm = cxlmd->cxlm;
1210
1211	return sysfs_emit(buf, "%zu\n", cxlm->payload_size);
1212}
1213static DEVICE_ATTR_RO(payload_max);
1214
1215static ssize_t label_storage_size_show(struct device *dev,
1216				struct device_attribute *attr, char *buf)
1217{
1218	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1219	struct cxl_mem *cxlm = cxlmd->cxlm;
1220
1221	return sysfs_emit(buf, "%zu\n", cxlm->lsa_size);
1222}
1223static DEVICE_ATTR_RO(label_storage_size);
1224
1225static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
1226			     char *buf)
1227{
1228	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1229	struct cxl_mem *cxlm = cxlmd->cxlm;
1230	unsigned long long len = range_len(&cxlm->ram_range);
1231
1232	return sysfs_emit(buf, "%#llx\n", len);
1233}
1234
1235static struct device_attribute dev_attr_ram_size =
1236	__ATTR(size, 0444, ram_size_show, NULL);
1237
1238static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
1239			      char *buf)
1240{
1241	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
1242	struct cxl_mem *cxlm = cxlmd->cxlm;
1243	unsigned long long len = range_len(&cxlm->pmem_range);
1244
1245	return sysfs_emit(buf, "%#llx\n", len);
1246}
1247
1248static struct device_attribute dev_attr_pmem_size =
1249	__ATTR(size, 0444, pmem_size_show, NULL);
1250
1251static struct attribute *cxl_memdev_attributes[] = {
1252	&dev_attr_firmware_version.attr,
1253	&dev_attr_payload_max.attr,
1254	&dev_attr_label_storage_size.attr,
1255	NULL,
1256};
1257
1258static struct attribute *cxl_memdev_pmem_attributes[] = {
1259	&dev_attr_pmem_size.attr,
1260	NULL,
1261};
1262
1263static struct attribute *cxl_memdev_ram_attributes[] = {
1264	&dev_attr_ram_size.attr,
1265	NULL,
1266};
1267
1268static struct attribute_group cxl_memdev_attribute_group = {
1269	.attrs = cxl_memdev_attributes,
1270};
1271
1272static struct attribute_group cxl_memdev_ram_attribute_group = {
1273	.name = "ram",
1274	.attrs = cxl_memdev_ram_attributes,
1275};
1276
1277static struct attribute_group cxl_memdev_pmem_attribute_group = {
1278	.name = "pmem",
1279	.attrs = cxl_memdev_pmem_attributes,
1280};
1281
1282static const struct attribute_group *cxl_memdev_attribute_groups[] = {
1283	&cxl_memdev_attribute_group,
1284	&cxl_memdev_ram_attribute_group,
1285	&cxl_memdev_pmem_attribute_group,
1286	NULL,
1287};
1288
1289static const struct device_type cxl_memdev_type = {
1290	.name = "cxl_memdev",
1291	.release = cxl_memdev_release,
1292	.devnode = cxl_memdev_devnode,
1293	.groups = cxl_memdev_attribute_groups,
1294};
1295
1296static void cxl_memdev_unregister(void *_cxlmd)
1297{
1298	struct cxl_memdev *cxlmd = _cxlmd;
1299	struct device *dev = &cxlmd->dev;
1300	struct cdev *cdev = &cxlmd->cdev;
1301	const struct cdevm_file_operations *cdevm_fops;
1302
1303	cdevm_fops = container_of(cdev->ops, typeof(*cdevm_fops), fops);
1304	cdevm_fops->shutdown(dev);
1305
1306	cdev_device_del(&cxlmd->cdev, dev);
1307	put_device(dev);
1308}
1309
1310static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm,
1311					   const struct file_operations *fops)
1312{
1313	struct pci_dev *pdev = cxlm->pdev;
1314	struct cxl_memdev *cxlmd;
1315	struct device *dev;
1316	struct cdev *cdev;
1317	int rc;
1318
1319	cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
1320	if (!cxlmd)
1321		return ERR_PTR(-ENOMEM);
1322
1323	rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
1324	if (rc < 0)
1325		goto err;
1326	cxlmd->id = rc;
1327
1328	dev = &cxlmd->dev;
1329	device_initialize(dev);
1330	dev->parent = &pdev->dev;
1331	dev->bus = &cxl_bus_type;
1332	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
1333	dev->type = &cxl_memdev_type;
1334	device_set_pm_not_required(dev);
1335
1336	cdev = &cxlmd->cdev;
1337	cdev_init(cdev, fops);
1338	return cxlmd;
1339
1340err:
1341	kfree(cxlmd);
1342	return ERR_PTR(rc);
1343}
1344
1345static struct cxl_memdev *
1346devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
1347		    const struct cdevm_file_operations *cdevm_fops)
1348{
1349	struct cxl_memdev *cxlmd;
1350	struct device *dev;
1351	struct cdev *cdev;
1352	int rc;
1353
1354	cxlmd = cxl_memdev_alloc(cxlm, &cdevm_fops->fops);
1355	if (IS_ERR(cxlmd))
1356		return cxlmd;
1357
1358	dev = &cxlmd->dev;
1359	rc = dev_set_name(dev, "mem%d", cxlmd->id);
1360	if (rc)
1361		goto err;
1362
1363	/*
1364	 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
1365	 * needed as this is ordered with cdev_add() publishing the device.
1366	 */
1367	cxlmd->cxlm = cxlm;
 
1368
1369	cdev = &cxlmd->cdev;
1370	rc = cdev_device_add(cdev, dev);
 
 
 
 
1371	if (rc)
1372		goto err;
1373
1374	rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
1375	if (rc)
1376		return ERR_PTR(rc);
1377	return cxlmd;
1378
1379err:
1380	/*
1381	 * The cdev was briefly live, shutdown any ioctl operations that
1382	 * saw that state.
1383	 */
1384	cdevm_fops->shutdown(dev);
1385	put_device(dev);
1386	return ERR_PTR(rc);
1387}
1388
1389static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
1390{
1391	u32 remaining = size;
1392	u32 offset = 0;
1393
1394	while (remaining) {
1395		u32 xfer_size = min_t(u32, remaining, cxlm->payload_size);
1396		struct cxl_mbox_get_log {
1397			uuid_t uuid;
1398			__le32 offset;
1399			__le32 length;
1400		} __packed log = {
1401			.uuid = *uuid,
1402			.offset = cpu_to_le32(offset),
1403			.length = cpu_to_le32(xfer_size)
1404		};
1405		int rc;
1406
1407		rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LOG, &log,
1408					   sizeof(log), out, xfer_size);
1409		if (rc < 0)
1410			return rc;
1411
1412		out += xfer_size;
1413		remaining -= xfer_size;
1414		offset += xfer_size;
1415	}
1416
1417	return 0;
1418}
 
1419
1420/**
1421 * cxl_walk_cel() - Walk through the Command Effects Log.
1422 * @cxlm: Device.
1423 * @size: Length of the Command Effects Log.
1424 * @cel: CEL
1425 *
1426 * Iterate over each entry in the CEL and determine if the driver supports the
1427 * command. If so, the command is enabled for the device and can be used later.
1428 */
1429static void cxl_walk_cel(struct cxl_mem *cxlm, size_t size, u8 *cel)
1430{
1431	struct cel_entry {
1432		__le16 opcode;
1433		__le16 effect;
1434	} __packed * cel_entry;
1435	const int cel_entries = size / sizeof(*cel_entry);
1436	int i;
1437
1438	cel_entry = (struct cel_entry *)cel;
1439
1440	for (i = 0; i < cel_entries; i++) {
1441		u16 opcode = le16_to_cpu(cel_entry[i].opcode);
1442		struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
1443
1444		if (!cmd) {
1445			dev_dbg(&cxlm->pdev->dev,
1446				"Opcode 0x%04x unsupported by driver", opcode);
1447			continue;
1448		}
1449
1450		set_bit(cmd->info.id, cxlm->enabled_cmds);
1451	}
1452}
1453
1454struct cxl_mbox_get_supported_logs {
1455	__le16 entries;
1456	u8 rsvd[6];
1457	struct gsl_entry {
1458		uuid_t uuid;
1459		__le32 size;
1460	} __packed entry[];
1461} __packed;
1462
1463static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm)
1464{
1465	struct cxl_mbox_get_supported_logs *ret;
1466	int rc;
 
 
 
 
 
1467
1468	ret = kvmalloc(cxlm->payload_size, GFP_KERNEL);
1469	if (!ret)
1470		return ERR_PTR(-ENOMEM);
 
 
 
1471
1472	rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL,
1473				   0, ret, cxlm->payload_size);
1474	if (rc < 0) {
1475		kvfree(ret);
1476		return ERR_PTR(rc);
1477	}
1478
1479	return ret;
1480}
1481
1482/**
1483 * cxl_mem_enumerate_cmds() - Enumerate commands for a device.
1484 * @cxlm: The device.
1485 *
1486 * Returns 0 if enumerate completed successfully.
1487 *
1488 * CXL devices have optional support for certain commands. This function will
1489 * determine the set of supported commands for the hardware and update the
1490 * enabled_cmds bitmap in the @cxlm.
1491 */
1492static int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm)
1493{
1494	struct cxl_mbox_get_supported_logs *gsl;
1495	struct device *dev = &cxlm->pdev->dev;
1496	struct cxl_mem_command *cmd;
1497	int i, rc;
1498
1499	gsl = cxl_get_gsl(cxlm);
1500	if (IS_ERR(gsl))
1501		return PTR_ERR(gsl);
1502
1503	rc = -ENOENT;
1504	for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
1505		u32 size = le32_to_cpu(gsl->entry[i].size);
1506		uuid_t uuid = gsl->entry[i].uuid;
1507		u8 *log;
1508
1509		dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
1510
1511		if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
1512			continue;
1513
1514		log = kvmalloc(size, GFP_KERNEL);
1515		if (!log) {
1516			rc = -ENOMEM;
1517			goto out;
1518		}
1519
1520		rc = cxl_xfer_log(cxlm, &uuid, size, log);
1521		if (rc) {
1522			kvfree(log);
1523			goto out;
1524		}
1525
1526		cxl_walk_cel(cxlm, size, log);
1527		kvfree(log);
 
 
 
 
 
1528
1529		/* In case CEL was bogus, enable some default commands. */
1530		cxl_for_each_cmd(cmd)
1531			if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
1532				set_bit(cmd->info.id, cxlm->enabled_cmds);
1533
1534		/* Found the required CEL */
1535		rc = 0;
1536	}
1537
1538out:
1539	kvfree(gsl);
1540	return rc;
1541}
1542
1543/**
1544 * cxl_mem_identify() - Send the IDENTIFY command to the device.
1545 * @cxlm: The device to identify.
1546 *
1547 * Return: 0 if identify was executed successfully.
1548 *
1549 * This will dispatch the identify command to the device and on success populate
1550 * structures to be exported to sysfs.
1551 */
1552static int cxl_mem_identify(struct cxl_mem *cxlm)
1553{
1554	/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
1555	struct cxl_mbox_identify {
1556		char fw_revision[0x10];
1557		__le64 total_capacity;
1558		__le64 volatile_capacity;
1559		__le64 persistent_capacity;
1560		__le64 partition_align;
1561		__le16 info_event_log_size;
1562		__le16 warning_event_log_size;
1563		__le16 failure_event_log_size;
1564		__le16 fatal_event_log_size;
1565		__le32 lsa_size;
1566		u8 poison_list_max_mer[3];
1567		__le16 inject_poison_limit;
1568		u8 poison_caps;
1569		u8 qos_telemetry_caps;
1570	} __packed id;
1571	int rc;
1572
1573	rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
1574				   sizeof(id));
1575	if (rc < 0)
1576		return rc;
1577
1578	/*
1579	 * TODO: enumerate DPA map, as 'ram' and 'pmem' do not alias.
1580	 * For now, only the capacity is exported in sysfs
1581	 */
1582	cxlm->ram_range.start = 0;
1583	cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) * SZ_256M - 1;
 
 
 
 
1584
1585	cxlm->pmem_range.start = 0;
1586	cxlm->pmem_range.end =
1587		le64_to_cpu(id.persistent_capacity) * SZ_256M - 1;
 
1588
1589	cxlm->lsa_size = le32_to_cpu(id.lsa_size);
1590	memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
1591
1592	return 0;
1593}
1594
1595static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1596{
1597	struct cxl_memdev *cxlmd;
1598	struct cxl_mem *cxlm;
1599	int rc;
1600
1601	rc = pcim_enable_device(pdev);
1602	if (rc)
1603		return rc;
1604
1605	cxlm = cxl_mem_create(pdev);
1606	if (IS_ERR(cxlm))
1607		return PTR_ERR(cxlm);
1608
1609	rc = cxl_mem_setup_regs(cxlm);
1610	if (rc)
1611		return rc;
1612
1613	rc = cxl_mem_setup_mailbox(cxlm);
1614	if (rc)
1615		return rc;
1616
1617	rc = cxl_mem_enumerate_cmds(cxlm);
1618	if (rc)
1619		return rc;
1620
1621	rc = cxl_mem_identify(cxlm);
1622	if (rc)
1623		return rc;
1624
1625	cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm, &cxl_memdev_fops);
1626	if (IS_ERR(cxlmd))
1627		return PTR_ERR(cxlmd);
1628
1629	if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
1630		rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1631
1632	return rc;
1633}
1634
1635static const struct pci_device_id cxl_mem_pci_tbl[] = {
1636	/* PCI class code for CXL.mem Type-3 Devices */
1637	{ PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
1638	{ /* terminate list */ },
1639};
1640MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
1641
1642static struct pci_driver cxl_mem_driver = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1643	.name			= KBUILD_MODNAME,
1644	.id_table		= cxl_mem_pci_tbl,
1645	.probe			= cxl_mem_probe,
 
1646	.driver	= {
1647		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
1648	},
1649};
1650
1651static __init int cxl_mem_init(void)
1652{
1653	struct dentry *mbox_debugfs;
1654	dev_t devt;
1655	int rc;
1656
1657	/* Double check the anonymous union trickery in struct cxl_regs */
1658	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
1659		     offsetof(struct cxl_regs, device_regs.memdev));
1660
1661	rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
1662	if (rc)
1663		return rc;
1664
1665	cxl_mem_major = MAJOR(devt);
1666
1667	rc = pci_register_driver(&cxl_mem_driver);
1668	if (rc) {
1669		unregister_chrdev_region(MKDEV(cxl_mem_major, 0),
1670					 CXL_MEM_MAX_DEVS);
1671		return rc;
1672	}
1673
1674	cxl_debugfs = debugfs_create_dir("cxl", NULL);
1675	mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
1676	debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
1677			    &cxl_raw_allow_all);
1678
1679	return 0;
1680}
1681
1682static __exit void cxl_mem_exit(void)
1683{
1684	debugfs_remove_recursive(cxl_debugfs);
1685	pci_unregister_driver(&cxl_mem_driver);
1686	unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
1687}
1688
1689MODULE_LICENSE("GPL v2");
1690module_init(cxl_mem_init);
1691module_exit(cxl_mem_exit);
1692MODULE_IMPORT_NS(CXL);