Linux Audio

Check our new training course

Loading...
   1/*
   2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of version 2 of the GNU General Public License as
   6 * published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 */
  13#include <linux/list_sort.h>
  14#include <linux/libnvdimm.h>
  15#include <linux/module.h>
  16#include <linux/mutex.h>
  17#include <linux/ndctl.h>
  18#include <linux/sysfs.h>
  19#include <linux/delay.h>
  20#include <linux/list.h>
  21#include <linux/acpi.h>
  22#include <linux/sort.h>
  23#include <linux/pmem.h>
  24#include <linux/io.h>
  25#include <linux/nd.h>
  26#include <asm/cacheflush.h>
  27#include "nfit.h"
  28
  29/*
  30 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
  31 * irrelevant.
  32 */
  33#include <linux/io-64-nonatomic-hi-lo.h>
  34
  35static bool force_enable_dimms;
  36module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
  37MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
  38
  39static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
  40module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
  41MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
  42
  43/* after three payloads of overflow, it's dead jim */
  44static unsigned int scrub_overflow_abort = 3;
  45module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
  46MODULE_PARM_DESC(scrub_overflow_abort,
  47		"Number of times we overflow ARS results before abort");
  48
  49static bool disable_vendor_specific;
  50module_param(disable_vendor_specific, bool, S_IRUGO);
  51MODULE_PARM_DESC(disable_vendor_specific,
  52		"Limit commands to the publicly specified set\n");
  53
  54LIST_HEAD(acpi_descs);
  55DEFINE_MUTEX(acpi_desc_lock);
  56
  57static struct workqueue_struct *nfit_wq;
  58
  59struct nfit_table_prev {
  60	struct list_head spas;
  61	struct list_head memdevs;
  62	struct list_head dcrs;
  63	struct list_head bdws;
  64	struct list_head idts;
  65	struct list_head flushes;
  66};
  67
  68static u8 nfit_uuid[NFIT_UUID_MAX][16];
  69
  70const u8 *to_nfit_uuid(enum nfit_uuids id)
  71{
  72	return nfit_uuid[id];
  73}
  74EXPORT_SYMBOL(to_nfit_uuid);
  75
  76static struct acpi_nfit_desc *to_acpi_nfit_desc(
  77		struct nvdimm_bus_descriptor *nd_desc)
  78{
  79	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
  80}
  81
  82static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
  83{
  84	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
  85
  86	/*
  87	 * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
  88	 * acpi_device.
  89	 */
  90	if (!nd_desc->provider_name
  91			|| strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
  92		return NULL;
  93
  94	return to_acpi_device(acpi_desc->dev);
  95}
  96
  97static int xlat_bus_status(void *buf, unsigned int cmd, u32 status)
  98{
  99	struct nd_cmd_clear_error *clear_err;
 100	struct nd_cmd_ars_status *ars_status;
 101	u16 flags;
 102
 103	switch (cmd) {
 104	case ND_CMD_ARS_CAP:
 105		if ((status & 0xffff) == NFIT_ARS_CAP_NONE)
 106			return -ENOTTY;
 107
 108		/* Command failed */
 109		if (status & 0xffff)
 110			return -EIO;
 111
 112		/* No supported scan types for this range */
 113		flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
 114		if ((status >> 16 & flags) == 0)
 115			return -ENOTTY;
 116		return 0;
 117	case ND_CMD_ARS_START:
 118		/* ARS is in progress */
 119		if ((status & 0xffff) == NFIT_ARS_START_BUSY)
 120			return -EBUSY;
 121
 122		/* Command failed */
 123		if (status & 0xffff)
 124			return -EIO;
 125		return 0;
 126	case ND_CMD_ARS_STATUS:
 127		ars_status = buf;
 128		/* Command failed */
 129		if (status & 0xffff)
 130			return -EIO;
 131		/* Check extended status (Upper two bytes) */
 132		if (status == NFIT_ARS_STATUS_DONE)
 133			return 0;
 134
 135		/* ARS is in progress */
 136		if (status == NFIT_ARS_STATUS_BUSY)
 137			return -EBUSY;
 138
 139		/* No ARS performed for the current boot */
 140		if (status == NFIT_ARS_STATUS_NONE)
 141			return -EAGAIN;
 142
 143		/*
 144		 * ARS interrupted, either we overflowed or some other
 145		 * agent wants the scan to stop.  If we didn't overflow
 146		 * then just continue with the returned results.
 147		 */
 148		if (status == NFIT_ARS_STATUS_INTR) {
 149			if (ars_status->out_length >= 40 && (ars_status->flags
 150						& NFIT_ARS_F_OVERFLOW))
 151				return -ENOSPC;
 152			return 0;
 153		}
 154
 155		/* Unknown status */
 156		if (status >> 16)
 157			return -EIO;
 158		return 0;
 159	case ND_CMD_CLEAR_ERROR:
 160		clear_err = buf;
 161		if (status & 0xffff)
 162			return -EIO;
 163		if (!clear_err->cleared)
 164			return -EIO;
 165		if (clear_err->length > clear_err->cleared)
 166			return clear_err->cleared;
 167		return 0;
 168	default:
 169		break;
 170	}
 171
 172	/* all other non-zero status results in an error */
 173	if (status)
 174		return -EIO;
 175	return 0;
 176}
 177
 178static int xlat_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd,
 179		u32 status)
 180{
 181	if (!nvdimm)
 182		return xlat_bus_status(buf, cmd, status);
 183	if (status)
 184		return -EIO;
 185	return 0;
 186}
 187
 188int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 189		unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
 190{
 191	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
 192	union acpi_object in_obj, in_buf, *out_obj;
 193	const struct nd_cmd_desc *desc = NULL;
 194	struct device *dev = acpi_desc->dev;
 195	struct nd_cmd_pkg *call_pkg = NULL;
 196	const char *cmd_name, *dimm_name;
 197	unsigned long cmd_mask, dsm_mask;
 198	u32 offset, fw_status = 0;
 199	acpi_handle handle;
 200	unsigned int func;
 201	const u8 *uuid;
 202	int rc, i;
 203
 204	func = cmd;
 205	if (cmd == ND_CMD_CALL) {
 206		call_pkg = buf;
 207		func = call_pkg->nd_command;
 208	}
 209
 210	if (nvdimm) {
 211		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 212		struct acpi_device *adev = nfit_mem->adev;
 213
 214		if (!adev)
 215			return -ENOTTY;
 216		if (call_pkg && nfit_mem->family != call_pkg->nd_family)
 217			return -ENOTTY;
 218
 219		dimm_name = nvdimm_name(nvdimm);
 220		cmd_name = nvdimm_cmd_name(cmd);
 221		cmd_mask = nvdimm_cmd_mask(nvdimm);
 222		dsm_mask = nfit_mem->dsm_mask;
 223		desc = nd_cmd_dimm_desc(cmd);
 224		uuid = to_nfit_uuid(nfit_mem->family);
 225		handle = adev->handle;
 226	} else {
 227		struct acpi_device *adev = to_acpi_dev(acpi_desc);
 228
 229		cmd_name = nvdimm_bus_cmd_name(cmd);
 230		cmd_mask = nd_desc->cmd_mask;
 231		dsm_mask = cmd_mask;
 232		desc = nd_cmd_bus_desc(cmd);
 233		uuid = to_nfit_uuid(NFIT_DEV_BUS);
 234		handle = adev->handle;
 235		dimm_name = "bus";
 236	}
 237
 238	if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
 239		return -ENOTTY;
 240
 241	if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
 242		return -ENOTTY;
 243
 244	in_obj.type = ACPI_TYPE_PACKAGE;
 245	in_obj.package.count = 1;
 246	in_obj.package.elements = &in_buf;
 247	in_buf.type = ACPI_TYPE_BUFFER;
 248	in_buf.buffer.pointer = buf;
 249	in_buf.buffer.length = 0;
 250
 251	/* libnvdimm has already validated the input envelope */
 252	for (i = 0; i < desc->in_num; i++)
 253		in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
 254				i, buf);
 255
 256	if (call_pkg) {
 257		/* skip over package wrapper */
 258		in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
 259		in_buf.buffer.length = call_pkg->nd_size_in;
 260	}
 261
 262	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
 263		dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
 264				__func__, dimm_name, cmd, func,
 265				in_buf.buffer.length);
 266		print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
 267			in_buf.buffer.pointer,
 268			min_t(u32, 256, in_buf.buffer.length), true);
 269	}
 270
 271	out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
 272	if (!out_obj) {
 273		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
 274				cmd_name);
 275		return -EINVAL;
 276	}
 277
 278	if (call_pkg) {
 279		call_pkg->nd_fw_size = out_obj->buffer.length;
 280		memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
 281			out_obj->buffer.pointer,
 282			min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
 283
 284		ACPI_FREE(out_obj);
 285		/*
 286		 * Need to support FW function w/o known size in advance.
 287		 * Caller can determine required size based upon nd_fw_size.
 288		 * If we return an error (like elsewhere) then caller wouldn't
 289		 * be able to rely upon data returned to make calculation.
 290		 */
 291		return 0;
 292	}
 293
 294	if (out_obj->package.type != ACPI_TYPE_BUFFER) {
 295		dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
 296				__func__, dimm_name, cmd_name, out_obj->type);
 297		rc = -EINVAL;
 298		goto out;
 299	}
 300
 301	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
 302		dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
 303				dimm_name, cmd_name, out_obj->buffer.length);
 304		print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
 305				4, out_obj->buffer.pointer, min_t(u32, 128,
 306					out_obj->buffer.length), true);
 307	}
 308
 309	for (i = 0, offset = 0; i < desc->out_num; i++) {
 310		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
 311				(u32 *) out_obj->buffer.pointer,
 312				out_obj->buffer.length - offset);
 313
 314		if (offset + out_size > out_obj->buffer.length) {
 315			dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
 316					__func__, dimm_name, cmd_name, i);
 317			break;
 318		}
 319
 320		if (in_buf.buffer.length + offset + out_size > buf_len) {
 321			dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
 322					__func__, dimm_name, cmd_name, i);
 323			rc = -ENXIO;
 324			goto out;
 325		}
 326		memcpy(buf + in_buf.buffer.length + offset,
 327				out_obj->buffer.pointer + offset, out_size);
 328		offset += out_size;
 329	}
 330
 331	/*
 332	 * Set fw_status for all the commands with a known format to be
 333	 * later interpreted by xlat_status().
 334	 */
 335	if (i >= 1 && ((cmd >= ND_CMD_ARS_CAP && cmd <= ND_CMD_CLEAR_ERROR)
 336			|| (cmd >= ND_CMD_SMART && cmd <= ND_CMD_VENDOR)))
 337		fw_status = *(u32 *) out_obj->buffer.pointer;
 338
 339	if (offset + in_buf.buffer.length < buf_len) {
 340		if (i >= 1) {
 341			/*
 342			 * status valid, return the number of bytes left
 343			 * unfilled in the output buffer
 344			 */
 345			rc = buf_len - offset - in_buf.buffer.length;
 346			if (cmd_rc)
 347				*cmd_rc = xlat_status(nvdimm, buf, cmd,
 348						fw_status);
 349		} else {
 350			dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
 351					__func__, dimm_name, cmd_name, buf_len,
 352					offset);
 353			rc = -ENXIO;
 354		}
 355	} else {
 356		rc = 0;
 357		if (cmd_rc)
 358			*cmd_rc = xlat_status(nvdimm, buf, cmd, fw_status);
 359	}
 360
 361 out:
 362	ACPI_FREE(out_obj);
 363
 364	return rc;
 365}
 366EXPORT_SYMBOL_GPL(acpi_nfit_ctl);
 367
 368static const char *spa_type_name(u16 type)
 369{
 370	static const char *to_name[] = {
 371		[NFIT_SPA_VOLATILE] = "volatile",
 372		[NFIT_SPA_PM] = "pmem",
 373		[NFIT_SPA_DCR] = "dimm-control-region",
 374		[NFIT_SPA_BDW] = "block-data-window",
 375		[NFIT_SPA_VDISK] = "volatile-disk",
 376		[NFIT_SPA_VCD] = "volatile-cd",
 377		[NFIT_SPA_PDISK] = "persistent-disk",
 378		[NFIT_SPA_PCD] = "persistent-cd",
 379
 380	};
 381
 382	if (type > NFIT_SPA_PCD)
 383		return "unknown";
 384
 385	return to_name[type];
 386}
 387
 388int nfit_spa_type(struct acpi_nfit_system_address *spa)
 389{
 390	int i;
 391
 392	for (i = 0; i < NFIT_UUID_MAX; i++)
 393		if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
 394			return i;
 395	return -1;
 396}
 397
 398static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 399		struct nfit_table_prev *prev,
 400		struct acpi_nfit_system_address *spa)
 401{
 402	struct device *dev = acpi_desc->dev;
 403	struct nfit_spa *nfit_spa;
 404
 405	if (spa->header.length != sizeof(*spa))
 406		return false;
 407
 408	list_for_each_entry(nfit_spa, &prev->spas, list) {
 409		if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
 410			list_move_tail(&nfit_spa->list, &acpi_desc->spas);
 411			return true;
 412		}
 413	}
 414
 415	nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
 416			GFP_KERNEL);
 417	if (!nfit_spa)
 418		return false;
 419	INIT_LIST_HEAD(&nfit_spa->list);
 420	memcpy(nfit_spa->spa, spa, sizeof(*spa));
 421	list_add_tail(&nfit_spa->list, &acpi_desc->spas);
 422	dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
 423			spa->range_index,
 424			spa_type_name(nfit_spa_type(spa)));
 425	return true;
 426}
 427
 428static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
 429		struct nfit_table_prev *prev,
 430		struct acpi_nfit_memory_map *memdev)
 431{
 432	struct device *dev = acpi_desc->dev;
 433	struct nfit_memdev *nfit_memdev;
 434
 435	if (memdev->header.length != sizeof(*memdev))
 436		return false;
 437
 438	list_for_each_entry(nfit_memdev, &prev->memdevs, list)
 439		if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
 440			list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
 441			return true;
 442		}
 443
 444	nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev),
 445			GFP_KERNEL);
 446	if (!nfit_memdev)
 447		return false;
 448	INIT_LIST_HEAD(&nfit_memdev->list);
 449	memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
 450	list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
 451	dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
 452			__func__, memdev->device_handle, memdev->range_index,
 453			memdev->region_index);
 454	return true;
 455}
 456
 457/*
 458 * An implementation may provide a truncated control region if no block windows
 459 * are defined.
 460 */
 461static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr)
 462{
 463	if (dcr->header.length < offsetof(struct acpi_nfit_control_region,
 464				window_size))
 465		return 0;
 466	if (dcr->windows)
 467		return sizeof(*dcr);
 468	return offsetof(struct acpi_nfit_control_region, window_size);
 469}
 470
 471static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
 472		struct nfit_table_prev *prev,
 473		struct acpi_nfit_control_region *dcr)
 474{
 475	struct device *dev = acpi_desc->dev;
 476	struct nfit_dcr *nfit_dcr;
 477
 478	if (!sizeof_dcr(dcr))
 479		return false;
 480
 481	list_for_each_entry(nfit_dcr, &prev->dcrs, list)
 482		if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) {
 483			list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
 484			return true;
 485		}
 486
 487	nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr),
 488			GFP_KERNEL);
 489	if (!nfit_dcr)
 490		return false;
 491	INIT_LIST_HEAD(&nfit_dcr->list);
 492	memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
 493	list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
 494	dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
 495			dcr->region_index, dcr->windows);
 496	return true;
 497}
 498
 499static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
 500		struct nfit_table_prev *prev,
 501		struct acpi_nfit_data_region *bdw)
 502{
 503	struct device *dev = acpi_desc->dev;
 504	struct nfit_bdw *nfit_bdw;
 505
 506	if (bdw->header.length != sizeof(*bdw))
 507		return false;
 508	list_for_each_entry(nfit_bdw, &prev->bdws, list)
 509		if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
 510			list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
 511			return true;
 512		}
 513
 514	nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw),
 515			GFP_KERNEL);
 516	if (!nfit_bdw)
 517		return false;
 518	INIT_LIST_HEAD(&nfit_bdw->list);
 519	memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
 520	list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
 521	dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
 522			bdw->region_index, bdw->windows);
 523	return true;
 524}
 525
 526static size_t sizeof_idt(struct acpi_nfit_interleave *idt)
 527{
 528	if (idt->header.length < sizeof(*idt))
 529		return 0;
 530	return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1);
 531}
 532
 533static bool add_idt(struct acpi_nfit_desc *acpi_desc,
 534		struct nfit_table_prev *prev,
 535		struct acpi_nfit_interleave *idt)
 536{
 537	struct device *dev = acpi_desc->dev;
 538	struct nfit_idt *nfit_idt;
 539
 540	if (!sizeof_idt(idt))
 541		return false;
 542
 543	list_for_each_entry(nfit_idt, &prev->idts, list) {
 544		if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt))
 545			continue;
 546
 547		if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) {
 548			list_move_tail(&nfit_idt->list, &acpi_desc->idts);
 549			return true;
 550		}
 551	}
 552
 553	nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt),
 554			GFP_KERNEL);
 555	if (!nfit_idt)
 556		return false;
 557	INIT_LIST_HEAD(&nfit_idt->list);
 558	memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
 559	list_add_tail(&nfit_idt->list, &acpi_desc->idts);
 560	dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
 561			idt->interleave_index, idt->line_count);
 562	return true;
 563}
 564
 565static size_t sizeof_flush(struct acpi_nfit_flush_address *flush)
 566{
 567	if (flush->header.length < sizeof(*flush))
 568		return 0;
 569	return sizeof(*flush) + sizeof(u64) * (flush->hint_count - 1);
 570}
 571
 572static bool add_flush(struct acpi_nfit_desc *acpi_desc,
 573		struct nfit_table_prev *prev,
 574		struct acpi_nfit_flush_address *flush)
 575{
 576	struct device *dev = acpi_desc->dev;
 577	struct nfit_flush *nfit_flush;
 578
 579	if (!sizeof_flush(flush))
 580		return false;
 581
 582	list_for_each_entry(nfit_flush, &prev->flushes, list) {
 583		if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush))
 584			continue;
 585
 586		if (memcmp(nfit_flush->flush, flush,
 587					sizeof_flush(flush)) == 0) {
 588			list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
 589			return true;
 590		}
 591	}
 592
 593	nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush)
 594			+ sizeof_flush(flush), GFP_KERNEL);
 595	if (!nfit_flush)
 596		return false;
 597	INIT_LIST_HEAD(&nfit_flush->list);
 598	memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
 599	list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
 600	dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
 601			flush->device_handle, flush->hint_count);
 602	return true;
 603}
 604
 605static void *add_table(struct acpi_nfit_desc *acpi_desc,
 606		struct nfit_table_prev *prev, void *table, const void *end)
 607{
 608	struct device *dev = acpi_desc->dev;
 609	struct acpi_nfit_header *hdr;
 610	void *err = ERR_PTR(-ENOMEM);
 611
 612	if (table >= end)
 613		return NULL;
 614
 615	hdr = table;
 616	if (!hdr->length) {
 617		dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
 618			hdr->type);
 619		return NULL;
 620	}
 621
 622	switch (hdr->type) {
 623	case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
 624		if (!add_spa(acpi_desc, prev, table))
 625			return err;
 626		break;
 627	case ACPI_NFIT_TYPE_MEMORY_MAP:
 628		if (!add_memdev(acpi_desc, prev, table))
 629			return err;
 630		break;
 631	case ACPI_NFIT_TYPE_CONTROL_REGION:
 632		if (!add_dcr(acpi_desc, prev, table))
 633			return err;
 634		break;
 635	case ACPI_NFIT_TYPE_DATA_REGION:
 636		if (!add_bdw(acpi_desc, prev, table))
 637			return err;
 638		break;
 639	case ACPI_NFIT_TYPE_INTERLEAVE:
 640		if (!add_idt(acpi_desc, prev, table))
 641			return err;
 642		break;
 643	case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
 644		if (!add_flush(acpi_desc, prev, table))
 645			return err;
 646		break;
 647	case ACPI_NFIT_TYPE_SMBIOS:
 648		dev_dbg(dev, "%s: smbios\n", __func__);
 649		break;
 650	default:
 651		dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
 652		break;
 653	}
 654
 655	return table + hdr->length;
 656}
 657
 658static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
 659		struct nfit_mem *nfit_mem)
 660{
 661	u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
 662	u16 dcr = nfit_mem->dcr->region_index;
 663	struct nfit_spa *nfit_spa;
 664
 665	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
 666		u16 range_index = nfit_spa->spa->range_index;
 667		int type = nfit_spa_type(nfit_spa->spa);
 668		struct nfit_memdev *nfit_memdev;
 669
 670		if (type != NFIT_SPA_BDW)
 671			continue;
 672
 673		list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
 674			if (nfit_memdev->memdev->range_index != range_index)
 675				continue;
 676			if (nfit_memdev->memdev->device_handle != device_handle)
 677				continue;
 678			if (nfit_memdev->memdev->region_index != dcr)
 679				continue;
 680
 681			nfit_mem->spa_bdw = nfit_spa->spa;
 682			return;
 683		}
 684	}
 685
 686	dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
 687			nfit_mem->spa_dcr->range_index);
 688	nfit_mem->bdw = NULL;
 689}
 690
 691static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
 692		struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
 693{
 694	u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
 695	struct nfit_memdev *nfit_memdev;
 696	struct nfit_bdw *nfit_bdw;
 697	struct nfit_idt *nfit_idt;
 698	u16 idt_idx, range_index;
 699
 700	list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
 701		if (nfit_bdw->bdw->region_index != dcr)
 702			continue;
 703		nfit_mem->bdw = nfit_bdw->bdw;
 704		break;
 705	}
 706
 707	if (!nfit_mem->bdw)
 708		return;
 709
 710	nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
 711
 712	if (!nfit_mem->spa_bdw)
 713		return;
 714
 715	range_index = nfit_mem->spa_bdw->range_index;
 716	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
 717		if (nfit_memdev->memdev->range_index != range_index ||
 718				nfit_memdev->memdev->region_index != dcr)
 719			continue;
 720		nfit_mem->memdev_bdw = nfit_memdev->memdev;
 721		idt_idx = nfit_memdev->memdev->interleave_index;
 722		list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
 723			if (nfit_idt->idt->interleave_index != idt_idx)
 724				continue;
 725			nfit_mem->idt_bdw = nfit_idt->idt;
 726			break;
 727		}
 728		break;
 729	}
 730}
 731
 732static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
 733		struct acpi_nfit_system_address *spa)
 734{
 735	struct nfit_mem *nfit_mem, *found;
 736	struct nfit_memdev *nfit_memdev;
 737	int type = nfit_spa_type(spa);
 738
 739	switch (type) {
 740	case NFIT_SPA_DCR:
 741	case NFIT_SPA_PM:
 742		break;
 743	default:
 744		return 0;
 745	}
 746
 747	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
 748		struct nfit_flush *nfit_flush;
 749		struct nfit_dcr *nfit_dcr;
 750		u32 device_handle;
 751		u16 dcr;
 752
 753		if (nfit_memdev->memdev->range_index != spa->range_index)
 754			continue;
 755		found = NULL;
 756		dcr = nfit_memdev->memdev->region_index;
 757		device_handle = nfit_memdev->memdev->device_handle;
 758		list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
 759			if (__to_nfit_memdev(nfit_mem)->device_handle
 760					== device_handle) {
 761				found = nfit_mem;
 762				break;
 763			}
 764
 765		if (found)
 766			nfit_mem = found;
 767		else {
 768			nfit_mem = devm_kzalloc(acpi_desc->dev,
 769					sizeof(*nfit_mem), GFP_KERNEL);
 770			if (!nfit_mem)
 771				return -ENOMEM;
 772			INIT_LIST_HEAD(&nfit_mem->list);
 773			nfit_mem->acpi_desc = acpi_desc;
 774			list_add(&nfit_mem->list, &acpi_desc->dimms);
 775		}
 776
 777		list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
 778			if (nfit_dcr->dcr->region_index != dcr)
 779				continue;
 780			/*
 781			 * Record the control region for the dimm.  For
 782			 * the ACPI 6.1 case, where there are separate
 783			 * control regions for the pmem vs blk
 784			 * interfaces, be sure to record the extended
 785			 * blk details.
 786			 */
 787			if (!nfit_mem->dcr)
 788				nfit_mem->dcr = nfit_dcr->dcr;
 789			else if (nfit_mem->dcr->windows == 0
 790					&& nfit_dcr->dcr->windows)
 791				nfit_mem->dcr = nfit_dcr->dcr;
 792			break;
 793		}
 794
 795		list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
 796			struct acpi_nfit_flush_address *flush;
 797			u16 i;
 798
 799			if (nfit_flush->flush->device_handle != device_handle)
 800				continue;
 801			nfit_mem->nfit_flush = nfit_flush;
 802			flush = nfit_flush->flush;
 803			nfit_mem->flush_wpq = devm_kzalloc(acpi_desc->dev,
 804					flush->hint_count
 805					* sizeof(struct resource), GFP_KERNEL);
 806			if (!nfit_mem->flush_wpq)
 807				return -ENOMEM;
 808			for (i = 0; i < flush->hint_count; i++) {
 809				struct resource *res = &nfit_mem->flush_wpq[i];
 810
 811				res->start = flush->hint_address[i];
 812				res->end = res->start + 8 - 1;
 813			}
 814			break;
 815		}
 816
 817		if (dcr && !nfit_mem->dcr) {
 818			dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
 819					spa->range_index, dcr);
 820			return -ENODEV;
 821		}
 822
 823		if (type == NFIT_SPA_DCR) {
 824			struct nfit_idt *nfit_idt;
 825			u16 idt_idx;
 826
 827			/* multiple dimms may share a SPA when interleaved */
 828			nfit_mem->spa_dcr = spa;
 829			nfit_mem->memdev_dcr = nfit_memdev->memdev;
 830			idt_idx = nfit_memdev->memdev->interleave_index;
 831			list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
 832				if (nfit_idt->idt->interleave_index != idt_idx)
 833					continue;
 834				nfit_mem->idt_dcr = nfit_idt->idt;
 835				break;
 836			}
 837			nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
 838		} else {
 839			/*
 840			 * A single dimm may belong to multiple SPA-PM
 841			 * ranges, record at least one in addition to
 842			 * any SPA-DCR range.
 843			 */
 844			nfit_mem->memdev_pmem = nfit_memdev->memdev;
 845		}
 846	}
 847
 848	return 0;
 849}
 850
 851static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
 852{
 853	struct nfit_mem *a = container_of(_a, typeof(*a), list);
 854	struct nfit_mem *b = container_of(_b, typeof(*b), list);
 855	u32 handleA, handleB;
 856
 857	handleA = __to_nfit_memdev(a)->device_handle;
 858	handleB = __to_nfit_memdev(b)->device_handle;
 859	if (handleA < handleB)
 860		return -1;
 861	else if (handleA > handleB)
 862		return 1;
 863	return 0;
 864}
 865
 866static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
 867{
 868	struct nfit_spa *nfit_spa;
 869
 870	/*
 871	 * For each SPA-DCR or SPA-PMEM address range find its
 872	 * corresponding MEMDEV(s).  From each MEMDEV find the
 873	 * corresponding DCR.  Then, if we're operating on a SPA-DCR,
 874	 * try to find a SPA-BDW and a corresponding BDW that references
 875	 * the DCR.  Throw it all into an nfit_mem object.  Note, that
 876	 * BDWs are optional.
 877	 */
 878	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
 879		int rc;
 880
 881		rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
 882		if (rc)
 883			return rc;
 884	}
 885
 886	list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
 887
 888	return 0;
 889}
 890
 891static ssize_t revision_show(struct device *dev,
 892		struct device_attribute *attr, char *buf)
 893{
 894	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
 895	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
 896	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 897
 898	return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
 899}
 900static DEVICE_ATTR_RO(revision);
 901
 902static ssize_t hw_error_scrub_show(struct device *dev,
 903		struct device_attribute *attr, char *buf)
 904{
 905	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
 906	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
 907	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 908
 909	return sprintf(buf, "%d\n", acpi_desc->scrub_mode);
 910}
 911
 912/*
 913 * The 'hw_error_scrub' attribute can have the following values written to it:
 914 * '0': Switch to the default mode where an exception will only insert
 915 *      the address of the memory error into the poison and badblocks lists.
 916 * '1': Enable a full scrub to happen if an exception for a memory error is
 917 *      received.
 918 */
 919static ssize_t hw_error_scrub_store(struct device *dev,
 920		struct device_attribute *attr, const char *buf, size_t size)
 921{
 922	struct nvdimm_bus_descriptor *nd_desc;
 923	ssize_t rc;
 924	long val;
 925
 926	rc = kstrtol(buf, 0, &val);
 927	if (rc)
 928		return rc;
 929
 930	device_lock(dev);
 931	nd_desc = dev_get_drvdata(dev);
 932	if (nd_desc) {
 933		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 934
 935		switch (val) {
 936		case HW_ERROR_SCRUB_ON:
 937			acpi_desc->scrub_mode = HW_ERROR_SCRUB_ON;
 938			break;
 939		case HW_ERROR_SCRUB_OFF:
 940			acpi_desc->scrub_mode = HW_ERROR_SCRUB_OFF;
 941			break;
 942		default:
 943			rc = -EINVAL;
 944			break;
 945		}
 946	}
 947	device_unlock(dev);
 948	if (rc)
 949		return rc;
 950	return size;
 951}
 952static DEVICE_ATTR_RW(hw_error_scrub);
 953
 954/*
 955 * This shows the number of full Address Range Scrubs that have been
 956 * completed since driver load time. Userspace can wait on this using
 957 * select/poll etc. A '+' at the end indicates an ARS is in progress
 958 */
 959static ssize_t scrub_show(struct device *dev,
 960		struct device_attribute *attr, char *buf)
 961{
 962	struct nvdimm_bus_descriptor *nd_desc;
 963	ssize_t rc = -ENXIO;
 964
 965	device_lock(dev);
 966	nd_desc = dev_get_drvdata(dev);
 967	if (nd_desc) {
 968		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 969
 970		rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
 971				(work_busy(&acpi_desc->work)) ? "+\n" : "\n");
 972	}
 973	device_unlock(dev);
 974	return rc;
 975}
 976
 977static ssize_t scrub_store(struct device *dev,
 978		struct device_attribute *attr, const char *buf, size_t size)
 979{
 980	struct nvdimm_bus_descriptor *nd_desc;
 981	ssize_t rc;
 982	long val;
 983
 984	rc = kstrtol(buf, 0, &val);
 985	if (rc)
 986		return rc;
 987	if (val != 1)
 988		return -EINVAL;
 989
 990	device_lock(dev);
 991	nd_desc = dev_get_drvdata(dev);
 992	if (nd_desc) {
 993		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 994
 995		rc = acpi_nfit_ars_rescan(acpi_desc);
 996	}
 997	device_unlock(dev);
 998	if (rc)
 999		return rc;
1000	return size;
1001}
1002static DEVICE_ATTR_RW(scrub);
1003
1004static bool ars_supported(struct nvdimm_bus *nvdimm_bus)
1005{
1006	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1007	const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START
1008		| 1 << ND_CMD_ARS_STATUS;
1009
1010	return (nd_desc->cmd_mask & mask) == mask;
1011}
1012
1013static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
1014{
1015	struct device *dev = container_of(kobj, struct device, kobj);
1016	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
1017
1018	if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus))
1019		return 0;
1020	return a->mode;
1021}
1022
1023static struct attribute *acpi_nfit_attributes[] = {
1024	&dev_attr_revision.attr,
1025	&dev_attr_scrub.attr,
1026	&dev_attr_hw_error_scrub.attr,
1027	NULL,
1028};
1029
1030static struct attribute_group acpi_nfit_attribute_group = {
1031	.name = "nfit",
1032	.attrs = acpi_nfit_attributes,
1033	.is_visible = nfit_visible,
1034};
1035
1036static const struct attribute_group *acpi_nfit_attribute_groups[] = {
1037	&nvdimm_bus_attribute_group,
1038	&acpi_nfit_attribute_group,
1039	NULL,
1040};
1041
1042static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
1043{
1044	struct nvdimm *nvdimm = to_nvdimm(dev);
1045	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1046
1047	return __to_nfit_memdev(nfit_mem);
1048}
1049
1050static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
1051{
1052	struct nvdimm *nvdimm = to_nvdimm(dev);
1053	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1054
1055	return nfit_mem->dcr;
1056}
1057
1058static ssize_t handle_show(struct device *dev,
1059		struct device_attribute *attr, char *buf)
1060{
1061	struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
1062
1063	return sprintf(buf, "%#x\n", memdev->device_handle);
1064}
1065static DEVICE_ATTR_RO(handle);
1066
1067static ssize_t phys_id_show(struct device *dev,
1068		struct device_attribute *attr, char *buf)
1069{
1070	struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
1071
1072	return sprintf(buf, "%#x\n", memdev->physical_id);
1073}
1074static DEVICE_ATTR_RO(phys_id);
1075
1076static ssize_t vendor_show(struct device *dev,
1077		struct device_attribute *attr, char *buf)
1078{
1079	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1080
1081	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
1082}
1083static DEVICE_ATTR_RO(vendor);
1084
1085static ssize_t rev_id_show(struct device *dev,
1086		struct device_attribute *attr, char *buf)
1087{
1088	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1089
1090	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
1091}
1092static DEVICE_ATTR_RO(rev_id);
1093
1094static ssize_t device_show(struct device *dev,
1095		struct device_attribute *attr, char *buf)
1096{
1097	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1098
1099	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
1100}
1101static DEVICE_ATTR_RO(device);
1102
1103static ssize_t subsystem_vendor_show(struct device *dev,
1104		struct device_attribute *attr, char *buf)
1105{
1106	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1107
1108	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
1109}
1110static DEVICE_ATTR_RO(subsystem_vendor);
1111
1112static ssize_t subsystem_rev_id_show(struct device *dev,
1113		struct device_attribute *attr, char *buf)
1114{
1115	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1116
1117	return sprintf(buf, "0x%04x\n",
1118			be16_to_cpu(dcr->subsystem_revision_id));
1119}
1120static DEVICE_ATTR_RO(subsystem_rev_id);
1121
1122static ssize_t subsystem_device_show(struct device *dev,
1123		struct device_attribute *attr, char *buf)
1124{
1125	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1126
1127	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
1128}
1129static DEVICE_ATTR_RO(subsystem_device);
1130
1131static int num_nvdimm_formats(struct nvdimm *nvdimm)
1132{
1133	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1134	int formats = 0;
1135
1136	if (nfit_mem->memdev_pmem)
1137		formats++;
1138	if (nfit_mem->memdev_bdw)
1139		formats++;
1140	return formats;
1141}
1142
1143static ssize_t format_show(struct device *dev,
1144		struct device_attribute *attr, char *buf)
1145{
1146	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1147
1148	return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
1149}
1150static DEVICE_ATTR_RO(format);
1151
1152static ssize_t format1_show(struct device *dev,
1153		struct device_attribute *attr, char *buf)
1154{
1155	u32 handle;
1156	ssize_t rc = -ENXIO;
1157	struct nfit_mem *nfit_mem;
1158	struct nfit_memdev *nfit_memdev;
1159	struct acpi_nfit_desc *acpi_desc;
1160	struct nvdimm *nvdimm = to_nvdimm(dev);
1161	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1162
1163	nfit_mem = nvdimm_provider_data(nvdimm);
1164	acpi_desc = nfit_mem->acpi_desc;
1165	handle = to_nfit_memdev(dev)->device_handle;
1166
1167	/* assumes DIMMs have at most 2 published interface codes */
1168	mutex_lock(&acpi_desc->init_mutex);
1169	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
1170		struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
1171		struct nfit_dcr *nfit_dcr;
1172
1173		if (memdev->device_handle != handle)
1174			continue;
1175
1176		list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
1177			if (nfit_dcr->dcr->region_index != memdev->region_index)
1178				continue;
1179			if (nfit_dcr->dcr->code == dcr->code)
1180				continue;
1181			rc = sprintf(buf, "0x%04x\n",
1182					le16_to_cpu(nfit_dcr->dcr->code));
1183			break;
1184		}
1185		if (rc != ENXIO)
1186			break;
1187	}
1188	mutex_unlock(&acpi_desc->init_mutex);
1189	return rc;
1190}
1191static DEVICE_ATTR_RO(format1);
1192
1193static ssize_t formats_show(struct device *dev,
1194		struct device_attribute *attr, char *buf)
1195{
1196	struct nvdimm *nvdimm = to_nvdimm(dev);
1197
1198	return sprintf(buf, "%d\n", num_nvdimm_formats(nvdimm));
1199}
1200static DEVICE_ATTR_RO(formats);
1201
1202static ssize_t serial_show(struct device *dev,
1203		struct device_attribute *attr, char *buf)
1204{
1205	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1206
1207	return sprintf(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
1208}
1209static DEVICE_ATTR_RO(serial);
1210
1211static ssize_t family_show(struct device *dev,
1212		struct device_attribute *attr, char *buf)
1213{
1214	struct nvdimm *nvdimm = to_nvdimm(dev);
1215	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1216
1217	if (nfit_mem->family < 0)
1218		return -ENXIO;
1219	return sprintf(buf, "%d\n", nfit_mem->family);
1220}
1221static DEVICE_ATTR_RO(family);
1222
1223static ssize_t dsm_mask_show(struct device *dev,
1224		struct device_attribute *attr, char *buf)
1225{
1226	struct nvdimm *nvdimm = to_nvdimm(dev);
1227	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1228
1229	if (nfit_mem->family < 0)
1230		return -ENXIO;
1231	return sprintf(buf, "%#lx\n", nfit_mem->dsm_mask);
1232}
1233static DEVICE_ATTR_RO(dsm_mask);
1234
1235static ssize_t flags_show(struct device *dev,
1236		struct device_attribute *attr, char *buf)
1237{
1238	u16 flags = to_nfit_memdev(dev)->flags;
1239
1240	return sprintf(buf, "%s%s%s%s%s\n",
1241		flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
1242		flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
1243		flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
1244		flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
1245		flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "");
1246}
1247static DEVICE_ATTR_RO(flags);
1248
1249static ssize_t id_show(struct device *dev,
1250		struct device_attribute *attr, char *buf)
1251{
1252	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1253
1254	if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
1255		return sprintf(buf, "%04x-%02x-%04x-%08x\n",
1256				be16_to_cpu(dcr->vendor_id),
1257				dcr->manufacturing_location,
1258				be16_to_cpu(dcr->manufacturing_date),
1259				be32_to_cpu(dcr->serial_number));
1260	else
1261		return sprintf(buf, "%04x-%08x\n",
1262				be16_to_cpu(dcr->vendor_id),
1263				be32_to_cpu(dcr->serial_number));
1264}
1265static DEVICE_ATTR_RO(id);
1266
1267static struct attribute *acpi_nfit_dimm_attributes[] = {
1268	&dev_attr_handle.attr,
1269	&dev_attr_phys_id.attr,
1270	&dev_attr_vendor.attr,
1271	&dev_attr_device.attr,
1272	&dev_attr_rev_id.attr,
1273	&dev_attr_subsystem_vendor.attr,
1274	&dev_attr_subsystem_device.attr,
1275	&dev_attr_subsystem_rev_id.attr,
1276	&dev_attr_format.attr,
1277	&dev_attr_formats.attr,
1278	&dev_attr_format1.attr,
1279	&dev_attr_serial.attr,
1280	&dev_attr_flags.attr,
1281	&dev_attr_id.attr,
1282	&dev_attr_family.attr,
1283	&dev_attr_dsm_mask.attr,
1284	NULL,
1285};
1286
1287static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
1288		struct attribute *a, int n)
1289{
1290	struct device *dev = container_of(kobj, struct device, kobj);
1291	struct nvdimm *nvdimm = to_nvdimm(dev);
1292
1293	if (!to_nfit_dcr(dev))
1294		return 0;
1295	if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
1296		return 0;
1297	return a->mode;
1298}
1299
1300static struct attribute_group acpi_nfit_dimm_attribute_group = {
1301	.name = "nfit",
1302	.attrs = acpi_nfit_dimm_attributes,
1303	.is_visible = acpi_nfit_dimm_attr_visible,
1304};
1305
1306static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
1307	&nvdimm_attribute_group,
1308	&nd_device_attribute_group,
1309	&acpi_nfit_dimm_attribute_group,
1310	NULL,
1311};
1312
1313static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
1314		u32 device_handle)
1315{
1316	struct nfit_mem *nfit_mem;
1317
1318	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
1319		if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
1320			return nfit_mem->nvdimm;
1321
1322	return NULL;
1323}
1324
1325void __acpi_nvdimm_notify(struct device *dev, u32 event)
1326{
1327	struct nfit_mem *nfit_mem;
1328	struct acpi_nfit_desc *acpi_desc;
1329
1330	dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__,
1331			event);
1332
1333	if (event != NFIT_NOTIFY_DIMM_HEALTH) {
1334		dev_dbg(dev->parent, "%s: unknown event: %d\n", dev_name(dev),
1335				event);
1336		return;
1337	}
1338
1339	acpi_desc = dev_get_drvdata(dev->parent);
1340	if (!acpi_desc)
1341		return;
1342
1343	/*
1344	 * If we successfully retrieved acpi_desc, then we know nfit_mem data
1345	 * is still valid.
1346	 */
1347	nfit_mem = dev_get_drvdata(dev);
1348	if (nfit_mem && nfit_mem->flags_attr)
1349		sysfs_notify_dirent(nfit_mem->flags_attr);
1350}
1351EXPORT_SYMBOL_GPL(__acpi_nvdimm_notify);
1352
1353static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
1354{
1355	struct acpi_device *adev = data;
1356	struct device *dev = &adev->dev;
1357
1358	device_lock(dev->parent);
1359	__acpi_nvdimm_notify(dev, event);
1360	device_unlock(dev->parent);
1361}
1362
1363static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1364		struct nfit_mem *nfit_mem, u32 device_handle)
1365{
1366	struct acpi_device *adev, *adev_dimm;
1367	struct device *dev = acpi_desc->dev;
1368	unsigned long dsm_mask;
1369	const u8 *uuid;
1370	int i;
1371
1372	/* nfit test assumes 1:1 relationship between commands and dsms */
1373	nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
1374	nfit_mem->family = NVDIMM_FAMILY_INTEL;
1375	adev = to_acpi_dev(acpi_desc);
1376	if (!adev)
1377		return 0;
1378
1379	adev_dimm = acpi_find_child_device(adev, device_handle, false);
1380	nfit_mem->adev = adev_dimm;
1381	if (!adev_dimm) {
1382		dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
1383				device_handle);
1384		return force_enable_dimms ? 0 : -ENODEV;
1385	}
1386
1387	if (ACPI_FAILURE(acpi_install_notify_handler(adev_dimm->handle,
1388		ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify, adev_dimm))) {
1389		dev_err(dev, "%s: notification registration failed\n",
1390				dev_name(&adev_dimm->dev));
1391		return -ENXIO;
1392	}
1393
1394	/*
1395	 * Until standardization materializes we need to consider 4
1396	 * different command sets.  Note, that checking for function0 (bit0)
1397	 * tells us if any commands are reachable through this uuid.
1398	 */
1399	for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
1400		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
1401			break;
1402
1403	/* limit the supported commands to those that are publicly documented */
1404	nfit_mem->family = i;
1405	if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
1406		dsm_mask = 0x3fe;
1407		if (disable_vendor_specific)
1408			dsm_mask &= ~(1 << ND_CMD_VENDOR);
1409	} else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) {
1410		dsm_mask = 0x1c3c76;
1411	} else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
1412		dsm_mask = 0x1fe;
1413		if (disable_vendor_specific)
1414			dsm_mask &= ~(1 << 8);
1415	} else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
1416		dsm_mask = 0xffffffff;
1417	} else {
1418		dev_dbg(dev, "unknown dimm command family\n");
1419		nfit_mem->family = -1;
1420		/* DSMs are optional, continue loading the driver... */
1421		return 0;
1422	}
1423
1424	uuid = to_nfit_uuid(nfit_mem->family);
1425	for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
1426		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
1427			set_bit(i, &nfit_mem->dsm_mask);
1428
1429	return 0;
1430}
1431
1432static void shutdown_dimm_notify(void *data)
1433{
1434	struct acpi_nfit_desc *acpi_desc = data;
1435	struct nfit_mem *nfit_mem;
1436
1437	mutex_lock(&acpi_desc->init_mutex);
1438	/*
1439	 * Clear out the nfit_mem->flags_attr and shut down dimm event
1440	 * notifications.
1441	 */
1442	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
1443		struct acpi_device *adev_dimm = nfit_mem->adev;
1444
1445		if (nfit_mem->flags_attr) {
1446			sysfs_put(nfit_mem->flags_attr);
1447			nfit_mem->flags_attr = NULL;
1448		}
1449		if (adev_dimm)
1450			acpi_remove_notify_handler(adev_dimm->handle,
1451					ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
1452	}
1453	mutex_unlock(&acpi_desc->init_mutex);
1454}
1455
1456static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
1457{
1458	struct nfit_mem *nfit_mem;
1459	int dimm_count = 0, rc;
1460	struct nvdimm *nvdimm;
1461
1462	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
1463		struct acpi_nfit_flush_address *flush;
1464		unsigned long flags = 0, cmd_mask;
1465		u32 device_handle;
1466		u16 mem_flags;
1467
1468		device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
1469		nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
1470		if (nvdimm) {
1471			dimm_count++;
1472			continue;
1473		}
1474
1475		if (nfit_mem->bdw && nfit_mem->memdev_pmem)
1476			flags |= NDD_ALIASING;
1477
1478		mem_flags = __to_nfit_memdev(nfit_mem)->flags;
1479		if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
1480			flags |= NDD_UNARMED;
1481
1482		rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
1483		if (rc)
1484			continue;
1485
1486		/*
1487		 * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
1488		 * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
1489		 * userspace interface.
1490		 */
1491		cmd_mask = 1UL << ND_CMD_CALL;
1492		if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
1493			cmd_mask |= nfit_mem->dsm_mask;
1494
1495		flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
1496			: NULL;
1497		nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
1498				acpi_nfit_dimm_attribute_groups,
1499				flags, cmd_mask, flush ? flush->hint_count : 0,
1500				nfit_mem->flush_wpq);
1501		if (!nvdimm)
1502			return -ENOMEM;
1503
1504		nfit_mem->nvdimm = nvdimm;
1505		dimm_count++;
1506
1507		if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
1508			continue;
1509
1510		dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n",
1511				nvdimm_name(nvdimm),
1512		  mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
1513		  mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
1514		  mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
1515		  mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "");
1516
1517	}
1518
1519	rc = nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
1520	if (rc)
1521		return rc;
1522
1523	/*
1524	 * Now that dimms are successfully registered, and async registration
1525	 * is flushed, attempt to enable event notification.
1526	 */
1527	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
1528		struct kernfs_node *nfit_kernfs;
1529
1530		nvdimm = nfit_mem->nvdimm;
1531		nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
1532		if (nfit_kernfs)
1533			nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
1534					"flags");
1535		sysfs_put(nfit_kernfs);
1536		if (!nfit_mem->flags_attr)
1537			dev_warn(acpi_desc->dev, "%s: notifications disabled\n",
1538					nvdimm_name(nvdimm));
1539	}
1540
1541	return devm_add_action_or_reset(acpi_desc->dev, shutdown_dimm_notify,
1542			acpi_desc);
1543}
1544
1545static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
1546{
1547	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1548	const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
1549	struct acpi_device *adev;
1550	int i;
1551
1552	nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
1553	adev = to_acpi_dev(acpi_desc);
1554	if (!adev)
1555		return;
1556
1557	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
1558		if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
1559			set_bit(i, &nd_desc->cmd_mask);
1560}
1561
1562static ssize_t range_index_show(struct device *dev,
1563		struct device_attribute *attr, char *buf)
1564{
1565	struct nd_region *nd_region = to_nd_region(dev);
1566	struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
1567
1568	return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
1569}
1570static DEVICE_ATTR_RO(range_index);
1571
1572static struct attribute *acpi_nfit_region_attributes[] = {
1573	&dev_attr_range_index.attr,
1574	NULL,
1575};
1576
1577static struct attribute_group acpi_nfit_region_attribute_group = {
1578	.name = "nfit",
1579	.attrs = acpi_nfit_region_attributes,
1580};
1581
1582static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
1583	&nd_region_attribute_group,
1584	&nd_mapping_attribute_group,
1585	&nd_device_attribute_group,
1586	&nd_numa_attribute_group,
1587	&acpi_nfit_region_attribute_group,
1588	NULL,
1589};
1590
1591/* enough info to uniquely specify an interleave set */
1592struct nfit_set_info {
1593	struct nfit_set_info_map {
1594		u64 region_offset;
1595		u32 serial_number;
1596		u32 pad;
1597	} mapping[0];
1598};
1599
1600static size_t sizeof_nfit_set_info(int num_mappings)
1601{
1602	return sizeof(struct nfit_set_info)
1603		+ num_mappings * sizeof(struct nfit_set_info_map);
1604}
1605
1606static int cmp_map_compat(const void *m0, const void *m1)
1607{
1608	const struct nfit_set_info_map *map0 = m0;
1609	const struct nfit_set_info_map *map1 = m1;
1610
1611	return memcmp(&map0->region_offset, &map1->region_offset,
1612			sizeof(u64));
1613}
1614
1615static int cmp_map(const void *m0, const void *m1)
1616{
1617	const struct nfit_set_info_map *map0 = m0;
1618	const struct nfit_set_info_map *map1 = m1;
1619
1620	return map0->region_offset - map1->region_offset;
1621}
1622
1623/* Retrieve the nth entry referencing this spa */
1624static struct acpi_nfit_memory_map *memdev_from_spa(
1625		struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
1626{
1627	struct nfit_memdev *nfit_memdev;
1628
1629	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
1630		if (nfit_memdev->memdev->range_index == range_index)
1631			if (n-- == 0)
1632				return nfit_memdev->memdev;
1633	return NULL;
1634}
1635
1636static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
1637		struct nd_region_desc *ndr_desc,
1638		struct acpi_nfit_system_address *spa)
1639{
1640	int i, spa_type = nfit_spa_type(spa);
1641	struct device *dev = acpi_desc->dev;
1642	struct nd_interleave_set *nd_set;
1643	u16 nr = ndr_desc->num_mappings;
1644	struct nfit_set_info *info;
1645
1646	if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
1647		/* pass */;
1648	else
1649		return 0;
1650
1651	nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
1652	if (!nd_set)
1653		return -ENOMEM;
1654
1655	info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
1656	if (!info)
1657		return -ENOMEM;
1658	for (i = 0; i < nr; i++) {
1659		struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
1660		struct nfit_set_info_map *map = &info->mapping[i];
1661		struct nvdimm *nvdimm = mapping->nvdimm;
1662		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1663		struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
1664				spa->range_index, i);
1665
1666		if (!memdev || !nfit_mem->dcr) {
1667			dev_err(dev, "%s: failed to find DCR\n", __func__);
1668			return -ENODEV;
1669		}
1670
1671		map->region_offset = memdev->region_offset;
1672		map->serial_number = nfit_mem->dcr->serial_number;
1673	}
1674
1675	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
1676			cmp_map, NULL);
1677	nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
1678
1679	/* support namespaces created with the wrong sort order */
1680	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
1681			cmp_map_compat, NULL);
1682	nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
1683
1684	ndr_desc->nd_set = nd_set;
1685	devm_kfree(dev, info);
1686
1687	return 0;
1688}
1689
1690static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
1691{
1692	struct acpi_nfit_interleave *idt = mmio->idt;
1693	u32 sub_line_offset, line_index, line_offset;
1694	u64 line_no, table_skip_count, table_offset;
1695
1696	line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
1697	table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
1698	line_offset = idt->line_offset[line_index]
1699		* mmio->line_size;
1700	table_offset = table_skip_count * mmio->table_size;
1701
1702	return mmio->base_offset + line_offset + table_offset + sub_line_offset;
1703}
1704
1705static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
1706{
1707	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
1708	u64 offset = nfit_blk->stat_offset + mmio->size * bw;
1709	const u32 STATUS_MASK = 0x80000037;
1710
1711	if (mmio->num_lines)
1712		offset = to_interleave_offset(offset, mmio);
1713
1714	return readl(mmio->addr.base + offset) & STATUS_MASK;
1715}
1716
1717static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
1718		resource_size_t dpa, unsigned int len, unsigned int write)
1719{
1720	u64 cmd, offset;
1721	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
1722
1723	enum {
1724		BCW_OFFSET_MASK = (1ULL << 48)-1,
1725		BCW_LEN_SHIFT = 48,
1726		BCW_LEN_MASK = (1ULL << 8) - 1,
1727		BCW_CMD_SHIFT = 56,
1728	};
1729
1730	cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
1731	len = len >> L1_CACHE_SHIFT;
1732	cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
1733	cmd |= ((u64) write) << BCW_CMD_SHIFT;
1734
1735	offset = nfit_blk->cmd_offset + mmio->size * bw;
1736	if (mmio->num_lines)
1737		offset = to_interleave_offset(offset, mmio);
1738
1739	writeq(cmd, mmio->addr.base + offset);
1740	nvdimm_flush(nfit_blk->nd_region);
1741
1742	if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
1743		readq(mmio->addr.base + offset);
1744}
1745
1746static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
1747		resource_size_t dpa, void *iobuf, size_t len, int rw,
1748		unsigned int lane)
1749{
1750	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
1751	unsigned int copied = 0;
1752	u64 base_offset;
1753	int rc;
1754
1755	base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
1756		+ lane * mmio->size;
1757	write_blk_ctl(nfit_blk, lane, dpa, len, rw);
1758	while (len) {
1759		unsigned int c;
1760		u64 offset;
1761
1762		if (mmio->num_lines) {
1763			u32 line_offset;
1764
1765			offset = to_interleave_offset(base_offset + copied,
1766					mmio);
1767			div_u64_rem(offset, mmio->line_size, &line_offset);
1768			c = min_t(size_t, len, mmio->line_size - line_offset);
1769		} else {
1770			offset = base_offset + nfit_blk->bdw_offset;
1771			c = len;
1772		}
1773
1774		if (rw)
1775			memcpy_to_pmem(mmio->addr.aperture + offset,
1776					iobuf + copied, c);
1777		else {
1778			if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
1779				mmio_flush_range((void __force *)
1780					mmio->addr.aperture + offset, c);
1781
1782			memcpy_from_pmem(iobuf + copied,
1783					mmio->addr.aperture + offset, c);
1784		}
1785
1786		copied += c;
1787		len -= c;
1788	}
1789
1790	if (rw)
1791		nvdimm_flush(nfit_blk->nd_region);
1792
1793	rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
1794	return rc;
1795}
1796
1797static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
1798		resource_size_t dpa, void *iobuf, u64 len, int rw)
1799{
1800	struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
1801	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
1802	struct nd_region *nd_region = nfit_blk->nd_region;
1803	unsigned int lane, copied = 0;
1804	int rc = 0;
1805
1806	lane = nd_region_acquire_lane(nd_region);
1807	while (len) {
1808		u64 c = min(len, mmio->size);
1809
1810		rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
1811				iobuf + copied, c, rw, lane);
1812		if (rc)
1813			break;
1814
1815		copied += c;
1816		len -= c;
1817	}
1818	nd_region_release_lane(nd_region, lane);
1819
1820	return rc;
1821}
1822
1823static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
1824		struct acpi_nfit_interleave *idt, u16 interleave_ways)
1825{
1826	if (idt) {
1827		mmio->num_lines = idt->line_count;
1828		mmio->line_size = idt->line_size;
1829		if (interleave_ways == 0)
1830			return -ENXIO;
1831		mmio->table_size = mmio->num_lines * interleave_ways
1832			* mmio->line_size;
1833	}
1834
1835	return 0;
1836}
1837
1838static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
1839		struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
1840{
1841	struct nd_cmd_dimm_flags flags;
1842	int rc;
1843
1844	memset(&flags, 0, sizeof(flags));
1845	rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
1846			sizeof(flags), NULL);
1847
1848	if (rc >= 0 && flags.status == 0)
1849		nfit_blk->dimm_flags = flags.flags;
1850	else if (rc == -ENOTTY) {
1851		/* fall back to a conservative default */
1852		nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
1853		rc = 0;
1854	} else
1855		rc = -ENXIO;
1856
1857	return rc;
1858}
1859
1860static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
1861		struct device *dev)
1862{
1863	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1864	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
1865	struct nfit_blk_mmio *mmio;
1866	struct nfit_blk *nfit_blk;
1867	struct nfit_mem *nfit_mem;
1868	struct nvdimm *nvdimm;
1869	int rc;
1870
1871	nvdimm = nd_blk_region_to_dimm(ndbr);
1872	nfit_mem = nvdimm_provider_data(nvdimm);
1873	if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
1874		dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
1875				nfit_mem ? "" : " nfit_mem",
1876				(nfit_mem && nfit_mem->dcr) ? "" : " dcr",
1877				(nfit_mem && nfit_mem->bdw) ? "" : " bdw");
1878		return -ENXIO;
1879	}
1880
1881	nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
1882	if (!nfit_blk)
1883		return -ENOMEM;
1884	nd_blk_region_set_provider_data(ndbr, nfit_blk);
1885	nfit_blk->nd_region = to_nd_region(dev);
1886
1887	/* map block aperture memory */
1888	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
1889	mmio = &nfit_blk->mmio[BDW];
1890	mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
1891                        nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM);
1892	if (!mmio->addr.base) {
1893		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
1894				nvdimm_name(nvdimm));
1895		return -ENOMEM;
1896	}
1897	mmio->size = nfit_mem->bdw->size;
1898	mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
1899	mmio->idt = nfit_mem->idt_bdw;
1900	mmio->spa = nfit_mem->spa_bdw;
1901	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
1902			nfit_mem->memdev_bdw->interleave_ways);
1903	if (rc) {
1904		dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
1905				__func__, nvdimm_name(nvdimm));
1906		return rc;
1907	}
1908
1909	/* map block control memory */
1910	nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
1911	nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
1912	mmio = &nfit_blk->mmio[DCR];
1913	mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
1914			nfit_mem->spa_dcr->length);
1915	if (!mmio->addr.base) {
1916		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
1917				nvdimm_name(nvdimm));
1918		return -ENOMEM;
1919	}
1920	mmio->size = nfit_mem->dcr->window_size;
1921	mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
1922	mmio->idt = nfit_mem->idt_dcr;
1923	mmio->spa = nfit_mem->spa_dcr;
1924	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
1925			nfit_mem->memdev_dcr->interleave_ways);
1926	if (rc) {
1927		dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
1928				__func__, nvdimm_name(nvdimm));
1929		return rc;
1930	}
1931
1932	rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
1933	if (rc < 0) {
1934		dev_dbg(dev, "%s: %s failed get DIMM flags\n",
1935				__func__, nvdimm_name(nvdimm));
1936		return rc;
1937	}
1938
1939	if (nvdimm_has_flush(nfit_blk->nd_region) < 0)
1940		dev_warn(dev, "unable to guarantee persistence of writes\n");
1941
1942	if (mmio->line_size == 0)
1943		return 0;
1944
1945	if ((u32) nfit_blk->cmd_offset % mmio->line_size
1946			+ 8 > mmio->line_size) {
1947		dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
1948		return -ENXIO;
1949	} else if ((u32) nfit_blk->stat_offset % mmio->line_size
1950			+ 8 > mmio->line_size) {
1951		dev_dbg(dev, "stat_offset crosses interleave boundary\n");
1952		return -ENXIO;
1953	}
1954
1955	return 0;
1956}
1957
1958static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
1959		struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
1960{
1961	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1962	struct acpi_nfit_system_address *spa = nfit_spa->spa;
1963	int cmd_rc, rc;
1964
1965	cmd->address = spa->address;
1966	cmd->length = spa->length;
1967	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
1968			sizeof(*cmd), &cmd_rc);
1969	if (rc < 0)
1970		return rc;
1971	return cmd_rc;
1972}
1973
1974static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
1975{
1976	int rc;
1977	int cmd_rc;
1978	struct nd_cmd_ars_start ars_start;
1979	struct acpi_nfit_system_address *spa = nfit_spa->spa;
1980	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1981
1982	memset(&ars_start, 0, sizeof(ars_start));
1983	ars_start.address = spa->address;
1984	ars_start.length = spa->length;
1985	if (nfit_spa_type(spa) == NFIT_SPA_PM)
1986		ars_start.type = ND_ARS_PERSISTENT;
1987	else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
1988		ars_start.type = ND_ARS_VOLATILE;
1989	else
1990		return -ENOTTY;
1991
1992	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
1993			sizeof(ars_start), &cmd_rc);
1994
1995	if (rc < 0)
1996		return rc;
1997	return cmd_rc;
1998}
1999
2000static int ars_continue(struct acpi_nfit_desc *acpi_desc)
2001{
2002	int rc, cmd_rc;
2003	struct nd_cmd_ars_start ars_start;
2004	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
2005	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
2006
2007	memset(&ars_start, 0, sizeof(ars_start));
2008	ars_start.address = ars_status->restart_address;
2009	ars_start.length = ars_status->restart_length;
2010	ars_start.type = ars_status->type;
2011	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
2012			sizeof(ars_start), &cmd_rc);
2013	if (rc < 0)
2014		return rc;
2015	return cmd_rc;
2016}
2017
2018static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
2019{
2020	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
2021	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
2022	int rc, cmd_rc;
2023
2024	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
2025			acpi_desc->ars_status_size, &cmd_rc);
2026	if (rc < 0)
2027		return rc;
2028	return cmd_rc;
2029}
2030
2031static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc,
2032		struct nd_cmd_ars_status *ars_status)
2033{
2034	struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
2035	int rc;
2036	u32 i;
2037
2038	/*
2039	 * First record starts at 44 byte offset from the start of the
2040	 * payload.
2041	 */
2042	if (ars_status->out_length < 44)
2043		return 0;
2044	for (i = 0; i < ars_status->num_records; i++) {
2045		/* only process full records */
2046		if (ars_status->out_length
2047				< 44 + sizeof(struct nd_ars_record) * (i + 1))
2048			break;
2049		rc = nvdimm_bus_add_poison(nvdimm_bus,
2050				ars_status->records[i].err_address,
2051				ars_status->records[i].length);
2052		if (rc)
2053			return rc;
2054	}
2055	if (i < ars_status->num_records)
2056		dev_warn(acpi_desc->dev, "detected truncated ars results\n");
2057
2058	return 0;
2059}
2060
2061static void acpi_nfit_remove_resource(void *data)
2062{
2063	struct resource *res = data;
2064
2065	remove_resource(res);
2066}
2067
2068static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
2069		struct nd_region_desc *ndr_desc)
2070{
2071	struct resource *res, *nd_res = ndr_desc->res;
2072	int is_pmem, ret;
2073
2074	/* No operation if the region is already registered as PMEM */
2075	is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
2076				IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
2077	if (is_pmem == REGION_INTERSECTS)
2078		return 0;
2079
2080	res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
2081	if (!res)
2082		return -ENOMEM;
2083
2084	res->name = "Persistent Memory";
2085	res->start = nd_res->start;
2086	res->end = nd_res->end;
2087	res->flags = IORESOURCE_MEM;
2088	res->desc = IORES_DESC_PERSISTENT_MEMORY;
2089
2090	ret = insert_resource(&iomem_resource, res);
2091	if (ret)
2092		return ret;
2093
2094	ret = devm_add_action_or_reset(acpi_desc->dev,
2095					acpi_nfit_remove_resource,
2096					res);
2097	if (ret)
2098		return ret;
2099
2100	return 0;
2101}
2102
2103static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
2104		struct nd_mapping_desc *mapping, struct nd_region_desc *ndr_desc,
2105		struct acpi_nfit_memory_map *memdev,
2106		struct nfit_spa *nfit_spa)
2107{
2108	struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
2109			memdev->device_handle);
2110	struct acpi_nfit_system_address *spa = nfit_spa->spa;
2111	struct nd_blk_region_desc *ndbr_desc;
2112	struct nfit_mem *nfit_mem;
2113	int blk_valid = 0;
2114
2115	if (!nvdimm) {
2116		dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
2117				spa->range_index, memdev->device_handle);
2118		return -ENODEV;
2119	}
2120
2121	mapping->nvdimm = nvdimm;
2122	switch (nfit_spa_type(spa)) {
2123	case NFIT_SPA_PM:
2124	case NFIT_SPA_VOLATILE:
2125		mapping->start = memdev->address;
2126		mapping->size = memdev->region_size;
2127		break;
2128	case NFIT_SPA_DCR:
2129		nfit_mem = nvdimm_provider_data(nvdimm);
2130		if (!nfit_mem || !nfit_mem->bdw) {
2131			dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
2132					spa->range_index, nvdimm_name(nvdimm));
2133		} else {
2134			mapping->size = nfit_mem->bdw->capacity;
2135			mapping->start = nfit_mem->bdw->start_address;
2136			ndr_desc->num_lanes = nfit_mem->bdw->windows;
2137			blk_valid = 1;
2138		}
2139
2140		ndr_desc->mapping = mapping;
2141		ndr_desc->num_mappings = blk_valid;
2142		ndbr_desc = to_blk_region_desc(ndr_desc);
2143		ndbr_desc->enable = acpi_nfit_blk_region_enable;
2144		ndbr_desc->do_io = acpi_desc->blk_do_io;
2145		nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
2146				ndr_desc);
2147		if (!nfit_spa->nd_region)
2148			return -ENOMEM;
2149		break;
2150	}
2151
2152	return 0;
2153}
2154
2155static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
2156{
2157	return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
2158		nfit_spa_type(spa) == NFIT_SPA_VCD   ||
2159		nfit_spa_type(spa) == NFIT_SPA_PDISK ||
2160		nfit_spa_type(spa) == NFIT_SPA_PCD);
2161}
2162
2163static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
2164		struct nfit_spa *nfit_spa)
2165{
2166	static struct nd_mapping_desc mappings[ND_MAX_MAPPINGS];
2167	struct acpi_nfit_system_address *spa = nfit_spa->spa;
2168	struct nd_blk_region_desc ndbr_desc;
2169	struct nd_region_desc *ndr_desc;
2170	struct nfit_memdev *nfit_memdev;
2171	struct nvdimm_bus *nvdimm_bus;
2172	struct resource res;
2173	int count = 0, rc;
2174
2175	if (nfit_spa->nd_region)
2176		return 0;
2177
2178	if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
2179		dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
2180				__func__);
2181		return 0;
2182	}
2183
2184	memset(&res, 0, sizeof(res));
2185	memset(&mappings, 0, sizeof(mappings));
2186	memset(&ndbr_desc, 0, sizeof(ndbr_desc));
2187	res.start = spa->address;
2188	res.end = res.start + spa->length - 1;
2189	ndr_desc = &ndbr_desc.ndr_desc;
2190	ndr_desc->res = &res;
2191	ndr_desc->provider_data = nfit_spa;
2192	ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
2193	if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
2194		ndr_desc->numa_node = acpi_map_pxm_to_online_node(
2195						spa->proximity_domain);
2196	else
2197		ndr_desc->numa_node = NUMA_NO_NODE;
2198
2199	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
2200		struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
2201		struct nd_mapping_desc *mapping;
2202
2203		if (memdev->range_index != spa->range_index)
2204			continue;
2205		if (count >= ND_MAX_MAPPINGS) {
2206			dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
2207					spa->range_index, ND_MAX_MAPPINGS);
2208			return -ENXIO;
2209		}
2210		mapping = &mappings[count++];
2211		rc = acpi_nfit_init_mapping(acpi_desc, mapping, ndr_desc,
2212				memdev, nfit_spa);
2213		if (rc)
2214			goto out;
2215	}
2216
2217	ndr_desc->mapping = mappings;
2218	ndr_desc->num_mappings = count;
2219	rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
2220	if (rc)
2221		goto out;
2222
2223	nvdimm_bus = acpi_desc->nvdimm_bus;
2224	if (nfit_spa_type(spa) == NFIT_SPA_PM) {
2225		rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
2226		if (rc) {
2227			dev_warn(acpi_desc->dev,
2228				"failed to insert pmem resource to iomem: %d\n",
2229				rc);
2230			goto out;
2231		}
2232
2233		nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
2234				ndr_desc);
2235		if (!nfit_spa->nd_region)
2236			rc = -ENOMEM;
2237	} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
2238		nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
2239				ndr_desc);
2240		if (!nfit_spa->nd_region)
2241			rc = -ENOMEM;
2242	} else if (nfit_spa_is_virtual(spa)) {
2243		nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
2244				ndr_desc);
2245		if (!nfit_spa->nd_region)
2246			rc = -ENOMEM;
2247	}
2248
2249 out:
2250	if (rc)
2251		dev_err(acpi_desc->dev, "failed to register spa range %d\n",
2252				nfit_spa->spa->range_index);
2253	return rc;
2254}
2255
2256static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
2257		u32 max_ars)
2258{
2259	struct device *dev = acpi_desc->dev;
2260	struct nd_cmd_ars_status *ars_status;
2261
2262	if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
2263		memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
2264		return 0;
2265	}
2266
2267	if (acpi_desc->ars_status)
2268		devm_kfree(dev, acpi_desc->ars_status);
2269	acpi_desc->ars_status = NULL;
2270	ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
2271	if (!ars_status)
2272		return -ENOMEM;
2273	acpi_desc->ars_status = ars_status;
2274	acpi_desc->ars_status_size = max_ars;
2275	return 0;
2276}
2277
2278static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
2279		struct nfit_spa *nfit_spa)
2280{
2281	struct acpi_nfit_system_address *spa = nfit_spa->spa;
2282	int rc;
2283
2284	if (!nfit_spa->max_ars) {
2285		struct nd_cmd_ars_cap ars_cap;
2286
2287		memset(&ars_cap, 0, sizeof(ars_cap));
2288		rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
2289		if (rc < 0)
2290			return rc;
2291		nfit_spa->max_ars = ars_cap.max_ars_out;
2292		nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
2293		/* check that the supported scrub types match the spa type */
2294		if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
2295				((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
2296			return -ENOTTY;
2297		else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
2298				((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
2299			return -ENOTTY;
2300	}
2301
2302	if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
2303		return -ENOMEM;
2304
2305	rc = ars_get_status(acpi_desc);
2306	if (rc < 0 && rc != -ENOSPC)
2307		return rc;
2308
2309	if (ars_status_process_records(acpi_desc, acpi_desc->ars_status))
2310		return -ENOMEM;
2311
2312	return 0;
2313}
2314
2315static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
2316		struct nfit_spa *nfit_spa)
2317{
2318	struct acpi_nfit_system_address *spa = nfit_spa->spa;
2319	unsigned int overflow_retry = scrub_overflow_abort;
2320	u64 init_ars_start = 0, init_ars_len = 0;
2321	struct device *dev = acpi_desc->dev;
2322	unsigned int tmo = scrub_timeout;
2323	int rc;
2324
2325	if (!nfit_spa->ars_required || !nfit_spa->nd_region)
2326		return;
2327
2328	rc = ars_start(acpi_desc, nfit_spa);
2329	/*
2330	 * If we timed out the initial scan we'll still be busy here,
2331	 * and will wait another timeout before giving up permanently.
2332	 */
2333	if (rc < 0 && rc != -EBUSY)
2334		return;
2335
2336	do {
2337		u64 ars_start, ars_len;
2338
2339		if (acpi_desc->cancel)
2340			break;
2341		rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
2342		if (rc == -ENOTTY)
2343			break;
2344		if (rc == -EBUSY && !tmo) {
2345			dev_warn(dev, "range %d ars timeout, aborting\n",
2346					spa->range_index);
2347			break;
2348		}
2349
2350		if (rc == -EBUSY) {
2351			/*
2352			 * Note, entries may be appended to the list
2353			 * while the lock is dropped, but the workqueue
2354			 * being active prevents entries being deleted /
2355			 * freed.
2356			 */
2357			mutex_unlock(&acpi_desc->init_mutex);
2358			ssleep(1);
2359			tmo--;
2360			mutex_lock(&acpi_desc->init_mutex);
2361			continue;
2362		}
2363
2364		/* we got some results, but there are more pending... */
2365		if (rc == -ENOSPC && overflow_retry--) {
2366			if (!init_ars_len) {
2367				init_ars_len = acpi_desc->ars_status->length;
2368				init_ars_start = acpi_desc->ars_status->address;
2369			}
2370			rc = ars_continue(acpi_desc);
2371		}
2372
2373		if (rc < 0) {
2374			dev_warn(dev, "range %d ars continuation failed\n",
2375					spa->range_index);
2376			break;
2377		}
2378
2379		if (init_ars_len) {
2380			ars_start = init_ars_start;
2381			ars_len = init_ars_len;
2382		} else {
2383			ars_start = acpi_desc->ars_status->address;
2384			ars_len = acpi_desc->ars_status->length;
2385		}
2386		dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
2387				spa->range_index, ars_start, ars_len);
2388		/* notify the region about new poison entries */
2389		nvdimm_region_notify(nfit_spa->nd_region,
2390				NVDIMM_REVALIDATE_POISON);
2391		break;
2392	} while (1);
2393}
2394
2395static void acpi_nfit_scrub(struct work_struct *work)
2396{
2397	struct device *dev;
2398	u64 init_scrub_length = 0;
2399	struct nfit_spa *nfit_spa;
2400	u64 init_scrub_address = 0;
2401	bool init_ars_done = false;
2402	struct acpi_nfit_desc *acpi_desc;
2403	unsigned int tmo = scrub_timeout;
2404	unsigned int overflow_retry = scrub_overflow_abort;
2405
2406	acpi_desc = container_of(work, typeof(*acpi_desc), work);
2407	dev = acpi_desc->dev;
2408
2409	/*
2410	 * We scrub in 2 phases.  The first phase waits for any platform
2411	 * firmware initiated scrubs to complete and then we go search for the
2412	 * affected spa regions to mark them scanned.  In the second phase we
2413	 * initiate a directed scrub for every range that was not scrubbed in
2414	 * phase 1. If we're called for a 'rescan', we harmlessly pass through
2415	 * the first phase, but really only care about running phase 2, where
2416	 * regions can be notified of new poison.
2417	 */
2418
2419	/* process platform firmware initiated scrubs */
2420 retry:
2421	mutex_lock(&acpi_desc->init_mutex);
2422	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2423		struct nd_cmd_ars_status *ars_status;
2424		struct acpi_nfit_system_address *spa;
2425		u64 ars_start, ars_len;
2426		int rc;
2427
2428		if (acpi_desc->cancel)
2429			break;
2430
2431		if (nfit_spa->nd_region)
2432			continue;
2433
2434		if (init_ars_done) {
2435			/*
2436			 * No need to re-query, we're now just
2437			 * reconciling all the ranges covered by the
2438			 * initial scrub
2439			 */
2440			rc = 0;
2441		} else
2442			rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
2443
2444		if (rc == -ENOTTY) {
2445			/* no ars capability, just register spa and move on */
2446			acpi_nfit_register_region(acpi_desc, nfit_spa);
2447			continue;
2448		}
2449
2450		if (rc == -EBUSY && !tmo) {
2451			/* fallthrough to directed scrub in phase 2 */
2452			dev_warn(dev, "timeout awaiting ars results, continuing...\n");
2453			break;
2454		} else if (rc == -EBUSY) {
2455			mutex_unlock(&acpi_desc->init_mutex);
2456			ssleep(1);
2457			tmo--;
2458			goto retry;
2459		}
2460
2461		/* we got some results, but there are more pending... */
2462		if (rc == -ENOSPC && overflow_retry--) {
2463			ars_status = acpi_desc->ars_status;
2464			/*
2465			 * Record the original scrub range, so that we
2466			 * can recall all the ranges impacted by the
2467			 * initial scrub.
2468			 */
2469			if (!init_scrub_length) {
2470				init_scrub_length = ars_status->length;
2471				init_scrub_address = ars_status->address;
2472			}
2473			rc = ars_continue(acpi_desc);
2474			if (rc == 0) {
2475				mutex_unlock(&acpi_desc->init_mutex);
2476				goto retry;
2477			}
2478		}
2479
2480		if (rc < 0) {
2481			/*
2482			 * Initial scrub failed, we'll give it one more
2483			 * try below...
2484			 */
2485			break;
2486		}
2487
2488		/* We got some final results, record completed ranges */
2489		ars_status = acpi_desc->ars_status;
2490		if (init_scrub_length) {
2491			ars_start = init_scrub_address;
2492			ars_len = ars_start + init_scrub_length;
2493		} else {
2494			ars_start = ars_status->address;
2495			ars_len = ars_status->length;
2496		}
2497		spa = nfit_spa->spa;
2498
2499		if (!init_ars_done) {
2500			init_ars_done = true;
2501			dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
2502					ars_start, ars_len);
2503		}
2504		if (ars_start <= spa->address && ars_start + ars_len
2505				>= spa->address + spa->length)
2506			acpi_nfit_register_region(acpi_desc, nfit_spa);
2507	}
2508
2509	/*
2510	 * For all the ranges not covered by an initial scrub we still
2511	 * want to see if there are errors, but it's ok to discover them
2512	 * asynchronously.
2513	 */
2514	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2515		/*
2516		 * Flag all the ranges that still need scrubbing, but
2517		 * register them now to make data available.
2518		 */
2519		if (!nfit_spa->nd_region) {
2520			nfit_spa->ars_required = 1;
2521			acpi_nfit_register_region(acpi_desc, nfit_spa);
2522		}
2523	}
2524
2525	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
2526		acpi_nfit_async_scrub(acpi_desc, nfit_spa);
2527	acpi_desc->scrub_count++;
2528	if (acpi_desc->scrub_count_state)
2529		sysfs_notify_dirent(acpi_desc->scrub_count_state);
2530	mutex_unlock(&acpi_desc->init_mutex);
2531}
2532
2533static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
2534{
2535	struct nfit_spa *nfit_spa;
2536	int rc;
2537
2538	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
2539		if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
2540			/* BLK regions don't need to wait for ars results */
2541			rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
2542			if (rc)
2543				return rc;
2544		}
2545
2546	queue_work(nfit_wq, &acpi_desc->work);
2547	return 0;
2548}
2549
2550static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
2551		struct nfit_table_prev *prev)
2552{
2553	struct device *dev = acpi_desc->dev;
2554
2555	if (!list_empty(&prev->spas) ||
2556			!list_empty(&prev->memdevs) ||
2557			!list_empty(&prev->dcrs) ||
2558			!list_empty(&prev->bdws) ||
2559			!list_empty(&prev->idts) ||
2560			!list_empty(&prev->flushes)) {
2561		dev_err(dev, "new nfit deletes entries (unsupported)\n");
2562		return -ENXIO;
2563	}
2564	return 0;
2565}
2566
2567static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
2568{
2569	struct device *dev = acpi_desc->dev;
2570	struct kernfs_node *nfit;
2571	struct device *bus_dev;
2572
2573	if (!ars_supported(acpi_desc->nvdimm_bus))
2574		return 0;
2575
2576	bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
2577	nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit");
2578	if (!nfit) {
2579		dev_err(dev, "sysfs_get_dirent 'nfit' failed\n");
2580		return -ENODEV;
2581	}
2582	acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub");
2583	sysfs_put(nfit);
2584	if (!acpi_desc->scrub_count_state) {
2585		dev_err(dev, "sysfs_get_dirent 'scrub' failed\n");
2586		return -ENODEV;
2587	}
2588
2589	return 0;
2590}
2591
2592static void acpi_nfit_destruct(void *data)
2593{
2594	struct acpi_nfit_desc *acpi_desc = data;
2595	struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
2596
2597	/*
2598	 * Destruct under acpi_desc_lock so that nfit_handle_mce does not
2599	 * race teardown
2600	 */
2601	mutex_lock(&acpi_desc_lock);
2602	acpi_desc->cancel = 1;
2603	/*
2604	 * Bounce the nvdimm bus lock to make sure any in-flight
2605	 * acpi_nfit_ars_rescan() submissions have had a chance to
2606	 * either submit or see ->cancel set.
2607	 */
2608	device_lock(bus_dev);
2609	device_unlock(bus_dev);
2610
2611	flush_workqueue(nfit_wq);
2612	if (acpi_desc->scrub_count_state)
2613		sysfs_put(acpi_desc->scrub_count_state);
2614	nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
2615	acpi_desc->nvdimm_bus = NULL;
2616	list_del(&acpi_desc->list);
2617	mutex_unlock(&acpi_desc_lock);
2618}
2619
2620int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
2621{
2622	struct device *dev = acpi_desc->dev;
2623	struct nfit_table_prev prev;
2624	const void *end;
2625	int rc;
2626
2627	if (!acpi_desc->nvdimm_bus) {
2628		acpi_nfit_init_dsms(acpi_desc);
2629
2630		acpi_desc->nvdimm_bus = nvdimm_bus_register(dev,
2631				&acpi_desc->nd_desc);
2632		if (!acpi_desc->nvdimm_bus)
2633			return -ENOMEM;
2634
2635		rc = devm_add_action_or_reset(dev, acpi_nfit_destruct,
2636				acpi_desc);
2637		if (rc)
2638			return rc;
2639
2640		rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
2641		if (rc)
2642			return rc;
2643
2644		/* register this acpi_desc for mce notifications */
2645		mutex_lock(&acpi_desc_lock);
2646		list_add_tail(&acpi_desc->list, &acpi_descs);
2647		mutex_unlock(&acpi_desc_lock);
2648	}
2649
2650	mutex_lock(&acpi_desc->init_mutex);
2651
2652	INIT_LIST_HEAD(&prev.spas);
2653	INIT_LIST_HEAD(&prev.memdevs);
2654	INIT_LIST_HEAD(&prev.dcrs);
2655	INIT_LIST_HEAD(&prev.bdws);
2656	INIT_LIST_HEAD(&prev.idts);
2657	INIT_LIST_HEAD(&prev.flushes);
2658
2659	list_cut_position(&prev.spas, &acpi_desc->spas,
2660				acpi_desc->spas.prev);
2661	list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
2662				acpi_desc->memdevs.prev);
2663	list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
2664				acpi_desc->dcrs.prev);
2665	list_cut_position(&prev.bdws, &acpi_desc->bdws,
2666				acpi_desc->bdws.prev);
2667	list_cut_position(&prev.idts, &acpi_desc->idts,
2668				acpi_desc->idts.prev);
2669	list_cut_position(&prev.flushes, &acpi_desc->flushes,
2670				acpi_desc->flushes.prev);
2671
2672	end = data + sz;
2673	while (!IS_ERR_OR_NULL(data))
2674		data = add_table(acpi_desc, &prev, data, end);
2675
2676	if (IS_ERR(data)) {
2677		dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
2678				PTR_ERR(data));
2679		rc = PTR_ERR(data);
2680		goto out_unlock;
2681	}
2682
2683	rc = acpi_nfit_check_deletions(acpi_desc, &prev);
2684	if (rc)
2685		goto out_unlock;
2686
2687	rc = nfit_mem_init(acpi_desc);
2688	if (rc)
2689		goto out_unlock;
2690
2691	rc = acpi_nfit_register_dimms(acpi_desc);
2692	if (rc)
2693		goto out_unlock;
2694
2695	rc = acpi_nfit_register_regions(acpi_desc);
2696
2697 out_unlock:
2698	mutex_unlock(&acpi_desc->init_mutex);
2699	return rc;
2700}
2701EXPORT_SYMBOL_GPL(acpi_nfit_init);
2702
2703struct acpi_nfit_flush_work {
2704	struct work_struct work;
2705	struct completion cmp;
2706};
2707
2708static void flush_probe(struct work_struct *work)
2709{
2710	struct acpi_nfit_flush_work *flush;
2711
2712	flush = container_of(work, typeof(*flush), work);
2713	complete(&flush->cmp);
2714}
2715
2716static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
2717{
2718	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
2719	struct device *dev = acpi_desc->dev;
2720	struct acpi_nfit_flush_work flush;
2721	int rc;
2722
2723	/* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
2724	device_lock(dev);
2725	device_unlock(dev);
2726
2727	/*
2728	 * Scrub work could take 10s of seconds, userspace may give up so we
2729	 * need to be interruptible while waiting.
2730	 */
2731	INIT_WORK_ONSTACK(&flush.work, flush_probe);
2732	COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
2733	queue_work(nfit_wq, &flush.work);
2734
2735	rc = wait_for_completion_interruptible(&flush.cmp);
2736	cancel_work_sync(&flush.work);
2737	return rc;
2738}
2739
2740static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
2741		struct nvdimm *nvdimm, unsigned int cmd)
2742{
2743	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
2744
2745	if (nvdimm)
2746		return 0;
2747	if (cmd != ND_CMD_ARS_START)
2748		return 0;
2749
2750	/*
2751	 * The kernel and userspace may race to initiate a scrub, but
2752	 * the scrub thread is prepared to lose that initial race.  It
2753	 * just needs guarantees that any ars it initiates are not
2754	 * interrupted by any intervening start reqeusts from userspace.
2755	 */
2756	if (work_busy(&acpi_desc->work))
2757		return -EBUSY;
2758
2759	return 0;
2760}
2761
2762int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
2763{
2764	struct device *dev = acpi_desc->dev;
2765	struct nfit_spa *nfit_spa;
2766
2767	if (work_busy(&acpi_desc->work))
2768		return -EBUSY;
2769
2770	if (acpi_desc->cancel)
2771		return 0;
2772
2773	mutex_lock(&acpi_desc->init_mutex);
2774	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2775		struct acpi_nfit_system_address *spa = nfit_spa->spa;
2776
2777		if (nfit_spa_type(spa) != NFIT_SPA_PM)
2778			continue;
2779
2780		nfit_spa->ars_required = 1;
2781	}
2782	queue_work(nfit_wq, &acpi_desc->work);
2783	dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
2784	mutex_unlock(&acpi_desc->init_mutex);
2785
2786	return 0;
2787}
2788
2789void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
2790{
2791	struct nvdimm_bus_descriptor *nd_desc;
2792
2793	dev_set_drvdata(dev, acpi_desc);
2794	acpi_desc->dev = dev;
2795	acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
2796	nd_desc = &acpi_desc->nd_desc;
2797	nd_desc->provider_name = "ACPI.NFIT";
2798	nd_desc->module = THIS_MODULE;
2799	nd_desc->ndctl = acpi_nfit_ctl;
2800	nd_desc->flush_probe = acpi_nfit_flush_probe;
2801	nd_desc->clear_to_send = acpi_nfit_clear_to_send;
2802	nd_desc->attr_groups = acpi_nfit_attribute_groups;
2803
2804	INIT_LIST_HEAD(&acpi_desc->spas);
2805	INIT_LIST_HEAD(&acpi_desc->dcrs);
2806	INIT_LIST_HEAD(&acpi_desc->bdws);
2807	INIT_LIST_HEAD(&acpi_desc->idts);
2808	INIT_LIST_HEAD(&acpi_desc->flushes);
2809	INIT_LIST_HEAD(&acpi_desc->memdevs);
2810	INIT_LIST_HEAD(&acpi_desc->dimms);
2811	INIT_LIST_HEAD(&acpi_desc->list);
2812	mutex_init(&acpi_desc->init_mutex);
2813	INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
2814}
2815EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
2816
2817static int acpi_nfit_add(struct acpi_device *adev)
2818{
2819	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
2820	struct acpi_nfit_desc *acpi_desc;
2821	struct device *dev = &adev->dev;
2822	struct acpi_table_header *tbl;
2823	acpi_status status = AE_OK;
2824	acpi_size sz;
2825	int rc = 0;
2826
2827	status = acpi_get_table(ACPI_SIG_NFIT, 0, &tbl);
2828	if (ACPI_FAILURE(status)) {
2829		/* This is ok, we could have an nvdimm hotplugged later */
2830		dev_dbg(dev, "failed to find NFIT at startup\n");
2831		return 0;
2832	}
2833	sz = tbl->length;
2834
2835	acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
2836	if (!acpi_desc)
2837		return -ENOMEM;
2838	acpi_nfit_desc_init(acpi_desc, &adev->dev);
2839
2840	/* Save the acpi header for exporting the revision via sysfs */
2841	acpi_desc->acpi_header = *tbl;
2842
2843	/* Evaluate _FIT and override with that if present */
2844	status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
2845	if (ACPI_SUCCESS(status) && buf.length > 0) {
2846		union acpi_object *obj = buf.pointer;
2847
2848		if (obj->type == ACPI_TYPE_BUFFER)
2849			rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
2850					obj->buffer.length);
2851		else
2852			dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
2853				 __func__, (int) obj->type);
2854		kfree(buf.pointer);
2855	} else
2856		/* skip over the lead-in header table */
2857		rc = acpi_nfit_init(acpi_desc, (void *) tbl
2858				+ sizeof(struct acpi_table_nfit),
2859				sz - sizeof(struct acpi_table_nfit));
2860	return rc;
2861}
2862
2863static int acpi_nfit_remove(struct acpi_device *adev)
2864{
2865	/* see acpi_nfit_destruct */
2866	return 0;
2867}
2868
2869void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
2870{
2871	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
2872	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
2873	union acpi_object *obj;
2874	acpi_status status;
2875	int ret;
2876
2877	dev_dbg(dev, "%s: event: %d\n", __func__, event);
2878
2879	if (event != NFIT_NOTIFY_UPDATE)
2880		return;
2881
2882	if (!dev->driver) {
2883		/* dev->driver may be null if we're being removed */
2884		dev_dbg(dev, "%s: no driver found for dev\n", __func__);
2885		return;
2886	}
2887
2888	if (!acpi_desc) {
2889		acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
2890		if (!acpi_desc)
2891			return;
2892		acpi_nfit_desc_init(acpi_desc, dev);
2893	} else {
2894		/*
2895		 * Finish previous registration before considering new
2896		 * regions.
2897		 */
2898		flush_workqueue(nfit_wq);
2899	}
2900
2901	/* Evaluate _FIT */
2902	status = acpi_evaluate_object(handle, "_FIT", NULL, &buf);
2903	if (ACPI_FAILURE(status)) {
2904		dev_err(dev, "failed to evaluate _FIT\n");
2905		return;
2906	}
2907
2908	obj = buf.pointer;
2909	if (obj->type == ACPI_TYPE_BUFFER) {
2910		ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
2911				obj->buffer.length);
2912		if (ret)
2913			dev_err(dev, "failed to merge updated NFIT\n");
2914	} else
2915		dev_err(dev, "Invalid _FIT\n");
2916	kfree(buf.pointer);
2917}
2918EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
2919
2920static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
2921{
2922	device_lock(&adev->dev);
2923	__acpi_nfit_notify(&adev->dev, adev->handle, event);
2924	device_unlock(&adev->dev);
2925}
2926
2927static const struct acpi_device_id acpi_nfit_ids[] = {
2928	{ "ACPI0012", 0 },
2929	{ "", 0 },
2930};
2931MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
2932
2933static struct acpi_driver acpi_nfit_driver = {
2934	.name = KBUILD_MODNAME,
2935	.ids = acpi_nfit_ids,
2936	.ops = {
2937		.add = acpi_nfit_add,
2938		.remove = acpi_nfit_remove,
2939		.notify = acpi_nfit_notify,
2940	},
2941};
2942
2943static __init int nfit_init(void)
2944{
2945	BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
2946	BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
2947	BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
2948	BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
2949	BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
2950	BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
2951	BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
2952
2953	acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
2954	acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
2955	acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
2956	acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
2957	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
2958	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
2959	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
2960	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
2961	acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
2962	acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
2963	acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
2964	acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
2965	acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
2966
2967	nfit_wq = create_singlethread_workqueue("nfit");
2968	if (!nfit_wq)
2969		return -ENOMEM;
2970
2971	nfit_mce_register();
2972
2973	return acpi_bus_register_driver(&acpi_nfit_driver);
2974}
2975
2976static __exit void nfit_exit(void)
2977{
2978	nfit_mce_unregister();
2979	acpi_bus_unregister_driver(&acpi_nfit_driver);
2980	destroy_workqueue(nfit_wq);
2981	WARN_ON(!list_empty(&acpi_descs));
2982}
2983
2984module_init(nfit_init);
2985module_exit(nfit_exit);
2986MODULE_LICENSE("GPL v2");
2987MODULE_AUTHOR("Intel Corporation");
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
   4 */
   5#include <linux/list_sort.h>
   6#include <linux/libnvdimm.h>
   7#include <linux/module.h>
   8#include <linux/nospec.h>
   9#include <linux/mutex.h>
  10#include <linux/ndctl.h>
  11#include <linux/sysfs.h>
  12#include <linux/delay.h>
  13#include <linux/list.h>
  14#include <linux/acpi.h>
  15#include <linux/sort.h>
  16#include <linux/io.h>
  17#include <linux/nd.h>
  18#include <asm/cacheflush.h>
  19#include <acpi/nfit.h>
  20#include "intel.h"
  21#include "nfit.h"
  22
  23/*
  24 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
  25 * irrelevant.
  26 */
  27#include <linux/io-64-nonatomic-hi-lo.h>
  28
  29static bool force_enable_dimms;
  30module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
  31MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
  32
  33static bool disable_vendor_specific;
  34module_param(disable_vendor_specific, bool, S_IRUGO);
  35MODULE_PARM_DESC(disable_vendor_specific,
  36		"Limit commands to the publicly specified set");
  37
  38static unsigned long override_dsm_mask;
  39module_param(override_dsm_mask, ulong, S_IRUGO);
  40MODULE_PARM_DESC(override_dsm_mask, "Bitmask of allowed NVDIMM DSM functions");
  41
  42static int default_dsm_family = -1;
  43module_param(default_dsm_family, int, S_IRUGO);
  44MODULE_PARM_DESC(default_dsm_family,
  45		"Try this DSM type first when identifying NVDIMM family");
  46
  47static bool no_init_ars;
  48module_param(no_init_ars, bool, 0644);
  49MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time");
  50
  51static bool force_labels;
  52module_param(force_labels, bool, 0444);
  53MODULE_PARM_DESC(force_labels, "Opt-in to labels despite missing methods");
  54
  55LIST_HEAD(acpi_descs);
  56DEFINE_MUTEX(acpi_desc_lock);
  57
  58static struct workqueue_struct *nfit_wq;
  59
  60struct nfit_table_prev {
  61	struct list_head spas;
  62	struct list_head memdevs;
  63	struct list_head dcrs;
  64	struct list_head bdws;
  65	struct list_head idts;
  66	struct list_head flushes;
  67};
  68
  69static guid_t nfit_uuid[NFIT_UUID_MAX];
  70
  71const guid_t *to_nfit_uuid(enum nfit_uuids id)
  72{
  73	return &nfit_uuid[id];
  74}
  75EXPORT_SYMBOL(to_nfit_uuid);
  76
  77static const guid_t *to_nfit_bus_uuid(int family)
  78{
  79	if (WARN_ONCE(family == NVDIMM_BUS_FAMILY_NFIT,
  80			"only secondary bus families can be translated\n"))
  81		return NULL;
  82	/*
  83	 * The index of bus UUIDs starts immediately following the last
  84	 * NVDIMM/leaf family.
  85	 */
  86	return to_nfit_uuid(family + NVDIMM_FAMILY_MAX);
  87}
  88
  89static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
  90{
  91	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
  92
  93	/*
  94	 * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
  95	 * acpi_device.
  96	 */
  97	if (!nd_desc->provider_name
  98			|| strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
  99		return NULL;
 100
 101	return to_acpi_device(acpi_desc->dev);
 102}
 103
 104static int xlat_bus_status(void *buf, unsigned int cmd, u32 status)
 105{
 106	struct nd_cmd_clear_error *clear_err;
 107	struct nd_cmd_ars_status *ars_status;
 108	u16 flags;
 109
 110	switch (cmd) {
 111	case ND_CMD_ARS_CAP:
 112		if ((status & 0xffff) == NFIT_ARS_CAP_NONE)
 113			return -ENOTTY;
 114
 115		/* Command failed */
 116		if (status & 0xffff)
 117			return -EIO;
 118
 119		/* No supported scan types for this range */
 120		flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
 121		if ((status >> 16 & flags) == 0)
 122			return -ENOTTY;
 123		return 0;
 124	case ND_CMD_ARS_START:
 125		/* ARS is in progress */
 126		if ((status & 0xffff) == NFIT_ARS_START_BUSY)
 127			return -EBUSY;
 128
 129		/* Command failed */
 130		if (status & 0xffff)
 131			return -EIO;
 132		return 0;
 133	case ND_CMD_ARS_STATUS:
 134		ars_status = buf;
 135		/* Command failed */
 136		if (status & 0xffff)
 137			return -EIO;
 138		/* Check extended status (Upper two bytes) */
 139		if (status == NFIT_ARS_STATUS_DONE)
 140			return 0;
 141
 142		/* ARS is in progress */
 143		if (status == NFIT_ARS_STATUS_BUSY)
 144			return -EBUSY;
 145
 146		/* No ARS performed for the current boot */
 147		if (status == NFIT_ARS_STATUS_NONE)
 148			return -EAGAIN;
 149
 150		/*
 151		 * ARS interrupted, either we overflowed or some other
 152		 * agent wants the scan to stop.  If we didn't overflow
 153		 * then just continue with the returned results.
 154		 */
 155		if (status == NFIT_ARS_STATUS_INTR) {
 156			if (ars_status->out_length >= 40 && (ars_status->flags
 157						& NFIT_ARS_F_OVERFLOW))
 158				return -ENOSPC;
 159			return 0;
 160		}
 161
 162		/* Unknown status */
 163		if (status >> 16)
 164			return -EIO;
 165		return 0;
 166	case ND_CMD_CLEAR_ERROR:
 167		clear_err = buf;
 168		if (status & 0xffff)
 169			return -EIO;
 170		if (!clear_err->cleared)
 171			return -EIO;
 172		if (clear_err->length > clear_err->cleared)
 173			return clear_err->cleared;
 174		return 0;
 175	default:
 176		break;
 177	}
 178
 179	/* all other non-zero status results in an error */
 180	if (status)
 181		return -EIO;
 182	return 0;
 183}
 184
 185#define ACPI_LABELS_LOCKED 3
 186
 187static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd,
 188		u32 status)
 189{
 190	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 191
 192	switch (cmd) {
 193	case ND_CMD_GET_CONFIG_SIZE:
 194		/*
 195		 * In the _LSI, _LSR, _LSW case the locked status is
 196		 * communicated via the read/write commands
 197		 */
 198		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags))
 199			break;
 200
 201		if (status >> 16 & ND_CONFIG_LOCKED)
 202			return -EACCES;
 203		break;
 204	case ND_CMD_GET_CONFIG_DATA:
 205		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)
 206				&& status == ACPI_LABELS_LOCKED)
 207			return -EACCES;
 208		break;
 209	case ND_CMD_SET_CONFIG_DATA:
 210		if (test_bit(NFIT_MEM_LSW, &nfit_mem->flags)
 211				&& status == ACPI_LABELS_LOCKED)
 212			return -EACCES;
 213		break;
 214	default:
 215		break;
 216	}
 217
 218	/* all other non-zero status results in an error */
 219	if (status)
 220		return -EIO;
 221	return 0;
 222}
 223
 224static int xlat_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd,
 225		u32 status)
 226{
 227	if (!nvdimm)
 228		return xlat_bus_status(buf, cmd, status);
 229	return xlat_nvdimm_status(nvdimm, buf, cmd, status);
 230}
 231
 232/* convert _LS{I,R} packages to the buffer object acpi_nfit_ctl expects */
 233static union acpi_object *pkg_to_buf(union acpi_object *pkg)
 234{
 235	int i;
 236	void *dst;
 237	size_t size = 0;
 238	union acpi_object *buf = NULL;
 239
 240	if (pkg->type != ACPI_TYPE_PACKAGE) {
 241		WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n",
 242				pkg->type);
 243		goto err;
 244	}
 245
 246	for (i = 0; i < pkg->package.count; i++) {
 247		union acpi_object *obj = &pkg->package.elements[i];
 248
 249		if (obj->type == ACPI_TYPE_INTEGER)
 250			size += 4;
 251		else if (obj->type == ACPI_TYPE_BUFFER)
 252			size += obj->buffer.length;
 253		else {
 254			WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n",
 255					obj->type);
 256			goto err;
 257		}
 258	}
 259
 260	buf = ACPI_ALLOCATE(sizeof(*buf) + size);
 261	if (!buf)
 262		goto err;
 263
 264	dst = buf + 1;
 265	buf->type = ACPI_TYPE_BUFFER;
 266	buf->buffer.length = size;
 267	buf->buffer.pointer = dst;
 268	for (i = 0; i < pkg->package.count; i++) {
 269		union acpi_object *obj = &pkg->package.elements[i];
 270
 271		if (obj->type == ACPI_TYPE_INTEGER) {
 272			memcpy(dst, &obj->integer.value, 4);
 273			dst += 4;
 274		} else if (obj->type == ACPI_TYPE_BUFFER) {
 275			memcpy(dst, obj->buffer.pointer, obj->buffer.length);
 276			dst += obj->buffer.length;
 277		}
 278	}
 279err:
 280	ACPI_FREE(pkg);
 281	return buf;
 282}
 283
 284static union acpi_object *int_to_buf(union acpi_object *integer)
 285{
 286	union acpi_object *buf = NULL;
 287	void *dst = NULL;
 288
 289	if (integer->type != ACPI_TYPE_INTEGER) {
 290		WARN_ONCE(1, "BIOS bug, unexpected element type: %d\n",
 291				integer->type);
 292		goto err;
 293	}
 294
 295	buf = ACPI_ALLOCATE(sizeof(*buf) + 4);
 296	if (!buf)
 297		goto err;
 298
 299	dst = buf + 1;
 300	buf->type = ACPI_TYPE_BUFFER;
 301	buf->buffer.length = 4;
 302	buf->buffer.pointer = dst;
 303	memcpy(dst, &integer->integer.value, 4);
 304err:
 305	ACPI_FREE(integer);
 306	return buf;
 307}
 308
 309static union acpi_object *acpi_label_write(acpi_handle handle, u32 offset,
 310		u32 len, void *data)
 311{
 312	acpi_status rc;
 313	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
 314	struct acpi_object_list input = {
 315		.count = 3,
 316		.pointer = (union acpi_object []) {
 317			[0] = {
 318				.integer.type = ACPI_TYPE_INTEGER,
 319				.integer.value = offset,
 320			},
 321			[1] = {
 322				.integer.type = ACPI_TYPE_INTEGER,
 323				.integer.value = len,
 324			},
 325			[2] = {
 326				.buffer.type = ACPI_TYPE_BUFFER,
 327				.buffer.pointer = data,
 328				.buffer.length = len,
 329			},
 330		},
 331	};
 332
 333	rc = acpi_evaluate_object(handle, "_LSW", &input, &buf);
 334	if (ACPI_FAILURE(rc))
 335		return NULL;
 336	return int_to_buf(buf.pointer);
 337}
 338
 339static union acpi_object *acpi_label_read(acpi_handle handle, u32 offset,
 340		u32 len)
 341{
 342	acpi_status rc;
 343	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
 344	struct acpi_object_list input = {
 345		.count = 2,
 346		.pointer = (union acpi_object []) {
 347			[0] = {
 348				.integer.type = ACPI_TYPE_INTEGER,
 349				.integer.value = offset,
 350			},
 351			[1] = {
 352				.integer.type = ACPI_TYPE_INTEGER,
 353				.integer.value = len,
 354			},
 355		},
 356	};
 357
 358	rc = acpi_evaluate_object(handle, "_LSR", &input, &buf);
 359	if (ACPI_FAILURE(rc))
 360		return NULL;
 361	return pkg_to_buf(buf.pointer);
 362}
 363
 364static union acpi_object *acpi_label_info(acpi_handle handle)
 365{
 366	acpi_status rc;
 367	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
 368
 369	rc = acpi_evaluate_object(handle, "_LSI", NULL, &buf);
 370	if (ACPI_FAILURE(rc))
 371		return NULL;
 372	return pkg_to_buf(buf.pointer);
 373}
 374
 375static u8 nfit_dsm_revid(unsigned family, unsigned func)
 376{
 377	static const u8 revid_table[NVDIMM_FAMILY_MAX+1][NVDIMM_CMD_MAX+1] = {
 378		[NVDIMM_FAMILY_INTEL] = {
 379			[NVDIMM_INTEL_GET_MODES ...
 380				NVDIMM_INTEL_FW_ACTIVATE_ARM] = 2,
 381		},
 382	};
 383	u8 id;
 384
 385	if (family > NVDIMM_FAMILY_MAX)
 386		return 0;
 387	if (func > NVDIMM_CMD_MAX)
 388		return 0;
 389	id = revid_table[family][func];
 390	if (id == 0)
 391		return 1; /* default */
 392	return id;
 393}
 394
 395static bool payload_dumpable(struct nvdimm *nvdimm, unsigned int func)
 396{
 397	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 398
 399	if (nfit_mem && nfit_mem->family == NVDIMM_FAMILY_INTEL
 400			&& func >= NVDIMM_INTEL_GET_SECURITY_STATE
 401			&& func <= NVDIMM_INTEL_MASTER_SECURE_ERASE)
 402		return IS_ENABLED(CONFIG_NFIT_SECURITY_DEBUG);
 403	return true;
 404}
 405
 406static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd,
 407		struct nd_cmd_pkg *call_pkg, int *family)
 408{
 409	if (call_pkg) {
 410		int i;
 411
 412		if (nfit_mem && nfit_mem->family != call_pkg->nd_family)
 413			return -ENOTTY;
 414
 415		for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++)
 416			if (call_pkg->nd_reserved2[i])
 417				return -EINVAL;
 418		*family = call_pkg->nd_family;
 419		return call_pkg->nd_command;
 420	}
 421
 422	/* In the !call_pkg case, bus commands == bus functions */
 423	if (!nfit_mem)
 424		return cmd;
 425
 426	/* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */
 427	if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
 428		return cmd;
 429
 430	/*
 431	 * Force function number validation to fail since 0 is never
 432	 * published as a valid function in dsm_mask.
 433	 */
 434	return 0;
 435}
 436
 437int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 438		unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
 439{
 440	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 441	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 442	union acpi_object in_obj, in_buf, *out_obj;
 443	const struct nd_cmd_desc *desc = NULL;
 444	struct device *dev = acpi_desc->dev;
 445	struct nd_cmd_pkg *call_pkg = NULL;
 446	const char *cmd_name, *dimm_name;
 447	unsigned long cmd_mask, dsm_mask;
 448	u32 offset, fw_status = 0;
 449	acpi_handle handle;
 450	const guid_t *guid;
 451	int func, rc, i;
 452	int family = 0;
 453
 454	if (cmd_rc)
 455		*cmd_rc = -EINVAL;
 456
 457	if (cmd == ND_CMD_CALL)
 458		call_pkg = buf;
 459	func = cmd_to_func(nfit_mem, cmd, call_pkg, &family);
 460	if (func < 0)
 461		return func;
 462
 463	if (nvdimm) {
 464		struct acpi_device *adev = nfit_mem->adev;
 465
 466		if (!adev)
 467			return -ENOTTY;
 468
 469		dimm_name = nvdimm_name(nvdimm);
 470		cmd_name = nvdimm_cmd_name(cmd);
 471		cmd_mask = nvdimm_cmd_mask(nvdimm);
 472		dsm_mask = nfit_mem->dsm_mask;
 473		desc = nd_cmd_dimm_desc(cmd);
 474		guid = to_nfit_uuid(nfit_mem->family);
 475		handle = adev->handle;
 476	} else {
 477		struct acpi_device *adev = to_acpi_dev(acpi_desc);
 478
 479		cmd_name = nvdimm_bus_cmd_name(cmd);
 480		cmd_mask = nd_desc->cmd_mask;
 481		if (cmd == ND_CMD_CALL && call_pkg->nd_family) {
 482			family = call_pkg->nd_family;
 483			if (family > NVDIMM_BUS_FAMILY_MAX ||
 484			    !test_bit(family, &nd_desc->bus_family_mask))
 485				return -EINVAL;
 486			family = array_index_nospec(family,
 487						    NVDIMM_BUS_FAMILY_MAX + 1);
 488			dsm_mask = acpi_desc->family_dsm_mask[family];
 489			guid = to_nfit_bus_uuid(family);
 490		} else {
 491			dsm_mask = acpi_desc->bus_dsm_mask;
 492			guid = to_nfit_uuid(NFIT_DEV_BUS);
 493		}
 494		desc = nd_cmd_bus_desc(cmd);
 495		handle = adev->handle;
 496		dimm_name = "bus";
 497	}
 498
 499	if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
 500		return -ENOTTY;
 501
 502	/*
 503	 * Check for a valid command.  For ND_CMD_CALL, we also have to
 504	 * make sure that the DSM function is supported.
 505	 */
 506	if (cmd == ND_CMD_CALL &&
 507	    (func > NVDIMM_CMD_MAX || !test_bit(func, &dsm_mask)))
 508		return -ENOTTY;
 509	else if (!test_bit(cmd, &cmd_mask))
 510		return -ENOTTY;
 511
 512	in_obj.type = ACPI_TYPE_PACKAGE;
 513	in_obj.package.count = 1;
 514	in_obj.package.elements = &in_buf;
 515	in_buf.type = ACPI_TYPE_BUFFER;
 516	in_buf.buffer.pointer = buf;
 517	in_buf.buffer.length = 0;
 518
 519	/* libnvdimm has already validated the input envelope */
 520	for (i = 0; i < desc->in_num; i++)
 521		in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
 522				i, buf);
 523
 524	if (call_pkg) {
 525		/* skip over package wrapper */
 526		in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
 527		in_buf.buffer.length = call_pkg->nd_size_in;
 528	}
 529
 530	dev_dbg(dev, "%s cmd: %d: family: %d func: %d input length: %d\n",
 531		dimm_name, cmd, family, func, in_buf.buffer.length);
 532	if (payload_dumpable(nvdimm, func))
 533		print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
 534				in_buf.buffer.pointer,
 535				min_t(u32, 256, in_buf.buffer.length), true);
 536
 537	/* call the BIOS, prefer the named methods over _DSM if available */
 538	if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE
 539			&& test_bit(NFIT_MEM_LSR, &nfit_mem->flags))
 540		out_obj = acpi_label_info(handle);
 541	else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA
 542			&& test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
 543		struct nd_cmd_get_config_data_hdr *p = buf;
 544
 545		out_obj = acpi_label_read(handle, p->in_offset, p->in_length);
 546	} else if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA
 547			&& test_bit(NFIT_MEM_LSW, &nfit_mem->flags)) {
 548		struct nd_cmd_set_config_hdr *p = buf;
 549
 550		out_obj = acpi_label_write(handle, p->in_offset, p->in_length,
 551				p->in_buf);
 552	} else {
 553		u8 revid;
 554
 555		if (nvdimm)
 556			revid = nfit_dsm_revid(nfit_mem->family, func);
 557		else
 558			revid = 1;
 559		out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj);
 560	}
 561
 562	if (!out_obj) {
 563		dev_dbg(dev, "%s _DSM failed cmd: %s\n", dimm_name, cmd_name);
 564		return -EINVAL;
 565	}
 566
 567	if (out_obj->type != ACPI_TYPE_BUFFER) {
 568		dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
 569				dimm_name, cmd_name, out_obj->type);
 570		rc = -EINVAL;
 571		goto out;
 572	}
 573
 574	dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
 575			cmd_name, out_obj->buffer.length);
 576	print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
 577			out_obj->buffer.pointer,
 578			min_t(u32, 128, out_obj->buffer.length), true);
 579
 580	if (call_pkg) {
 581		call_pkg->nd_fw_size = out_obj->buffer.length;
 582		memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
 583			out_obj->buffer.pointer,
 584			min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
 585
 586		ACPI_FREE(out_obj);
 587		/*
 588		 * Need to support FW function w/o known size in advance.
 589		 * Caller can determine required size based upon nd_fw_size.
 590		 * If we return an error (like elsewhere) then caller wouldn't
 591		 * be able to rely upon data returned to make calculation.
 592		 */
 593		if (cmd_rc)
 594			*cmd_rc = 0;
 595		return 0;
 596	}
 597
 598	for (i = 0, offset = 0; i < desc->out_num; i++) {
 599		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
 600				(u32 *) out_obj->buffer.pointer,
 601				out_obj->buffer.length - offset);
 602
 603		if (offset + out_size > out_obj->buffer.length) {
 604			dev_dbg(dev, "%s output object underflow cmd: %s field: %d\n",
 605					dimm_name, cmd_name, i);
 606			break;
 607		}
 608
 609		if (in_buf.buffer.length + offset + out_size > buf_len) {
 610			dev_dbg(dev, "%s output overrun cmd: %s field: %d\n",
 611					dimm_name, cmd_name, i);
 612			rc = -ENXIO;
 613			goto out;
 614		}
 615		memcpy(buf + in_buf.buffer.length + offset,
 616				out_obj->buffer.pointer + offset, out_size);
 617		offset += out_size;
 618	}
 619
 620	/*
 621	 * Set fw_status for all the commands with a known format to be
 622	 * later interpreted by xlat_status().
 623	 */
 624	if (i >= 1 && ((!nvdimm && cmd >= ND_CMD_ARS_CAP
 625					&& cmd <= ND_CMD_CLEAR_ERROR)
 626				|| (nvdimm && cmd >= ND_CMD_SMART
 627					&& cmd <= ND_CMD_VENDOR)))
 628		fw_status = *(u32 *) out_obj->buffer.pointer;
 629
 630	if (offset + in_buf.buffer.length < buf_len) {
 631		if (i >= 1) {
 632			/*
 633			 * status valid, return the number of bytes left
 634			 * unfilled in the output buffer
 635			 */
 636			rc = buf_len - offset - in_buf.buffer.length;
 637			if (cmd_rc)
 638				*cmd_rc = xlat_status(nvdimm, buf, cmd,
 639						fw_status);
 640		} else {
 641			dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
 642					__func__, dimm_name, cmd_name, buf_len,
 643					offset);
 644			rc = -ENXIO;
 645		}
 646	} else {
 647		rc = 0;
 648		if (cmd_rc)
 649			*cmd_rc = xlat_status(nvdimm, buf, cmd, fw_status);
 650	}
 651
 652 out:
 653	ACPI_FREE(out_obj);
 654
 655	return rc;
 656}
 657EXPORT_SYMBOL_GPL(acpi_nfit_ctl);
 658
 659static const char *spa_type_name(u16 type)
 660{
 661	static const char *to_name[] = {
 662		[NFIT_SPA_VOLATILE] = "volatile",
 663		[NFIT_SPA_PM] = "pmem",
 664		[NFIT_SPA_DCR] = "dimm-control-region",
 665		[NFIT_SPA_BDW] = "block-data-window",
 666		[NFIT_SPA_VDISK] = "volatile-disk",
 667		[NFIT_SPA_VCD] = "volatile-cd",
 668		[NFIT_SPA_PDISK] = "persistent-disk",
 669		[NFIT_SPA_PCD] = "persistent-cd",
 670
 671	};
 672
 673	if (type > NFIT_SPA_PCD)
 674		return "unknown";
 675
 676	return to_name[type];
 677}
 678
 679int nfit_spa_type(struct acpi_nfit_system_address *spa)
 680{
 681	guid_t guid;
 682	int i;
 683
 684	import_guid(&guid, spa->range_guid);
 685	for (i = 0; i < NFIT_UUID_MAX; i++)
 686		if (guid_equal(to_nfit_uuid(i), &guid))
 687			return i;
 688	return -1;
 689}
 690
 691static size_t sizeof_spa(struct acpi_nfit_system_address *spa)
 692{
 693	if (spa->flags & ACPI_NFIT_LOCATION_COOKIE_VALID)
 694		return sizeof(*spa);
 695	return sizeof(*spa) - 8;
 696}
 697
 698static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 699		struct nfit_table_prev *prev,
 700		struct acpi_nfit_system_address *spa)
 701{
 702	struct device *dev = acpi_desc->dev;
 703	struct nfit_spa *nfit_spa;
 704
 705	if (spa->header.length != sizeof_spa(spa))
 706		return false;
 707
 708	list_for_each_entry(nfit_spa, &prev->spas, list) {
 709		if (memcmp(nfit_spa->spa, spa, sizeof_spa(spa)) == 0) {
 710			list_move_tail(&nfit_spa->list, &acpi_desc->spas);
 711			return true;
 712		}
 713	}
 714
 715	nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof_spa(spa),
 716			GFP_KERNEL);
 717	if (!nfit_spa)
 718		return false;
 719	INIT_LIST_HEAD(&nfit_spa->list);
 720	memcpy(nfit_spa->spa, spa, sizeof_spa(spa));
 721	list_add_tail(&nfit_spa->list, &acpi_desc->spas);
 722	dev_dbg(dev, "spa index: %d type: %s\n",
 723			spa->range_index,
 724			spa_type_name(nfit_spa_type(spa)));
 725	return true;
 726}
 727
 728static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
 729		struct nfit_table_prev *prev,
 730		struct acpi_nfit_memory_map *memdev)
 731{
 732	struct device *dev = acpi_desc->dev;
 733	struct nfit_memdev *nfit_memdev;
 734
 735	if (memdev->header.length != sizeof(*memdev))
 736		return false;
 737
 738	list_for_each_entry(nfit_memdev, &prev->memdevs, list)
 739		if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
 740			list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
 741			return true;
 742		}
 743
 744	nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev),
 745			GFP_KERNEL);
 746	if (!nfit_memdev)
 747		return false;
 748	INIT_LIST_HEAD(&nfit_memdev->list);
 749	memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
 750	list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
 751	dev_dbg(dev, "memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
 752			memdev->device_handle, memdev->range_index,
 753			memdev->region_index, memdev->flags);
 754	return true;
 755}
 756
 757int nfit_get_smbios_id(u32 device_handle, u16 *flags)
 758{
 759	struct acpi_nfit_memory_map *memdev;
 760	struct acpi_nfit_desc *acpi_desc;
 761	struct nfit_mem *nfit_mem;
 762	u16 physical_id;
 763
 764	mutex_lock(&acpi_desc_lock);
 765	list_for_each_entry(acpi_desc, &acpi_descs, list) {
 766		mutex_lock(&acpi_desc->init_mutex);
 767		list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
 768			memdev = __to_nfit_memdev(nfit_mem);
 769			if (memdev->device_handle == device_handle) {
 770				*flags = memdev->flags;
 771				physical_id = memdev->physical_id;
 772				mutex_unlock(&acpi_desc->init_mutex);
 773				mutex_unlock(&acpi_desc_lock);
 774				return physical_id;
 775			}
 776		}
 777		mutex_unlock(&acpi_desc->init_mutex);
 778	}
 779	mutex_unlock(&acpi_desc_lock);
 780
 781	return -ENODEV;
 782}
 783EXPORT_SYMBOL_GPL(nfit_get_smbios_id);
 784
 785/*
 786 * An implementation may provide a truncated control region if no block windows
 787 * are defined.
 788 */
 789static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr)
 790{
 791	if (dcr->header.length < offsetof(struct acpi_nfit_control_region,
 792				window_size))
 793		return 0;
 794	if (dcr->windows)
 795		return sizeof(*dcr);
 796	return offsetof(struct acpi_nfit_control_region, window_size);
 797}
 798
 799static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
 800		struct nfit_table_prev *prev,
 801		struct acpi_nfit_control_region *dcr)
 802{
 803	struct device *dev = acpi_desc->dev;
 804	struct nfit_dcr *nfit_dcr;
 805
 806	if (!sizeof_dcr(dcr))
 807		return false;
 808
 809	list_for_each_entry(nfit_dcr, &prev->dcrs, list)
 810		if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) {
 811			list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
 812			return true;
 813		}
 814
 815	nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr),
 816			GFP_KERNEL);
 817	if (!nfit_dcr)
 818		return false;
 819	INIT_LIST_HEAD(&nfit_dcr->list);
 820	memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
 821	list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
 822	dev_dbg(dev, "dcr index: %d windows: %d\n",
 823			dcr->region_index, dcr->windows);
 824	return true;
 825}
 826
 827static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
 828		struct nfit_table_prev *prev,
 829		struct acpi_nfit_data_region *bdw)
 830{
 831	struct device *dev = acpi_desc->dev;
 832	struct nfit_bdw *nfit_bdw;
 833
 834	if (bdw->header.length != sizeof(*bdw))
 835		return false;
 836	list_for_each_entry(nfit_bdw, &prev->bdws, list)
 837		if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
 838			list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
 839			return true;
 840		}
 841
 842	nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw),
 843			GFP_KERNEL);
 844	if (!nfit_bdw)
 845		return false;
 846	INIT_LIST_HEAD(&nfit_bdw->list);
 847	memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
 848	list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
 849	dev_dbg(dev, "bdw dcr: %d windows: %d\n",
 850			bdw->region_index, bdw->windows);
 851	return true;
 852}
 853
 854static size_t sizeof_idt(struct acpi_nfit_interleave *idt)
 855{
 856	if (idt->header.length < sizeof(*idt))
 857		return 0;
 858	return sizeof(*idt) + sizeof(u32) * idt->line_count;
 859}
 860
 861static bool add_idt(struct acpi_nfit_desc *acpi_desc,
 862		struct nfit_table_prev *prev,
 863		struct acpi_nfit_interleave *idt)
 864{
 865	struct device *dev = acpi_desc->dev;
 866	struct nfit_idt *nfit_idt;
 867
 868	if (!sizeof_idt(idt))
 869		return false;
 870
 871	list_for_each_entry(nfit_idt, &prev->idts, list) {
 872		if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt))
 873			continue;
 874
 875		if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) {
 876			list_move_tail(&nfit_idt->list, &acpi_desc->idts);
 877			return true;
 878		}
 879	}
 880
 881	nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt),
 882			GFP_KERNEL);
 883	if (!nfit_idt)
 884		return false;
 885	INIT_LIST_HEAD(&nfit_idt->list);
 886	memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
 887	list_add_tail(&nfit_idt->list, &acpi_desc->idts);
 888	dev_dbg(dev, "idt index: %d num_lines: %d\n",
 889			idt->interleave_index, idt->line_count);
 890	return true;
 891}
 892
 893static size_t sizeof_flush(struct acpi_nfit_flush_address *flush)
 894{
 895	if (flush->header.length < sizeof(*flush))
 896		return 0;
 897	return struct_size(flush, hint_address, flush->hint_count);
 898}
 899
 900static bool add_flush(struct acpi_nfit_desc *acpi_desc,
 901		struct nfit_table_prev *prev,
 902		struct acpi_nfit_flush_address *flush)
 903{
 904	struct device *dev = acpi_desc->dev;
 905	struct nfit_flush *nfit_flush;
 906
 907	if (!sizeof_flush(flush))
 908		return false;
 909
 910	list_for_each_entry(nfit_flush, &prev->flushes, list) {
 911		if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush))
 912			continue;
 913
 914		if (memcmp(nfit_flush->flush, flush,
 915					sizeof_flush(flush)) == 0) {
 916			list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
 917			return true;
 918		}
 919	}
 920
 921	nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush)
 922			+ sizeof_flush(flush), GFP_KERNEL);
 923	if (!nfit_flush)
 924		return false;
 925	INIT_LIST_HEAD(&nfit_flush->list);
 926	memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
 927	list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
 928	dev_dbg(dev, "nfit_flush handle: %d hint_count: %d\n",
 929			flush->device_handle, flush->hint_count);
 930	return true;
 931}
 932
 933static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc,
 934		struct acpi_nfit_capabilities *pcap)
 935{
 936	struct device *dev = acpi_desc->dev;
 937	u32 mask;
 938
 939	mask = (1 << (pcap->highest_capability + 1)) - 1;
 940	acpi_desc->platform_cap = pcap->capabilities & mask;
 941	dev_dbg(dev, "cap: %#x\n", acpi_desc->platform_cap);
 942	return true;
 943}
 944
 945static void *add_table(struct acpi_nfit_desc *acpi_desc,
 946		struct nfit_table_prev *prev, void *table, const void *end)
 947{
 948	struct device *dev = acpi_desc->dev;
 949	struct acpi_nfit_header *hdr;
 950	void *err = ERR_PTR(-ENOMEM);
 951
 952	if (table >= end)
 953		return NULL;
 954
 955	hdr = table;
 956	if (!hdr->length) {
 957		dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
 958			hdr->type);
 959		return NULL;
 960	}
 961
 962	switch (hdr->type) {
 963	case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
 964		if (!add_spa(acpi_desc, prev, table))
 965			return err;
 966		break;
 967	case ACPI_NFIT_TYPE_MEMORY_MAP:
 968		if (!add_memdev(acpi_desc, prev, table))
 969			return err;
 970		break;
 971	case ACPI_NFIT_TYPE_CONTROL_REGION:
 972		if (!add_dcr(acpi_desc, prev, table))
 973			return err;
 974		break;
 975	case ACPI_NFIT_TYPE_DATA_REGION:
 976		if (!add_bdw(acpi_desc, prev, table))
 977			return err;
 978		break;
 979	case ACPI_NFIT_TYPE_INTERLEAVE:
 980		if (!add_idt(acpi_desc, prev, table))
 981			return err;
 982		break;
 983	case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
 984		if (!add_flush(acpi_desc, prev, table))
 985			return err;
 986		break;
 987	case ACPI_NFIT_TYPE_SMBIOS:
 988		dev_dbg(dev, "smbios\n");
 989		break;
 990	case ACPI_NFIT_TYPE_CAPABILITIES:
 991		if (!add_platform_cap(acpi_desc, table))
 992			return err;
 993		break;
 994	default:
 995		dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
 996		break;
 997	}
 998
 999	return table + hdr->length;
1000}
1001
1002static int __nfit_mem_init(struct acpi_nfit_desc *acpi_desc,
1003		struct acpi_nfit_system_address *spa)
1004{
1005	struct nfit_mem *nfit_mem, *found;
1006	struct nfit_memdev *nfit_memdev;
1007	int type = spa ? nfit_spa_type(spa) : 0;
1008
1009	switch (type) {
1010	case NFIT_SPA_DCR:
1011	case NFIT_SPA_PM:
1012		break;
1013	default:
1014		if (spa)
1015			return 0;
1016	}
1017
1018	/*
1019	 * This loop runs in two modes, when a dimm is mapped the loop
1020	 * adds memdev associations to an existing dimm, or creates a
1021	 * dimm. In the unmapped dimm case this loop sweeps for memdev
1022	 * instances with an invalid / zero range_index and adds those
1023	 * dimms without spa associations.
1024	 */
1025	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
1026		struct nfit_flush *nfit_flush;
1027		struct nfit_dcr *nfit_dcr;
1028		u32 device_handle;
1029		u16 dcr;
1030
1031		if (spa && nfit_memdev->memdev->range_index != spa->range_index)
1032			continue;
1033		if (!spa && nfit_memdev->memdev->range_index)
1034			continue;
1035		found = NULL;
1036		dcr = nfit_memdev->memdev->region_index;
1037		device_handle = nfit_memdev->memdev->device_handle;
1038		list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
1039			if (__to_nfit_memdev(nfit_mem)->device_handle
1040					== device_handle) {
1041				found = nfit_mem;
1042				break;
1043			}
1044
1045		if (found)
1046			nfit_mem = found;
1047		else {
1048			nfit_mem = devm_kzalloc(acpi_desc->dev,
1049					sizeof(*nfit_mem), GFP_KERNEL);
1050			if (!nfit_mem)
1051				return -ENOMEM;
1052			INIT_LIST_HEAD(&nfit_mem->list);
1053			nfit_mem->acpi_desc = acpi_desc;
1054			list_add(&nfit_mem->list, &acpi_desc->dimms);
1055		}
1056
1057		list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
1058			if (nfit_dcr->dcr->region_index != dcr)
1059				continue;
1060			/*
1061			 * Record the control region for the dimm.  For
1062			 * the ACPI 6.1 case, where there are separate
1063			 * control regions for the pmem vs blk
1064			 * interfaces, be sure to record the extended
1065			 * blk details.
1066			 */
1067			if (!nfit_mem->dcr)
1068				nfit_mem->dcr = nfit_dcr->dcr;
1069			else if (nfit_mem->dcr->windows == 0
1070					&& nfit_dcr->dcr->windows)
1071				nfit_mem->dcr = nfit_dcr->dcr;
1072			break;
1073		}
1074
1075		list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
1076			struct acpi_nfit_flush_address *flush;
1077			u16 i;
1078
1079			if (nfit_flush->flush->device_handle != device_handle)
1080				continue;
1081			nfit_mem->nfit_flush = nfit_flush;
1082			flush = nfit_flush->flush;
1083			nfit_mem->flush_wpq = devm_kcalloc(acpi_desc->dev,
1084					flush->hint_count,
1085					sizeof(struct resource),
1086					GFP_KERNEL);
1087			if (!nfit_mem->flush_wpq)
1088				return -ENOMEM;
1089			for (i = 0; i < flush->hint_count; i++) {
1090				struct resource *res = &nfit_mem->flush_wpq[i];
1091
1092				res->start = flush->hint_address[i];
1093				res->end = res->start + 8 - 1;
1094			}
1095			break;
1096		}
1097
1098		if (dcr && !nfit_mem->dcr) {
1099			dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
1100					spa->range_index, dcr);
1101			return -ENODEV;
1102		}
1103
1104		if (type == NFIT_SPA_DCR) {
1105			struct nfit_idt *nfit_idt;
1106			u16 idt_idx;
1107
1108			/* multiple dimms may share a SPA when interleaved */
1109			nfit_mem->spa_dcr = spa;
1110			nfit_mem->memdev_dcr = nfit_memdev->memdev;
1111			idt_idx = nfit_memdev->memdev->interleave_index;
1112			list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
1113				if (nfit_idt->idt->interleave_index != idt_idx)
1114					continue;
1115				nfit_mem->idt_dcr = nfit_idt->idt;
1116				break;
1117			}
1118		} else if (type == NFIT_SPA_PM) {
1119			/*
1120			 * A single dimm may belong to multiple SPA-PM
1121			 * ranges, record at least one in addition to
1122			 * any SPA-DCR range.
1123			 */
1124			nfit_mem->memdev_pmem = nfit_memdev->memdev;
1125		} else
1126			nfit_mem->memdev_dcr = nfit_memdev->memdev;
1127	}
1128
1129	return 0;
1130}
1131
1132static int nfit_mem_cmp(void *priv, const struct list_head *_a,
1133		const struct list_head *_b)
1134{
1135	struct nfit_mem *a = container_of(_a, typeof(*a), list);
1136	struct nfit_mem *b = container_of(_b, typeof(*b), list);
1137	u32 handleA, handleB;
1138
1139	handleA = __to_nfit_memdev(a)->device_handle;
1140	handleB = __to_nfit_memdev(b)->device_handle;
1141	if (handleA < handleB)
1142		return -1;
1143	else if (handleA > handleB)
1144		return 1;
1145	return 0;
1146}
1147
1148static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
1149{
1150	struct nfit_spa *nfit_spa;
1151	int rc;
1152
1153
1154	/*
1155	 * For each SPA-DCR or SPA-PMEM address range find its
1156	 * corresponding MEMDEV(s).  From each MEMDEV find the
1157	 * corresponding DCR.  Then, if we're operating on a SPA-DCR,
1158	 * try to find a SPA-BDW and a corresponding BDW that references
1159	 * the DCR.  Throw it all into an nfit_mem object.  Note, that
1160	 * BDWs are optional.
1161	 */
1162	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
1163		rc = __nfit_mem_init(acpi_desc, nfit_spa->spa);
1164		if (rc)
1165			return rc;
1166	}
1167
1168	/*
1169	 * If a DIMM has failed to be mapped into SPA there will be no
1170	 * SPA entries above. Find and register all the unmapped DIMMs
1171	 * for reporting and recovery purposes.
1172	 */
1173	rc = __nfit_mem_init(acpi_desc, NULL);
1174	if (rc)
1175		return rc;
1176
1177	list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
1178
1179	return 0;
1180}
1181
1182static ssize_t bus_dsm_mask_show(struct device *dev,
1183		struct device_attribute *attr, char *buf)
1184{
1185	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
1186	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1187	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1188
1189	return sysfs_emit(buf, "%#lx\n", acpi_desc->bus_dsm_mask);
1190}
1191static struct device_attribute dev_attr_bus_dsm_mask =
1192		__ATTR(dsm_mask, 0444, bus_dsm_mask_show, NULL);
1193
1194static ssize_t revision_show(struct device *dev,
1195		struct device_attribute *attr, char *buf)
1196{
1197	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
1198	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1199	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1200
1201	return sysfs_emit(buf, "%d\n", acpi_desc->acpi_header.revision);
1202}
1203static DEVICE_ATTR_RO(revision);
1204
1205static ssize_t hw_error_scrub_show(struct device *dev,
1206		struct device_attribute *attr, char *buf)
1207{
1208	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
1209	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1210	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1211
1212	return sysfs_emit(buf, "%d\n", acpi_desc->scrub_mode);
1213}
1214
1215/*
1216 * The 'hw_error_scrub' attribute can have the following values written to it:
1217 * '0': Switch to the default mode where an exception will only insert
1218 *      the address of the memory error into the poison and badblocks lists.
1219 * '1': Enable a full scrub to happen if an exception for a memory error is
1220 *      received.
1221 */
1222static ssize_t hw_error_scrub_store(struct device *dev,
1223		struct device_attribute *attr, const char *buf, size_t size)
1224{
1225	struct nvdimm_bus_descriptor *nd_desc;
1226	ssize_t rc;
1227	long val;
1228
1229	rc = kstrtol(buf, 0, &val);
1230	if (rc)
1231		return rc;
1232
1233	device_lock(dev);
1234	nd_desc = dev_get_drvdata(dev);
1235	if (nd_desc) {
1236		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1237
1238		switch (val) {
1239		case HW_ERROR_SCRUB_ON:
1240			acpi_desc->scrub_mode = HW_ERROR_SCRUB_ON;
1241			break;
1242		case HW_ERROR_SCRUB_OFF:
1243			acpi_desc->scrub_mode = HW_ERROR_SCRUB_OFF;
1244			break;
1245		default:
1246			rc = -EINVAL;
1247			break;
1248		}
1249	}
1250	device_unlock(dev);
1251	if (rc)
1252		return rc;
1253	return size;
1254}
1255static DEVICE_ATTR_RW(hw_error_scrub);
1256
1257/*
1258 * This shows the number of full Address Range Scrubs that have been
1259 * completed since driver load time. Userspace can wait on this using
1260 * select/poll etc. A '+' at the end indicates an ARS is in progress
1261 */
1262static ssize_t scrub_show(struct device *dev,
1263		struct device_attribute *attr, char *buf)
1264{
1265	struct nvdimm_bus_descriptor *nd_desc;
1266	struct acpi_nfit_desc *acpi_desc;
1267	ssize_t rc = -ENXIO;
1268	bool busy;
1269
1270	device_lock(dev);
1271	nd_desc = dev_get_drvdata(dev);
1272	if (!nd_desc) {
1273		device_unlock(dev);
1274		return rc;
1275	}
1276	acpi_desc = to_acpi_desc(nd_desc);
1277
1278	mutex_lock(&acpi_desc->init_mutex);
1279	busy = test_bit(ARS_BUSY, &acpi_desc->scrub_flags)
1280		&& !test_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
1281	rc = sysfs_emit(buf, "%d%s", acpi_desc->scrub_count, busy ? "+\n" : "\n");
1282	/* Allow an admin to poll the busy state at a higher rate */
1283	if (busy && capable(CAP_SYS_RAWIO) && !test_and_set_bit(ARS_POLL,
1284				&acpi_desc->scrub_flags)) {
1285		acpi_desc->scrub_tmo = 1;
1286		mod_delayed_work(nfit_wq, &acpi_desc->dwork, HZ);
1287	}
1288
1289	mutex_unlock(&acpi_desc->init_mutex);
1290	device_unlock(dev);
1291	return rc;
1292}
1293
1294static ssize_t scrub_store(struct device *dev,
1295		struct device_attribute *attr, const char *buf, size_t size)
1296{
1297	struct nvdimm_bus_descriptor *nd_desc;
1298	ssize_t rc;
1299	long val;
1300
1301	rc = kstrtol(buf, 0, &val);
1302	if (rc)
1303		return rc;
1304	if (val != 1)
1305		return -EINVAL;
1306
1307	device_lock(dev);
1308	nd_desc = dev_get_drvdata(dev);
1309	if (nd_desc) {
1310		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
1311
1312		rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
1313	}
1314	device_unlock(dev);
1315	if (rc)
1316		return rc;
1317	return size;
1318}
1319static DEVICE_ATTR_RW(scrub);
1320
1321static bool ars_supported(struct nvdimm_bus *nvdimm_bus)
1322{
1323	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1324	const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START
1325		| 1 << ND_CMD_ARS_STATUS;
1326
1327	return (nd_desc->cmd_mask & mask) == mask;
1328}
1329
1330static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
1331{
1332	struct device *dev = kobj_to_dev(kobj);
1333	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
1334
1335	if (a == &dev_attr_scrub.attr)
1336		return ars_supported(nvdimm_bus) ? a->mode : 0;
1337
1338	if (a == &dev_attr_firmware_activate_noidle.attr)
1339		return intel_fwa_supported(nvdimm_bus) ? a->mode : 0;
1340
1341	return a->mode;
1342}
1343
1344static struct attribute *acpi_nfit_attributes[] = {
1345	&dev_attr_revision.attr,
1346	&dev_attr_scrub.attr,
1347	&dev_attr_hw_error_scrub.attr,
1348	&dev_attr_bus_dsm_mask.attr,
1349	&dev_attr_firmware_activate_noidle.attr,
1350	NULL,
1351};
1352
1353static const struct attribute_group acpi_nfit_attribute_group = {
1354	.name = "nfit",
1355	.attrs = acpi_nfit_attributes,
1356	.is_visible = nfit_visible,
1357};
1358
1359static const struct attribute_group *acpi_nfit_attribute_groups[] = {
1360	&acpi_nfit_attribute_group,
1361	NULL,
1362};
1363
1364static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
1365{
1366	struct nvdimm *nvdimm = to_nvdimm(dev);
1367	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1368
1369	return __to_nfit_memdev(nfit_mem);
1370}
1371
1372static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
1373{
1374	struct nvdimm *nvdimm = to_nvdimm(dev);
1375	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1376
1377	return nfit_mem->dcr;
1378}
1379
1380static ssize_t handle_show(struct device *dev,
1381		struct device_attribute *attr, char *buf)
1382{
1383	struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
1384
1385	return sysfs_emit(buf, "%#x\n", memdev->device_handle);
1386}
1387static DEVICE_ATTR_RO(handle);
1388
1389static ssize_t phys_id_show(struct device *dev,
1390		struct device_attribute *attr, char *buf)
1391{
1392	struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
1393
1394	return sysfs_emit(buf, "%#x\n", memdev->physical_id);
1395}
1396static DEVICE_ATTR_RO(phys_id);
1397
1398static ssize_t vendor_show(struct device *dev,
1399		struct device_attribute *attr, char *buf)
1400{
1401	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1402
1403	return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
1404}
1405static DEVICE_ATTR_RO(vendor);
1406
1407static ssize_t rev_id_show(struct device *dev,
1408		struct device_attribute *attr, char *buf)
1409{
1410	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1411
1412	return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
1413}
1414static DEVICE_ATTR_RO(rev_id);
1415
1416static ssize_t device_show(struct device *dev,
1417		struct device_attribute *attr, char *buf)
1418{
1419	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1420
1421	return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
1422}
1423static DEVICE_ATTR_RO(device);
1424
1425static ssize_t subsystem_vendor_show(struct device *dev,
1426		struct device_attribute *attr, char *buf)
1427{
1428	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1429
1430	return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
1431}
1432static DEVICE_ATTR_RO(subsystem_vendor);
1433
1434static ssize_t subsystem_rev_id_show(struct device *dev,
1435		struct device_attribute *attr, char *buf)
1436{
1437	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1438
1439	return sysfs_emit(buf, "0x%04x\n",
1440			be16_to_cpu(dcr->subsystem_revision_id));
1441}
1442static DEVICE_ATTR_RO(subsystem_rev_id);
1443
1444static ssize_t subsystem_device_show(struct device *dev,
1445		struct device_attribute *attr, char *buf)
1446{
1447	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1448
1449	return sysfs_emit(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
1450}
1451static DEVICE_ATTR_RO(subsystem_device);
1452
1453static int num_nvdimm_formats(struct nvdimm *nvdimm)
1454{
1455	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1456	int formats = 0;
1457
1458	if (nfit_mem->memdev_pmem)
1459		formats++;
1460	return formats;
1461}
1462
1463static ssize_t format_show(struct device *dev,
1464		struct device_attribute *attr, char *buf)
1465{
1466	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1467
1468	return sysfs_emit(buf, "0x%04x\n", le16_to_cpu(dcr->code));
1469}
1470static DEVICE_ATTR_RO(format);
1471
1472static ssize_t format1_show(struct device *dev,
1473		struct device_attribute *attr, char *buf)
1474{
1475	u32 handle;
1476	ssize_t rc = -ENXIO;
1477	struct nfit_mem *nfit_mem;
1478	struct nfit_memdev *nfit_memdev;
1479	struct acpi_nfit_desc *acpi_desc;
1480	struct nvdimm *nvdimm = to_nvdimm(dev);
1481	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1482
1483	nfit_mem = nvdimm_provider_data(nvdimm);
1484	acpi_desc = nfit_mem->acpi_desc;
1485	handle = to_nfit_memdev(dev)->device_handle;
1486
1487	/* assumes DIMMs have at most 2 published interface codes */
1488	mutex_lock(&acpi_desc->init_mutex);
1489	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
1490		struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
1491		struct nfit_dcr *nfit_dcr;
1492
1493		if (memdev->device_handle != handle)
1494			continue;
1495
1496		list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
1497			if (nfit_dcr->dcr->region_index != memdev->region_index)
1498				continue;
1499			if (nfit_dcr->dcr->code == dcr->code)
1500				continue;
1501			rc = sysfs_emit(buf, "0x%04x\n",
1502					le16_to_cpu(nfit_dcr->dcr->code));
1503			break;
1504		}
1505		if (rc != -ENXIO)
1506			break;
1507	}
1508	mutex_unlock(&acpi_desc->init_mutex);
1509	return rc;
1510}
1511static DEVICE_ATTR_RO(format1);
1512
1513static ssize_t formats_show(struct device *dev,
1514		struct device_attribute *attr, char *buf)
1515{
1516	struct nvdimm *nvdimm = to_nvdimm(dev);
1517
1518	return sysfs_emit(buf, "%d\n", num_nvdimm_formats(nvdimm));
1519}
1520static DEVICE_ATTR_RO(formats);
1521
1522static ssize_t serial_show(struct device *dev,
1523		struct device_attribute *attr, char *buf)
1524{
1525	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1526
1527	return sysfs_emit(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
1528}
1529static DEVICE_ATTR_RO(serial);
1530
1531static ssize_t family_show(struct device *dev,
1532		struct device_attribute *attr, char *buf)
1533{
1534	struct nvdimm *nvdimm = to_nvdimm(dev);
1535	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1536
1537	if (nfit_mem->family < 0)
1538		return -ENXIO;
1539	return sysfs_emit(buf, "%d\n", nfit_mem->family);
1540}
1541static DEVICE_ATTR_RO(family);
1542
1543static ssize_t dsm_mask_show(struct device *dev,
1544		struct device_attribute *attr, char *buf)
1545{
1546	struct nvdimm *nvdimm = to_nvdimm(dev);
1547	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1548
1549	if (nfit_mem->family < 0)
1550		return -ENXIO;
1551	return sysfs_emit(buf, "%#lx\n", nfit_mem->dsm_mask);
1552}
1553static DEVICE_ATTR_RO(dsm_mask);
1554
1555static ssize_t flags_show(struct device *dev,
1556		struct device_attribute *attr, char *buf)
1557{
1558	struct nvdimm *nvdimm = to_nvdimm(dev);
1559	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1560	u16 flags = __to_nfit_memdev(nfit_mem)->flags;
1561
1562	if (test_bit(NFIT_MEM_DIRTY, &nfit_mem->flags))
1563		flags |= ACPI_NFIT_MEM_FLUSH_FAILED;
1564
1565	return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
1566		flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
1567		flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
1568		flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
1569		flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
1570		flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "",
1571		flags & ACPI_NFIT_MEM_MAP_FAILED ? "map_fail " : "",
1572		flags & ACPI_NFIT_MEM_HEALTH_ENABLED ? "smart_notify " : "");
1573}
1574static DEVICE_ATTR_RO(flags);
1575
1576static ssize_t id_show(struct device *dev,
1577		struct device_attribute *attr, char *buf)
1578{
1579	struct nvdimm *nvdimm = to_nvdimm(dev);
1580	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1581
1582	return sysfs_emit(buf, "%s\n", nfit_mem->id);
1583}
1584static DEVICE_ATTR_RO(id);
1585
1586static ssize_t dirty_shutdown_show(struct device *dev,
1587		struct device_attribute *attr, char *buf)
1588{
1589	struct nvdimm *nvdimm = to_nvdimm(dev);
1590	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1591
1592	return sysfs_emit(buf, "%d\n", nfit_mem->dirty_shutdown);
1593}
1594static DEVICE_ATTR_RO(dirty_shutdown);
1595
1596static struct attribute *acpi_nfit_dimm_attributes[] = {
1597	&dev_attr_handle.attr,
1598	&dev_attr_phys_id.attr,
1599	&dev_attr_vendor.attr,
1600	&dev_attr_device.attr,
1601	&dev_attr_rev_id.attr,
1602	&dev_attr_subsystem_vendor.attr,
1603	&dev_attr_subsystem_device.attr,
1604	&dev_attr_subsystem_rev_id.attr,
1605	&dev_attr_format.attr,
1606	&dev_attr_formats.attr,
1607	&dev_attr_format1.attr,
1608	&dev_attr_serial.attr,
1609	&dev_attr_flags.attr,
1610	&dev_attr_id.attr,
1611	&dev_attr_family.attr,
1612	&dev_attr_dsm_mask.attr,
1613	&dev_attr_dirty_shutdown.attr,
1614	NULL,
1615};
1616
1617static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
1618		struct attribute *a, int n)
1619{
1620	struct device *dev = kobj_to_dev(kobj);
1621	struct nvdimm *nvdimm = to_nvdimm(dev);
1622	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1623
1624	if (!to_nfit_dcr(dev)) {
1625		/* Without a dcr only the memdev attributes can be surfaced */
1626		if (a == &dev_attr_handle.attr || a == &dev_attr_phys_id.attr
1627				|| a == &dev_attr_flags.attr
1628				|| a == &dev_attr_family.attr
1629				|| a == &dev_attr_dsm_mask.attr)
1630			return a->mode;
1631		return 0;
1632	}
1633
1634	if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
1635		return 0;
1636
1637	if (!test_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags)
1638			&& a == &dev_attr_dirty_shutdown.attr)
1639		return 0;
1640
1641	return a->mode;
1642}
1643
1644static const struct attribute_group acpi_nfit_dimm_attribute_group = {
1645	.name = "nfit",
1646	.attrs = acpi_nfit_dimm_attributes,
1647	.is_visible = acpi_nfit_dimm_attr_visible,
1648};
1649
1650static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
1651	&acpi_nfit_dimm_attribute_group,
1652	NULL,
1653};
1654
1655static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
1656		u32 device_handle)
1657{
1658	struct nfit_mem *nfit_mem;
1659
1660	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
1661		if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
1662			return nfit_mem->nvdimm;
1663
1664	return NULL;
1665}
1666
1667void __acpi_nvdimm_notify(struct device *dev, u32 event)
1668{
1669	struct nfit_mem *nfit_mem;
1670	struct acpi_nfit_desc *acpi_desc;
1671
1672	dev_dbg(dev->parent, "%s: event: %d\n", dev_name(dev),
1673			event);
1674
1675	if (event != NFIT_NOTIFY_DIMM_HEALTH) {
1676		dev_dbg(dev->parent, "%s: unknown event: %d\n", dev_name(dev),
1677				event);
1678		return;
1679	}
1680
1681	acpi_desc = dev_get_drvdata(dev->parent);
1682	if (!acpi_desc)
1683		return;
1684
1685	/*
1686	 * If we successfully retrieved acpi_desc, then we know nfit_mem data
1687	 * is still valid.
1688	 */
1689	nfit_mem = dev_get_drvdata(dev);
1690	if (nfit_mem && nfit_mem->flags_attr)
1691		sysfs_notify_dirent(nfit_mem->flags_attr);
1692}
1693EXPORT_SYMBOL_GPL(__acpi_nvdimm_notify);
1694
1695static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
1696{
1697	struct acpi_device *adev = data;
1698	struct device *dev = &adev->dev;
1699
1700	device_lock(dev->parent);
1701	__acpi_nvdimm_notify(dev, event);
1702	device_unlock(dev->parent);
1703}
1704
1705static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
1706{
1707	acpi_handle handle;
1708	acpi_status status;
1709
1710	status = acpi_get_handle(adev->handle, method, &handle);
1711
1712	if (ACPI_SUCCESS(status))
1713		return true;
1714	return false;
1715}
1716
1717__weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
1718{
1719	struct device *dev = &nfit_mem->adev->dev;
1720	struct nd_intel_smart smart = { 0 };
1721	union acpi_object in_buf = {
1722		.buffer.type = ACPI_TYPE_BUFFER,
1723		.buffer.length = 0,
1724	};
1725	union acpi_object in_obj = {
1726		.package.type = ACPI_TYPE_PACKAGE,
1727		.package.count = 1,
1728		.package.elements = &in_buf,
1729	};
1730	const u8 func = ND_INTEL_SMART;
1731	const guid_t *guid = to_nfit_uuid(nfit_mem->family);
1732	u8 revid = nfit_dsm_revid(nfit_mem->family, func);
1733	struct acpi_device *adev = nfit_mem->adev;
1734	acpi_handle handle = adev->handle;
1735	union acpi_object *out_obj;
1736
1737	if ((nfit_mem->dsm_mask & (1 << func)) == 0)
1738		return;
1739
1740	out_obj = acpi_evaluate_dsm_typed(handle, guid, revid, func, &in_obj, ACPI_TYPE_BUFFER);
1741	if (!out_obj || out_obj->buffer.length < sizeof(smart)) {
1742		dev_dbg(dev->parent, "%s: failed to retrieve initial health\n",
1743				dev_name(dev));
1744		ACPI_FREE(out_obj);
1745		return;
1746	}
1747	memcpy(&smart, out_obj->buffer.pointer, sizeof(smart));
1748	ACPI_FREE(out_obj);
1749
1750	if (smart.flags & ND_INTEL_SMART_SHUTDOWN_VALID) {
1751		if (smart.shutdown_state)
1752			set_bit(NFIT_MEM_DIRTY, &nfit_mem->flags);
1753	}
1754
1755	if (smart.flags & ND_INTEL_SMART_SHUTDOWN_COUNT_VALID) {
1756		set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
1757		nfit_mem->dirty_shutdown = smart.shutdown_count;
1758	}
1759}
1760
1761static void populate_shutdown_status(struct nfit_mem *nfit_mem)
1762{
1763	/*
1764	 * For DIMMs that provide a dynamic facility to retrieve a
1765	 * dirty-shutdown status and/or a dirty-shutdown count, cache
1766	 * these values in nfit_mem.
1767	 */
1768	if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
1769		nfit_intel_shutdown_status(nfit_mem);
1770}
1771
1772static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1773		struct nfit_mem *nfit_mem, u32 device_handle)
1774{
1775	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1776	struct acpi_device *adev, *adev_dimm;
1777	struct device *dev = acpi_desc->dev;
1778	unsigned long dsm_mask, label_mask;
1779	const guid_t *guid;
1780	int i;
1781	int family = -1;
1782	struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
1783
1784	/* nfit test assumes 1:1 relationship between commands and dsms */
1785	nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
1786	nfit_mem->family = NVDIMM_FAMILY_INTEL;
1787	set_bit(NVDIMM_FAMILY_INTEL, &nd_desc->dimm_family_mask);
1788
1789	if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
1790		sprintf(nfit_mem->id, "%04x-%02x-%04x-%08x",
1791				be16_to_cpu(dcr->vendor_id),
1792				dcr->manufacturing_location,
1793				be16_to_cpu(dcr->manufacturing_date),
1794				be32_to_cpu(dcr->serial_number));
1795	else
1796		sprintf(nfit_mem->id, "%04x-%08x",
1797				be16_to_cpu(dcr->vendor_id),
1798				be32_to_cpu(dcr->serial_number));
1799
1800	adev = to_acpi_dev(acpi_desc);
1801	if (!adev) {
1802		/* unit test case */
1803		populate_shutdown_status(nfit_mem);
1804		return 0;
1805	}
1806
1807	adev_dimm = acpi_find_child_device(adev, device_handle, false);
1808	nfit_mem->adev = adev_dimm;
1809	if (!adev_dimm) {
1810		dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
1811				device_handle);
1812		return force_enable_dimms ? 0 : -ENODEV;
1813	}
1814
1815	if (ACPI_FAILURE(acpi_install_notify_handler(adev_dimm->handle,
1816		ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify, adev_dimm))) {
1817		dev_err(dev, "%s: notification registration failed\n",
1818				dev_name(&adev_dimm->dev));
1819		return -ENXIO;
1820	}
1821	/*
1822	 * Record nfit_mem for the notification path to track back to
1823	 * the nfit sysfs attributes for this dimm device object.
1824	 */
1825	dev_set_drvdata(&adev_dimm->dev, nfit_mem);
1826
1827	/*
1828	 * There are 4 "legacy" NVDIMM command sets
1829	 * (NVDIMM_FAMILY_{INTEL,MSFT,HPE1,HPE2}) that were created before
1830	 * an EFI working group was established to constrain this
1831	 * proliferation. The nfit driver probes for the supported command
1832	 * set by GUID. Note, if you're a platform developer looking to add
1833	 * a new command set to this probe, consider using an existing set,
1834	 * or otherwise seek approval to publish the command set at
1835	 * http://www.uefi.org/RFIC_LIST.
1836	 *
1837	 * Note, that checking for function0 (bit0) tells us if any commands
1838	 * are reachable through this GUID.
1839	 */
1840	clear_bit(NVDIMM_FAMILY_INTEL, &nd_desc->dimm_family_mask);
1841	for (i = 0; i <= NVDIMM_FAMILY_MAX; i++)
1842		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) {
1843			set_bit(i, &nd_desc->dimm_family_mask);
1844			if (family < 0 || i == default_dsm_family)
1845				family = i;
1846		}
1847
1848	/* limit the supported commands to those that are publicly documented */
1849	nfit_mem->family = family;
1850	if (override_dsm_mask && !disable_vendor_specific)
1851		dsm_mask = override_dsm_mask;
1852	else if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
1853		dsm_mask = NVDIMM_INTEL_CMDMASK;
1854		if (disable_vendor_specific)
1855			dsm_mask &= ~(1 << ND_CMD_VENDOR);
1856	} else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) {
1857		dsm_mask = 0x1c3c76;
1858	} else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
1859		dsm_mask = 0x1fe;
1860		if (disable_vendor_specific)
1861			dsm_mask &= ~(1 << 8);
1862	} else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
1863		dsm_mask = 0xffffffff;
1864	} else if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) {
1865		dsm_mask = 0x1f;
1866	} else {
1867		dev_dbg(dev, "unknown dimm command family\n");
1868		nfit_mem->family = -1;
1869		/* DSMs are optional, continue loading the driver... */
1870		return 0;
1871	}
1872
1873	/*
1874	 * Function 0 is the command interrogation function, don't
1875	 * export it to potential userspace use, and enable it to be
1876	 * used as an error value in acpi_nfit_ctl().
1877	 */
1878	dsm_mask &= ~1UL;
1879
1880	guid = to_nfit_uuid(nfit_mem->family);
1881	for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
1882		if (acpi_check_dsm(adev_dimm->handle, guid,
1883					nfit_dsm_revid(nfit_mem->family, i),
1884					1ULL << i))
1885			set_bit(i, &nfit_mem->dsm_mask);
1886
1887	/*
1888	 * Prefer the NVDIMM_FAMILY_INTEL label read commands if present
1889	 * due to their better semantics handling locked capacity.
1890	 */
1891	label_mask = 1 << ND_CMD_GET_CONFIG_SIZE | 1 << ND_CMD_GET_CONFIG_DATA
1892		| 1 << ND_CMD_SET_CONFIG_DATA;
1893	if (family == NVDIMM_FAMILY_INTEL
1894			&& (dsm_mask & label_mask) == label_mask)
1895		/* skip _LS{I,R,W} enabling */;
1896	else {
1897		if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
1898				&& acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
1899			dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
1900			set_bit(NFIT_MEM_LSR, &nfit_mem->flags);
1901		}
1902
1903		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)
1904				&& acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
1905			dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
1906			set_bit(NFIT_MEM_LSW, &nfit_mem->flags);
1907		}
1908
1909		/*
1910		 * Quirk read-only label configurations to preserve
1911		 * access to label-less namespaces by default.
1912		 */
1913		if (!test_bit(NFIT_MEM_LSW, &nfit_mem->flags)
1914				&& !force_labels) {
1915			dev_dbg(dev, "%s: No _LSW, disable labels\n",
1916					dev_name(&adev_dimm->dev));
1917			clear_bit(NFIT_MEM_LSR, &nfit_mem->flags);
1918		} else
1919			dev_dbg(dev, "%s: Force enable labels\n",
1920					dev_name(&adev_dimm->dev));
1921	}
1922
1923	populate_shutdown_status(nfit_mem);
1924
1925	return 0;
1926}
1927
1928static void shutdown_dimm_notify(void *data)
1929{
1930	struct acpi_nfit_desc *acpi_desc = data;
1931	struct nfit_mem *nfit_mem;
1932
1933	mutex_lock(&acpi_desc->init_mutex);
1934	/*
1935	 * Clear out the nfit_mem->flags_attr and shut down dimm event
1936	 * notifications.
1937	 */
1938	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
1939		struct acpi_device *adev_dimm = nfit_mem->adev;
1940
1941		if (nfit_mem->flags_attr) {
1942			sysfs_put(nfit_mem->flags_attr);
1943			nfit_mem->flags_attr = NULL;
1944		}
1945		if (adev_dimm) {
1946			acpi_remove_notify_handler(adev_dimm->handle,
1947					ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
1948			dev_set_drvdata(&adev_dimm->dev, NULL);
1949		}
1950	}
1951	mutex_unlock(&acpi_desc->init_mutex);
1952}
1953
1954static const struct nvdimm_security_ops *acpi_nfit_get_security_ops(int family)
1955{
1956	switch (family) {
1957	case NVDIMM_FAMILY_INTEL:
1958		return intel_security_ops;
1959	default:
1960		return NULL;
1961	}
1962}
1963
1964static const struct nvdimm_fw_ops *acpi_nfit_get_fw_ops(
1965		struct nfit_mem *nfit_mem)
1966{
1967	unsigned long mask;
1968	struct acpi_nfit_desc *acpi_desc = nfit_mem->acpi_desc;
1969	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1970
1971	if (!nd_desc->fw_ops)
1972		return NULL;
1973
1974	if (nfit_mem->family != NVDIMM_FAMILY_INTEL)
1975		return NULL;
1976
1977	mask = nfit_mem->dsm_mask & NVDIMM_INTEL_FW_ACTIVATE_CMDMASK;
1978	if (mask != NVDIMM_INTEL_FW_ACTIVATE_CMDMASK)
1979		return NULL;
1980
1981	return intel_fw_ops;
1982}
1983
1984static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
1985{
1986	struct nfit_mem *nfit_mem;
1987	int dimm_count = 0, rc;
1988	struct nvdimm *nvdimm;
1989
1990	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
1991		struct acpi_nfit_flush_address *flush;
1992		unsigned long flags = 0, cmd_mask;
1993		struct nfit_memdev *nfit_memdev;
1994		u32 device_handle;
1995		u16 mem_flags;
1996
1997		device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
1998		nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
1999		if (nvdimm) {
2000			dimm_count++;
2001			continue;
2002		}
2003
2004		/* collate flags across all memdevs for this dimm */
2005		list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
2006			struct acpi_nfit_memory_map *dimm_memdev;
2007
2008			dimm_memdev = __to_nfit_memdev(nfit_mem);
2009			if (dimm_memdev->device_handle
2010					!= nfit_memdev->memdev->device_handle)
2011				continue;
2012			dimm_memdev->flags |= nfit_memdev->memdev->flags;
2013		}
2014
2015		mem_flags = __to_nfit_memdev(nfit_mem)->flags;
2016		if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
2017			set_bit(NDD_UNARMED, &flags);
2018
2019		rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
2020		if (rc)
2021			continue;
2022
2023		/*
2024		 * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
2025		 * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
2026		 * userspace interface.
2027		 */
2028		cmd_mask = 1UL << ND_CMD_CALL;
2029		if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
2030			/*
2031			 * These commands have a 1:1 correspondence
2032			 * between DSM payload and libnvdimm ioctl
2033			 * payload format.
2034			 */
2035			cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
2036		}
2037
2038		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
2039			set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
2040			set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
2041		}
2042		if (test_bit(NFIT_MEM_LSW, &nfit_mem->flags))
2043			set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
2044
2045		flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
2046			: NULL;
2047		nvdimm = __nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
2048				acpi_nfit_dimm_attribute_groups,
2049				flags, cmd_mask, flush ? flush->hint_count : 0,
2050				nfit_mem->flush_wpq, &nfit_mem->id[0],
2051				acpi_nfit_get_security_ops(nfit_mem->family),
2052				acpi_nfit_get_fw_ops(nfit_mem));
2053		if (!nvdimm)
2054			return -ENOMEM;
2055
2056		nfit_mem->nvdimm = nvdimm;
2057		dimm_count++;
2058
2059		if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
2060			continue;
2061
2062		dev_err(acpi_desc->dev, "Error found in NVDIMM %s flags:%s%s%s%s%s\n",
2063				nvdimm_name(nvdimm),
2064		  mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
2065		  mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
2066		  mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
2067		  mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "",
2068		  mem_flags & ACPI_NFIT_MEM_MAP_FAILED ? " map_fail" : "");
2069
2070	}
2071
2072	rc = nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
2073	if (rc)
2074		return rc;
2075
2076	/*
2077	 * Now that dimms are successfully registered, and async registration
2078	 * is flushed, attempt to enable event notification.
2079	 */
2080	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
2081		struct kernfs_node *nfit_kernfs;
2082
2083		nvdimm = nfit_mem->nvdimm;
2084		if (!nvdimm)
2085			continue;
2086
2087		nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
2088		if (nfit_kernfs)
2089			nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
2090					"flags");
2091		sysfs_put(nfit_kernfs);
2092		if (!nfit_mem->flags_attr)
2093			dev_warn(acpi_desc->dev, "%s: notifications disabled\n",
2094					nvdimm_name(nvdimm));
2095	}
2096
2097	return devm_add_action_or_reset(acpi_desc->dev, shutdown_dimm_notify,
2098			acpi_desc);
2099}
2100
2101/*
2102 * These constants are private because there are no kernel consumers of
2103 * these commands.
2104 */
2105enum nfit_aux_cmds {
2106	NFIT_CMD_TRANSLATE_SPA = 5,
2107	NFIT_CMD_ARS_INJECT_SET = 7,
2108	NFIT_CMD_ARS_INJECT_CLEAR = 8,
2109	NFIT_CMD_ARS_INJECT_GET = 9,
2110};
2111
2112static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
2113{
2114	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
2115	const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS);
2116	unsigned long dsm_mask, *mask;
2117	struct acpi_device *adev;
2118	int i;
2119
2120	set_bit(ND_CMD_CALL, &nd_desc->cmd_mask);
2121	set_bit(NVDIMM_BUS_FAMILY_NFIT, &nd_desc->bus_family_mask);
2122
2123	/* enable nfit_test to inject bus command emulation */
2124	if (acpi_desc->bus_cmd_force_en) {
2125		nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
2126		mask = &nd_desc->bus_family_mask;
2127		if (acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL]) {
2128			set_bit(NVDIMM_BUS_FAMILY_INTEL, mask);
2129			nd_desc->fw_ops = intel_bus_fw_ops;
2130		}
2131	}
2132
2133	adev = to_acpi_dev(acpi_desc);
2134	if (!adev)
2135		return;
2136
2137	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
2138		if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
2139			set_bit(i, &nd_desc->cmd_mask);
2140
2141	dsm_mask =
2142		(1 << ND_CMD_ARS_CAP) |
2143		(1 << ND_CMD_ARS_START) |
2144		(1 << ND_CMD_ARS_STATUS) |
2145		(1 << ND_CMD_CLEAR_ERROR) |
2146		(1 << NFIT_CMD_TRANSLATE_SPA) |
2147		(1 << NFIT_CMD_ARS_INJECT_SET) |
2148		(1 << NFIT_CMD_ARS_INJECT_CLEAR) |
2149		(1 << NFIT_CMD_ARS_INJECT_GET);
2150	for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
2151		if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
2152			set_bit(i, &acpi_desc->bus_dsm_mask);
2153
2154	/* Enumerate allowed NVDIMM_BUS_FAMILY_INTEL commands */
2155	dsm_mask = NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK;
2156	guid = to_nfit_bus_uuid(NVDIMM_BUS_FAMILY_INTEL);
2157	mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL];
2158	for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
2159		if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
2160			set_bit(i, mask);
2161
2162	if (*mask == dsm_mask) {
2163		set_bit(NVDIMM_BUS_FAMILY_INTEL, &nd_desc->bus_family_mask);
2164		nd_desc->fw_ops = intel_bus_fw_ops;
2165	}
2166}
2167
2168static ssize_t range_index_show(struct device *dev,
2169		struct device_attribute *attr, char *buf)
2170{
2171	struct nd_region *nd_region = to_nd_region(dev);
2172	struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
2173
2174	return sysfs_emit(buf, "%d\n", nfit_spa->spa->range_index);
2175}
2176static DEVICE_ATTR_RO(range_index);
2177
2178static struct attribute *acpi_nfit_region_attributes[] = {
2179	&dev_attr_range_index.attr,
2180	NULL,
2181};
2182
2183static const struct attribute_group acpi_nfit_region_attribute_group = {
2184	.name = "nfit",
2185	.attrs = acpi_nfit_region_attributes,
2186};
2187
2188static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
2189	&acpi_nfit_region_attribute_group,
2190	NULL,
2191};
2192
2193/* enough info to uniquely specify an interleave set */
2194struct nfit_set_info {
2195	u64 region_offset;
2196	u32 serial_number;
2197	u32 pad;
2198};
2199
2200struct nfit_set_info2 {
2201	u64 region_offset;
2202	u32 serial_number;
2203	u16 vendor_id;
2204	u16 manufacturing_date;
2205	u8 manufacturing_location;
2206	u8 reserved[31];
2207};
2208
2209static int cmp_map_compat(const void *m0, const void *m1)
2210{
2211	const struct nfit_set_info *map0 = m0;
2212	const struct nfit_set_info *map1 = m1;
2213
2214	return memcmp(&map0->region_offset, &map1->region_offset,
2215			sizeof(u64));
2216}
2217
2218static int cmp_map(const void *m0, const void *m1)
2219{
2220	const struct nfit_set_info *map0 = m0;
2221	const struct nfit_set_info *map1 = m1;
2222
2223	if (map0->region_offset < map1->region_offset)
2224		return -1;
2225	else if (map0->region_offset > map1->region_offset)
2226		return 1;
2227	return 0;
2228}
2229
2230static int cmp_map2(const void *m0, const void *m1)
2231{
2232	const struct nfit_set_info2 *map0 = m0;
2233	const struct nfit_set_info2 *map1 = m1;
2234
2235	if (map0->region_offset < map1->region_offset)
2236		return -1;
2237	else if (map0->region_offset > map1->region_offset)
2238		return 1;
2239	return 0;
2240}
2241
2242/* Retrieve the nth entry referencing this spa */
2243static struct acpi_nfit_memory_map *memdev_from_spa(
2244		struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
2245{
2246	struct nfit_memdev *nfit_memdev;
2247
2248	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
2249		if (nfit_memdev->memdev->range_index == range_index)
2250			if (n-- == 0)
2251				return nfit_memdev->memdev;
2252	return NULL;
2253}
2254
2255static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
2256		struct nd_region_desc *ndr_desc,
2257		struct acpi_nfit_system_address *spa)
2258{
2259	u16 nr = ndr_desc->num_mappings;
2260	struct nfit_set_info2 *info2 __free(kfree) =
2261		kcalloc(nr, sizeof(*info2), GFP_KERNEL);
2262	struct nfit_set_info *info __free(kfree) =
2263		kcalloc(nr, sizeof(*info), GFP_KERNEL);
2264	struct device *dev = acpi_desc->dev;
2265	struct nd_interleave_set *nd_set;
2266	int i;
2267
2268	if (!info || !info2)
2269		return -ENOMEM;
2270
2271	nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
2272	if (!nd_set)
2273		return -ENOMEM;
2274	import_guid(&nd_set->type_guid, spa->range_guid);
2275
2276	for (i = 0; i < nr; i++) {
2277		struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
2278		struct nvdimm *nvdimm = mapping->nvdimm;
2279		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
2280		struct nfit_set_info *map = &info[i];
2281		struct nfit_set_info2 *map2 = &info2[i];
2282		struct acpi_nfit_memory_map *memdev =
2283			memdev_from_spa(acpi_desc, spa->range_index, i);
2284		struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
2285
2286		if (!memdev || !nfit_mem->dcr) {
2287			dev_err(dev, "%s: failed to find DCR\n", __func__);
2288			return -ENODEV;
2289		}
2290
2291		map->region_offset = memdev->region_offset;
2292		map->serial_number = dcr->serial_number;
2293
2294		map2->region_offset = memdev->region_offset;
2295		map2->serial_number = dcr->serial_number;
2296		map2->vendor_id = dcr->vendor_id;
2297		map2->manufacturing_date = dcr->manufacturing_date;
2298		map2->manufacturing_location = dcr->manufacturing_location;
2299	}
2300
2301	/* v1.1 namespaces */
2302	sort(info, nr, sizeof(*info), cmp_map, NULL);
2303	nd_set->cookie1 = nd_fletcher64(info, sizeof(*info) * nr, 0);
2304
2305	/* v1.2 namespaces */
2306	sort(info2, nr, sizeof(*info2), cmp_map2, NULL);
2307	nd_set->cookie2 = nd_fletcher64(info2, sizeof(*info2) * nr, 0);
2308
2309	/* support v1.1 namespaces created with the wrong sort order */
2310	sort(info, nr, sizeof(*info), cmp_map_compat, NULL);
2311	nd_set->altcookie = nd_fletcher64(info, sizeof(*info) * nr, 0);
2312
2313	/* record the result of the sort for the mapping position */
2314	for (i = 0; i < nr; i++) {
2315		struct nfit_set_info2 *map2 = &info2[i];
2316		int j;
2317
2318		for (j = 0; j < nr; j++) {
2319			struct nd_mapping_desc *mapping = &ndr_desc->mapping[j];
2320			struct nvdimm *nvdimm = mapping->nvdimm;
2321			struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
2322			struct acpi_nfit_control_region *dcr = nfit_mem->dcr;
2323
2324			if (map2->serial_number == dcr->serial_number &&
2325			    map2->vendor_id == dcr->vendor_id &&
2326			    map2->manufacturing_date == dcr->manufacturing_date &&
2327			    map2->manufacturing_location
2328				    == dcr->manufacturing_location) {
2329				mapping->position = i;
2330				break;
2331			}
2332		}
2333	}
2334
2335	ndr_desc->nd_set = nd_set;
2336
2337	return 0;
2338}
2339
2340static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
2341		struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
2342{
2343	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
2344	struct acpi_nfit_system_address *spa = nfit_spa->spa;
2345	int cmd_rc, rc;
2346
2347	cmd->address = spa->address;
2348	cmd->length = spa->length;
2349	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
2350			sizeof(*cmd), &cmd_rc);
2351	if (rc < 0)
2352		return rc;
2353	return cmd_rc;
2354}
2355
2356static int ars_start(struct acpi_nfit_desc *acpi_desc,
2357		struct nfit_spa *nfit_spa, enum nfit_ars_state req_type)
2358{
2359	int rc;
2360	int cmd_rc;
2361	struct nd_cmd_ars_start ars_start;
2362	struct acpi_nfit_system_address *spa = nfit_spa->spa;
2363	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
2364
2365	memset(&ars_start, 0, sizeof(ars_start));
2366	ars_start.address = spa->address;
2367	ars_start.length = spa->length;
2368	if (req_type == ARS_REQ_SHORT)
2369		ars_start.flags = ND_ARS_RETURN_PREV_DATA;
2370	if (nfit_spa_type(spa) == NFIT_SPA_PM)
2371		ars_start.type = ND_ARS_PERSISTENT;
2372	else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
2373		ars_start.type = ND_ARS_VOLATILE;
2374	else
2375		return -ENOTTY;
2376
2377	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
2378			sizeof(ars_start), &cmd_rc);
2379
2380	if (rc < 0)
2381		return rc;
2382	if (cmd_rc < 0)
2383		return cmd_rc;
2384	set_bit(ARS_VALID, &acpi_desc->scrub_flags);
2385	return 0;
2386}
2387
2388static int ars_continue(struct acpi_nfit_desc *acpi_desc)
2389{
2390	int rc, cmd_rc;
2391	struct nd_cmd_ars_start ars_start;
2392	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
2393	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
2394
2395	ars_start = (struct nd_cmd_ars_start) {
2396		.address = ars_status->restart_address,
2397		.length = ars_status->restart_length,
2398		.type = ars_status->type,
2399	};
2400	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
2401			sizeof(ars_start), &cmd_rc);
2402	if (rc < 0)
2403		return rc;
2404	return cmd_rc;
2405}
2406
2407static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
2408{
2409	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
2410	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
2411	int rc, cmd_rc;
2412
2413	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
2414			acpi_desc->max_ars, &cmd_rc);
2415	if (rc < 0)
2416		return rc;
2417	return cmd_rc;
2418}
2419
2420static void ars_complete(struct acpi_nfit_desc *acpi_desc,
2421		struct nfit_spa *nfit_spa)
2422{
2423	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
2424	struct acpi_nfit_system_address *spa = nfit_spa->spa;
2425	struct nd_region *nd_region = nfit_spa->nd_region;
2426	struct device *dev;
2427
2428	lockdep_assert_held(&acpi_desc->init_mutex);
2429	/*
2430	 * Only advance the ARS state for ARS runs initiated by the
2431	 * kernel, ignore ARS results from BIOS initiated runs for scrub
2432	 * completion tracking.
2433	 */
2434	if (acpi_desc->scrub_spa != nfit_spa)
2435		return;
2436
2437	if ((ars_status->address >= spa->address && ars_status->address
2438				< spa->address + spa->length)
2439			|| (ars_status->address < spa->address)) {
2440		/*
2441		 * Assume that if a scrub starts at an offset from the
2442		 * start of nfit_spa that we are in the continuation
2443		 * case.
2444		 *
2445		 * Otherwise, if the scrub covers the spa range, mark
2446		 * any pending request complete.
2447		 */
2448		if (ars_status->address + ars_status->length
2449				>= spa->address + spa->length)
2450				/* complete */;
2451		else
2452			return;
2453	} else
2454		return;
2455
2456	acpi_desc->scrub_spa = NULL;
2457	if (nd_region) {
2458		dev = nd_region_dev(nd_region);
2459		nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
2460	} else
2461		dev = acpi_desc->dev;
2462	dev_dbg(dev, "ARS: range %d complete\n", spa->range_index);
2463}
2464
2465static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
2466{
2467	struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
2468	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
2469	int rc;
2470	u32 i;
2471
2472	/*
2473	 * First record starts at 44 byte offset from the start of the
2474	 * payload.
2475	 */
2476	if (ars_status->out_length < 44)
2477		return 0;
2478
2479	/*
2480	 * Ignore potentially stale results that are only refreshed
2481	 * after a start-ARS event.
2482	 */
2483	if (!test_and_clear_bit(ARS_VALID, &acpi_desc->scrub_flags)) {
2484		dev_dbg(acpi_desc->dev, "skip %d stale records\n",
2485				ars_status->num_records);
2486		return 0;
2487	}
2488
2489	for (i = 0; i < ars_status->num_records; i++) {
2490		/* only process full records */
2491		if (ars_status->out_length
2492				< 44 + sizeof(struct nd_ars_record) * (i + 1))
2493			break;
2494		rc = nvdimm_bus_add_badrange(nvdimm_bus,
2495				ars_status->records[i].err_address,
2496				ars_status->records[i].length);
2497		if (rc)
2498			return rc;
2499	}
2500	if (i < ars_status->num_records)
2501		dev_warn(acpi_desc->dev, "detected truncated ars results\n");
2502
2503	return 0;
2504}
2505
2506static void acpi_nfit_remove_resource(void *data)
2507{
2508	struct resource *res = data;
2509
2510	remove_resource(res);
2511}
2512
2513static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
2514		struct nd_region_desc *ndr_desc)
2515{
2516	struct resource *res, *nd_res = ndr_desc->res;
2517	int is_pmem, ret;
2518
2519	/* No operation if the region is already registered as PMEM */
2520	is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
2521				IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
2522	if (is_pmem == REGION_INTERSECTS)
2523		return 0;
2524
2525	res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
2526	if (!res)
2527		return -ENOMEM;
2528
2529	res->name = "Persistent Memory";
2530	res->start = nd_res->start;
2531	res->end = nd_res->end;
2532	res->flags = IORESOURCE_MEM;
2533	res->desc = IORES_DESC_PERSISTENT_MEMORY;
2534
2535	ret = insert_resource(&iomem_resource, res);
2536	if (ret)
2537		return ret;
2538
2539	ret = devm_add_action_or_reset(acpi_desc->dev,
2540					acpi_nfit_remove_resource,
2541					res);
2542	if (ret)
2543		return ret;
2544
2545	return 0;
2546}
2547
2548static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
2549		struct nd_mapping_desc *mapping, struct nd_region_desc *ndr_desc,
2550		struct acpi_nfit_memory_map *memdev,
2551		struct nfit_spa *nfit_spa)
2552{
2553	struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
2554			memdev->device_handle);
2555	struct acpi_nfit_system_address *spa = nfit_spa->spa;
2556
2557	if (!nvdimm) {
2558		dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
2559				spa->range_index, memdev->device_handle);
2560		return -ENODEV;
2561	}
2562
2563	mapping->nvdimm = nvdimm;
2564	switch (nfit_spa_type(spa)) {
2565	case NFIT_SPA_PM:
2566	case NFIT_SPA_VOLATILE:
2567		mapping->start = memdev->address;
2568		mapping->size = memdev->region_size;
2569		break;
2570	}
2571
2572	return 0;
2573}
2574
2575static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
2576{
2577	return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
2578		nfit_spa_type(spa) == NFIT_SPA_VCD   ||
2579		nfit_spa_type(spa) == NFIT_SPA_PDISK ||
2580		nfit_spa_type(spa) == NFIT_SPA_PCD);
2581}
2582
2583static bool nfit_spa_is_volatile(struct acpi_nfit_system_address *spa)
2584{
2585	return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
2586		nfit_spa_type(spa) == NFIT_SPA_VCD   ||
2587		nfit_spa_type(spa) == NFIT_SPA_VOLATILE);
2588}
2589
2590static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
2591		struct nfit_spa *nfit_spa)
2592{
2593	static struct nd_mapping_desc mappings[ND_MAX_MAPPINGS];
2594	struct acpi_nfit_system_address *spa = nfit_spa->spa;
2595	struct nd_region_desc *ndr_desc, _ndr_desc;
2596	struct nfit_memdev *nfit_memdev;
2597	struct nvdimm_bus *nvdimm_bus;
2598	struct resource res;
2599	int count = 0, rc;
2600
2601	if (nfit_spa->nd_region)
2602		return 0;
2603
2604	if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
2605		dev_dbg(acpi_desc->dev, "detected invalid spa index\n");
2606		return 0;
2607	}
2608
2609	memset(&res, 0, sizeof(res));
2610	memset(&mappings, 0, sizeof(mappings));
2611	memset(&_ndr_desc, 0, sizeof(_ndr_desc));
2612	res.start = spa->address;
2613	res.end = res.start + spa->length - 1;
2614	ndr_desc = &_ndr_desc;
2615	ndr_desc->res = &res;
2616	ndr_desc->provider_data = nfit_spa;
2617	ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
2618	if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) {
2619		ndr_desc->numa_node = pxm_to_online_node(spa->proximity_domain);
2620		ndr_desc->target_node = pxm_to_node(spa->proximity_domain);
2621	} else {
2622		ndr_desc->numa_node = NUMA_NO_NODE;
2623		ndr_desc->target_node = NUMA_NO_NODE;
2624	}
2625
2626	/* Fallback to address based numa information if node lookup failed */
2627	if (ndr_desc->numa_node == NUMA_NO_NODE) {
2628		ndr_desc->numa_node = memory_add_physaddr_to_nid(spa->address);
2629		dev_info(acpi_desc->dev, "changing numa node from %d to %d for nfit region [%pa-%pa]",
2630			NUMA_NO_NODE, ndr_desc->numa_node, &res.start, &res.end);
2631	}
2632	if (ndr_desc->target_node == NUMA_NO_NODE) {
2633		ndr_desc->target_node = phys_to_target_node(spa->address);
2634		dev_info(acpi_desc->dev, "changing target node from %d to %d for nfit region [%pa-%pa]",
2635			NUMA_NO_NODE, ndr_desc->numa_node, &res.start, &res.end);
2636	}
2637
2638	/*
2639	 * Persistence domain bits are hierarchical, if
2640	 * ACPI_NFIT_CAPABILITY_CACHE_FLUSH is set then
2641	 * ACPI_NFIT_CAPABILITY_MEM_FLUSH is implied.
2642	 */
2643	if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH)
2644		set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags);
2645	else if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH)
2646		set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags);
2647
2648	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
2649		struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
2650		struct nd_mapping_desc *mapping;
2651
2652		/* range index 0 == unmapped in SPA or invalid-SPA */
2653		if (memdev->range_index == 0 || spa->range_index == 0)
2654			continue;
2655		if (memdev->range_index != spa->range_index)
2656			continue;
2657		if (count >= ND_MAX_MAPPINGS) {
2658			dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
2659					spa->range_index, ND_MAX_MAPPINGS);
2660			return -ENXIO;
2661		}
2662		mapping = &mappings[count++];
2663		rc = acpi_nfit_init_mapping(acpi_desc, mapping, ndr_desc,
2664				memdev, nfit_spa);
2665		if (rc)
2666			goto out;
2667	}
2668
2669	ndr_desc->mapping = mappings;
2670	ndr_desc->num_mappings = count;
2671	rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
2672	if (rc)
2673		goto out;
2674
2675	nvdimm_bus = acpi_desc->nvdimm_bus;
2676	if (nfit_spa_type(spa) == NFIT_SPA_PM) {
2677		rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
2678		if (rc) {
2679			dev_warn(acpi_desc->dev,
2680				"failed to insert pmem resource to iomem: %d\n",
2681				rc);
2682			goto out;
2683		}
2684
2685		nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
2686				ndr_desc);
2687		if (!nfit_spa->nd_region)
2688			rc = -ENOMEM;
2689	} else if (nfit_spa_is_volatile(spa)) {
2690		nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
2691				ndr_desc);
2692		if (!nfit_spa->nd_region)
2693			rc = -ENOMEM;
2694	} else if (nfit_spa_is_virtual(spa)) {
2695		nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
2696				ndr_desc);
2697		if (!nfit_spa->nd_region)
2698			rc = -ENOMEM;
2699	}
2700
2701 out:
2702	if (rc)
2703		dev_err(acpi_desc->dev, "failed to register spa range %d\n",
2704				nfit_spa->spa->range_index);
2705	return rc;
2706}
2707
2708static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc)
2709{
2710	struct device *dev = acpi_desc->dev;
2711	struct nd_cmd_ars_status *ars_status;
2712
2713	if (acpi_desc->ars_status) {
2714		memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
2715		return 0;
2716	}
2717
2718	ars_status = devm_kzalloc(dev, acpi_desc->max_ars, GFP_KERNEL);
2719	if (!ars_status)
2720		return -ENOMEM;
2721	acpi_desc->ars_status = ars_status;
2722	return 0;
2723}
2724
2725static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
2726{
2727	int rc;
2728
2729	if (ars_status_alloc(acpi_desc))
2730		return -ENOMEM;
2731
2732	rc = ars_get_status(acpi_desc);
2733
2734	if (rc < 0 && rc != -ENOSPC)
2735		return rc;
2736
2737	if (ars_status_process_records(acpi_desc))
2738		dev_err(acpi_desc->dev, "Failed to process ARS records\n");
2739
2740	return rc;
2741}
2742
2743static int ars_register(struct acpi_nfit_desc *acpi_desc,
2744		struct nfit_spa *nfit_spa)
2745{
2746	int rc;
2747
2748	if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
2749		return acpi_nfit_register_region(acpi_desc, nfit_spa);
2750
2751	set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
2752	if (!no_init_ars)
2753		set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);
2754
2755	switch (acpi_nfit_query_poison(acpi_desc)) {
2756	case 0:
2757	case -ENOSPC:
2758	case -EAGAIN:
2759		rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT);
2760		/* shouldn't happen, try again later */
2761		if (rc == -EBUSY)
2762			break;
2763		if (rc) {
2764			set_bit(ARS_FAILED, &nfit_spa->ars_state);
2765			break;
2766		}
2767		clear_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
2768		rc = acpi_nfit_query_poison(acpi_desc);
2769		if (rc)
2770			break;
2771		acpi_desc->scrub_spa = nfit_spa;
2772		ars_complete(acpi_desc, nfit_spa);
2773		/*
2774		 * If ars_complete() says we didn't complete the
2775		 * short scrub, we'll try again with a long
2776		 * request.
2777		 */
2778		acpi_desc->scrub_spa = NULL;
2779		break;
2780	case -EBUSY:
2781	case -ENOMEM:
2782		/*
2783		 * BIOS was using ARS, wait for it to complete (or
2784		 * resources to become available) and then perform our
2785		 * own scrubs.
2786		 */
2787		break;
2788	default:
2789		set_bit(ARS_FAILED, &nfit_spa->ars_state);
2790		break;
2791	}
2792
2793	return acpi_nfit_register_region(acpi_desc, nfit_spa);
2794}
2795
2796static void ars_complete_all(struct acpi_nfit_desc *acpi_desc)
2797{
2798	struct nfit_spa *nfit_spa;
2799
2800	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2801		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
2802			continue;
2803		ars_complete(acpi_desc, nfit_spa);
2804	}
2805}
2806
2807static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
2808		int query_rc)
2809{
2810	unsigned int tmo = acpi_desc->scrub_tmo;
2811	struct device *dev = acpi_desc->dev;
2812	struct nfit_spa *nfit_spa;
2813
2814	lockdep_assert_held(&acpi_desc->init_mutex);
2815
2816	if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags))
2817		return 0;
2818
2819	if (query_rc == -EBUSY) {
2820		dev_dbg(dev, "ARS: ARS busy\n");
2821		return min(30U * 60U, tmo * 2);
2822	}
2823	if (query_rc == -ENOSPC) {
2824		dev_dbg(dev, "ARS: ARS continue\n");
2825		ars_continue(acpi_desc);
2826		return 1;
2827	}
2828	if (query_rc && query_rc != -EAGAIN) {
2829		unsigned long long addr, end;
2830
2831		addr = acpi_desc->ars_status->address;
2832		end = addr + acpi_desc->ars_status->length;
2833		dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end,
2834				query_rc);
2835	}
2836
2837	ars_complete_all(acpi_desc);
2838	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2839		enum nfit_ars_state req_type;
2840		int rc;
2841
2842		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
2843			continue;
2844
2845		/* prefer short ARS requests first */
2846		if (test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state))
2847			req_type = ARS_REQ_SHORT;
2848		else if (test_bit(ARS_REQ_LONG, &nfit_spa->ars_state))
2849			req_type = ARS_REQ_LONG;
2850		else
2851			continue;
2852		rc = ars_start(acpi_desc, nfit_spa, req_type);
2853
2854		dev = nd_region_dev(nfit_spa->nd_region);
2855		dev_dbg(dev, "ARS: range %d ARS start %s (%d)\n",
2856				nfit_spa->spa->range_index,
2857				req_type == ARS_REQ_SHORT ? "short" : "long",
2858				rc);
2859		/*
2860		 * Hmm, we raced someone else starting ARS? Try again in
2861		 * a bit.
2862		 */
2863		if (rc == -EBUSY)
2864			return 1;
2865		if (rc == 0) {
2866			dev_WARN_ONCE(dev, acpi_desc->scrub_spa,
2867					"scrub start while range %d active\n",
2868					acpi_desc->scrub_spa->spa->range_index);
2869			clear_bit(req_type, &nfit_spa->ars_state);
2870			acpi_desc->scrub_spa = nfit_spa;
2871			/*
2872			 * Consider this spa last for future scrub
2873			 * requests
2874			 */
2875			list_move_tail(&nfit_spa->list, &acpi_desc->spas);
2876			return 1;
2877		}
2878
2879		dev_err(dev, "ARS: range %d ARS failed (%d)\n",
2880				nfit_spa->spa->range_index, rc);
2881		set_bit(ARS_FAILED, &nfit_spa->ars_state);
2882	}
2883	return 0;
2884}
2885
2886static void __sched_ars(struct acpi_nfit_desc *acpi_desc, unsigned int tmo)
2887{
2888	lockdep_assert_held(&acpi_desc->init_mutex);
2889
2890	set_bit(ARS_BUSY, &acpi_desc->scrub_flags);
2891	/* note this should only be set from within the workqueue */
2892	if (tmo)
2893		acpi_desc->scrub_tmo = tmo;
2894	queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
2895}
2896
2897static void sched_ars(struct acpi_nfit_desc *acpi_desc)
2898{
2899	__sched_ars(acpi_desc, 0);
2900}
2901
2902static void notify_ars_done(struct acpi_nfit_desc *acpi_desc)
2903{
2904	lockdep_assert_held(&acpi_desc->init_mutex);
2905
2906	clear_bit(ARS_BUSY, &acpi_desc->scrub_flags);
2907	acpi_desc->scrub_count++;
2908	if (acpi_desc->scrub_count_state)
2909		sysfs_notify_dirent(acpi_desc->scrub_count_state);
2910}
2911
2912static void acpi_nfit_scrub(struct work_struct *work)
2913{
2914	struct acpi_nfit_desc *acpi_desc;
2915	unsigned int tmo;
2916	int query_rc;
2917
2918	acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
2919	mutex_lock(&acpi_desc->init_mutex);
2920	query_rc = acpi_nfit_query_poison(acpi_desc);
2921	tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
2922	if (tmo)
2923		__sched_ars(acpi_desc, tmo);
2924	else
2925		notify_ars_done(acpi_desc);
2926	memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
2927	clear_bit(ARS_POLL, &acpi_desc->scrub_flags);
2928	mutex_unlock(&acpi_desc->init_mutex);
2929}
2930
2931static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
2932		struct nfit_spa *nfit_spa)
2933{
2934	int type = nfit_spa_type(nfit_spa->spa);
2935	struct nd_cmd_ars_cap ars_cap;
2936	int rc;
2937
2938	set_bit(ARS_FAILED, &nfit_spa->ars_state);
2939	memset(&ars_cap, 0, sizeof(ars_cap));
2940	rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
2941	if (rc < 0)
2942		return;
2943	/* check that the supported scrub types match the spa type */
2944	if (type == NFIT_SPA_VOLATILE && ((ars_cap.status >> 16)
2945				& ND_ARS_VOLATILE) == 0)
2946		return;
2947	if (type == NFIT_SPA_PM && ((ars_cap.status >> 16)
2948				& ND_ARS_PERSISTENT) == 0)
2949		return;
2950
2951	nfit_spa->max_ars = ars_cap.max_ars_out;
2952	nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
2953	acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
2954	clear_bit(ARS_FAILED, &nfit_spa->ars_state);
2955}
2956
2957static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
2958{
2959	struct nfit_spa *nfit_spa;
2960	int rc, do_sched_ars = 0;
2961
2962	set_bit(ARS_VALID, &acpi_desc->scrub_flags);
2963	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2964		switch (nfit_spa_type(nfit_spa->spa)) {
2965		case NFIT_SPA_VOLATILE:
2966		case NFIT_SPA_PM:
2967			acpi_nfit_init_ars(acpi_desc, nfit_spa);
2968			break;
2969		}
2970	}
2971
2972	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2973		switch (nfit_spa_type(nfit_spa->spa)) {
2974		case NFIT_SPA_VOLATILE:
2975		case NFIT_SPA_PM:
2976			/* register regions and kick off initial ARS run */
2977			rc = ars_register(acpi_desc, nfit_spa);
2978			if (rc)
2979				return rc;
2980
2981			/*
2982			 * Kick off background ARS if at least one
2983			 * region successfully registered ARS
2984			 */
2985			if (!test_bit(ARS_FAILED, &nfit_spa->ars_state))
2986				do_sched_ars++;
2987			break;
2988		case NFIT_SPA_BDW:
2989			/* nothing to register */
2990			break;
2991		case NFIT_SPA_DCR:
2992		case NFIT_SPA_VDISK:
2993		case NFIT_SPA_VCD:
2994		case NFIT_SPA_PDISK:
2995		case NFIT_SPA_PCD:
2996			/* register known regions that don't support ARS */
2997			rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
2998			if (rc)
2999				return rc;
3000			break;
3001		default:
3002			/* don't register unknown regions */
3003			break;
3004		}
3005	}
3006
3007	if (do_sched_ars)
3008		sched_ars(acpi_desc);
3009	return 0;
3010}
3011
3012static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
3013		struct nfit_table_prev *prev)
3014{
3015	struct device *dev = acpi_desc->dev;
3016
3017	if (!list_empty(&prev->spas) ||
3018			!list_empty(&prev->memdevs) ||
3019			!list_empty(&prev->dcrs) ||
3020			!list_empty(&prev->bdws) ||
3021			!list_empty(&prev->idts) ||
3022			!list_empty(&prev->flushes)) {
3023		dev_err(dev, "new nfit deletes entries (unsupported)\n");
3024		return -ENXIO;
3025	}
3026	return 0;
3027}
3028
3029static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
3030{
3031	struct device *dev = acpi_desc->dev;
3032	struct kernfs_node *nfit;
3033	struct device *bus_dev;
3034
3035	if (!ars_supported(acpi_desc->nvdimm_bus))
3036		return 0;
3037
3038	bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
3039	nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit");
3040	if (!nfit) {
3041		dev_err(dev, "sysfs_get_dirent 'nfit' failed\n");
3042		return -ENODEV;
3043	}
3044	acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub");
3045	sysfs_put(nfit);
3046	if (!acpi_desc->scrub_count_state) {
3047		dev_err(dev, "sysfs_get_dirent 'scrub' failed\n");
3048		return -ENODEV;
3049	}
3050
3051	return 0;
3052}
3053
3054static void acpi_nfit_unregister(void *data)
3055{
3056	struct acpi_nfit_desc *acpi_desc = data;
3057
3058	nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
3059}
3060
3061int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
3062{
3063	struct device *dev = acpi_desc->dev;
3064	struct nfit_table_prev prev;
3065	const void *end;
3066	int rc;
3067
3068	if (!acpi_desc->nvdimm_bus) {
3069		acpi_nfit_init_dsms(acpi_desc);
3070
3071		acpi_desc->nvdimm_bus = nvdimm_bus_register(dev,
3072				&acpi_desc->nd_desc);
3073		if (!acpi_desc->nvdimm_bus)
3074			return -ENOMEM;
3075
3076		rc = devm_add_action_or_reset(dev, acpi_nfit_unregister,
3077				acpi_desc);
3078		if (rc)
3079			return rc;
3080
3081		rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
3082		if (rc)
3083			return rc;
3084
3085		/* register this acpi_desc for mce notifications */
3086		mutex_lock(&acpi_desc_lock);
3087		list_add_tail(&acpi_desc->list, &acpi_descs);
3088		mutex_unlock(&acpi_desc_lock);
3089	}
3090
3091	mutex_lock(&acpi_desc->init_mutex);
3092
3093	INIT_LIST_HEAD(&prev.spas);
3094	INIT_LIST_HEAD(&prev.memdevs);
3095	INIT_LIST_HEAD(&prev.dcrs);
3096	INIT_LIST_HEAD(&prev.bdws);
3097	INIT_LIST_HEAD(&prev.idts);
3098	INIT_LIST_HEAD(&prev.flushes);
3099
3100	list_cut_position(&prev.spas, &acpi_desc->spas,
3101				acpi_desc->spas.prev);
3102	list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
3103				acpi_desc->memdevs.prev);
3104	list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
3105				acpi_desc->dcrs.prev);
3106	list_cut_position(&prev.bdws, &acpi_desc->bdws,
3107				acpi_desc->bdws.prev);
3108	list_cut_position(&prev.idts, &acpi_desc->idts,
3109				acpi_desc->idts.prev);
3110	list_cut_position(&prev.flushes, &acpi_desc->flushes,
3111				acpi_desc->flushes.prev);
3112
3113	end = data + sz;
3114	while (!IS_ERR_OR_NULL(data))
3115		data = add_table(acpi_desc, &prev, data, end);
3116
3117	if (IS_ERR(data)) {
3118		dev_dbg(dev, "nfit table parsing error: %ld\n",	PTR_ERR(data));
3119		rc = PTR_ERR(data);
3120		goto out_unlock;
3121	}
3122
3123	rc = acpi_nfit_check_deletions(acpi_desc, &prev);
3124	if (rc)
3125		goto out_unlock;
3126
3127	rc = nfit_mem_init(acpi_desc);
3128	if (rc)
3129		goto out_unlock;
3130
3131	rc = acpi_nfit_register_dimms(acpi_desc);
3132	if (rc)
3133		goto out_unlock;
3134
3135	rc = acpi_nfit_register_regions(acpi_desc);
3136
3137 out_unlock:
3138	mutex_unlock(&acpi_desc->init_mutex);
3139	return rc;
3140}
3141EXPORT_SYMBOL_GPL(acpi_nfit_init);
3142
3143static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
3144{
3145	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
3146	struct device *dev = acpi_desc->dev;
3147
3148	/* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
3149	device_lock(dev);
3150	device_unlock(dev);
3151
3152	/* Bounce the init_mutex to complete initial registration */
3153	mutex_lock(&acpi_desc->init_mutex);
3154	mutex_unlock(&acpi_desc->init_mutex);
3155
3156	return 0;
3157}
3158
3159static int __acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
3160		struct nvdimm *nvdimm, unsigned int cmd)
3161{
3162	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
3163
3164	if (nvdimm)
3165		return 0;
3166	if (cmd != ND_CMD_ARS_START)
3167		return 0;
3168
3169	/*
3170	 * The kernel and userspace may race to initiate a scrub, but
3171	 * the scrub thread is prepared to lose that initial race.  It
3172	 * just needs guarantees that any ARS it initiates are not
3173	 * interrupted by any intervening start requests from userspace.
3174	 */
3175	if (work_busy(&acpi_desc->dwork.work))
3176		return -EBUSY;
3177
3178	return 0;
3179}
3180
3181/*
3182 * Prevent security and firmware activate commands from being issued via
3183 * ioctl.
3184 */
3185static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
3186		struct nvdimm *nvdimm, unsigned int cmd, void *buf)
3187{
3188	struct nd_cmd_pkg *call_pkg = buf;
3189	unsigned int func;
3190
3191	if (nvdimm && cmd == ND_CMD_CALL &&
3192			call_pkg->nd_family == NVDIMM_FAMILY_INTEL) {
3193		func = call_pkg->nd_command;
3194		if (func > NVDIMM_CMD_MAX ||
3195		    (1 << func) & NVDIMM_INTEL_DENY_CMDMASK)
3196			return -EOPNOTSUPP;
3197	}
3198
3199	/* block all non-nfit bus commands */
3200	if (!nvdimm && cmd == ND_CMD_CALL &&
3201			call_pkg->nd_family != NVDIMM_BUS_FAMILY_NFIT)
3202		return -EOPNOTSUPP;
3203
3204	return __acpi_nfit_clear_to_send(nd_desc, nvdimm, cmd);
3205}
3206
3207int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
3208		enum nfit_ars_state req_type)
3209{
3210	struct device *dev = acpi_desc->dev;
3211	int scheduled = 0, busy = 0;
3212	struct nfit_spa *nfit_spa;
3213
3214	mutex_lock(&acpi_desc->init_mutex);
3215	if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags)) {
3216		mutex_unlock(&acpi_desc->init_mutex);
3217		return 0;
3218	}
3219
3220	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
3221		int type = nfit_spa_type(nfit_spa->spa);
3222
3223		if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE)
3224			continue;
3225		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
3226			continue;
3227
3228		if (test_and_set_bit(req_type, &nfit_spa->ars_state))
3229			busy++;
3230		else
3231			scheduled++;
3232	}
3233	if (scheduled) {
3234		sched_ars(acpi_desc);
3235		dev_dbg(dev, "ars_scan triggered\n");
3236	}
3237	mutex_unlock(&acpi_desc->init_mutex);
3238
3239	if (scheduled)
3240		return 0;
3241	if (busy)
3242		return -EBUSY;
3243	return -ENOTTY;
3244}
3245
3246void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
3247{
3248	struct nvdimm_bus_descriptor *nd_desc;
3249
3250	dev_set_drvdata(dev, acpi_desc);
3251	acpi_desc->dev = dev;
3252	nd_desc = &acpi_desc->nd_desc;
3253	nd_desc->provider_name = "ACPI.NFIT";
3254	nd_desc->module = THIS_MODULE;
3255	nd_desc->ndctl = acpi_nfit_ctl;
3256	nd_desc->flush_probe = acpi_nfit_flush_probe;
3257	nd_desc->clear_to_send = acpi_nfit_clear_to_send;
3258	nd_desc->attr_groups = acpi_nfit_attribute_groups;
3259
3260	INIT_LIST_HEAD(&acpi_desc->spas);
3261	INIT_LIST_HEAD(&acpi_desc->dcrs);
3262	INIT_LIST_HEAD(&acpi_desc->bdws);
3263	INIT_LIST_HEAD(&acpi_desc->idts);
3264	INIT_LIST_HEAD(&acpi_desc->flushes);
3265	INIT_LIST_HEAD(&acpi_desc->memdevs);
3266	INIT_LIST_HEAD(&acpi_desc->dimms);
3267	INIT_LIST_HEAD(&acpi_desc->list);
3268	mutex_init(&acpi_desc->init_mutex);
3269	acpi_desc->scrub_tmo = 1;
3270	INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
3271}
3272EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
3273
3274static void acpi_nfit_put_table(void *table)
3275{
3276	acpi_put_table(table);
3277}
3278
3279static void acpi_nfit_notify(acpi_handle handle, u32 event, void *data)
3280{
3281	struct acpi_device *adev = data;
3282
3283	device_lock(&adev->dev);
3284	__acpi_nfit_notify(&adev->dev, handle, event);
3285	device_unlock(&adev->dev);
3286}
3287
3288static void acpi_nfit_remove_notify_handler(void *data)
3289{
3290	struct acpi_device *adev = data;
3291
3292	acpi_dev_remove_notify_handler(adev, ACPI_DEVICE_NOTIFY,
3293				       acpi_nfit_notify);
3294}
3295
3296void acpi_nfit_shutdown(void *data)
3297{
3298	struct acpi_nfit_desc *acpi_desc = data;
3299	struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
3300
3301	/*
3302	 * Destruct under acpi_desc_lock so that nfit_handle_mce does not
3303	 * race teardown
3304	 */
3305	mutex_lock(&acpi_desc_lock);
3306	list_del(&acpi_desc->list);
3307	mutex_unlock(&acpi_desc_lock);
3308
3309	mutex_lock(&acpi_desc->init_mutex);
3310	set_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
3311	mutex_unlock(&acpi_desc->init_mutex);
3312	cancel_delayed_work_sync(&acpi_desc->dwork);
3313
3314	/*
3315	 * Bounce the nvdimm bus lock to make sure any in-flight
3316	 * acpi_nfit_ars_rescan() submissions have had a chance to
3317	 * either submit or see ->cancel set.
3318	 */
3319	device_lock(bus_dev);
3320	device_unlock(bus_dev);
3321
3322	flush_workqueue(nfit_wq);
3323}
3324EXPORT_SYMBOL_GPL(acpi_nfit_shutdown);
3325
3326static int acpi_nfit_add(struct acpi_device *adev)
3327{
3328	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
3329	struct acpi_nfit_desc *acpi_desc;
3330	struct device *dev = &adev->dev;
3331	struct acpi_table_header *tbl;
3332	acpi_status status = AE_OK;
3333	acpi_size sz;
3334	int rc = 0;
3335
3336	rc = acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY,
3337					     acpi_nfit_notify, adev);
3338	if (rc)
3339		return rc;
3340
3341	rc = devm_add_action_or_reset(dev, acpi_nfit_remove_notify_handler,
3342					adev);
3343	if (rc)
3344		return rc;
3345
3346	status = acpi_get_table(ACPI_SIG_NFIT, 0, &tbl);
3347	if (ACPI_FAILURE(status)) {
3348		/* The NVDIMM root device allows OS to trigger enumeration of
3349		 * NVDIMMs through NFIT at boot time and re-enumeration at
3350		 * root level via the _FIT method during runtime.
3351		 * This is ok to return 0 here, we could have an nvdimm
3352		 * hotplugged later and evaluate _FIT method which returns
3353		 * data in the format of a series of NFIT Structures.
3354		 */
3355		dev_dbg(dev, "failed to find NFIT at startup\n");
3356		return 0;
3357	}
3358
3359	rc = devm_add_action_or_reset(dev, acpi_nfit_put_table, tbl);
3360	if (rc)
3361		return rc;
3362	sz = tbl->length;
3363
3364	acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
3365	if (!acpi_desc)
3366		return -ENOMEM;
3367	acpi_nfit_desc_init(acpi_desc, &adev->dev);
3368
3369	/* Save the acpi header for exporting the revision via sysfs */
3370	acpi_desc->acpi_header = *tbl;
3371
3372	/* Evaluate _FIT and override with that if present */
3373	status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
3374	if (ACPI_SUCCESS(status) && buf.length > 0) {
3375		union acpi_object *obj = buf.pointer;
3376
3377		if (obj->type == ACPI_TYPE_BUFFER)
3378			rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
3379					obj->buffer.length);
3380		else
3381			dev_dbg(dev, "invalid type %d, ignoring _FIT\n",
3382				(int) obj->type);
3383		kfree(buf.pointer);
3384	} else
3385		/* skip over the lead-in header table */
3386		rc = acpi_nfit_init(acpi_desc, (void *) tbl
3387				+ sizeof(struct acpi_table_nfit),
3388				sz - sizeof(struct acpi_table_nfit));
3389
3390	if (rc)
3391		return rc;
3392
3393	return devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc);
3394}
3395
3396static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
3397{
3398	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
3399	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
3400	union acpi_object *obj;
3401	acpi_status status;
3402	int ret;
3403
3404	if (!dev->driver) {
3405		/* dev->driver may be null if we're being removed */
3406		dev_dbg(dev, "no driver found for dev\n");
3407		return;
3408	}
3409
3410	if (!acpi_desc) {
3411		acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
3412		if (!acpi_desc)
3413			return;
3414		acpi_nfit_desc_init(acpi_desc, dev);
3415	} else {
3416		/*
3417		 * Finish previous registration before considering new
3418		 * regions.
3419		 */
3420		flush_workqueue(nfit_wq);
3421	}
3422
3423	/* Evaluate _FIT */
3424	status = acpi_evaluate_object(handle, "_FIT", NULL, &buf);
3425	if (ACPI_FAILURE(status)) {
3426		dev_err(dev, "failed to evaluate _FIT\n");
3427		return;
3428	}
3429
3430	obj = buf.pointer;
3431	if (obj->type == ACPI_TYPE_BUFFER) {
3432		ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
3433				obj->buffer.length);
3434		if (ret)
3435			dev_err(dev, "failed to merge updated NFIT\n");
3436	} else
3437		dev_err(dev, "Invalid _FIT\n");
3438	kfree(buf.pointer);
3439}
3440
3441static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
3442{
3443	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
3444
3445	if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON)
3446		acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
3447	else
3448		acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_SHORT);
3449}
3450
3451void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
3452{
3453	dev_dbg(dev, "event: 0x%x\n", event);
3454
3455	switch (event) {
3456	case NFIT_NOTIFY_UPDATE:
3457		return acpi_nfit_update_notify(dev, handle);
3458	case NFIT_NOTIFY_UC_MEMORY_ERROR:
3459		return acpi_nfit_uc_error_notify(dev, handle);
3460	default:
3461		return;
3462	}
3463}
3464EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
3465
3466static const struct acpi_device_id acpi_nfit_ids[] = {
3467	{ "ACPI0012", 0 },
3468	{ "", 0 },
3469};
3470MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
3471
3472static struct acpi_driver acpi_nfit_driver = {
3473	.name = KBUILD_MODNAME,
3474	.ids = acpi_nfit_ids,
3475	.ops = {
3476		.add = acpi_nfit_add,
3477	},
3478};
3479
3480static __init int nfit_init(void)
3481{
3482	int ret;
3483
3484	BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
3485	BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 64);
3486	BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
3487	BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 16);
3488	BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 8);
3489	BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
3490	BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
3491	BUILD_BUG_ON(sizeof(struct acpi_nfit_capabilities) != 16);
3492
3493	guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]);
3494	guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]);
3495	guid_parse(UUID_CONTROL_REGION, &nfit_uuid[NFIT_SPA_DCR]);
3496	guid_parse(UUID_DATA_REGION, &nfit_uuid[NFIT_SPA_BDW]);
3497	guid_parse(UUID_VOLATILE_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_VDISK]);
3498	guid_parse(UUID_VOLATILE_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_VCD]);
3499	guid_parse(UUID_PERSISTENT_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_PDISK]);
3500	guid_parse(UUID_PERSISTENT_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_PCD]);
3501	guid_parse(UUID_NFIT_BUS, &nfit_uuid[NFIT_DEV_BUS]);
3502	guid_parse(UUID_NFIT_DIMM, &nfit_uuid[NFIT_DEV_DIMM]);
3503	guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
3504	guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
3505	guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
3506	guid_parse(UUID_NFIT_DIMM_N_HYPERV, &nfit_uuid[NFIT_DEV_DIMM_N_HYPERV]);
3507	guid_parse(UUID_INTEL_BUS, &nfit_uuid[NFIT_BUS_INTEL]);
3508
3509	nfit_wq = create_singlethread_workqueue("nfit");
3510	if (!nfit_wq)
3511		return -ENOMEM;
3512
3513	nfit_mce_register();
3514	ret = acpi_bus_register_driver(&acpi_nfit_driver);
3515	if (ret) {
3516		nfit_mce_unregister();
3517		destroy_workqueue(nfit_wq);
3518	}
3519
3520	return ret;
3521
3522}
3523
3524static __exit void nfit_exit(void)
3525{
3526	nfit_mce_unregister();
3527	acpi_bus_unregister_driver(&acpi_nfit_driver);
3528	destroy_workqueue(nfit_wq);
3529	WARN_ON(!list_empty(&acpi_descs));
3530}
3531
3532module_init(nfit_init);
3533module_exit(nfit_exit);
3534MODULE_LICENSE("GPL v2");
3535MODULE_AUTHOR("Intel Corporation");