Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
   4 */
   5#include <linux/scatterlist.h>
   6#include <linux/memregion.h>
   7#include <linux/highmem.h>
   8#include <linux/sched.h>
   9#include <linux/slab.h>
  10#include <linux/hash.h>
  11#include <linux/sort.h>
  12#include <linux/io.h>
  13#include <linux/nd.h>
  14#include "nd-core.h"
  15#include "nd.h"
  16
  17/*
  18 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
  19 * irrelevant.
  20 */
  21#include <linux/io-64-nonatomic-hi-lo.h>
  22
  23static DEFINE_PER_CPU(int, flush_idx);
  24
  25static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
  26		struct nd_region_data *ndrd)
  27{
  28	int i, j;
  29
  30	dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
  31			nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
  32	for (i = 0; i < (1 << ndrd->hints_shift); i++) {
  33		struct resource *res = &nvdimm->flush_wpq[i];
  34		unsigned long pfn = PHYS_PFN(res->start);
  35		void __iomem *flush_page;
  36
  37		/* check if flush hints share a page */
  38		for (j = 0; j < i; j++) {
  39			struct resource *res_j = &nvdimm->flush_wpq[j];
  40			unsigned long pfn_j = PHYS_PFN(res_j->start);
  41
  42			if (pfn == pfn_j)
  43				break;
  44		}
  45
  46		if (j < i)
  47			flush_page = (void __iomem *) ((unsigned long)
  48					ndrd_get_flush_wpq(ndrd, dimm, j)
  49					& PAGE_MASK);
  50		else
  51			flush_page = devm_nvdimm_ioremap(dev,
  52					PFN_PHYS(pfn), PAGE_SIZE);
  53		if (!flush_page)
  54			return -ENXIO;
  55		ndrd_set_flush_wpq(ndrd, dimm, i, flush_page
  56				+ (res->start & ~PAGE_MASK));
  57	}
  58
  59	return 0;
  60}
  61
  62static int nd_region_invalidate_memregion(struct nd_region *nd_region)
  63{
  64	int i, incoherent = 0;
  65
  66	for (i = 0; i < nd_region->ndr_mappings; i++) {
  67		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  68		struct nvdimm *nvdimm = nd_mapping->nvdimm;
  69
  70		if (test_bit(NDD_INCOHERENT, &nvdimm->flags)) {
  71			incoherent++;
  72			break;
  73		}
  74	}
  75
  76	if (!incoherent)
  77		return 0;
  78
  79	if (!cpu_cache_has_invalidate_memregion()) {
  80		if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)) {
  81			dev_warn(
  82				&nd_region->dev,
  83				"Bypassing cpu_cache_invalidate_memergion() for testing!\n");
  84			goto out;
  85		} else {
  86			dev_err(&nd_region->dev,
  87				"Failed to synchronize CPU cache state\n");
  88			return -ENXIO;
  89		}
  90	}
  91
  92	cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
  93out:
  94	for (i = 0; i < nd_region->ndr_mappings; i++) {
  95		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  96		struct nvdimm *nvdimm = nd_mapping->nvdimm;
  97
  98		clear_bit(NDD_INCOHERENT, &nvdimm->flags);
  99	}
 100
 101	return 0;
 102}
 103
 104int nd_region_activate(struct nd_region *nd_region)
 105{
 106	int i, j, rc, num_flush = 0;
 107	struct nd_region_data *ndrd;
 108	struct device *dev = &nd_region->dev;
 109	size_t flush_data_size = sizeof(void *);
 110
 111	nvdimm_bus_lock(&nd_region->dev);
 112	for (i = 0; i < nd_region->ndr_mappings; i++) {
 113		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 114		struct nvdimm *nvdimm = nd_mapping->nvdimm;
 115
 116		if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
 117			nvdimm_bus_unlock(&nd_region->dev);
 118			return -EBUSY;
 119		}
 120
 121		/* at least one null hint slot per-dimm for the "no-hint" case */
 122		flush_data_size += sizeof(void *);
 123		num_flush = min_not_zero(num_flush, nvdimm->num_flush);
 124		if (!nvdimm->num_flush)
 125			continue;
 126		flush_data_size += nvdimm->num_flush * sizeof(void *);
 127	}
 128	nvdimm_bus_unlock(&nd_region->dev);
 129
 130	rc = nd_region_invalidate_memregion(nd_region);
 131	if (rc)
 132		return rc;
 133
 134	ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
 135	if (!ndrd)
 136		return -ENOMEM;
 137	dev_set_drvdata(dev, ndrd);
 138
 139	if (!num_flush)
 140		return 0;
 141
 142	ndrd->hints_shift = ilog2(num_flush);
 143	for (i = 0; i < nd_region->ndr_mappings; i++) {
 144		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 145		struct nvdimm *nvdimm = nd_mapping->nvdimm;
 146		int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
 147
 148		if (rc)
 149			return rc;
 150	}
 151
 152	/*
 153	 * Clear out entries that are duplicates. This should prevent the
 154	 * extra flushings.
 155	 */
 156	for (i = 0; i < nd_region->ndr_mappings - 1; i++) {
 157		/* ignore if NULL already */
 158		if (!ndrd_get_flush_wpq(ndrd, i, 0))
 159			continue;
 160
 161		for (j = i + 1; j < nd_region->ndr_mappings; j++)
 162			if (ndrd_get_flush_wpq(ndrd, i, 0) ==
 163			    ndrd_get_flush_wpq(ndrd, j, 0))
 164				ndrd_set_flush_wpq(ndrd, j, 0, NULL);
 165	}
 166
 167	return 0;
 168}
 169
 170static void nd_region_release(struct device *dev)
 171{
 172	struct nd_region *nd_region = to_nd_region(dev);
 173	u16 i;
 174
 175	for (i = 0; i < nd_region->ndr_mappings; i++) {
 176		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 177		struct nvdimm *nvdimm = nd_mapping->nvdimm;
 178
 179		put_device(&nvdimm->dev);
 180	}
 181	free_percpu(nd_region->lane);
 182	if (!test_bit(ND_REGION_CXL, &nd_region->flags))
 183		memregion_free(nd_region->id);
 184	kfree(nd_region);
 185}
 186
 187struct nd_region *to_nd_region(struct device *dev)
 188{
 189	struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
 190
 191	WARN_ON(dev->type->release != nd_region_release);
 192	return nd_region;
 193}
 194EXPORT_SYMBOL_GPL(to_nd_region);
 195
 196struct device *nd_region_dev(struct nd_region *nd_region)
 197{
 198	if (!nd_region)
 199		return NULL;
 200	return &nd_region->dev;
 201}
 202EXPORT_SYMBOL_GPL(nd_region_dev);
 203
 204void *nd_region_provider_data(struct nd_region *nd_region)
 205{
 206	return nd_region->provider_data;
 207}
 208EXPORT_SYMBOL_GPL(nd_region_provider_data);
 209
 210/**
 211 * nd_region_to_nstype() - region to an integer namespace type
 212 * @nd_region: region-device to interrogate
 213 *
 214 * This is the 'nstype' attribute of a region as well, an input to the
 215 * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
 216 * namespace devices with namespace drivers.
 217 */
 218int nd_region_to_nstype(struct nd_region *nd_region)
 219{
 220	if (is_memory(&nd_region->dev)) {
 221		u16 i, label;
 222
 223		for (i = 0, label = 0; i < nd_region->ndr_mappings; i++) {
 224			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 225			struct nvdimm *nvdimm = nd_mapping->nvdimm;
 226
 227			if (test_bit(NDD_LABELING, &nvdimm->flags))
 228				label++;
 229		}
 230		if (label)
 231			return ND_DEVICE_NAMESPACE_PMEM;
 232		else
 233			return ND_DEVICE_NAMESPACE_IO;
 234	}
 235
 236	return 0;
 237}
 238EXPORT_SYMBOL(nd_region_to_nstype);
 239
 240static unsigned long long region_size(struct nd_region *nd_region)
 241{
 242	if (is_memory(&nd_region->dev)) {
 243		return nd_region->ndr_size;
 244	} else if (nd_region->ndr_mappings == 1) {
 245		struct nd_mapping *nd_mapping = &nd_region->mapping[0];
 246
 247		return nd_mapping->size;
 248	}
 249
 250	return 0;
 251}
 252
 253static ssize_t size_show(struct device *dev,
 254		struct device_attribute *attr, char *buf)
 255{
 256	struct nd_region *nd_region = to_nd_region(dev);
 257
 258	return sprintf(buf, "%llu\n", region_size(nd_region));
 259}
 260static DEVICE_ATTR_RO(size);
 261
 262static ssize_t deep_flush_show(struct device *dev,
 263		struct device_attribute *attr, char *buf)
 264{
 265	struct nd_region *nd_region = to_nd_region(dev);
 266
 267	/*
 268	 * NOTE: in the nvdimm_has_flush() error case this attribute is
 269	 * not visible.
 270	 */
 271	return sprintf(buf, "%d\n", nvdimm_has_flush(nd_region));
 272}
 273
 274static ssize_t deep_flush_store(struct device *dev, struct device_attribute *attr,
 275		const char *buf, size_t len)
 276{
 277	bool flush;
 278	int rc = strtobool(buf, &flush);
 279	struct nd_region *nd_region = to_nd_region(dev);
 280
 281	if (rc)
 282		return rc;
 283	if (!flush)
 284		return -EINVAL;
 285	rc = nvdimm_flush(nd_region, NULL);
 286	if (rc)
 287		return rc;
 288
 289	return len;
 290}
 291static DEVICE_ATTR_RW(deep_flush);
 292
 293static ssize_t mappings_show(struct device *dev,
 294		struct device_attribute *attr, char *buf)
 295{
 296	struct nd_region *nd_region = to_nd_region(dev);
 297
 298	return sprintf(buf, "%d\n", nd_region->ndr_mappings);
 299}
 300static DEVICE_ATTR_RO(mappings);
 301
 302static ssize_t nstype_show(struct device *dev,
 303		struct device_attribute *attr, char *buf)
 304{
 305	struct nd_region *nd_region = to_nd_region(dev);
 306
 307	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
 308}
 309static DEVICE_ATTR_RO(nstype);
 310
 311static ssize_t set_cookie_show(struct device *dev,
 312		struct device_attribute *attr, char *buf)
 313{
 314	struct nd_region *nd_region = to_nd_region(dev);
 315	struct nd_interleave_set *nd_set = nd_region->nd_set;
 316	ssize_t rc = 0;
 317
 318	if (is_memory(dev) && nd_set)
 319		/* pass, should be precluded by region_visible */;
 320	else
 321		return -ENXIO;
 322
 323	/*
 324	 * The cookie to show depends on which specification of the
 325	 * labels we are using. If there are not labels then default to
 326	 * the v1.1 namespace label cookie definition. To read all this
 327	 * data we need to wait for probing to settle.
 328	 */
 329	device_lock(dev);
 330	nvdimm_bus_lock(dev);
 331	wait_nvdimm_bus_probe_idle(dev);
 332	if (nd_region->ndr_mappings) {
 333		struct nd_mapping *nd_mapping = &nd_region->mapping[0];
 334		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 335
 336		if (ndd) {
 337			struct nd_namespace_index *nsindex;
 338
 339			nsindex = to_namespace_index(ndd, ndd->ns_current);
 340			rc = sprintf(buf, "%#llx\n",
 341					nd_region_interleave_set_cookie(nd_region,
 342						nsindex));
 343		}
 344	}
 345	nvdimm_bus_unlock(dev);
 346	device_unlock(dev);
 347
 348	if (rc)
 349		return rc;
 350	return sprintf(buf, "%#llx\n", nd_set->cookie1);
 351}
 352static DEVICE_ATTR_RO(set_cookie);
 353
 354resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
 355{
 356	resource_size_t available;
 357	int i;
 358
 359	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
 360
 361	available = 0;
 362	for (i = 0; i < nd_region->ndr_mappings; i++) {
 363		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 364		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 365
 366		/* if a dimm is disabled the available capacity is zero */
 367		if (!ndd)
 368			return 0;
 369
 370		available += nd_pmem_available_dpa(nd_region, nd_mapping);
 371	}
 372
 373	return available;
 374}
 375
 376resource_size_t nd_region_allocatable_dpa(struct nd_region *nd_region)
 377{
 378	resource_size_t avail = 0;
 379	int i;
 380
 381	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
 382	for (i = 0; i < nd_region->ndr_mappings; i++) {
 383		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 384
 385		avail = min_not_zero(avail, nd_pmem_max_contiguous_dpa(
 386						    nd_region, nd_mapping));
 387	}
 388	return avail * nd_region->ndr_mappings;
 389}
 390
 391static ssize_t available_size_show(struct device *dev,
 392		struct device_attribute *attr, char *buf)
 393{
 394	struct nd_region *nd_region = to_nd_region(dev);
 395	unsigned long long available = 0;
 396
 397	/*
 398	 * Flush in-flight updates and grab a snapshot of the available
 399	 * size.  Of course, this value is potentially invalidated the
 400	 * memory nvdimm_bus_lock() is dropped, but that's userspace's
 401	 * problem to not race itself.
 402	 */
 403	device_lock(dev);
 404	nvdimm_bus_lock(dev);
 405	wait_nvdimm_bus_probe_idle(dev);
 406	available = nd_region_available_dpa(nd_region);
 407	nvdimm_bus_unlock(dev);
 408	device_unlock(dev);
 409
 410	return sprintf(buf, "%llu\n", available);
 411}
 412static DEVICE_ATTR_RO(available_size);
 413
 414static ssize_t max_available_extent_show(struct device *dev,
 415		struct device_attribute *attr, char *buf)
 416{
 417	struct nd_region *nd_region = to_nd_region(dev);
 418	unsigned long long available = 0;
 419
 420	device_lock(dev);
 421	nvdimm_bus_lock(dev);
 422	wait_nvdimm_bus_probe_idle(dev);
 423	available = nd_region_allocatable_dpa(nd_region);
 424	nvdimm_bus_unlock(dev);
 425	device_unlock(dev);
 426
 427	return sprintf(buf, "%llu\n", available);
 428}
 429static DEVICE_ATTR_RO(max_available_extent);
 430
 431static ssize_t init_namespaces_show(struct device *dev,
 432		struct device_attribute *attr, char *buf)
 433{
 434	struct nd_region_data *ndrd = dev_get_drvdata(dev);
 435	ssize_t rc;
 436
 437	nvdimm_bus_lock(dev);
 438	if (ndrd)
 439		rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
 440	else
 441		rc = -ENXIO;
 442	nvdimm_bus_unlock(dev);
 443
 444	return rc;
 445}
 446static DEVICE_ATTR_RO(init_namespaces);
 447
 448static ssize_t namespace_seed_show(struct device *dev,
 449		struct device_attribute *attr, char *buf)
 450{
 451	struct nd_region *nd_region = to_nd_region(dev);
 452	ssize_t rc;
 453
 454	nvdimm_bus_lock(dev);
 455	if (nd_region->ns_seed)
 456		rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed));
 457	else
 458		rc = sprintf(buf, "\n");
 459	nvdimm_bus_unlock(dev);
 460	return rc;
 461}
 462static DEVICE_ATTR_RO(namespace_seed);
 463
 464static ssize_t btt_seed_show(struct device *dev,
 465		struct device_attribute *attr, char *buf)
 466{
 467	struct nd_region *nd_region = to_nd_region(dev);
 468	ssize_t rc;
 469
 470	nvdimm_bus_lock(dev);
 471	if (nd_region->btt_seed)
 472		rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed));
 473	else
 474		rc = sprintf(buf, "\n");
 475	nvdimm_bus_unlock(dev);
 476
 477	return rc;
 478}
 479static DEVICE_ATTR_RO(btt_seed);
 480
 481static ssize_t pfn_seed_show(struct device *dev,
 482		struct device_attribute *attr, char *buf)
 483{
 484	struct nd_region *nd_region = to_nd_region(dev);
 485	ssize_t rc;
 486
 487	nvdimm_bus_lock(dev);
 488	if (nd_region->pfn_seed)
 489		rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed));
 490	else
 491		rc = sprintf(buf, "\n");
 492	nvdimm_bus_unlock(dev);
 493
 494	return rc;
 495}
 496static DEVICE_ATTR_RO(pfn_seed);
 497
 498static ssize_t dax_seed_show(struct device *dev,
 499		struct device_attribute *attr, char *buf)
 500{
 501	struct nd_region *nd_region = to_nd_region(dev);
 502	ssize_t rc;
 503
 504	nvdimm_bus_lock(dev);
 505	if (nd_region->dax_seed)
 506		rc = sprintf(buf, "%s\n", dev_name(nd_region->dax_seed));
 507	else
 508		rc = sprintf(buf, "\n");
 509	nvdimm_bus_unlock(dev);
 510
 511	return rc;
 512}
 513static DEVICE_ATTR_RO(dax_seed);
 514
 515static ssize_t read_only_show(struct device *dev,
 516		struct device_attribute *attr, char *buf)
 517{
 518	struct nd_region *nd_region = to_nd_region(dev);
 519
 520	return sprintf(buf, "%d\n", nd_region->ro);
 521}
 522
 523static int revalidate_read_only(struct device *dev, void *data)
 524{
 525	nd_device_notify(dev, NVDIMM_REVALIDATE_REGION);
 526	return 0;
 527}
 528
 529static ssize_t read_only_store(struct device *dev,
 530		struct device_attribute *attr, const char *buf, size_t len)
 531{
 532	bool ro;
 533	int rc = strtobool(buf, &ro);
 534	struct nd_region *nd_region = to_nd_region(dev);
 535
 536	if (rc)
 537		return rc;
 538
 539	nd_region->ro = ro;
 540	device_for_each_child(dev, NULL, revalidate_read_only);
 541	return len;
 542}
 543static DEVICE_ATTR_RW(read_only);
 544
 545static ssize_t align_show(struct device *dev,
 546		struct device_attribute *attr, char *buf)
 547{
 548	struct nd_region *nd_region = to_nd_region(dev);
 549
 550	return sprintf(buf, "%#lx\n", nd_region->align);
 551}
 552
 553static ssize_t align_store(struct device *dev,
 554		struct device_attribute *attr, const char *buf, size_t len)
 555{
 556	struct nd_region *nd_region = to_nd_region(dev);
 557	unsigned long val, dpa;
 558	u32 mappings, remainder;
 559	int rc;
 560
 561	rc = kstrtoul(buf, 0, &val);
 562	if (rc)
 563		return rc;
 564
 565	/*
 566	 * Ensure space-align is evenly divisible by the region
 567	 * interleave-width because the kernel typically has no facility
 568	 * to determine which DIMM(s), dimm-physical-addresses, would
 569	 * contribute to the tail capacity in system-physical-address
 570	 * space for the namespace.
 571	 */
 572	mappings = max_t(u32, 1, nd_region->ndr_mappings);
 573	dpa = div_u64_rem(val, mappings, &remainder);
 574	if (!is_power_of_2(dpa) || dpa < PAGE_SIZE
 575			|| val > region_size(nd_region) || remainder)
 576		return -EINVAL;
 577
 578	/*
 579	 * Given that space allocation consults this value multiple
 580	 * times ensure it does not change for the duration of the
 581	 * allocation.
 582	 */
 583	nvdimm_bus_lock(dev);
 584	nd_region->align = val;
 585	nvdimm_bus_unlock(dev);
 586
 587	return len;
 588}
 589static DEVICE_ATTR_RW(align);
 590
 591static ssize_t region_badblocks_show(struct device *dev,
 592		struct device_attribute *attr, char *buf)
 593{
 594	struct nd_region *nd_region = to_nd_region(dev);
 595	ssize_t rc;
 596
 597	device_lock(dev);
 598	if (dev->driver)
 599		rc = badblocks_show(&nd_region->bb, buf, 0);
 600	else
 601		rc = -ENXIO;
 602	device_unlock(dev);
 603
 604	return rc;
 605}
 606static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL);
 607
 608static ssize_t resource_show(struct device *dev,
 609		struct device_attribute *attr, char *buf)
 610{
 611	struct nd_region *nd_region = to_nd_region(dev);
 612
 613	return sprintf(buf, "%#llx\n", nd_region->ndr_start);
 614}
 615static DEVICE_ATTR_ADMIN_RO(resource);
 616
 617static ssize_t persistence_domain_show(struct device *dev,
 618		struct device_attribute *attr, char *buf)
 619{
 620	struct nd_region *nd_region = to_nd_region(dev);
 621
 622	if (test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags))
 623		return sprintf(buf, "cpu_cache\n");
 624	else if (test_bit(ND_REGION_PERSIST_MEMCTRL, &nd_region->flags))
 625		return sprintf(buf, "memory_controller\n");
 626	else
 627		return sprintf(buf, "\n");
 628}
 629static DEVICE_ATTR_RO(persistence_domain);
 630
 631static struct attribute *nd_region_attributes[] = {
 632	&dev_attr_size.attr,
 633	&dev_attr_align.attr,
 634	&dev_attr_nstype.attr,
 635	&dev_attr_mappings.attr,
 636	&dev_attr_btt_seed.attr,
 637	&dev_attr_pfn_seed.attr,
 638	&dev_attr_dax_seed.attr,
 639	&dev_attr_deep_flush.attr,
 640	&dev_attr_read_only.attr,
 641	&dev_attr_set_cookie.attr,
 642	&dev_attr_available_size.attr,
 643	&dev_attr_max_available_extent.attr,
 644	&dev_attr_namespace_seed.attr,
 645	&dev_attr_init_namespaces.attr,
 646	&dev_attr_badblocks.attr,
 647	&dev_attr_resource.attr,
 648	&dev_attr_persistence_domain.attr,
 649	NULL,
 650};
 651
 652static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
 653{
 654	struct device *dev = container_of(kobj, typeof(*dev), kobj);
 655	struct nd_region *nd_region = to_nd_region(dev);
 656	struct nd_interleave_set *nd_set = nd_region->nd_set;
 657	int type = nd_region_to_nstype(nd_region);
 658
 659	if (!is_memory(dev) && a == &dev_attr_pfn_seed.attr)
 660		return 0;
 661
 662	if (!is_memory(dev) && a == &dev_attr_dax_seed.attr)
 663		return 0;
 664
 665	if (!is_memory(dev) && a == &dev_attr_badblocks.attr)
 666		return 0;
 667
 668	if (a == &dev_attr_resource.attr && !is_memory(dev))
 669		return 0;
 670
 671	if (a == &dev_attr_deep_flush.attr) {
 672		int has_flush = nvdimm_has_flush(nd_region);
 673
 674		if (has_flush == 1)
 675			return a->mode;
 676		else if (has_flush == 0)
 677			return 0444;
 678		else
 679			return 0;
 680	}
 681
 682	if (a == &dev_attr_persistence_domain.attr) {
 683		if ((nd_region->flags & (BIT(ND_REGION_PERSIST_CACHE)
 684					| BIT(ND_REGION_PERSIST_MEMCTRL))) == 0)
 685			return 0;
 686		return a->mode;
 687	}
 688
 689	if (a == &dev_attr_align.attr)
 690		return a->mode;
 691
 692	if (a != &dev_attr_set_cookie.attr
 693			&& a != &dev_attr_available_size.attr)
 694		return a->mode;
 695
 696	if (type == ND_DEVICE_NAMESPACE_PMEM &&
 697	    a == &dev_attr_available_size.attr)
 698		return a->mode;
 699	else if (is_memory(dev) && nd_set)
 700		return a->mode;
 701
 702	return 0;
 703}
 704
 705static ssize_t mappingN(struct device *dev, char *buf, int n)
 706{
 707	struct nd_region *nd_region = to_nd_region(dev);
 708	struct nd_mapping *nd_mapping;
 709	struct nvdimm *nvdimm;
 710
 711	if (n >= nd_region->ndr_mappings)
 712		return -ENXIO;
 713	nd_mapping = &nd_region->mapping[n];
 714	nvdimm = nd_mapping->nvdimm;
 715
 716	return sprintf(buf, "%s,%llu,%llu,%d\n", dev_name(&nvdimm->dev),
 717			nd_mapping->start, nd_mapping->size,
 718			nd_mapping->position);
 719}
 720
 721#define REGION_MAPPING(idx) \
 722static ssize_t mapping##idx##_show(struct device *dev,		\
 723		struct device_attribute *attr, char *buf)	\
 724{								\
 725	return mappingN(dev, buf, idx);				\
 726}								\
 727static DEVICE_ATTR_RO(mapping##idx)
 728
 729/*
 730 * 32 should be enough for a while, even in the presence of socket
 731 * interleave a 32-way interleave set is a degenerate case.
 732 */
 733REGION_MAPPING(0);
 734REGION_MAPPING(1);
 735REGION_MAPPING(2);
 736REGION_MAPPING(3);
 737REGION_MAPPING(4);
 738REGION_MAPPING(5);
 739REGION_MAPPING(6);
 740REGION_MAPPING(7);
 741REGION_MAPPING(8);
 742REGION_MAPPING(9);
 743REGION_MAPPING(10);
 744REGION_MAPPING(11);
 745REGION_MAPPING(12);
 746REGION_MAPPING(13);
 747REGION_MAPPING(14);
 748REGION_MAPPING(15);
 749REGION_MAPPING(16);
 750REGION_MAPPING(17);
 751REGION_MAPPING(18);
 752REGION_MAPPING(19);
 753REGION_MAPPING(20);
 754REGION_MAPPING(21);
 755REGION_MAPPING(22);
 756REGION_MAPPING(23);
 757REGION_MAPPING(24);
 758REGION_MAPPING(25);
 759REGION_MAPPING(26);
 760REGION_MAPPING(27);
 761REGION_MAPPING(28);
 762REGION_MAPPING(29);
 763REGION_MAPPING(30);
 764REGION_MAPPING(31);
 765
 766static umode_t mapping_visible(struct kobject *kobj, struct attribute *a, int n)
 767{
 768	struct device *dev = container_of(kobj, struct device, kobj);
 769	struct nd_region *nd_region = to_nd_region(dev);
 770
 771	if (n < nd_region->ndr_mappings)
 772		return a->mode;
 773	return 0;
 774}
 775
 776static struct attribute *mapping_attributes[] = {
 777	&dev_attr_mapping0.attr,
 778	&dev_attr_mapping1.attr,
 779	&dev_attr_mapping2.attr,
 780	&dev_attr_mapping3.attr,
 781	&dev_attr_mapping4.attr,
 782	&dev_attr_mapping5.attr,
 783	&dev_attr_mapping6.attr,
 784	&dev_attr_mapping7.attr,
 785	&dev_attr_mapping8.attr,
 786	&dev_attr_mapping9.attr,
 787	&dev_attr_mapping10.attr,
 788	&dev_attr_mapping11.attr,
 789	&dev_attr_mapping12.attr,
 790	&dev_attr_mapping13.attr,
 791	&dev_attr_mapping14.attr,
 792	&dev_attr_mapping15.attr,
 793	&dev_attr_mapping16.attr,
 794	&dev_attr_mapping17.attr,
 795	&dev_attr_mapping18.attr,
 796	&dev_attr_mapping19.attr,
 797	&dev_attr_mapping20.attr,
 798	&dev_attr_mapping21.attr,
 799	&dev_attr_mapping22.attr,
 800	&dev_attr_mapping23.attr,
 801	&dev_attr_mapping24.attr,
 802	&dev_attr_mapping25.attr,
 803	&dev_attr_mapping26.attr,
 804	&dev_attr_mapping27.attr,
 805	&dev_attr_mapping28.attr,
 806	&dev_attr_mapping29.attr,
 807	&dev_attr_mapping30.attr,
 808	&dev_attr_mapping31.attr,
 809	NULL,
 810};
 811
 812static const struct attribute_group nd_mapping_attribute_group = {
 813	.is_visible = mapping_visible,
 814	.attrs = mapping_attributes,
 815};
 816
 817static const struct attribute_group nd_region_attribute_group = {
 818	.attrs = nd_region_attributes,
 819	.is_visible = region_visible,
 820};
 821
 822static const struct attribute_group *nd_region_attribute_groups[] = {
 823	&nd_device_attribute_group,
 824	&nd_region_attribute_group,
 825	&nd_numa_attribute_group,
 826	&nd_mapping_attribute_group,
 827	NULL,
 828};
 829
 830static const struct device_type nd_pmem_device_type = {
 831	.name = "nd_pmem",
 832	.release = nd_region_release,
 833	.groups = nd_region_attribute_groups,
 834};
 835
 836static const struct device_type nd_volatile_device_type = {
 837	.name = "nd_volatile",
 838	.release = nd_region_release,
 839	.groups = nd_region_attribute_groups,
 840};
 841
 842bool is_nd_pmem(struct device *dev)
 843{
 844	return dev ? dev->type == &nd_pmem_device_type : false;
 845}
 846
 847bool is_nd_volatile(struct device *dev)
 848{
 849	return dev ? dev->type == &nd_volatile_device_type : false;
 850}
 851
 852u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
 853		struct nd_namespace_index *nsindex)
 854{
 855	struct nd_interleave_set *nd_set = nd_region->nd_set;
 856
 857	if (!nd_set)
 858		return 0;
 859
 860	if (nsindex && __le16_to_cpu(nsindex->major) == 1
 861			&& __le16_to_cpu(nsindex->minor) == 1)
 862		return nd_set->cookie1;
 863	return nd_set->cookie2;
 864}
 865
 866u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
 867{
 868	struct nd_interleave_set *nd_set = nd_region->nd_set;
 869
 870	if (nd_set)
 871		return nd_set->altcookie;
 872	return 0;
 873}
 874
 875void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
 876{
 877	struct nd_label_ent *label_ent, *e;
 878
 879	lockdep_assert_held(&nd_mapping->lock);
 880	list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
 881		list_del(&label_ent->list);
 882		kfree(label_ent);
 883	}
 884}
 885
 886/*
 887 * When a namespace is activated create new seeds for the next
 888 * namespace, or namespace-personality to be configured.
 889 */
 890void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
 891{
 892	nvdimm_bus_lock(dev);
 893	if (nd_region->ns_seed == dev) {
 894		nd_region_create_ns_seed(nd_region);
 895	} else if (is_nd_btt(dev)) {
 896		struct nd_btt *nd_btt = to_nd_btt(dev);
 897
 898		if (nd_region->btt_seed == dev)
 899			nd_region_create_btt_seed(nd_region);
 900		if (nd_region->ns_seed == &nd_btt->ndns->dev)
 901			nd_region_create_ns_seed(nd_region);
 902	} else if (is_nd_pfn(dev)) {
 903		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
 904
 905		if (nd_region->pfn_seed == dev)
 906			nd_region_create_pfn_seed(nd_region);
 907		if (nd_region->ns_seed == &nd_pfn->ndns->dev)
 908			nd_region_create_ns_seed(nd_region);
 909	} else if (is_nd_dax(dev)) {
 910		struct nd_dax *nd_dax = to_nd_dax(dev);
 911
 912		if (nd_region->dax_seed == dev)
 913			nd_region_create_dax_seed(nd_region);
 914		if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
 915			nd_region_create_ns_seed(nd_region);
 916	}
 917	nvdimm_bus_unlock(dev);
 918}
 919
 920/**
 921 * nd_region_acquire_lane - allocate and lock a lane
 922 * @nd_region: region id and number of lanes possible
 923 *
 924 * A lane correlates to a BLK-data-window and/or a log slot in the BTT.
 925 * We optimize for the common case where there are 256 lanes, one
 926 * per-cpu.  For larger systems we need to lock to share lanes.  For now
 927 * this implementation assumes the cost of maintaining an allocator for
 928 * free lanes is on the order of the lock hold time, so it implements a
 929 * static lane = cpu % num_lanes mapping.
 930 *
 931 * In the case of a BTT instance on top of a BLK namespace a lane may be
 932 * acquired recursively.  We lock on the first instance.
 933 *
 934 * In the case of a BTT instance on top of PMEM, we only acquire a lane
 935 * for the BTT metadata updates.
 936 */
 937unsigned int nd_region_acquire_lane(struct nd_region *nd_region)
 938{
 939	unsigned int cpu, lane;
 940
 941	cpu = get_cpu();
 942	if (nd_region->num_lanes < nr_cpu_ids) {
 943		struct nd_percpu_lane *ndl_lock, *ndl_count;
 944
 945		lane = cpu % nd_region->num_lanes;
 946		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
 947		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
 948		if (ndl_count->count++ == 0)
 949			spin_lock(&ndl_lock->lock);
 950	} else
 951		lane = cpu;
 952
 953	return lane;
 954}
 955EXPORT_SYMBOL(nd_region_acquire_lane);
 956
 957void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
 958{
 959	if (nd_region->num_lanes < nr_cpu_ids) {
 960		unsigned int cpu = get_cpu();
 961		struct nd_percpu_lane *ndl_lock, *ndl_count;
 962
 963		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
 964		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
 965		if (--ndl_count->count == 0)
 966			spin_unlock(&ndl_lock->lock);
 967		put_cpu();
 968	}
 969	put_cpu();
 970}
 971EXPORT_SYMBOL(nd_region_release_lane);
 972
 973/*
 974 * PowerPC requires this alignment for memremap_pages(). All other archs
 975 * should be ok with SUBSECTION_SIZE (see memremap_compat_align()).
 976 */
 977#define MEMREMAP_COMPAT_ALIGN_MAX SZ_16M
 978
 979static unsigned long default_align(struct nd_region *nd_region)
 980{
 981	unsigned long align;
 982	u32 remainder;
 983	int mappings;
 984
 985	align = MEMREMAP_COMPAT_ALIGN_MAX;
 986	if (nd_region->ndr_size < MEMREMAP_COMPAT_ALIGN_MAX)
 987		align = PAGE_SIZE;
 988
 989	mappings = max_t(u16, 1, nd_region->ndr_mappings);
 990	div_u64_rem(align, mappings, &remainder);
 991	if (remainder)
 992		align *= mappings;
 993
 994	return align;
 995}
 996
 997static struct lock_class_key nvdimm_region_key;
 998
 999static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
1000		struct nd_region_desc *ndr_desc,
1001		const struct device_type *dev_type, const char *caller)
1002{
1003	struct nd_region *nd_region;
1004	struct device *dev;
1005	unsigned int i;
1006	int ro = 0;
1007
1008	for (i = 0; i < ndr_desc->num_mappings; i++) {
1009		struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
1010		struct nvdimm *nvdimm = mapping->nvdimm;
1011
1012		if ((mapping->start | mapping->size) % PAGE_SIZE) {
1013			dev_err(&nvdimm_bus->dev,
1014				"%s: %s mapping%d is not %ld aligned\n",
1015				caller, dev_name(&nvdimm->dev), i, PAGE_SIZE);
1016			return NULL;
1017		}
1018
1019		if (test_bit(NDD_UNARMED, &nvdimm->flags))
1020			ro = 1;
1021
1022	}
1023
1024	nd_region =
1025		kzalloc(struct_size(nd_region, mapping, ndr_desc->num_mappings),
1026			GFP_KERNEL);
1027
1028	if (!nd_region)
1029		return NULL;
1030	/* CXL pre-assigns memregion ids before creating nvdimm regions */
1031	if (test_bit(ND_REGION_CXL, &ndr_desc->flags)) {
1032		nd_region->id = ndr_desc->memregion;
1033	} else {
1034		nd_region->id = memregion_alloc(GFP_KERNEL);
1035		if (nd_region->id < 0)
1036			goto err_id;
1037	}
1038
1039	nd_region->lane = alloc_percpu(struct nd_percpu_lane);
1040	if (!nd_region->lane)
1041		goto err_percpu;
1042
1043        for (i = 0; i < nr_cpu_ids; i++) {
1044		struct nd_percpu_lane *ndl;
1045
1046		ndl = per_cpu_ptr(nd_region->lane, i);
1047		spin_lock_init(&ndl->lock);
1048		ndl->count = 0;
1049	}
1050
1051	for (i = 0; i < ndr_desc->num_mappings; i++) {
1052		struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
1053		struct nvdimm *nvdimm = mapping->nvdimm;
1054
1055		nd_region->mapping[i].nvdimm = nvdimm;
1056		nd_region->mapping[i].start = mapping->start;
1057		nd_region->mapping[i].size = mapping->size;
1058		nd_region->mapping[i].position = mapping->position;
1059		INIT_LIST_HEAD(&nd_region->mapping[i].labels);
1060		mutex_init(&nd_region->mapping[i].lock);
1061
1062		get_device(&nvdimm->dev);
1063	}
1064	nd_region->ndr_mappings = ndr_desc->num_mappings;
1065	nd_region->provider_data = ndr_desc->provider_data;
1066	nd_region->nd_set = ndr_desc->nd_set;
1067	nd_region->num_lanes = ndr_desc->num_lanes;
1068	nd_region->flags = ndr_desc->flags;
1069	nd_region->ro = ro;
1070	nd_region->numa_node = ndr_desc->numa_node;
1071	nd_region->target_node = ndr_desc->target_node;
1072	ida_init(&nd_region->ns_ida);
1073	ida_init(&nd_region->btt_ida);
1074	ida_init(&nd_region->pfn_ida);
1075	ida_init(&nd_region->dax_ida);
1076	dev = &nd_region->dev;
1077	dev_set_name(dev, "region%d", nd_region->id);
1078	dev->parent = &nvdimm_bus->dev;
1079	dev->type = dev_type;
1080	dev->groups = ndr_desc->attr_groups;
1081	dev->of_node = ndr_desc->of_node;
1082	nd_region->ndr_size = resource_size(ndr_desc->res);
1083	nd_region->ndr_start = ndr_desc->res->start;
1084	nd_region->align = default_align(nd_region);
1085	if (ndr_desc->flush)
1086		nd_region->flush = ndr_desc->flush;
1087	else
1088		nd_region->flush = NULL;
1089
1090	device_initialize(dev);
1091	lockdep_set_class(&dev->mutex, &nvdimm_region_key);
1092	nd_device_register(dev);
1093
1094	return nd_region;
1095
1096err_percpu:
1097	if (!test_bit(ND_REGION_CXL, &ndr_desc->flags))
1098		memregion_free(nd_region->id);
1099err_id:
1100	kfree(nd_region);
1101	return NULL;
1102}
1103
1104struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
1105		struct nd_region_desc *ndr_desc)
1106{
1107	ndr_desc->num_lanes = ND_MAX_LANES;
1108	return nd_region_create(nvdimm_bus, ndr_desc, &nd_pmem_device_type,
1109			__func__);
1110}
1111EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create);
1112
1113struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
1114		struct nd_region_desc *ndr_desc)
1115{
1116	ndr_desc->num_lanes = ND_MAX_LANES;
1117	return nd_region_create(nvdimm_bus, ndr_desc, &nd_volatile_device_type,
1118			__func__);
1119}
1120EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
1121
1122void nvdimm_region_delete(struct nd_region *nd_region)
1123{
1124	if (nd_region)
1125		nd_device_unregister(&nd_region->dev, ND_SYNC);
1126}
1127EXPORT_SYMBOL_GPL(nvdimm_region_delete);
1128
1129int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
1130{
1131	int rc = 0;
1132
1133	if (!nd_region->flush)
1134		rc = generic_nvdimm_flush(nd_region);
1135	else {
1136		if (nd_region->flush(nd_region, bio))
1137			rc = -EIO;
1138	}
1139
1140	return rc;
1141}
1142/**
1143 * generic_nvdimm_flush() - flush any posted write queues between the cpu and pmem media
1144 * @nd_region: interleaved pmem region
1145 */
1146int generic_nvdimm_flush(struct nd_region *nd_region)
1147{
1148	struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
1149	int i, idx;
1150
1151	/*
1152	 * Try to encourage some diversity in flush hint addresses
1153	 * across cpus assuming a limited number of flush hints.
1154	 */
1155	idx = this_cpu_read(flush_idx);
1156	idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
1157
1158	/*
1159	 * The pmem_wmb() is needed to 'sfence' all
1160	 * previous writes such that they are architecturally visible for
1161	 * the platform buffer flush. Note that we've already arranged for pmem
1162	 * writes to avoid the cache via memcpy_flushcache().  The final
1163	 * wmb() ensures ordering for the NVDIMM flush write.
1164	 */
1165	pmem_wmb();
1166	for (i = 0; i < nd_region->ndr_mappings; i++)
1167		if (ndrd_get_flush_wpq(ndrd, i, 0))
1168			writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
1169	wmb();
1170
1171	return 0;
1172}
1173EXPORT_SYMBOL_GPL(nvdimm_flush);
1174
1175/**
1176 * nvdimm_has_flush - determine write flushing requirements
1177 * @nd_region: interleaved pmem region
1178 *
1179 * Returns 1 if writes require flushing
1180 * Returns 0 if writes do not require flushing
1181 * Returns -ENXIO if flushing capability can not be determined
1182 */
1183int nvdimm_has_flush(struct nd_region *nd_region)
1184{
1185	int i;
1186
1187	/* no nvdimm or pmem api == flushing capability unknown */
1188	if (nd_region->ndr_mappings == 0
1189			|| !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
1190		return -ENXIO;
1191
1192	/* Test if an explicit flush function is defined */
1193	if (test_bit(ND_REGION_ASYNC, &nd_region->flags) && nd_region->flush)
1194		return 1;
1195
1196	/* Test if any flush hints for the region are available */
1197	for (i = 0; i < nd_region->ndr_mappings; i++) {
1198		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1199		struct nvdimm *nvdimm = nd_mapping->nvdimm;
1200
1201		/* flush hints present / available */
1202		if (nvdimm->num_flush)
1203			return 1;
1204	}
1205
1206	/*
1207	 * The platform defines dimm devices without hints nor explicit flush,
1208	 * assume platform persistence mechanism like ADR
1209	 */
1210	return 0;
1211}
1212EXPORT_SYMBOL_GPL(nvdimm_has_flush);
1213
1214int nvdimm_has_cache(struct nd_region *nd_region)
1215{
1216	return is_nd_pmem(&nd_region->dev) &&
1217		!test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
1218}
1219EXPORT_SYMBOL_GPL(nvdimm_has_cache);
1220
1221bool is_nvdimm_sync(struct nd_region *nd_region)
1222{
1223	if (is_nd_volatile(&nd_region->dev))
1224		return true;
1225
1226	return is_nd_pmem(&nd_region->dev) &&
1227		!test_bit(ND_REGION_ASYNC, &nd_region->flags);
1228}
1229EXPORT_SYMBOL_GPL(is_nvdimm_sync);
1230
1231struct conflict_context {
1232	struct nd_region *nd_region;
1233	resource_size_t start, size;
1234};
1235
1236static int region_conflict(struct device *dev, void *data)
1237{
1238	struct nd_region *nd_region;
1239	struct conflict_context *ctx = data;
1240	resource_size_t res_end, region_end, region_start;
1241
1242	if (!is_memory(dev))
1243		return 0;
1244
1245	nd_region = to_nd_region(dev);
1246	if (nd_region == ctx->nd_region)
1247		return 0;
1248
1249	res_end = ctx->start + ctx->size;
1250	region_start = nd_region->ndr_start;
1251	region_end = region_start + nd_region->ndr_size;
1252	if (ctx->start >= region_start && ctx->start < region_end)
1253		return -EBUSY;
1254	if (res_end > region_start && res_end <= region_end)
1255		return -EBUSY;
1256	return 0;
1257}
1258
1259int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
1260		resource_size_t size)
1261{
1262	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
1263	struct conflict_context ctx = {
1264		.nd_region = nd_region,
1265		.start = start,
1266		.size = size,
1267	};
1268
1269	return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
1270}
1271
1272MODULE_IMPORT_NS(DEVMEM);