Linux Audio

Check our new training course

Yocto / OpenEmbedded training

Mar 24-27, 2025, special US time zones
Register
Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
   4 */
   5#include <linux/scatterlist.h>
   6#include <linux/memregion.h>
   7#include <linux/highmem.h>
   8#include <linux/kstrtox.h>
   9#include <linux/sched.h>
  10#include <linux/slab.h>
  11#include <linux/hash.h>
  12#include <linux/sort.h>
  13#include <linux/io.h>
  14#include <linux/nd.h>
  15#include "nd-core.h"
  16#include "nd.h"
  17
  18/*
  19 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
  20 * irrelevant.
  21 */
  22#include <linux/io-64-nonatomic-hi-lo.h>
  23
  24static DEFINE_PER_CPU(int, flush_idx);
  25
  26static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
  27		struct nd_region_data *ndrd)
  28{
  29	int i, j;
  30
  31	dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
  32			nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
  33	for (i = 0; i < (1 << ndrd->hints_shift); i++) {
  34		struct resource *res = &nvdimm->flush_wpq[i];
  35		unsigned long pfn = PHYS_PFN(res->start);
  36		void __iomem *flush_page;
  37
  38		/* check if flush hints share a page */
  39		for (j = 0; j < i; j++) {
  40			struct resource *res_j = &nvdimm->flush_wpq[j];
  41			unsigned long pfn_j = PHYS_PFN(res_j->start);
  42
  43			if (pfn == pfn_j)
  44				break;
  45		}
  46
  47		if (j < i)
  48			flush_page = (void __iomem *) ((unsigned long)
  49					ndrd_get_flush_wpq(ndrd, dimm, j)
  50					& PAGE_MASK);
  51		else
  52			flush_page = devm_nvdimm_ioremap(dev,
  53					PFN_PHYS(pfn), PAGE_SIZE);
  54		if (!flush_page)
  55			return -ENXIO;
  56		ndrd_set_flush_wpq(ndrd, dimm, i, flush_page
  57				+ (res->start & ~PAGE_MASK));
  58	}
  59
  60	return 0;
  61}
  62
  63static int nd_region_invalidate_memregion(struct nd_region *nd_region)
  64{
  65	int i, incoherent = 0;
  66
  67	for (i = 0; i < nd_region->ndr_mappings; i++) {
  68		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  69		struct nvdimm *nvdimm = nd_mapping->nvdimm;
  70
  71		if (test_bit(NDD_INCOHERENT, &nvdimm->flags)) {
  72			incoherent++;
  73			break;
  74		}
  75	}
  76
  77	if (!incoherent)
  78		return 0;
  79
  80	if (!cpu_cache_has_invalidate_memregion()) {
  81		if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)) {
  82			dev_warn(
  83				&nd_region->dev,
  84				"Bypassing cpu_cache_invalidate_memergion() for testing!\n");
  85			goto out;
  86		} else {
  87			dev_err(&nd_region->dev,
  88				"Failed to synchronize CPU cache state\n");
  89			return -ENXIO;
  90		}
  91	}
  92
  93	cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
  94out:
  95	for (i = 0; i < nd_region->ndr_mappings; i++) {
  96		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
  97		struct nvdimm *nvdimm = nd_mapping->nvdimm;
  98
  99		clear_bit(NDD_INCOHERENT, &nvdimm->flags);
 100	}
 101
 102	return 0;
 103}
 104
 105int nd_region_activate(struct nd_region *nd_region)
 106{
 107	int i, j, rc, num_flush = 0;
 108	struct nd_region_data *ndrd;
 109	struct device *dev = &nd_region->dev;
 110	size_t flush_data_size = sizeof(void *);
 111
 112	nvdimm_bus_lock(&nd_region->dev);
 113	for (i = 0; i < nd_region->ndr_mappings; i++) {
 114		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 115		struct nvdimm *nvdimm = nd_mapping->nvdimm;
 116
 117		if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
 118			nvdimm_bus_unlock(&nd_region->dev);
 119			return -EBUSY;
 120		}
 121
 122		/* at least one null hint slot per-dimm for the "no-hint" case */
 123		flush_data_size += sizeof(void *);
 124		num_flush = min_not_zero(num_flush, nvdimm->num_flush);
 125		if (!nvdimm->num_flush)
 126			continue;
 127		flush_data_size += nvdimm->num_flush * sizeof(void *);
 128	}
 129	nvdimm_bus_unlock(&nd_region->dev);
 130
 131	rc = nd_region_invalidate_memregion(nd_region);
 132	if (rc)
 133		return rc;
 134
 135	ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
 136	if (!ndrd)
 137		return -ENOMEM;
 138	dev_set_drvdata(dev, ndrd);
 139
 140	if (!num_flush)
 141		return 0;
 142
 143	ndrd->hints_shift = ilog2(num_flush);
 144	for (i = 0; i < nd_region->ndr_mappings; i++) {
 145		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 146		struct nvdimm *nvdimm = nd_mapping->nvdimm;
 147		int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
 148
 149		if (rc)
 150			return rc;
 151	}
 152
 153	/*
 154	 * Clear out entries that are duplicates. This should prevent the
 155	 * extra flushings.
 156	 */
 157	for (i = 0; i < nd_region->ndr_mappings - 1; i++) {
 158		/* ignore if NULL already */
 159		if (!ndrd_get_flush_wpq(ndrd, i, 0))
 160			continue;
 161
 162		for (j = i + 1; j < nd_region->ndr_mappings; j++)
 163			if (ndrd_get_flush_wpq(ndrd, i, 0) ==
 164			    ndrd_get_flush_wpq(ndrd, j, 0))
 165				ndrd_set_flush_wpq(ndrd, j, 0, NULL);
 166	}
 167
 168	return 0;
 169}
 170
 171static void nd_region_release(struct device *dev)
 172{
 173	struct nd_region *nd_region = to_nd_region(dev);
 174	u16 i;
 175
 176	for (i = 0; i < nd_region->ndr_mappings; i++) {
 177		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 178		struct nvdimm *nvdimm = nd_mapping->nvdimm;
 179
 180		put_device(&nvdimm->dev);
 181	}
 182	free_percpu(nd_region->lane);
 183	if (!test_bit(ND_REGION_CXL, &nd_region->flags))
 184		memregion_free(nd_region->id);
 185	kfree(nd_region);
 186}
 187
 188struct nd_region *to_nd_region(struct device *dev)
 189{
 190	struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
 191
 192	WARN_ON(dev->type->release != nd_region_release);
 193	return nd_region;
 194}
 195EXPORT_SYMBOL_GPL(to_nd_region);
 196
 197struct device *nd_region_dev(struct nd_region *nd_region)
 198{
 199	if (!nd_region)
 200		return NULL;
 201	return &nd_region->dev;
 202}
 203EXPORT_SYMBOL_GPL(nd_region_dev);
 204
 205void *nd_region_provider_data(struct nd_region *nd_region)
 206{
 207	return nd_region->provider_data;
 208}
 209EXPORT_SYMBOL_GPL(nd_region_provider_data);
 210
 211/**
 212 * nd_region_to_nstype() - region to an integer namespace type
 213 * @nd_region: region-device to interrogate
 214 *
 215 * This is the 'nstype' attribute of a region as well, an input to the
 216 * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
 217 * namespace devices with namespace drivers.
 218 */
 219int nd_region_to_nstype(struct nd_region *nd_region)
 220{
 221	if (is_memory(&nd_region->dev)) {
 222		u16 i, label;
 223
 224		for (i = 0, label = 0; i < nd_region->ndr_mappings; i++) {
 225			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 226			struct nvdimm *nvdimm = nd_mapping->nvdimm;
 227
 228			if (test_bit(NDD_LABELING, &nvdimm->flags))
 229				label++;
 230		}
 231		if (label)
 232			return ND_DEVICE_NAMESPACE_PMEM;
 233		else
 234			return ND_DEVICE_NAMESPACE_IO;
 235	}
 236
 237	return 0;
 238}
 239EXPORT_SYMBOL(nd_region_to_nstype);
 240
 241static unsigned long long region_size(struct nd_region *nd_region)
 242{
 243	if (is_memory(&nd_region->dev)) {
 244		return nd_region->ndr_size;
 245	} else if (nd_region->ndr_mappings == 1) {
 246		struct nd_mapping *nd_mapping = &nd_region->mapping[0];
 247
 248		return nd_mapping->size;
 249	}
 250
 251	return 0;
 252}
 253
 254static ssize_t size_show(struct device *dev,
 255		struct device_attribute *attr, char *buf)
 256{
 257	struct nd_region *nd_region = to_nd_region(dev);
 258
 259	return sprintf(buf, "%llu\n", region_size(nd_region));
 260}
 261static DEVICE_ATTR_RO(size);
 262
 263static ssize_t deep_flush_show(struct device *dev,
 264		struct device_attribute *attr, char *buf)
 265{
 266	struct nd_region *nd_region = to_nd_region(dev);
 267
 268	/*
 269	 * NOTE: in the nvdimm_has_flush() error case this attribute is
 270	 * not visible.
 271	 */
 272	return sprintf(buf, "%d\n", nvdimm_has_flush(nd_region));
 273}
 274
 275static ssize_t deep_flush_store(struct device *dev, struct device_attribute *attr,
 276		const char *buf, size_t len)
 277{
 278	bool flush;
 279	int rc = kstrtobool(buf, &flush);
 280	struct nd_region *nd_region = to_nd_region(dev);
 281
 282	if (rc)
 283		return rc;
 284	if (!flush)
 285		return -EINVAL;
 286	rc = nvdimm_flush(nd_region, NULL);
 287	if (rc)
 288		return rc;
 289
 290	return len;
 291}
 292static DEVICE_ATTR_RW(deep_flush);
 293
 294static ssize_t mappings_show(struct device *dev,
 295		struct device_attribute *attr, char *buf)
 296{
 297	struct nd_region *nd_region = to_nd_region(dev);
 298
 299	return sprintf(buf, "%d\n", nd_region->ndr_mappings);
 300}
 301static DEVICE_ATTR_RO(mappings);
 302
 303static ssize_t nstype_show(struct device *dev,
 304		struct device_attribute *attr, char *buf)
 305{
 306	struct nd_region *nd_region = to_nd_region(dev);
 307
 308	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
 309}
 310static DEVICE_ATTR_RO(nstype);
 311
 312static ssize_t set_cookie_show(struct device *dev,
 313		struct device_attribute *attr, char *buf)
 314{
 315	struct nd_region *nd_region = to_nd_region(dev);
 316	struct nd_interleave_set *nd_set = nd_region->nd_set;
 317	ssize_t rc = 0;
 318
 319	if (is_memory(dev) && nd_set)
 320		/* pass, should be precluded by region_visible */;
 321	else
 322		return -ENXIO;
 323
 324	/*
 325	 * The cookie to show depends on which specification of the
 326	 * labels we are using. If there are not labels then default to
 327	 * the v1.1 namespace label cookie definition. To read all this
 328	 * data we need to wait for probing to settle.
 329	 */
 330	device_lock(dev);
 331	nvdimm_bus_lock(dev);
 332	wait_nvdimm_bus_probe_idle(dev);
 333	if (nd_region->ndr_mappings) {
 334		struct nd_mapping *nd_mapping = &nd_region->mapping[0];
 335		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 336
 337		if (ndd) {
 338			struct nd_namespace_index *nsindex;
 339
 340			nsindex = to_namespace_index(ndd, ndd->ns_current);
 341			rc = sprintf(buf, "%#llx\n",
 342					nd_region_interleave_set_cookie(nd_region,
 343						nsindex));
 344		}
 345	}
 346	nvdimm_bus_unlock(dev);
 347	device_unlock(dev);
 348
 349	if (rc)
 350		return rc;
 351	return sprintf(buf, "%#llx\n", nd_set->cookie1);
 352}
 353static DEVICE_ATTR_RO(set_cookie);
 354
 355resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
 356{
 357	resource_size_t available;
 358	int i;
 359
 360	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
 361
 362	available = 0;
 363	for (i = 0; i < nd_region->ndr_mappings; i++) {
 364		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 365		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 366
 367		/* if a dimm is disabled the available capacity is zero */
 368		if (!ndd)
 369			return 0;
 370
 371		available += nd_pmem_available_dpa(nd_region, nd_mapping);
 372	}
 373
 374	return available;
 375}
 376
 377resource_size_t nd_region_allocatable_dpa(struct nd_region *nd_region)
 378{
 379	resource_size_t avail = 0;
 380	int i;
 381
 382	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
 383	for (i = 0; i < nd_region->ndr_mappings; i++) {
 384		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 385
 386		avail = min_not_zero(avail, nd_pmem_max_contiguous_dpa(
 387						    nd_region, nd_mapping));
 388	}
 389	return avail * nd_region->ndr_mappings;
 390}
 391
 392static ssize_t available_size_show(struct device *dev,
 393		struct device_attribute *attr, char *buf)
 394{
 395	struct nd_region *nd_region = to_nd_region(dev);
 396	unsigned long long available = 0;
 397
 398	/*
 399	 * Flush in-flight updates and grab a snapshot of the available
 400	 * size.  Of course, this value is potentially invalidated the
 401	 * memory nvdimm_bus_lock() is dropped, but that's userspace's
 402	 * problem to not race itself.
 403	 */
 404	device_lock(dev);
 405	nvdimm_bus_lock(dev);
 406	wait_nvdimm_bus_probe_idle(dev);
 407	available = nd_region_available_dpa(nd_region);
 408	nvdimm_bus_unlock(dev);
 409	device_unlock(dev);
 410
 411	return sprintf(buf, "%llu\n", available);
 412}
 413static DEVICE_ATTR_RO(available_size);
 414
 415static ssize_t max_available_extent_show(struct device *dev,
 416		struct device_attribute *attr, char *buf)
 417{
 418	struct nd_region *nd_region = to_nd_region(dev);
 419	unsigned long long available = 0;
 420
 421	device_lock(dev);
 422	nvdimm_bus_lock(dev);
 423	wait_nvdimm_bus_probe_idle(dev);
 424	available = nd_region_allocatable_dpa(nd_region);
 425	nvdimm_bus_unlock(dev);
 426	device_unlock(dev);
 427
 428	return sprintf(buf, "%llu\n", available);
 429}
 430static DEVICE_ATTR_RO(max_available_extent);
 431
 432static ssize_t init_namespaces_show(struct device *dev,
 433		struct device_attribute *attr, char *buf)
 434{
 435	struct nd_region_data *ndrd = dev_get_drvdata(dev);
 436	ssize_t rc;
 437
 438	nvdimm_bus_lock(dev);
 439	if (ndrd)
 440		rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
 441	else
 442		rc = -ENXIO;
 443	nvdimm_bus_unlock(dev);
 444
 445	return rc;
 446}
 447static DEVICE_ATTR_RO(init_namespaces);
 448
 449static ssize_t namespace_seed_show(struct device *dev,
 450		struct device_attribute *attr, char *buf)
 451{
 452	struct nd_region *nd_region = to_nd_region(dev);
 453	ssize_t rc;
 454
 455	nvdimm_bus_lock(dev);
 456	if (nd_region->ns_seed)
 457		rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed));
 458	else
 459		rc = sprintf(buf, "\n");
 460	nvdimm_bus_unlock(dev);
 461	return rc;
 462}
 463static DEVICE_ATTR_RO(namespace_seed);
 464
 465static ssize_t btt_seed_show(struct device *dev,
 466		struct device_attribute *attr, char *buf)
 467{
 468	struct nd_region *nd_region = to_nd_region(dev);
 469	ssize_t rc;
 470
 471	nvdimm_bus_lock(dev);
 472	if (nd_region->btt_seed)
 473		rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed));
 474	else
 475		rc = sprintf(buf, "\n");
 476	nvdimm_bus_unlock(dev);
 477
 478	return rc;
 479}
 480static DEVICE_ATTR_RO(btt_seed);
 481
 482static ssize_t pfn_seed_show(struct device *dev,
 483		struct device_attribute *attr, char *buf)
 484{
 485	struct nd_region *nd_region = to_nd_region(dev);
 486	ssize_t rc;
 487
 488	nvdimm_bus_lock(dev);
 489	if (nd_region->pfn_seed)
 490		rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed));
 491	else
 492		rc = sprintf(buf, "\n");
 493	nvdimm_bus_unlock(dev);
 494
 495	return rc;
 496}
 497static DEVICE_ATTR_RO(pfn_seed);
 498
 499static ssize_t dax_seed_show(struct device *dev,
 500		struct device_attribute *attr, char *buf)
 501{
 502	struct nd_region *nd_region = to_nd_region(dev);
 503	ssize_t rc;
 504
 505	nvdimm_bus_lock(dev);
 506	if (nd_region->dax_seed)
 507		rc = sprintf(buf, "%s\n", dev_name(nd_region->dax_seed));
 508	else
 509		rc = sprintf(buf, "\n");
 510	nvdimm_bus_unlock(dev);
 511
 512	return rc;
 513}
 514static DEVICE_ATTR_RO(dax_seed);
 515
 516static ssize_t read_only_show(struct device *dev,
 517		struct device_attribute *attr, char *buf)
 518{
 519	struct nd_region *nd_region = to_nd_region(dev);
 520
 521	return sprintf(buf, "%d\n", nd_region->ro);
 522}
 523
 524static int revalidate_read_only(struct device *dev, void *data)
 525{
 526	nd_device_notify(dev, NVDIMM_REVALIDATE_REGION);
 527	return 0;
 528}
 529
 530static ssize_t read_only_store(struct device *dev,
 531		struct device_attribute *attr, const char *buf, size_t len)
 532{
 533	bool ro;
 534	int rc = kstrtobool(buf, &ro);
 535	struct nd_region *nd_region = to_nd_region(dev);
 536
 537	if (rc)
 538		return rc;
 539
 540	nd_region->ro = ro;
 541	device_for_each_child(dev, NULL, revalidate_read_only);
 542	return len;
 543}
 544static DEVICE_ATTR_RW(read_only);
 545
 546static ssize_t align_show(struct device *dev,
 547		struct device_attribute *attr, char *buf)
 548{
 549	struct nd_region *nd_region = to_nd_region(dev);
 550
 551	return sprintf(buf, "%#lx\n", nd_region->align);
 552}
 553
 554static ssize_t align_store(struct device *dev,
 555		struct device_attribute *attr, const char *buf, size_t len)
 556{
 557	struct nd_region *nd_region = to_nd_region(dev);
 558	unsigned long val, dpa;
 559	u32 mappings, remainder;
 560	int rc;
 561
 562	rc = kstrtoul(buf, 0, &val);
 563	if (rc)
 564		return rc;
 565
 566	/*
 567	 * Ensure space-align is evenly divisible by the region
 568	 * interleave-width because the kernel typically has no facility
 569	 * to determine which DIMM(s), dimm-physical-addresses, would
 570	 * contribute to the tail capacity in system-physical-address
 571	 * space for the namespace.
 572	 */
 573	mappings = max_t(u32, 1, nd_region->ndr_mappings);
 574	dpa = div_u64_rem(val, mappings, &remainder);
 575	if (!is_power_of_2(dpa) || dpa < PAGE_SIZE
 576			|| val > region_size(nd_region) || remainder)
 577		return -EINVAL;
 578
 579	/*
 580	 * Given that space allocation consults this value multiple
 581	 * times ensure it does not change for the duration of the
 582	 * allocation.
 583	 */
 584	nvdimm_bus_lock(dev);
 585	nd_region->align = val;
 586	nvdimm_bus_unlock(dev);
 587
 588	return len;
 589}
 590static DEVICE_ATTR_RW(align);
 591
 592static ssize_t region_badblocks_show(struct device *dev,
 593		struct device_attribute *attr, char *buf)
 594{
 595	struct nd_region *nd_region = to_nd_region(dev);
 596	ssize_t rc;
 597
 598	device_lock(dev);
 599	if (dev->driver)
 600		rc = badblocks_show(&nd_region->bb, buf, 0);
 601	else
 602		rc = -ENXIO;
 603	device_unlock(dev);
 604
 605	return rc;
 606}
 607static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL);
 608
 609static ssize_t resource_show(struct device *dev,
 610		struct device_attribute *attr, char *buf)
 611{
 612	struct nd_region *nd_region = to_nd_region(dev);
 613
 614	return sprintf(buf, "%#llx\n", nd_region->ndr_start);
 615}
 616static DEVICE_ATTR_ADMIN_RO(resource);
 617
 618static ssize_t persistence_domain_show(struct device *dev,
 619		struct device_attribute *attr, char *buf)
 620{
 621	struct nd_region *nd_region = to_nd_region(dev);
 622
 623	if (test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags))
 624		return sprintf(buf, "cpu_cache\n");
 625	else if (test_bit(ND_REGION_PERSIST_MEMCTRL, &nd_region->flags))
 626		return sprintf(buf, "memory_controller\n");
 627	else
 628		return sprintf(buf, "\n");
 629}
 630static DEVICE_ATTR_RO(persistence_domain);
 631
 632static struct attribute *nd_region_attributes[] = {
 633	&dev_attr_size.attr,
 634	&dev_attr_align.attr,
 635	&dev_attr_nstype.attr,
 636	&dev_attr_mappings.attr,
 637	&dev_attr_btt_seed.attr,
 638	&dev_attr_pfn_seed.attr,
 639	&dev_attr_dax_seed.attr,
 640	&dev_attr_deep_flush.attr,
 641	&dev_attr_read_only.attr,
 642	&dev_attr_set_cookie.attr,
 643	&dev_attr_available_size.attr,
 644	&dev_attr_max_available_extent.attr,
 645	&dev_attr_namespace_seed.attr,
 646	&dev_attr_init_namespaces.attr,
 647	&dev_attr_badblocks.attr,
 648	&dev_attr_resource.attr,
 649	&dev_attr_persistence_domain.attr,
 650	NULL,
 651};
 652
 653static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
 654{
 655	struct device *dev = container_of(kobj, typeof(*dev), kobj);
 656	struct nd_region *nd_region = to_nd_region(dev);
 657	struct nd_interleave_set *nd_set = nd_region->nd_set;
 658	int type = nd_region_to_nstype(nd_region);
 659
 660	if (!is_memory(dev) && a == &dev_attr_pfn_seed.attr)
 661		return 0;
 662
 663	if (!is_memory(dev) && a == &dev_attr_dax_seed.attr)
 664		return 0;
 665
 666	if (!is_memory(dev) && a == &dev_attr_badblocks.attr)
 667		return 0;
 668
 669	if (a == &dev_attr_resource.attr && !is_memory(dev))
 670		return 0;
 671
 672	if (a == &dev_attr_deep_flush.attr) {
 673		int has_flush = nvdimm_has_flush(nd_region);
 674
 675		if (has_flush == 1)
 676			return a->mode;
 677		else if (has_flush == 0)
 678			return 0444;
 679		else
 680			return 0;
 681	}
 682
 683	if (a == &dev_attr_persistence_domain.attr) {
 684		if ((nd_region->flags & (BIT(ND_REGION_PERSIST_CACHE)
 685					| BIT(ND_REGION_PERSIST_MEMCTRL))) == 0)
 686			return 0;
 687		return a->mode;
 688	}
 689
 690	if (a == &dev_attr_align.attr)
 691		return a->mode;
 692
 693	if (a != &dev_attr_set_cookie.attr
 694			&& a != &dev_attr_available_size.attr)
 695		return a->mode;
 696
 697	if (type == ND_DEVICE_NAMESPACE_PMEM &&
 698	    a == &dev_attr_available_size.attr)
 699		return a->mode;
 700	else if (is_memory(dev) && nd_set)
 701		return a->mode;
 702
 703	return 0;
 704}
 705
 706static ssize_t mappingN(struct device *dev, char *buf, int n)
 707{
 708	struct nd_region *nd_region = to_nd_region(dev);
 709	struct nd_mapping *nd_mapping;
 710	struct nvdimm *nvdimm;
 711
 712	if (n >= nd_region->ndr_mappings)
 713		return -ENXIO;
 714	nd_mapping = &nd_region->mapping[n];
 715	nvdimm = nd_mapping->nvdimm;
 716
 717	return sprintf(buf, "%s,%llu,%llu,%d\n", dev_name(&nvdimm->dev),
 718			nd_mapping->start, nd_mapping->size,
 719			nd_mapping->position);
 720}
 721
 722#define REGION_MAPPING(idx) \
 723static ssize_t mapping##idx##_show(struct device *dev,		\
 724		struct device_attribute *attr, char *buf)	\
 725{								\
 726	return mappingN(dev, buf, idx);				\
 727}								\
 728static DEVICE_ATTR_RO(mapping##idx)
 729
 730/*
 731 * 32 should be enough for a while, even in the presence of socket
 732 * interleave a 32-way interleave set is a degenerate case.
 733 */
 734REGION_MAPPING(0);
 735REGION_MAPPING(1);
 736REGION_MAPPING(2);
 737REGION_MAPPING(3);
 738REGION_MAPPING(4);
 739REGION_MAPPING(5);
 740REGION_MAPPING(6);
 741REGION_MAPPING(7);
 742REGION_MAPPING(8);
 743REGION_MAPPING(9);
 744REGION_MAPPING(10);
 745REGION_MAPPING(11);
 746REGION_MAPPING(12);
 747REGION_MAPPING(13);
 748REGION_MAPPING(14);
 749REGION_MAPPING(15);
 750REGION_MAPPING(16);
 751REGION_MAPPING(17);
 752REGION_MAPPING(18);
 753REGION_MAPPING(19);
 754REGION_MAPPING(20);
 755REGION_MAPPING(21);
 756REGION_MAPPING(22);
 757REGION_MAPPING(23);
 758REGION_MAPPING(24);
 759REGION_MAPPING(25);
 760REGION_MAPPING(26);
 761REGION_MAPPING(27);
 762REGION_MAPPING(28);
 763REGION_MAPPING(29);
 764REGION_MAPPING(30);
 765REGION_MAPPING(31);
 766
 767static umode_t mapping_visible(struct kobject *kobj, struct attribute *a, int n)
 768{
 769	struct device *dev = container_of(kobj, struct device, kobj);
 770	struct nd_region *nd_region = to_nd_region(dev);
 771
 772	if (n < nd_region->ndr_mappings)
 773		return a->mode;
 774	return 0;
 775}
 776
 777static struct attribute *mapping_attributes[] = {
 778	&dev_attr_mapping0.attr,
 779	&dev_attr_mapping1.attr,
 780	&dev_attr_mapping2.attr,
 781	&dev_attr_mapping3.attr,
 782	&dev_attr_mapping4.attr,
 783	&dev_attr_mapping5.attr,
 784	&dev_attr_mapping6.attr,
 785	&dev_attr_mapping7.attr,
 786	&dev_attr_mapping8.attr,
 787	&dev_attr_mapping9.attr,
 788	&dev_attr_mapping10.attr,
 789	&dev_attr_mapping11.attr,
 790	&dev_attr_mapping12.attr,
 791	&dev_attr_mapping13.attr,
 792	&dev_attr_mapping14.attr,
 793	&dev_attr_mapping15.attr,
 794	&dev_attr_mapping16.attr,
 795	&dev_attr_mapping17.attr,
 796	&dev_attr_mapping18.attr,
 797	&dev_attr_mapping19.attr,
 798	&dev_attr_mapping20.attr,
 799	&dev_attr_mapping21.attr,
 800	&dev_attr_mapping22.attr,
 801	&dev_attr_mapping23.attr,
 802	&dev_attr_mapping24.attr,
 803	&dev_attr_mapping25.attr,
 804	&dev_attr_mapping26.attr,
 805	&dev_attr_mapping27.attr,
 806	&dev_attr_mapping28.attr,
 807	&dev_attr_mapping29.attr,
 808	&dev_attr_mapping30.attr,
 809	&dev_attr_mapping31.attr,
 810	NULL,
 811};
 812
 813static const struct attribute_group nd_mapping_attribute_group = {
 814	.is_visible = mapping_visible,
 815	.attrs = mapping_attributes,
 816};
 817
 818static const struct attribute_group nd_region_attribute_group = {
 819	.attrs = nd_region_attributes,
 820	.is_visible = region_visible,
 821};
 822
 823static const struct attribute_group *nd_region_attribute_groups[] = {
 824	&nd_device_attribute_group,
 825	&nd_region_attribute_group,
 826	&nd_numa_attribute_group,
 827	&nd_mapping_attribute_group,
 828	NULL,
 829};
 830
 831static const struct device_type nd_pmem_device_type = {
 832	.name = "nd_pmem",
 833	.release = nd_region_release,
 834	.groups = nd_region_attribute_groups,
 835};
 836
 837static const struct device_type nd_volatile_device_type = {
 838	.name = "nd_volatile",
 839	.release = nd_region_release,
 840	.groups = nd_region_attribute_groups,
 841};
 842
 843bool is_nd_pmem(const struct device *dev)
 844{
 845	return dev ? dev->type == &nd_pmem_device_type : false;
 846}
 847
 848bool is_nd_volatile(const struct device *dev)
 849{
 850	return dev ? dev->type == &nd_volatile_device_type : false;
 851}
 852
 853u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
 854		struct nd_namespace_index *nsindex)
 855{
 856	struct nd_interleave_set *nd_set = nd_region->nd_set;
 857
 858	if (!nd_set)
 859		return 0;
 860
 861	if (nsindex && __le16_to_cpu(nsindex->major) == 1
 862			&& __le16_to_cpu(nsindex->minor) == 1)
 863		return nd_set->cookie1;
 864	return nd_set->cookie2;
 865}
 866
 867u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
 868{
 869	struct nd_interleave_set *nd_set = nd_region->nd_set;
 870
 871	if (nd_set)
 872		return nd_set->altcookie;
 873	return 0;
 874}
 875
 876void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
 877{
 878	struct nd_label_ent *label_ent, *e;
 879
 880	lockdep_assert_held(&nd_mapping->lock);
 881	list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
 882		list_del(&label_ent->list);
 883		kfree(label_ent);
 884	}
 885}
 886
 887/*
 888 * When a namespace is activated create new seeds for the next
 889 * namespace, or namespace-personality to be configured.
 890 */
 891void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
 892{
 893	nvdimm_bus_lock(dev);
 894	if (nd_region->ns_seed == dev) {
 895		nd_region_create_ns_seed(nd_region);
 896	} else if (is_nd_btt(dev)) {
 897		struct nd_btt *nd_btt = to_nd_btt(dev);
 898
 899		if (nd_region->btt_seed == dev)
 900			nd_region_create_btt_seed(nd_region);
 901		if (nd_region->ns_seed == &nd_btt->ndns->dev)
 902			nd_region_create_ns_seed(nd_region);
 903	} else if (is_nd_pfn(dev)) {
 904		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
 905
 906		if (nd_region->pfn_seed == dev)
 907			nd_region_create_pfn_seed(nd_region);
 908		if (nd_region->ns_seed == &nd_pfn->ndns->dev)
 909			nd_region_create_ns_seed(nd_region);
 910	} else if (is_nd_dax(dev)) {
 911		struct nd_dax *nd_dax = to_nd_dax(dev);
 912
 913		if (nd_region->dax_seed == dev)
 914			nd_region_create_dax_seed(nd_region);
 915		if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
 916			nd_region_create_ns_seed(nd_region);
 917	}
 918	nvdimm_bus_unlock(dev);
 919}
 920
 921/**
 922 * nd_region_acquire_lane - allocate and lock a lane
 923 * @nd_region: region id and number of lanes possible
 924 *
 925 * A lane correlates to a BLK-data-window and/or a log slot in the BTT.
 926 * We optimize for the common case where there are 256 lanes, one
 927 * per-cpu.  For larger systems we need to lock to share lanes.  For now
 928 * this implementation assumes the cost of maintaining an allocator for
 929 * free lanes is on the order of the lock hold time, so it implements a
 930 * static lane = cpu % num_lanes mapping.
 931 *
 932 * In the case of a BTT instance on top of a BLK namespace a lane may be
 933 * acquired recursively.  We lock on the first instance.
 934 *
 935 * In the case of a BTT instance on top of PMEM, we only acquire a lane
 936 * for the BTT metadata updates.
 937 */
 938unsigned int nd_region_acquire_lane(struct nd_region *nd_region)
 939{
 940	unsigned int cpu, lane;
 941
 942	migrate_disable();
 943	cpu = smp_processor_id();
 944	if (nd_region->num_lanes < nr_cpu_ids) {
 945		struct nd_percpu_lane *ndl_lock, *ndl_count;
 946
 947		lane = cpu % nd_region->num_lanes;
 948		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
 949		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
 950		if (ndl_count->count++ == 0)
 951			spin_lock(&ndl_lock->lock);
 952	} else
 953		lane = cpu;
 954
 955	return lane;
 956}
 957EXPORT_SYMBOL(nd_region_acquire_lane);
 958
 959void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
 960{
 961	if (nd_region->num_lanes < nr_cpu_ids) {
 962		unsigned int cpu = smp_processor_id();
 963		struct nd_percpu_lane *ndl_lock, *ndl_count;
 964
 965		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
 966		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
 967		if (--ndl_count->count == 0)
 968			spin_unlock(&ndl_lock->lock);
 969	}
 970	migrate_enable();
 971}
 972EXPORT_SYMBOL(nd_region_release_lane);
 973
 974/*
 975 * PowerPC requires this alignment for memremap_pages(). All other archs
 976 * should be ok with SUBSECTION_SIZE (see memremap_compat_align()).
 977 */
 978#define MEMREMAP_COMPAT_ALIGN_MAX SZ_16M
 979
 980static unsigned long default_align(struct nd_region *nd_region)
 981{
 982	unsigned long align;
 983	u32 remainder;
 984	int mappings;
 985
 986	align = MEMREMAP_COMPAT_ALIGN_MAX;
 987	if (nd_region->ndr_size < MEMREMAP_COMPAT_ALIGN_MAX)
 988		align = PAGE_SIZE;
 989
 990	mappings = max_t(u16, 1, nd_region->ndr_mappings);
 991	div_u64_rem(align, mappings, &remainder);
 992	if (remainder)
 993		align *= mappings;
 994
 995	return align;
 996}
 997
 998static struct lock_class_key nvdimm_region_key;
 999
1000static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
1001		struct nd_region_desc *ndr_desc,
1002		const struct device_type *dev_type, const char *caller)
1003{
1004	struct nd_region *nd_region;
1005	struct device *dev;
1006	unsigned int i;
1007	int ro = 0;
1008
1009	for (i = 0; i < ndr_desc->num_mappings; i++) {
1010		struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
1011		struct nvdimm *nvdimm = mapping->nvdimm;
1012
1013		if ((mapping->start | mapping->size) % PAGE_SIZE) {
1014			dev_err(&nvdimm_bus->dev,
1015				"%s: %s mapping%d is not %ld aligned\n",
1016				caller, dev_name(&nvdimm->dev), i, PAGE_SIZE);
1017			return NULL;
1018		}
1019
1020		if (test_bit(NDD_UNARMED, &nvdimm->flags))
1021			ro = 1;
1022
1023	}
1024
1025	nd_region =
1026		kzalloc(struct_size(nd_region, mapping, ndr_desc->num_mappings),
1027			GFP_KERNEL);
1028
1029	if (!nd_region)
1030		return NULL;
1031	nd_region->ndr_mappings = ndr_desc->num_mappings;
1032	/* CXL pre-assigns memregion ids before creating nvdimm regions */
1033	if (test_bit(ND_REGION_CXL, &ndr_desc->flags)) {
1034		nd_region->id = ndr_desc->memregion;
1035	} else {
1036		nd_region->id = memregion_alloc(GFP_KERNEL);
1037		if (nd_region->id < 0)
1038			goto err_id;
1039	}
1040
1041	nd_region->lane = alloc_percpu(struct nd_percpu_lane);
1042	if (!nd_region->lane)
1043		goto err_percpu;
1044
1045        for (i = 0; i < nr_cpu_ids; i++) {
1046		struct nd_percpu_lane *ndl;
1047
1048		ndl = per_cpu_ptr(nd_region->lane, i);
1049		spin_lock_init(&ndl->lock);
1050		ndl->count = 0;
1051	}
1052
1053	for (i = 0; i < ndr_desc->num_mappings; i++) {
1054		struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
1055		struct nvdimm *nvdimm = mapping->nvdimm;
1056
1057		nd_region->mapping[i].nvdimm = nvdimm;
1058		nd_region->mapping[i].start = mapping->start;
1059		nd_region->mapping[i].size = mapping->size;
1060		nd_region->mapping[i].position = mapping->position;
1061		INIT_LIST_HEAD(&nd_region->mapping[i].labels);
1062		mutex_init(&nd_region->mapping[i].lock);
1063
1064		get_device(&nvdimm->dev);
1065	}
1066	nd_region->provider_data = ndr_desc->provider_data;
1067	nd_region->nd_set = ndr_desc->nd_set;
1068	nd_region->num_lanes = ndr_desc->num_lanes;
1069	nd_region->flags = ndr_desc->flags;
1070	nd_region->ro = ro;
1071	nd_region->numa_node = ndr_desc->numa_node;
1072	nd_region->target_node = ndr_desc->target_node;
1073	ida_init(&nd_region->ns_ida);
1074	ida_init(&nd_region->btt_ida);
1075	ida_init(&nd_region->pfn_ida);
1076	ida_init(&nd_region->dax_ida);
1077	dev = &nd_region->dev;
1078	dev_set_name(dev, "region%d", nd_region->id);
1079	dev->parent = &nvdimm_bus->dev;
1080	dev->type = dev_type;
1081	dev->groups = ndr_desc->attr_groups;
1082	dev->of_node = ndr_desc->of_node;
1083	nd_region->ndr_size = resource_size(ndr_desc->res);
1084	nd_region->ndr_start = ndr_desc->res->start;
1085	nd_region->align = default_align(nd_region);
1086	if (ndr_desc->flush)
1087		nd_region->flush = ndr_desc->flush;
1088	else
1089		nd_region->flush = NULL;
1090
1091	device_initialize(dev);
1092	lockdep_set_class(&dev->mutex, &nvdimm_region_key);
1093	nd_device_register(dev);
1094
1095	return nd_region;
1096
1097err_percpu:
1098	if (!test_bit(ND_REGION_CXL, &ndr_desc->flags))
1099		memregion_free(nd_region->id);
1100err_id:
1101	kfree(nd_region);
1102	return NULL;
1103}
1104
1105struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
1106		struct nd_region_desc *ndr_desc)
1107{
1108	ndr_desc->num_lanes = ND_MAX_LANES;
1109	return nd_region_create(nvdimm_bus, ndr_desc, &nd_pmem_device_type,
1110			__func__);
1111}
1112EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create);
1113
1114struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
1115		struct nd_region_desc *ndr_desc)
1116{
1117	ndr_desc->num_lanes = ND_MAX_LANES;
1118	return nd_region_create(nvdimm_bus, ndr_desc, &nd_volatile_device_type,
1119			__func__);
1120}
1121EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
1122
1123void nvdimm_region_delete(struct nd_region *nd_region)
1124{
1125	if (nd_region)
1126		nd_device_unregister(&nd_region->dev, ND_SYNC);
1127}
1128EXPORT_SYMBOL_GPL(nvdimm_region_delete);
1129
1130int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
1131{
1132	int rc = 0;
1133
1134	if (!nd_region->flush)
1135		rc = generic_nvdimm_flush(nd_region);
1136	else {
1137		if (nd_region->flush(nd_region, bio))
1138			rc = -EIO;
1139	}
1140
1141	return rc;
1142}
1143/**
1144 * generic_nvdimm_flush() - flush any posted write queues between the cpu and pmem media
1145 * @nd_region: interleaved pmem region
1146 */
1147int generic_nvdimm_flush(struct nd_region *nd_region)
1148{
1149	struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
1150	int i, idx;
1151
1152	/*
1153	 * Try to encourage some diversity in flush hint addresses
1154	 * across cpus assuming a limited number of flush hints.
1155	 */
1156	idx = this_cpu_read(flush_idx);
1157	idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
1158
1159	/*
1160	 * The pmem_wmb() is needed to 'sfence' all
1161	 * previous writes such that they are architecturally visible for
1162	 * the platform buffer flush. Note that we've already arranged for pmem
1163	 * writes to avoid the cache via memcpy_flushcache().  The final
1164	 * wmb() ensures ordering for the NVDIMM flush write.
1165	 */
1166	pmem_wmb();
1167	for (i = 0; i < nd_region->ndr_mappings; i++)
1168		if (ndrd_get_flush_wpq(ndrd, i, 0))
1169			writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
1170	wmb();
1171
1172	return 0;
1173}
1174EXPORT_SYMBOL_GPL(nvdimm_flush);
1175
1176/**
1177 * nvdimm_has_flush - determine write flushing requirements
1178 * @nd_region: interleaved pmem region
1179 *
1180 * Returns 1 if writes require flushing
1181 * Returns 0 if writes do not require flushing
1182 * Returns -ENXIO if flushing capability can not be determined
1183 */
1184int nvdimm_has_flush(struct nd_region *nd_region)
1185{
1186	int i;
1187
1188	/* no nvdimm or pmem api == flushing capability unknown */
1189	if (nd_region->ndr_mappings == 0
1190			|| !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
1191		return -ENXIO;
1192
1193	/* Test if an explicit flush function is defined */
1194	if (test_bit(ND_REGION_ASYNC, &nd_region->flags) && nd_region->flush)
1195		return 1;
1196
1197	/* Test if any flush hints for the region are available */
1198	for (i = 0; i < nd_region->ndr_mappings; i++) {
1199		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1200		struct nvdimm *nvdimm = nd_mapping->nvdimm;
1201
1202		/* flush hints present / available */
1203		if (nvdimm->num_flush)
1204			return 1;
1205	}
1206
1207	/*
1208	 * The platform defines dimm devices without hints nor explicit flush,
1209	 * assume platform persistence mechanism like ADR
1210	 */
1211	return 0;
1212}
1213EXPORT_SYMBOL_GPL(nvdimm_has_flush);
1214
1215int nvdimm_has_cache(struct nd_region *nd_region)
1216{
1217	return is_nd_pmem(&nd_region->dev) &&
1218		!test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
1219}
1220EXPORT_SYMBOL_GPL(nvdimm_has_cache);
1221
1222bool is_nvdimm_sync(struct nd_region *nd_region)
1223{
1224	if (is_nd_volatile(&nd_region->dev))
1225		return true;
1226
1227	return is_nd_pmem(&nd_region->dev) &&
1228		!test_bit(ND_REGION_ASYNC, &nd_region->flags);
1229}
1230EXPORT_SYMBOL_GPL(is_nvdimm_sync);
1231
1232struct conflict_context {
1233	struct nd_region *nd_region;
1234	resource_size_t start, size;
1235};
1236
1237static int region_conflict(struct device *dev, void *data)
1238{
1239	struct nd_region *nd_region;
1240	struct conflict_context *ctx = data;
1241	resource_size_t res_end, region_end, region_start;
1242
1243	if (!is_memory(dev))
1244		return 0;
1245
1246	nd_region = to_nd_region(dev);
1247	if (nd_region == ctx->nd_region)
1248		return 0;
1249
1250	res_end = ctx->start + ctx->size;
1251	region_start = nd_region->ndr_start;
1252	region_end = region_start + nd_region->ndr_size;
1253	if (ctx->start >= region_start && ctx->start < region_end)
1254		return -EBUSY;
1255	if (res_end > region_start && res_end <= region_end)
1256		return -EBUSY;
1257	return 0;
1258}
1259
1260int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
1261		resource_size_t size)
1262{
1263	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
1264	struct conflict_context ctx = {
1265		.nd_region = nd_region,
1266		.start = start,
1267		.size = size,
1268	};
1269
1270	return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
1271}
1272
1273MODULE_IMPORT_NS(DEVMEM);