Linux Audio

Check our new training course

Loading...
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
   3#include <linux/memremap.h>
   4#include <linux/device.h>
   5#include <linux/mutex.h>
   6#include <linux/list.h>
   7#include <linux/slab.h>
   8#include <linux/dax.h>
   9#include <linux/io.h>
  10#include "dax-private.h"
  11#include "bus.h"
  12
  13static DEFINE_MUTEX(dax_bus_lock);
  14
 
 
 
 
 
 
 
 
 
 
 
 
  15#define DAX_NAME_LEN 30
  16struct dax_id {
  17	struct list_head list;
  18	char dev_name[DAX_NAME_LEN];
  19};
  20
  21static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
  22{
  23	/*
  24	 * We only ever expect to handle device-dax instances, i.e. the
  25	 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
  26	 */
  27	return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
  28}
  29
  30static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
  31{
  32	return container_of(drv, struct dax_device_driver, drv);
  33}
  34
  35static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
  36		const char *dev_name)
  37{
  38	struct dax_id *dax_id;
  39
  40	lockdep_assert_held(&dax_bus_lock);
  41
  42	list_for_each_entry(dax_id, &dax_drv->ids, list)
  43		if (sysfs_streq(dax_id->dev_name, dev_name))
  44			return dax_id;
  45	return NULL;
  46}
  47
  48static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
  49{
  50	int match;
  51
  52	mutex_lock(&dax_bus_lock);
  53	match = !!__dax_match_id(dax_drv, dev_name(dev));
  54	mutex_unlock(&dax_bus_lock);
  55
  56	return match;
  57}
  58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  59enum id_action {
  60	ID_REMOVE,
  61	ID_ADD,
  62};
  63
  64static ssize_t do_id_store(struct device_driver *drv, const char *buf,
  65		size_t count, enum id_action action)
  66{
  67	struct dax_device_driver *dax_drv = to_dax_drv(drv);
  68	unsigned int region_id, id;
  69	char devname[DAX_NAME_LEN];
  70	struct dax_id *dax_id;
  71	ssize_t rc = count;
  72	int fields;
  73
  74	fields = sscanf(buf, "dax%d.%d", &region_id, &id);
  75	if (fields != 2)
  76		return -EINVAL;
  77	sprintf(devname, "dax%d.%d", region_id, id);
  78	if (!sysfs_streq(buf, devname))
  79		return -EINVAL;
  80
  81	mutex_lock(&dax_bus_lock);
  82	dax_id = __dax_match_id(dax_drv, buf);
  83	if (!dax_id) {
  84		if (action == ID_ADD) {
  85			dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
  86			if (dax_id) {
  87				strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
  88				list_add(&dax_id->list, &dax_drv->ids);
  89			} else
  90				rc = -ENOMEM;
  91		}
  92	} else if (action == ID_REMOVE) {
  93		list_del(&dax_id->list);
  94		kfree(dax_id);
  95	}
  96	mutex_unlock(&dax_bus_lock);
  97
  98	if (rc < 0)
  99		return rc;
 100	if (action == ID_ADD)
 101		rc = driver_attach(drv);
 102	if (rc)
 103		return rc;
 104	return count;
 105}
 106
 107static ssize_t new_id_store(struct device_driver *drv, const char *buf,
 108		size_t count)
 109{
 110	return do_id_store(drv, buf, count, ID_ADD);
 111}
 112static DRIVER_ATTR_WO(new_id);
 113
 114static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
 115		size_t count)
 116{
 117	return do_id_store(drv, buf, count, ID_REMOVE);
 118}
 119static DRIVER_ATTR_WO(remove_id);
 120
 121static struct attribute *dax_drv_attrs[] = {
 122	&driver_attr_new_id.attr,
 123	&driver_attr_remove_id.attr,
 124	NULL,
 125};
 126ATTRIBUTE_GROUPS(dax_drv);
 127
 128static int dax_bus_match(struct device *dev, struct device_driver *drv);
 129
 130/*
 131 * Static dax regions are regions created by an external subsystem
 132 * nvdimm where a single range is assigned. Its boundaries are by the external
 133 * subsystem and are usually limited to one physical memory range. For example,
 134 * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
 135 * single contiguous range)
 136 *
 137 * On dynamic dax regions, the assigned region can be partitioned by dax core
 138 * into multiple subdivisions. A subdivision is represented into one
 139 * /dev/daxN.M device composed by one or more potentially discontiguous ranges.
 140 *
 141 * When allocating a dax region, drivers must set whether it's static
 142 * (IORESOURCE_DAX_STATIC).  On static dax devices, the @pgmap is pre-assigned
 143 * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
 144 * devices it is NULL but afterwards allocated by dax core on device ->probe().
 145 * Care is needed to make sure that dynamic dax devices are torn down with a
 146 * cleared @pgmap field (see kill_dev_dax()).
 147 */
 148static bool is_static(struct dax_region *dax_region)
 149{
 150	return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
 151}
 152
 153bool static_dev_dax(struct dev_dax *dev_dax)
 154{
 155	return is_static(dev_dax->region);
 156}
 157EXPORT_SYMBOL_GPL(static_dev_dax);
 158
 159static u64 dev_dax_size(struct dev_dax *dev_dax)
 160{
 161	u64 size = 0;
 162	int i;
 163
 164	device_lock_assert(&dev_dax->dev);
 165
 166	for (i = 0; i < dev_dax->nr_range; i++)
 167		size += range_len(&dev_dax->ranges[i].range);
 168
 169	return size;
 170}
 171
 172static int dax_bus_probe(struct device *dev)
 173{
 174	struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
 175	struct dev_dax *dev_dax = to_dev_dax(dev);
 176	struct dax_region *dax_region = dev_dax->region;
 177	int rc;
 
 
 
 
 
 
 
 178
 179	if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0)
 180		return -ENXIO;
 181
 182	rc = dax_drv->probe(dev_dax);
 183
 184	if (rc || is_static(dax_region))
 185		return rc;
 186
 187	/*
 188	 * Track new seed creation only after successful probe of the
 189	 * previous seed.
 190	 */
 191	if (dax_region->seed == dev)
 192		dax_region->seed = NULL;
 193
 194	return 0;
 195}
 196
 197static void dax_bus_remove(struct device *dev)
 198{
 199	struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
 200	struct dev_dax *dev_dax = to_dev_dax(dev);
 201
 202	if (dax_drv->remove)
 203		dax_drv->remove(dev_dax);
 204}
 205
 206static struct bus_type dax_bus_type = {
 207	.name = "dax",
 208	.uevent = dax_bus_uevent,
 209	.match = dax_bus_match,
 210	.probe = dax_bus_probe,
 211	.remove = dax_bus_remove,
 212	.drv_groups = dax_drv_groups,
 213};
 214
 215static int dax_bus_match(struct device *dev, struct device_driver *drv)
 216{
 217	struct dax_device_driver *dax_drv = to_dax_drv(drv);
 218
 219	/*
 220	 * All but the 'device-dax' driver, which has 'match_always'
 221	 * set, requires an exact id match.
 222	 */
 223	if (dax_drv->match_always)
 224		return 1;
 225
 226	return dax_match_id(dax_drv, dev);
 227}
 228
 229/*
 230 * Rely on the fact that drvdata is set before the attributes are
 231 * registered, and that the attributes are unregistered before drvdata
 232 * is cleared to assume that drvdata is always valid.
 233 */
 234static ssize_t id_show(struct device *dev,
 235		struct device_attribute *attr, char *buf)
 236{
 237	struct dax_region *dax_region = dev_get_drvdata(dev);
 238
 239	return sprintf(buf, "%d\n", dax_region->id);
 240}
 241static DEVICE_ATTR_RO(id);
 242
 243static ssize_t region_size_show(struct device *dev,
 244		struct device_attribute *attr, char *buf)
 245{
 246	struct dax_region *dax_region = dev_get_drvdata(dev);
 247
 248	return sprintf(buf, "%llu\n", (unsigned long long)
 249			resource_size(&dax_region->res));
 250}
 251static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
 252		region_size_show, NULL);
 253
 254static ssize_t region_align_show(struct device *dev,
 255		struct device_attribute *attr, char *buf)
 256{
 257	struct dax_region *dax_region = dev_get_drvdata(dev);
 258
 259	return sprintf(buf, "%u\n", dax_region->align);
 260}
 261static struct device_attribute dev_attr_region_align =
 262		__ATTR(align, 0400, region_align_show, NULL);
 263
 264#define for_each_dax_region_resource(dax_region, res) \
 265	for (res = (dax_region)->res.child; res; res = res->sibling)
 266
 267static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
 268{
 269	resource_size_t size = resource_size(&dax_region->res);
 270	struct resource *res;
 271
 272	device_lock_assert(dax_region->dev);
 273
 274	for_each_dax_region_resource(dax_region, res)
 275		size -= resource_size(res);
 276	return size;
 277}
 278
 279static ssize_t available_size_show(struct device *dev,
 280		struct device_attribute *attr, char *buf)
 281{
 282	struct dax_region *dax_region = dev_get_drvdata(dev);
 283	unsigned long long size;
 
 284
 285	device_lock(dev);
 
 
 286	size = dax_region_avail_size(dax_region);
 287	device_unlock(dev);
 288
 289	return sprintf(buf, "%llu\n", size);
 290}
 291static DEVICE_ATTR_RO(available_size);
 292
 293static ssize_t seed_show(struct device *dev,
 294		struct device_attribute *attr, char *buf)
 295{
 296	struct dax_region *dax_region = dev_get_drvdata(dev);
 297	struct device *seed;
 298	ssize_t rc;
 299
 300	if (is_static(dax_region))
 301		return -EINVAL;
 302
 303	device_lock(dev);
 
 
 304	seed = dax_region->seed;
 305	rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : "");
 306	device_unlock(dev);
 307
 308	return rc;
 309}
 310static DEVICE_ATTR_RO(seed);
 311
 312static ssize_t create_show(struct device *dev,
 313		struct device_attribute *attr, char *buf)
 314{
 315	struct dax_region *dax_region = dev_get_drvdata(dev);
 316	struct device *youngest;
 317	ssize_t rc;
 318
 319	if (is_static(dax_region))
 320		return -EINVAL;
 321
 322	device_lock(dev);
 
 
 323	youngest = dax_region->youngest;
 324	rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : "");
 325	device_unlock(dev);
 326
 327	return rc;
 328}
 329
 
 
 330static ssize_t create_store(struct device *dev, struct device_attribute *attr,
 331		const char *buf, size_t len)
 332{
 333	struct dax_region *dax_region = dev_get_drvdata(dev);
 334	unsigned long long avail;
 335	ssize_t rc;
 336	int val;
 337
 338	if (is_static(dax_region))
 339		return -EINVAL;
 340
 341	rc = kstrtoint(buf, 0, &val);
 342	if (rc)
 343		return rc;
 344	if (val != 1)
 345		return -EINVAL;
 346
 347	device_lock(dev);
 
 
 348	avail = dax_region_avail_size(dax_region);
 349	if (avail == 0)
 350		rc = -ENOSPC;
 351	else {
 352		struct dev_dax_data data = {
 353			.dax_region = dax_region,
 354			.size = 0,
 355			.id = -1,
 
 356		};
 357		struct dev_dax *dev_dax = devm_create_dev_dax(&data);
 358
 359		if (IS_ERR(dev_dax))
 360			rc = PTR_ERR(dev_dax);
 361		else {
 362			/*
 363			 * In support of crafting multiple new devices
 364			 * simultaneously multiple seeds can be created,
 365			 * but only the first one that has not been
 366			 * successfully bound is tracked as the region
 367			 * seed.
 368			 */
 369			if (!dax_region->seed)
 370				dax_region->seed = &dev_dax->dev;
 371			dax_region->youngest = &dev_dax->dev;
 372			rc = len;
 373		}
 374	}
 375	device_unlock(dev);
 376
 377	return rc;
 378}
 379static DEVICE_ATTR_RW(create);
 380
 381void kill_dev_dax(struct dev_dax *dev_dax)
 382{
 383	struct dax_device *dax_dev = dev_dax->dax_dev;
 384	struct inode *inode = dax_inode(dax_dev);
 385
 386	kill_dax(dax_dev);
 387	unmap_mapping_range(inode->i_mapping, 0, 0, 1);
 388
 389	/*
 390	 * Dynamic dax region have the pgmap allocated via dev_kzalloc()
 391	 * and thus freed by devm. Clear the pgmap to not have stale pgmap
 392	 * ranges on probe() from previous reconfigurations of region devices.
 393	 */
 394	if (!static_dev_dax(dev_dax))
 395		dev_dax->pgmap = NULL;
 396}
 397EXPORT_SYMBOL_GPL(kill_dev_dax);
 398
 399static void trim_dev_dax_range(struct dev_dax *dev_dax)
 400{
 401	int i = dev_dax->nr_range - 1;
 402	struct range *range = &dev_dax->ranges[i].range;
 403	struct dax_region *dax_region = dev_dax->region;
 404
 405	device_lock_assert(dax_region->dev);
 406	dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
 407		(unsigned long long)range->start,
 408		(unsigned long long)range->end);
 409
 410	__release_region(&dax_region->res, range->start, range_len(range));
 411	if (--dev_dax->nr_range == 0) {
 412		kfree(dev_dax->ranges);
 413		dev_dax->ranges = NULL;
 414	}
 415}
 416
 417static void free_dev_dax_ranges(struct dev_dax *dev_dax)
 418{
 419	while (dev_dax->nr_range)
 420		trim_dev_dax_range(dev_dax);
 421}
 422
 423static void unregister_dev_dax(void *dev)
 424{
 425	struct dev_dax *dev_dax = to_dev_dax(dev);
 426
 427	dev_dbg(dev, "%s\n", __func__);
 428
 
 429	kill_dev_dax(dev_dax);
 430	free_dev_dax_ranges(dev_dax);
 431	device_del(dev);
 
 432	put_device(dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 433}
 434
 435/* a return value >= 0 indicates this invocation invalidated the id */
 436static int __free_dev_dax_id(struct dev_dax *dev_dax)
 437{
 438	struct dax_region *dax_region = dev_dax->region;
 439	struct device *dev = &dev_dax->dev;
 440	int rc = dev_dax->id;
 441
 442	device_lock_assert(dev);
 443
 444	if (is_static(dax_region) || dev_dax->id < 0)
 445		return -1;
 
 446	ida_free(&dax_region->ida, dev_dax->id);
 
 447	dev_dax->id = -1;
 448	return rc;
 449}
 450
 451static int free_dev_dax_id(struct dev_dax *dev_dax)
 452{
 453	struct device *dev = &dev_dax->dev;
 454	int rc;
 455
 456	device_lock(dev);
 
 
 457	rc = __free_dev_dax_id(dev_dax);
 458	device_unlock(dev);
 459	return rc;
 460}
 461
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 462static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
 463		const char *buf, size_t len)
 464{
 465	struct dax_region *dax_region = dev_get_drvdata(dev);
 466	struct dev_dax *dev_dax;
 467	struct device *victim;
 468	bool do_del = false;
 469	int rc;
 470
 471	if (is_static(dax_region))
 472		return -EINVAL;
 473
 474	victim = device_find_child_by_name(dax_region->dev, buf);
 475	if (!victim)
 476		return -ENXIO;
 477
 478	device_lock(dev);
 479	device_lock(victim);
 480	dev_dax = to_dev_dax(victim);
 
 481	if (victim->driver || dev_dax_size(dev_dax))
 482		rc = -EBUSY;
 483	else {
 484		/*
 485		 * Invalidate the device so it does not become active
 486		 * again, but always preserve device-id-0 so that
 487		 * /sys/bus/dax/ is guaranteed to be populated while any
 488		 * dax_region is registered.
 489		 */
 490		if (dev_dax->id > 0) {
 491			do_del = __free_dev_dax_id(dev_dax) >= 0;
 492			rc = len;
 493			if (dax_region->seed == victim)
 494				dax_region->seed = NULL;
 495			if (dax_region->youngest == victim)
 496				dax_region->youngest = NULL;
 497		} else
 498			rc = -EBUSY;
 499	}
 
 500	device_unlock(victim);
 501
 502	/* won the race to invalidate the device, clean it up */
 503	if (do_del)
 504		devm_release_action(dev, unregister_dev_dax, victim);
 505	device_unlock(dev);
 506	put_device(victim);
 507
 508	return rc;
 509}
 510static DEVICE_ATTR_WO(delete);
 511
 512static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a,
 513		int n)
 514{
 515	struct device *dev = container_of(kobj, struct device, kobj);
 516	struct dax_region *dax_region = dev_get_drvdata(dev);
 517
 518	if (is_static(dax_region))
 519		if (a == &dev_attr_available_size.attr
 520				|| a == &dev_attr_create.attr
 521				|| a == &dev_attr_seed.attr
 522				|| a == &dev_attr_delete.attr)
 523			return 0;
 524	return a->mode;
 525}
 526
 527static struct attribute *dax_region_attributes[] = {
 528	&dev_attr_available_size.attr,
 529	&dev_attr_region_size.attr,
 530	&dev_attr_region_align.attr,
 531	&dev_attr_create.attr,
 532	&dev_attr_seed.attr,
 533	&dev_attr_delete.attr,
 534	&dev_attr_id.attr,
 535	NULL,
 536};
 537
 538static const struct attribute_group dax_region_attribute_group = {
 539	.name = "dax_region",
 540	.attrs = dax_region_attributes,
 541	.is_visible = dax_region_visible,
 542};
 543
 544static const struct attribute_group *dax_region_attribute_groups[] = {
 545	&dax_region_attribute_group,
 546	NULL,
 547};
 548
 549static void dax_region_free(struct kref *kref)
 550{
 551	struct dax_region *dax_region;
 552
 553	dax_region = container_of(kref, struct dax_region, kref);
 554	kfree(dax_region);
 555}
 556
 557void dax_region_put(struct dax_region *dax_region)
 558{
 559	kref_put(&dax_region->kref, dax_region_free);
 560}
 561EXPORT_SYMBOL_GPL(dax_region_put);
 562
 563static void dax_region_unregister(void *region)
 564{
 565	struct dax_region *dax_region = region;
 566
 567	sysfs_remove_groups(&dax_region->dev->kobj,
 568			dax_region_attribute_groups);
 569	dax_region_put(dax_region);
 570}
 571
 572struct dax_region *alloc_dax_region(struct device *parent, int region_id,
 573		struct range *range, int target_node, unsigned int align,
 574		unsigned long flags)
 575{
 576	struct dax_region *dax_region;
 577
 578	/*
 579	 * The DAX core assumes that it can store its private data in
 580	 * parent->driver_data. This WARN is a reminder / safeguard for
 581	 * developers of device-dax drivers.
 582	 */
 583	if (dev_get_drvdata(parent)) {
 584		dev_WARN(parent, "dax core failed to setup private data\n");
 585		return NULL;
 586	}
 587
 588	if (!IS_ALIGNED(range->start, align)
 589			|| !IS_ALIGNED(range_len(range), align))
 590		return NULL;
 591
 592	dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
 593	if (!dax_region)
 594		return NULL;
 595
 596	dev_set_drvdata(parent, dax_region);
 597	kref_init(&dax_region->kref);
 598	dax_region->id = region_id;
 599	dax_region->align = align;
 600	dax_region->dev = parent;
 601	dax_region->target_node = target_node;
 602	ida_init(&dax_region->ida);
 603	dax_region->res = (struct resource) {
 604		.start = range->start,
 605		.end = range->end,
 606		.flags = IORESOURCE_MEM | flags,
 607	};
 608
 609	if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
 610		kfree(dax_region);
 611		return NULL;
 612	}
 613
 614	kref_get(&dax_region->kref);
 615	if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
 616		return NULL;
 617	return dax_region;
 618}
 619EXPORT_SYMBOL_GPL(alloc_dax_region);
 620
 621static void dax_mapping_release(struct device *dev)
 622{
 623	struct dax_mapping *mapping = to_dax_mapping(dev);
 624	struct dev_dax *dev_dax = to_dev_dax(dev->parent);
 
 625
 626	ida_free(&dev_dax->ida, mapping->id);
 627	kfree(mapping);
 
 628}
 629
 630static void unregister_dax_mapping(void *data)
 631{
 632	struct device *dev = data;
 633	struct dax_mapping *mapping = to_dax_mapping(dev);
 634	struct dev_dax *dev_dax = to_dev_dax(dev->parent);
 635	struct dax_region *dax_region = dev_dax->region;
 636
 637	dev_dbg(dev, "%s\n", __func__);
 638
 639	device_lock_assert(dax_region->dev);
 640
 641	dev_dax->ranges[mapping->range_id].mapping = NULL;
 642	mapping->range_id = -1;
 643
 644	device_del(dev);
 645	put_device(dev);
 646}
 647
 648static struct dev_dax_range *get_dax_range(struct device *dev)
 649{
 650	struct dax_mapping *mapping = to_dax_mapping(dev);
 651	struct dev_dax *dev_dax = to_dev_dax(dev->parent);
 652	struct dax_region *dax_region = dev_dax->region;
 653
 654	device_lock(dax_region->dev);
 
 
 655	if (mapping->range_id < 0) {
 656		device_unlock(dax_region->dev);
 657		return NULL;
 658	}
 659
 660	return &dev_dax->ranges[mapping->range_id];
 661}
 662
 663static void put_dax_range(struct dev_dax_range *dax_range)
 664{
 665	struct dax_mapping *mapping = dax_range->mapping;
 666	struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent);
 667	struct dax_region *dax_region = dev_dax->region;
 668
 669	device_unlock(dax_region->dev);
 670}
 671
 672static ssize_t start_show(struct device *dev,
 673		struct device_attribute *attr, char *buf)
 674{
 675	struct dev_dax_range *dax_range;
 676	ssize_t rc;
 677
 678	dax_range = get_dax_range(dev);
 679	if (!dax_range)
 680		return -ENXIO;
 681	rc = sprintf(buf, "%#llx\n", dax_range->range.start);
 682	put_dax_range(dax_range);
 683
 684	return rc;
 685}
 686static DEVICE_ATTR(start, 0400, start_show, NULL);
 687
 688static ssize_t end_show(struct device *dev,
 689		struct device_attribute *attr, char *buf)
 690{
 691	struct dev_dax_range *dax_range;
 692	ssize_t rc;
 693
 694	dax_range = get_dax_range(dev);
 695	if (!dax_range)
 696		return -ENXIO;
 697	rc = sprintf(buf, "%#llx\n", dax_range->range.end);
 698	put_dax_range(dax_range);
 699
 700	return rc;
 701}
 702static DEVICE_ATTR(end, 0400, end_show, NULL);
 703
 704static ssize_t pgoff_show(struct device *dev,
 705		struct device_attribute *attr, char *buf)
 706{
 707	struct dev_dax_range *dax_range;
 708	ssize_t rc;
 709
 710	dax_range = get_dax_range(dev);
 711	if (!dax_range)
 712		return -ENXIO;
 713	rc = sprintf(buf, "%#lx\n", dax_range->pgoff);
 714	put_dax_range(dax_range);
 715
 716	return rc;
 717}
 718static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL);
 719
 720static struct attribute *dax_mapping_attributes[] = {
 721	&dev_attr_start.attr,
 722	&dev_attr_end.attr,
 723	&dev_attr_page_offset.attr,
 724	NULL,
 725};
 726
 727static const struct attribute_group dax_mapping_attribute_group = {
 728	.attrs = dax_mapping_attributes,
 729};
 730
 731static const struct attribute_group *dax_mapping_attribute_groups[] = {
 732	&dax_mapping_attribute_group,
 733	NULL,
 734};
 735
 736static struct device_type dax_mapping_type = {
 737	.release = dax_mapping_release,
 738	.groups = dax_mapping_attribute_groups,
 739};
 740
 741static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
 742{
 743	struct dax_region *dax_region = dev_dax->region;
 744	struct dax_mapping *mapping;
 745	struct device *dev;
 746	int rc;
 747
 748	device_lock_assert(dax_region->dev);
 749
 750	if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
 751				"region disabled\n"))
 752		return -ENXIO;
 753
 754	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
 755	if (!mapping)
 756		return -ENOMEM;
 757	mapping->range_id = range_id;
 758	mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL);
 759	if (mapping->id < 0) {
 760		kfree(mapping);
 761		return -ENOMEM;
 762	}
 763	dev_dax->ranges[range_id].mapping = mapping;
 764	dev = &mapping->dev;
 765	device_initialize(dev);
 766	dev->parent = &dev_dax->dev;
 
 767	dev->type = &dax_mapping_type;
 768	dev_set_name(dev, "mapping%d", mapping->id);
 769	rc = device_add(dev);
 770	if (rc) {
 771		put_device(dev);
 772		return rc;
 773	}
 774
 775	rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping,
 776			dev);
 777	if (rc)
 778		return rc;
 779	return 0;
 780}
 781
 782static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
 783		resource_size_t size)
 784{
 785	struct dax_region *dax_region = dev_dax->region;
 786	struct resource *res = &dax_region->res;
 787	struct device *dev = &dev_dax->dev;
 788	struct dev_dax_range *ranges;
 789	unsigned long pgoff = 0;
 790	struct resource *alloc;
 791	int i, rc;
 792
 793	device_lock_assert(dax_region->dev);
 794
 795	/* handle the seed alloc special case */
 796	if (!size) {
 797		if (dev_WARN_ONCE(dev, dev_dax->nr_range,
 798					"0-size allocation must be first\n"))
 799			return -EBUSY;
 800		/* nr_range == 0 is elsewhere special cased as 0-size device */
 801		return 0;
 802	}
 803
 804	alloc = __request_region(res, start, size, dev_name(dev), 0);
 805	if (!alloc)
 806		return -ENOMEM;
 807
 808	ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
 809			* (dev_dax->nr_range + 1), GFP_KERNEL);
 810	if (!ranges) {
 811		__release_region(res, alloc->start, resource_size(alloc));
 812		return -ENOMEM;
 813	}
 814
 815	for (i = 0; i < dev_dax->nr_range; i++)
 816		pgoff += PHYS_PFN(range_len(&ranges[i].range));
 817	dev_dax->ranges = ranges;
 818	ranges[dev_dax->nr_range++] = (struct dev_dax_range) {
 819		.pgoff = pgoff,
 820		.range = {
 821			.start = alloc->start,
 822			.end = alloc->end,
 823		},
 824	};
 825
 826	dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
 827			&alloc->start, &alloc->end);
 828	/*
 829	 * A dev_dax instance must be registered before mapping device
 830	 * children can be added. Defer to devm_create_dev_dax() to add
 831	 * the initial mapping device.
 832	 */
 833	if (!device_is_registered(&dev_dax->dev))
 834		return 0;
 835
 836	rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
 837	if (rc)
 838		trim_dev_dax_range(dev_dax);
 839
 840	return rc;
 841}
 842
 843static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
 844{
 845	int last_range = dev_dax->nr_range - 1;
 846	struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
 847	struct dax_region *dax_region = dev_dax->region;
 848	bool is_shrink = resource_size(res) > size;
 849	struct range *range = &dax_range->range;
 850	struct device *dev = &dev_dax->dev;
 851	int rc;
 852
 853	device_lock_assert(dax_region->dev);
 854
 855	if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
 856		return -EINVAL;
 857
 858	rc = adjust_resource(res, range->start, size);
 859	if (rc)
 860		return rc;
 861
 862	*range = (struct range) {
 863		.start = range->start,
 864		.end = range->start + size - 1,
 865	};
 866
 867	dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend",
 868			last_range, (unsigned long long) range->start,
 869			(unsigned long long) range->end);
 870
 871	return 0;
 872}
 873
 874static ssize_t size_show(struct device *dev,
 875		struct device_attribute *attr, char *buf)
 876{
 877	struct dev_dax *dev_dax = to_dev_dax(dev);
 878	unsigned long long size;
 
 879
 880	device_lock(dev);
 
 
 881	size = dev_dax_size(dev_dax);
 882	device_unlock(dev);
 883
 884	return sprintf(buf, "%llu\n", size);
 885}
 886
 887static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
 888{
 889	/*
 890	 * The minimum mapping granularity for a device instance is a
 891	 * single subsection, unless the arch says otherwise.
 892	 */
 893	return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align()));
 894}
 895
 896static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size)
 897{
 898	resource_size_t to_shrink = dev_dax_size(dev_dax) - size;
 899	struct dax_region *dax_region = dev_dax->region;
 900	struct device *dev = &dev_dax->dev;
 901	int i;
 902
 903	for (i = dev_dax->nr_range - 1; i >= 0; i--) {
 904		struct range *range = &dev_dax->ranges[i].range;
 905		struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
 906		struct resource *adjust = NULL, *res;
 907		resource_size_t shrink;
 908
 909		shrink = min_t(u64, to_shrink, range_len(range));
 910		if (shrink >= range_len(range)) {
 911			devm_release_action(dax_region->dev,
 912					unregister_dax_mapping, &mapping->dev);
 913			trim_dev_dax_range(dev_dax);
 914			to_shrink -= shrink;
 915			if (!to_shrink)
 916				break;
 917			continue;
 918		}
 919
 920		for_each_dax_region_resource(dax_region, res)
 921			if (strcmp(res->name, dev_name(dev)) == 0
 922					&& res->start == range->start) {
 923				adjust = res;
 924				break;
 925			}
 926
 927		if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
 928					"failed to find matching resource\n"))
 929			return -ENXIO;
 930		return adjust_dev_dax_range(dev_dax, adjust, range_len(range)
 931				- shrink);
 932	}
 933	return 0;
 934}
 935
 936/*
 937 * Only allow adjustments that preserve the relative pgoff of existing
 938 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
 939 */
 940static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res)
 941{
 942	struct dev_dax_range *last;
 943	int i;
 944
 945	if (dev_dax->nr_range == 0)
 946		return false;
 947	if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0)
 948		return false;
 949	last = &dev_dax->ranges[dev_dax->nr_range - 1];
 950	if (last->range.start != res->start || last->range.end != res->end)
 951		return false;
 952	for (i = 0; i < dev_dax->nr_range - 1; i++) {
 953		struct dev_dax_range *dax_range = &dev_dax->ranges[i];
 954
 955		if (dax_range->pgoff > last->pgoff)
 956			return false;
 957	}
 958
 959	return true;
 960}
 961
 962static ssize_t dev_dax_resize(struct dax_region *dax_region,
 963		struct dev_dax *dev_dax, resource_size_t size)
 964{
 965	resource_size_t avail = dax_region_avail_size(dax_region), to_alloc;
 966	resource_size_t dev_size = dev_dax_size(dev_dax);
 967	struct resource *region_res = &dax_region->res;
 968	struct device *dev = &dev_dax->dev;
 969	struct resource *res, *first;
 970	resource_size_t alloc = 0;
 971	int rc;
 972
 973	if (dev->driver)
 974		return -EBUSY;
 975	if (size == dev_size)
 976		return 0;
 977	if (size > dev_size && size - dev_size > avail)
 978		return -ENOSPC;
 979	if (size < dev_size)
 980		return dev_dax_shrink(dev_dax, size);
 981
 982	to_alloc = size - dev_size;
 983	if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc),
 984			"resize of %pa misaligned\n", &to_alloc))
 985		return -ENXIO;
 986
 987	/*
 988	 * Expand the device into the unused portion of the region. This
 989	 * may involve adjusting the end of an existing resource, or
 990	 * allocating a new resource.
 991	 */
 992retry:
 993	first = region_res->child;
 994	if (!first)
 995		return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc);
 996
 997	rc = -ENOSPC;
 998	for (res = first; res; res = res->sibling) {
 999		struct resource *next = res->sibling;
1000
1001		/* space at the beginning of the region */
1002		if (res == first && res->start > dax_region->res.start) {
1003			alloc = min(res->start - dax_region->res.start, to_alloc);
1004			rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc);
1005			break;
1006		}
1007
1008		alloc = 0;
1009		/* space between allocations */
1010		if (next && next->start > res->end + 1)
1011			alloc = min(next->start - (res->end + 1), to_alloc);
1012
1013		/* space at the end of the region */
1014		if (!alloc && !next && res->end < region_res->end)
1015			alloc = min(region_res->end - res->end, to_alloc);
1016
1017		if (!alloc)
1018			continue;
1019
1020		if (adjust_ok(dev_dax, res)) {
1021			rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc);
1022			break;
1023		}
1024		rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc);
1025		break;
1026	}
1027	if (rc)
1028		return rc;
1029	to_alloc -= alloc;
1030	if (to_alloc)
1031		goto retry;
1032	return 0;
1033}
1034
1035static ssize_t size_store(struct device *dev, struct device_attribute *attr,
1036		const char *buf, size_t len)
1037{
1038	ssize_t rc;
1039	unsigned long long val;
1040	struct dev_dax *dev_dax = to_dev_dax(dev);
1041	struct dax_region *dax_region = dev_dax->region;
1042
1043	rc = kstrtoull(buf, 0, &val);
1044	if (rc)
1045		return rc;
1046
1047	if (!alloc_is_aligned(dev_dax, val)) {
1048		dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val);
1049		return -EINVAL;
1050	}
1051
1052	device_lock(dax_region->dev);
 
 
1053	if (!dax_region->dev->driver) {
1054		device_unlock(dax_region->dev);
1055		return -ENXIO;
1056	}
1057	device_lock(dev);
 
 
 
1058	rc = dev_dax_resize(dax_region, dev_dax, val);
1059	device_unlock(dev);
1060	device_unlock(dax_region->dev);
1061
1062	return rc == 0 ? len : rc;
 
 
 
 
 
 
 
1063}
1064static DEVICE_ATTR_RW(size);
1065
1066static ssize_t range_parse(const char *opt, size_t len, struct range *range)
1067{
1068	unsigned long long addr = 0;
1069	char *start, *end, *str;
1070	ssize_t rc = -EINVAL;
1071
1072	str = kstrdup(opt, GFP_KERNEL);
1073	if (!str)
1074		return rc;
1075
1076	end = str;
1077	start = strsep(&end, "-");
1078	if (!start || !end)
1079		goto err;
1080
1081	rc = kstrtoull(start, 16, &addr);
1082	if (rc)
1083		goto err;
1084	range->start = addr;
1085
1086	rc = kstrtoull(end, 16, &addr);
1087	if (rc)
1088		goto err;
1089	range->end = addr;
1090
1091err:
1092	kfree(str);
1093	return rc;
1094}
1095
1096static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
1097		const char *buf, size_t len)
1098{
1099	struct dev_dax *dev_dax = to_dev_dax(dev);
1100	struct dax_region *dax_region = dev_dax->region;
1101	size_t to_alloc;
1102	struct range r;
1103	ssize_t rc;
1104
1105	rc = range_parse(buf, len, &r);
1106	if (rc)
1107		return rc;
1108
1109	rc = -ENXIO;
1110	device_lock(dax_region->dev);
 
1111	if (!dax_region->dev->driver) {
1112		device_unlock(dax_region->dev);
 
 
 
 
 
1113		return rc;
1114	}
1115	device_lock(dev);
1116
1117	to_alloc = range_len(&r);
1118	if (alloc_is_aligned(dev_dax, to_alloc))
1119		rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
1120	device_unlock(dev);
1121	device_unlock(dax_region->dev);
1122
1123	return rc == 0 ? len : rc;
1124}
1125static DEVICE_ATTR_WO(mapping);
1126
1127static ssize_t align_show(struct device *dev,
1128		struct device_attribute *attr, char *buf)
1129{
1130	struct dev_dax *dev_dax = to_dev_dax(dev);
1131
1132	return sprintf(buf, "%d\n", dev_dax->align);
1133}
1134
1135static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
1136{
1137	struct device *dev = &dev_dax->dev;
1138	int i;
1139
1140	for (i = 0; i < dev_dax->nr_range; i++) {
1141		size_t len = range_len(&dev_dax->ranges[i].range);
1142
1143		if (!alloc_is_aligned(dev_dax, len)) {
1144			dev_dbg(dev, "%s: align %u invalid for range %d\n",
1145				__func__, dev_dax->align, i);
1146			return -EINVAL;
1147		}
1148	}
1149
1150	return 0;
1151}
1152
1153static ssize_t align_store(struct device *dev, struct device_attribute *attr,
1154		const char *buf, size_t len)
1155{
1156	struct dev_dax *dev_dax = to_dev_dax(dev);
1157	struct dax_region *dax_region = dev_dax->region;
1158	unsigned long val, align_save;
1159	ssize_t rc;
1160
1161	rc = kstrtoul(buf, 0, &val);
1162	if (rc)
1163		return -ENXIO;
1164
1165	if (!dax_align_valid(val))
1166		return -EINVAL;
1167
1168	device_lock(dax_region->dev);
 
 
1169	if (!dax_region->dev->driver) {
1170		device_unlock(dax_region->dev);
1171		return -ENXIO;
1172	}
1173
1174	device_lock(dev);
 
 
 
 
1175	if (dev->driver) {
1176		rc = -EBUSY;
1177		goto out_unlock;
1178	}
1179
1180	align_save = dev_dax->align;
1181	dev_dax->align = val;
1182	rc = dev_dax_validate_align(dev_dax);
1183	if (rc)
1184		dev_dax->align = align_save;
1185out_unlock:
1186	device_unlock(dev);
1187	device_unlock(dax_region->dev);
1188	return rc == 0 ? len : rc;
1189}
1190static DEVICE_ATTR_RW(align);
1191
1192static int dev_dax_target_node(struct dev_dax *dev_dax)
1193{
1194	struct dax_region *dax_region = dev_dax->region;
1195
1196	return dax_region->target_node;
1197}
1198
1199static ssize_t target_node_show(struct device *dev,
1200		struct device_attribute *attr, char *buf)
1201{
1202	struct dev_dax *dev_dax = to_dev_dax(dev);
1203
1204	return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
1205}
1206static DEVICE_ATTR_RO(target_node);
1207
1208static ssize_t resource_show(struct device *dev,
1209		struct device_attribute *attr, char *buf)
1210{
1211	struct dev_dax *dev_dax = to_dev_dax(dev);
1212	struct dax_region *dax_region = dev_dax->region;
1213	unsigned long long start;
1214
1215	if (dev_dax->nr_range < 1)
1216		start = dax_region->res.start;
1217	else
1218		start = dev_dax->ranges[0].range.start;
1219
1220	return sprintf(buf, "%#llx\n", start);
1221}
1222static DEVICE_ATTR(resource, 0400, resource_show, NULL);
1223
1224static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1225		char *buf)
1226{
1227	/*
1228	 * We only ever expect to handle device-dax instances, i.e. the
1229	 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
1230	 */
1231	return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
1232}
1233static DEVICE_ATTR_RO(modalias);
1234
1235static ssize_t numa_node_show(struct device *dev,
1236		struct device_attribute *attr, char *buf)
1237{
1238	return sprintf(buf, "%d\n", dev_to_node(dev));
1239}
1240static DEVICE_ATTR_RO(numa_node);
1241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1242static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
1243{
1244	struct device *dev = container_of(kobj, struct device, kobj);
1245	struct dev_dax *dev_dax = to_dev_dax(dev);
1246	struct dax_region *dax_region = dev_dax->region;
1247
1248	if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
1249		return 0;
1250	if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
1251		return 0;
1252	if (a == &dev_attr_mapping.attr && is_static(dax_region))
1253		return 0;
1254	if ((a == &dev_attr_align.attr ||
1255	     a == &dev_attr_size.attr) && is_static(dax_region))
1256		return 0444;
1257	return a->mode;
1258}
1259
1260static struct attribute *dev_dax_attributes[] = {
1261	&dev_attr_modalias.attr,
1262	&dev_attr_size.attr,
1263	&dev_attr_mapping.attr,
1264	&dev_attr_target_node.attr,
1265	&dev_attr_align.attr,
1266	&dev_attr_resource.attr,
1267	&dev_attr_numa_node.attr,
 
1268	NULL,
1269};
1270
1271static const struct attribute_group dev_dax_attribute_group = {
1272	.attrs = dev_dax_attributes,
1273	.is_visible = dev_dax_visible,
1274};
1275
1276static const struct attribute_group *dax_attribute_groups[] = {
1277	&dev_dax_attribute_group,
1278	NULL,
1279};
1280
1281static void dev_dax_release(struct device *dev)
1282{
1283	struct dev_dax *dev_dax = to_dev_dax(dev);
1284	struct dax_region *dax_region = dev_dax->region;
1285	struct dax_device *dax_dev = dev_dax->dax_dev;
1286
1287	put_dax(dax_dev);
1288	free_dev_dax_id(dev_dax);
1289	dax_region_put(dax_region);
1290	kfree(dev_dax->pgmap);
1291	kfree(dev_dax);
1292}
1293
1294static const struct device_type dev_dax_type = {
1295	.release = dev_dax_release,
1296	.groups = dax_attribute_groups,
1297};
1298
1299struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
1300{
1301	struct dax_region *dax_region = data->dax_region;
1302	struct device *parent = dax_region->dev;
1303	struct dax_device *dax_dev;
1304	struct dev_dax *dev_dax;
1305	struct inode *inode;
1306	struct device *dev;
1307	int rc;
1308
1309	dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
1310	if (!dev_dax)
1311		return ERR_PTR(-ENOMEM);
1312
 
1313	if (is_static(dax_region)) {
1314		if (dev_WARN_ONCE(parent, data->id < 0,
1315				"dynamic id specified to static region\n")) {
1316			rc = -EINVAL;
1317			goto err_id;
1318		}
1319
1320		dev_dax->id = data->id;
1321	} else {
1322		if (dev_WARN_ONCE(parent, data->id >= 0,
1323				"static id specified to dynamic region\n")) {
1324			rc = -EINVAL;
1325			goto err_id;
1326		}
1327
1328		rc = ida_alloc(&dax_region->ida, GFP_KERNEL);
1329		if (rc < 0)
1330			goto err_id;
1331		dev_dax->id = rc;
1332	}
1333
1334	dev_dax->region = dax_region;
1335	dev = &dev_dax->dev;
1336	device_initialize(dev);
1337	dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
1338
1339	rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size);
1340	if (rc)
1341		goto err_range;
1342
1343	if (data->pgmap) {
1344		dev_WARN_ONCE(parent, !is_static(dax_region),
1345			"custom dev_pagemap requires a static dax_region\n");
1346
1347		dev_dax->pgmap = kmemdup(data->pgmap,
1348				sizeof(struct dev_pagemap), GFP_KERNEL);
1349		if (!dev_dax->pgmap) {
1350			rc = -ENOMEM;
1351			goto err_pgmap;
1352		}
1353	}
1354
1355	/*
1356	 * No dax_operations since there is no access to this device outside of
1357	 * mmap of the resulting character device.
1358	 */
1359	dax_dev = alloc_dax(dev_dax, NULL);
1360	if (IS_ERR(dax_dev)) {
1361		rc = PTR_ERR(dax_dev);
1362		goto err_alloc_dax;
1363	}
1364	set_dax_synchronous(dax_dev);
1365	set_dax_nocache(dax_dev);
1366	set_dax_nomc(dax_dev);
1367
1368	/* a device_dax instance is dead while the driver is not attached */
1369	kill_dax(dax_dev);
1370
1371	dev_dax->dax_dev = dax_dev;
1372	dev_dax->target_node = dax_region->target_node;
1373	dev_dax->align = dax_region->align;
1374	ida_init(&dev_dax->ida);
1375	kref_get(&dax_region->kref);
 
1376
1377	inode = dax_inode(dax_dev);
1378	dev->devt = inode->i_rdev;
1379	dev->bus = &dax_bus_type;
1380	dev->parent = parent;
1381	dev->type = &dev_dax_type;
1382
1383	rc = device_add(dev);
1384	if (rc) {
1385		kill_dev_dax(dev_dax);
1386		put_device(dev);
1387		return ERR_PTR(rc);
1388	}
1389
1390	rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
1391	if (rc)
1392		return ERR_PTR(rc);
1393
1394	/* register mapping device for the initial allocation range */
1395	if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) {
1396		rc = devm_register_dax_mapping(dev_dax, 0);
1397		if (rc)
1398			return ERR_PTR(rc);
1399	}
1400
1401	return dev_dax;
1402
1403err_alloc_dax:
1404	kfree(dev_dax->pgmap);
1405err_pgmap:
1406	free_dev_dax_ranges(dev_dax);
1407err_range:
1408	free_dev_dax_id(dev_dax);
1409err_id:
1410	kfree(dev_dax);
1411
1412	return ERR_PTR(rc);
1413}
1414EXPORT_SYMBOL_GPL(devm_create_dev_dax);
1415
1416static int match_always_count;
 
 
 
 
 
 
 
 
 
 
1417
1418int __dax_driver_register(struct dax_device_driver *dax_drv,
1419		struct module *module, const char *mod_name)
1420{
1421	struct device_driver *drv = &dax_drv->drv;
1422	int rc = 0;
1423
1424	/*
1425	 * dax_bus_probe() calls dax_drv->probe() unconditionally.
1426	 * So better be safe than sorry and ensure it is provided.
1427	 */
1428	if (!dax_drv->probe)
1429		return -EINVAL;
1430
1431	INIT_LIST_HEAD(&dax_drv->ids);
1432	drv->owner = module;
1433	drv->name = mod_name;
1434	drv->mod_name = mod_name;
1435	drv->bus = &dax_bus_type;
1436
1437	/* there can only be one default driver */
1438	mutex_lock(&dax_bus_lock);
1439	match_always_count += dax_drv->match_always;
1440	if (match_always_count > 1) {
1441		match_always_count--;
1442		WARN_ON(1);
1443		rc = -EINVAL;
1444	}
1445	mutex_unlock(&dax_bus_lock);
1446	if (rc)
1447		return rc;
1448
1449	rc = driver_register(drv);
1450	if (rc && dax_drv->match_always) {
1451		mutex_lock(&dax_bus_lock);
1452		match_always_count -= dax_drv->match_always;
1453		mutex_unlock(&dax_bus_lock);
1454	}
1455
1456	return rc;
1457}
1458EXPORT_SYMBOL_GPL(__dax_driver_register);
1459
1460void dax_driver_unregister(struct dax_device_driver *dax_drv)
1461{
1462	struct device_driver *drv = &dax_drv->drv;
1463	struct dax_id *dax_id, *_id;
1464
1465	mutex_lock(&dax_bus_lock);
1466	match_always_count -= dax_drv->match_always;
1467	list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
1468		list_del(&dax_id->list);
1469		kfree(dax_id);
1470	}
1471	mutex_unlock(&dax_bus_lock);
1472	driver_unregister(drv);
1473}
1474EXPORT_SYMBOL_GPL(dax_driver_unregister);
1475
1476int __init dax_bus_init(void)
1477{
1478	return bus_register(&dax_bus_type);
1479}
1480
1481void __exit dax_bus_exit(void)
1482{
1483	bus_unregister(&dax_bus_type);
1484}
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
   3#include <linux/memremap.h>
   4#include <linux/device.h>
   5#include <linux/mutex.h>
   6#include <linux/list.h>
   7#include <linux/slab.h>
   8#include <linux/dax.h>
   9#include <linux/io.h>
  10#include "dax-private.h"
  11#include "bus.h"
  12
  13static DEFINE_MUTEX(dax_bus_lock);
  14
  15/*
  16 * All changes to the dax region configuration occur with this lock held
  17 * for write.
  18 */
  19DECLARE_RWSEM(dax_region_rwsem);
  20
  21/*
  22 * All changes to the dax device configuration occur with this lock held
  23 * for write.
  24 */
  25DECLARE_RWSEM(dax_dev_rwsem);
  26
  27#define DAX_NAME_LEN 30
  28struct dax_id {
  29	struct list_head list;
  30	char dev_name[DAX_NAME_LEN];
  31};
  32
  33static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env)
  34{
  35	/*
  36	 * We only ever expect to handle device-dax instances, i.e. the
  37	 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
  38	 */
  39	return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
  40}
  41
  42#define to_dax_drv(__drv)	container_of_const(__drv, struct dax_device_driver, drv)
 
 
 
  43
  44static struct dax_id *__dax_match_id(const struct dax_device_driver *dax_drv,
  45		const char *dev_name)
  46{
  47	struct dax_id *dax_id;
  48
  49	lockdep_assert_held(&dax_bus_lock);
  50
  51	list_for_each_entry(dax_id, &dax_drv->ids, list)
  52		if (sysfs_streq(dax_id->dev_name, dev_name))
  53			return dax_id;
  54	return NULL;
  55}
  56
  57static int dax_match_id(const struct dax_device_driver *dax_drv, struct device *dev)
  58{
  59	int match;
  60
  61	mutex_lock(&dax_bus_lock);
  62	match = !!__dax_match_id(dax_drv, dev_name(dev));
  63	mutex_unlock(&dax_bus_lock);
  64
  65	return match;
  66}
  67
  68static int dax_match_type(const struct dax_device_driver *dax_drv, struct device *dev)
  69{
  70	enum dax_driver_type type = DAXDRV_DEVICE_TYPE;
  71	struct dev_dax *dev_dax = to_dev_dax(dev);
  72
  73	if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM)
  74		type = DAXDRV_KMEM_TYPE;
  75
  76	if (dax_drv->type == type)
  77		return 1;
  78
  79	/* default to device mode if dax_kmem is disabled */
  80	if (dax_drv->type == DAXDRV_DEVICE_TYPE &&
  81	    !IS_ENABLED(CONFIG_DEV_DAX_KMEM))
  82		return 1;
  83
  84	return 0;
  85}
  86
  87enum id_action {
  88	ID_REMOVE,
  89	ID_ADD,
  90};
  91
  92static ssize_t do_id_store(struct device_driver *drv, const char *buf,
  93		size_t count, enum id_action action)
  94{
  95	struct dax_device_driver *dax_drv = to_dax_drv(drv);
  96	unsigned int region_id, id;
  97	char devname[DAX_NAME_LEN];
  98	struct dax_id *dax_id;
  99	ssize_t rc = count;
 100	int fields;
 101
 102	fields = sscanf(buf, "dax%d.%d", &region_id, &id);
 103	if (fields != 2)
 104		return -EINVAL;
 105	sprintf(devname, "dax%d.%d", region_id, id);
 106	if (!sysfs_streq(buf, devname))
 107		return -EINVAL;
 108
 109	mutex_lock(&dax_bus_lock);
 110	dax_id = __dax_match_id(dax_drv, buf);
 111	if (!dax_id) {
 112		if (action == ID_ADD) {
 113			dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
 114			if (dax_id) {
 115				strscpy(dax_id->dev_name, buf, DAX_NAME_LEN);
 116				list_add(&dax_id->list, &dax_drv->ids);
 117			} else
 118				rc = -ENOMEM;
 119		}
 120	} else if (action == ID_REMOVE) {
 121		list_del(&dax_id->list);
 122		kfree(dax_id);
 123	}
 124	mutex_unlock(&dax_bus_lock);
 125
 126	if (rc < 0)
 127		return rc;
 128	if (action == ID_ADD)
 129		rc = driver_attach(drv);
 130	if (rc)
 131		return rc;
 132	return count;
 133}
 134
 135static ssize_t new_id_store(struct device_driver *drv, const char *buf,
 136		size_t count)
 137{
 138	return do_id_store(drv, buf, count, ID_ADD);
 139}
 140static DRIVER_ATTR_WO(new_id);
 141
 142static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
 143		size_t count)
 144{
 145	return do_id_store(drv, buf, count, ID_REMOVE);
 146}
 147static DRIVER_ATTR_WO(remove_id);
 148
 149static struct attribute *dax_drv_attrs[] = {
 150	&driver_attr_new_id.attr,
 151	&driver_attr_remove_id.attr,
 152	NULL,
 153};
 154ATTRIBUTE_GROUPS(dax_drv);
 155
 156static int dax_bus_match(struct device *dev, const struct device_driver *drv);
 157
 158/*
 159 * Static dax regions are regions created by an external subsystem
 160 * nvdimm where a single range is assigned. Its boundaries are by the external
 161 * subsystem and are usually limited to one physical memory range. For example,
 162 * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
 163 * single contiguous range)
 164 *
 165 * On dynamic dax regions, the assigned region can be partitioned by dax core
 166 * into multiple subdivisions. A subdivision is represented into one
 167 * /dev/daxN.M device composed by one or more potentially discontiguous ranges.
 168 *
 169 * When allocating a dax region, drivers must set whether it's static
 170 * (IORESOURCE_DAX_STATIC).  On static dax devices, the @pgmap is pre-assigned
 171 * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
 172 * devices it is NULL but afterwards allocated by dax core on device ->probe().
 173 * Care is needed to make sure that dynamic dax devices are torn down with a
 174 * cleared @pgmap field (see kill_dev_dax()).
 175 */
 176static bool is_static(struct dax_region *dax_region)
 177{
 178	return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
 179}
 180
 181bool static_dev_dax(struct dev_dax *dev_dax)
 182{
 183	return is_static(dev_dax->region);
 184}
 185EXPORT_SYMBOL_GPL(static_dev_dax);
 186
 187static u64 dev_dax_size(struct dev_dax *dev_dax)
 188{
 189	u64 size = 0;
 190	int i;
 191
 192	lockdep_assert_held(&dax_dev_rwsem);
 193
 194	for (i = 0; i < dev_dax->nr_range; i++)
 195		size += range_len(&dev_dax->ranges[i].range);
 196
 197	return size;
 198}
 199
 200static int dax_bus_probe(struct device *dev)
 201{
 202	struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
 203	struct dev_dax *dev_dax = to_dev_dax(dev);
 204	struct dax_region *dax_region = dev_dax->region;
 205	int rc;
 206	u64 size;
 207
 208	rc = down_read_interruptible(&dax_dev_rwsem);
 209	if (rc)
 210		return rc;
 211	size = dev_dax_size(dev_dax);
 212	up_read(&dax_dev_rwsem);
 213
 214	if (size == 0 || dev_dax->id < 0)
 215		return -ENXIO;
 216
 217	rc = dax_drv->probe(dev_dax);
 218
 219	if (rc || is_static(dax_region))
 220		return rc;
 221
 222	/*
 223	 * Track new seed creation only after successful probe of the
 224	 * previous seed.
 225	 */
 226	if (dax_region->seed == dev)
 227		dax_region->seed = NULL;
 228
 229	return 0;
 230}
 231
 232static void dax_bus_remove(struct device *dev)
 233{
 234	struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
 235	struct dev_dax *dev_dax = to_dev_dax(dev);
 236
 237	if (dax_drv->remove)
 238		dax_drv->remove(dev_dax);
 239}
 240
 241static const struct bus_type dax_bus_type = {
 242	.name = "dax",
 243	.uevent = dax_bus_uevent,
 244	.match = dax_bus_match,
 245	.probe = dax_bus_probe,
 246	.remove = dax_bus_remove,
 247	.drv_groups = dax_drv_groups,
 248};
 249
 250static int dax_bus_match(struct device *dev, const struct device_driver *drv)
 251{
 252	const struct dax_device_driver *dax_drv = to_dax_drv(drv);
 253
 254	if (dax_match_id(dax_drv, dev))
 
 
 
 
 255		return 1;
 256	return dax_match_type(dax_drv, dev);
 
 257}
 258
 259/*
 260 * Rely on the fact that drvdata is set before the attributes are
 261 * registered, and that the attributes are unregistered before drvdata
 262 * is cleared to assume that drvdata is always valid.
 263 */
 264static ssize_t id_show(struct device *dev,
 265		struct device_attribute *attr, char *buf)
 266{
 267	struct dax_region *dax_region = dev_get_drvdata(dev);
 268
 269	return sysfs_emit(buf, "%d\n", dax_region->id);
 270}
 271static DEVICE_ATTR_RO(id);
 272
 273static ssize_t region_size_show(struct device *dev,
 274		struct device_attribute *attr, char *buf)
 275{
 276	struct dax_region *dax_region = dev_get_drvdata(dev);
 277
 278	return sysfs_emit(buf, "%llu\n",
 279			  (unsigned long long)resource_size(&dax_region->res));
 280}
 281static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
 282		region_size_show, NULL);
 283
 284static ssize_t region_align_show(struct device *dev,
 285		struct device_attribute *attr, char *buf)
 286{
 287	struct dax_region *dax_region = dev_get_drvdata(dev);
 288
 289	return sysfs_emit(buf, "%u\n", dax_region->align);
 290}
 291static struct device_attribute dev_attr_region_align =
 292		__ATTR(align, 0400, region_align_show, NULL);
 293
 294#define for_each_dax_region_resource(dax_region, res) \
 295	for (res = (dax_region)->res.child; res; res = res->sibling)
 296
 297static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
 298{
 299	resource_size_t size = resource_size(&dax_region->res);
 300	struct resource *res;
 301
 302	lockdep_assert_held(&dax_region_rwsem);
 303
 304	for_each_dax_region_resource(dax_region, res)
 305		size -= resource_size(res);
 306	return size;
 307}
 308
 309static ssize_t available_size_show(struct device *dev,
 310		struct device_attribute *attr, char *buf)
 311{
 312	struct dax_region *dax_region = dev_get_drvdata(dev);
 313	unsigned long long size;
 314	int rc;
 315
 316	rc = down_read_interruptible(&dax_region_rwsem);
 317	if (rc)
 318		return rc;
 319	size = dax_region_avail_size(dax_region);
 320	up_read(&dax_region_rwsem);
 321
 322	return sysfs_emit(buf, "%llu\n", size);
 323}
 324static DEVICE_ATTR_RO(available_size);
 325
 326static ssize_t seed_show(struct device *dev,
 327		struct device_attribute *attr, char *buf)
 328{
 329	struct dax_region *dax_region = dev_get_drvdata(dev);
 330	struct device *seed;
 331	ssize_t rc;
 332
 333	if (is_static(dax_region))
 334		return -EINVAL;
 335
 336	rc = down_read_interruptible(&dax_region_rwsem);
 337	if (rc)
 338		return rc;
 339	seed = dax_region->seed;
 340	rc = sysfs_emit(buf, "%s\n", seed ? dev_name(seed) : "");
 341	up_read(&dax_region_rwsem);
 342
 343	return rc;
 344}
 345static DEVICE_ATTR_RO(seed);
 346
 347static ssize_t create_show(struct device *dev,
 348		struct device_attribute *attr, char *buf)
 349{
 350	struct dax_region *dax_region = dev_get_drvdata(dev);
 351	struct device *youngest;
 352	ssize_t rc;
 353
 354	if (is_static(dax_region))
 355		return -EINVAL;
 356
 357	rc = down_read_interruptible(&dax_region_rwsem);
 358	if (rc)
 359		return rc;
 360	youngest = dax_region->youngest;
 361	rc = sysfs_emit(buf, "%s\n", youngest ? dev_name(youngest) : "");
 362	up_read(&dax_region_rwsem);
 363
 364	return rc;
 365}
 366
 367static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data);
 368
 369static ssize_t create_store(struct device *dev, struct device_attribute *attr,
 370		const char *buf, size_t len)
 371{
 372	struct dax_region *dax_region = dev_get_drvdata(dev);
 373	unsigned long long avail;
 374	ssize_t rc;
 375	int val;
 376
 377	if (is_static(dax_region))
 378		return -EINVAL;
 379
 380	rc = kstrtoint(buf, 0, &val);
 381	if (rc)
 382		return rc;
 383	if (val != 1)
 384		return -EINVAL;
 385
 386	rc = down_write_killable(&dax_region_rwsem);
 387	if (rc)
 388		return rc;
 389	avail = dax_region_avail_size(dax_region);
 390	if (avail == 0)
 391		rc = -ENOSPC;
 392	else {
 393		struct dev_dax_data data = {
 394			.dax_region = dax_region,
 395			.size = 0,
 396			.id = -1,
 397			.memmap_on_memory = false,
 398		};
 399		struct dev_dax *dev_dax = __devm_create_dev_dax(&data);
 400
 401		if (IS_ERR(dev_dax))
 402			rc = PTR_ERR(dev_dax);
 403		else {
 404			/*
 405			 * In support of crafting multiple new devices
 406			 * simultaneously multiple seeds can be created,
 407			 * but only the first one that has not been
 408			 * successfully bound is tracked as the region
 409			 * seed.
 410			 */
 411			if (!dax_region->seed)
 412				dax_region->seed = &dev_dax->dev;
 413			dax_region->youngest = &dev_dax->dev;
 414			rc = len;
 415		}
 416	}
 417	up_write(&dax_region_rwsem);
 418
 419	return rc;
 420}
 421static DEVICE_ATTR_RW(create);
 422
 423void kill_dev_dax(struct dev_dax *dev_dax)
 424{
 425	struct dax_device *dax_dev = dev_dax->dax_dev;
 426	struct inode *inode = dax_inode(dax_dev);
 427
 428	kill_dax(dax_dev);
 429	unmap_mapping_range(inode->i_mapping, 0, 0, 1);
 430
 431	/*
 432	 * Dynamic dax region have the pgmap allocated via dev_kzalloc()
 433	 * and thus freed by devm. Clear the pgmap to not have stale pgmap
 434	 * ranges on probe() from previous reconfigurations of region devices.
 435	 */
 436	if (!static_dev_dax(dev_dax))
 437		dev_dax->pgmap = NULL;
 438}
 439EXPORT_SYMBOL_GPL(kill_dev_dax);
 440
 441static void trim_dev_dax_range(struct dev_dax *dev_dax)
 442{
 443	int i = dev_dax->nr_range - 1;
 444	struct range *range = &dev_dax->ranges[i].range;
 445	struct dax_region *dax_region = dev_dax->region;
 446
 447	lockdep_assert_held_write(&dax_region_rwsem);
 448	dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
 449		(unsigned long long)range->start,
 450		(unsigned long long)range->end);
 451
 452	__release_region(&dax_region->res, range->start, range_len(range));
 453	if (--dev_dax->nr_range == 0) {
 454		kfree(dev_dax->ranges);
 455		dev_dax->ranges = NULL;
 456	}
 457}
 458
 459static void free_dev_dax_ranges(struct dev_dax *dev_dax)
 460{
 461	while (dev_dax->nr_range)
 462		trim_dev_dax_range(dev_dax);
 463}
 464
 465static void unregister_dev_dax(void *dev)
 466{
 467	struct dev_dax *dev_dax = to_dev_dax(dev);
 468
 469	dev_dbg(dev, "%s\n", __func__);
 470
 471	down_write(&dax_region_rwsem);
 472	kill_dev_dax(dev_dax);
 
 473	device_del(dev);
 474	free_dev_dax_ranges(dev_dax);
 475	put_device(dev);
 476	up_write(&dax_region_rwsem);
 477}
 478
 479static void dax_region_free(struct kref *kref)
 480{
 481	struct dax_region *dax_region;
 482
 483	dax_region = container_of(kref, struct dax_region, kref);
 484	kfree(dax_region);
 485}
 486
 487static void dax_region_put(struct dax_region *dax_region)
 488{
 489	kref_put(&dax_region->kref, dax_region_free);
 490}
 491
 492/* a return value >= 0 indicates this invocation invalidated the id */
 493static int __free_dev_dax_id(struct dev_dax *dev_dax)
 494{
 495	struct dax_region *dax_region;
 
 496	int rc = dev_dax->id;
 497
 498	lockdep_assert_held_write(&dax_dev_rwsem);
 499
 500	if (!dev_dax->dyn_id || dev_dax->id < 0)
 501		return -1;
 502	dax_region = dev_dax->region;
 503	ida_free(&dax_region->ida, dev_dax->id);
 504	dax_region_put(dax_region);
 505	dev_dax->id = -1;
 506	return rc;
 507}
 508
 509static int free_dev_dax_id(struct dev_dax *dev_dax)
 510{
 
 511	int rc;
 512
 513	rc = down_write_killable(&dax_dev_rwsem);
 514	if (rc)
 515		return rc;
 516	rc = __free_dev_dax_id(dev_dax);
 517	up_write(&dax_dev_rwsem);
 518	return rc;
 519}
 520
 521static int alloc_dev_dax_id(struct dev_dax *dev_dax)
 522{
 523	struct dax_region *dax_region = dev_dax->region;
 524	int id;
 525
 526	id = ida_alloc(&dax_region->ida, GFP_KERNEL);
 527	if (id < 0)
 528		return id;
 529	kref_get(&dax_region->kref);
 530	dev_dax->dyn_id = true;
 531	dev_dax->id = id;
 532	return id;
 533}
 534
 535static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
 536		const char *buf, size_t len)
 537{
 538	struct dax_region *dax_region = dev_get_drvdata(dev);
 539	struct dev_dax *dev_dax;
 540	struct device *victim;
 541	bool do_del = false;
 542	int rc;
 543
 544	if (is_static(dax_region))
 545		return -EINVAL;
 546
 547	victim = device_find_child_by_name(dax_region->dev, buf);
 548	if (!victim)
 549		return -ENXIO;
 550
 551	device_lock(dev);
 552	device_lock(victim);
 553	dev_dax = to_dev_dax(victim);
 554	down_write(&dax_dev_rwsem);
 555	if (victim->driver || dev_dax_size(dev_dax))
 556		rc = -EBUSY;
 557	else {
 558		/*
 559		 * Invalidate the device so it does not become active
 560		 * again, but always preserve device-id-0 so that
 561		 * /sys/bus/dax/ is guaranteed to be populated while any
 562		 * dax_region is registered.
 563		 */
 564		if (dev_dax->id > 0) {
 565			do_del = __free_dev_dax_id(dev_dax) >= 0;
 566			rc = len;
 567			if (dax_region->seed == victim)
 568				dax_region->seed = NULL;
 569			if (dax_region->youngest == victim)
 570				dax_region->youngest = NULL;
 571		} else
 572			rc = -EBUSY;
 573	}
 574	up_write(&dax_dev_rwsem);
 575	device_unlock(victim);
 576
 577	/* won the race to invalidate the device, clean it up */
 578	if (do_del)
 579		devm_release_action(dev, unregister_dev_dax, victim);
 580	device_unlock(dev);
 581	put_device(victim);
 582
 583	return rc;
 584}
 585static DEVICE_ATTR_WO(delete);
 586
 587static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a,
 588		int n)
 589{
 590	struct device *dev = container_of(kobj, struct device, kobj);
 591	struct dax_region *dax_region = dev_get_drvdata(dev);
 592
 593	if (is_static(dax_region))
 594		if (a == &dev_attr_available_size.attr
 595				|| a == &dev_attr_create.attr
 596				|| a == &dev_attr_seed.attr
 597				|| a == &dev_attr_delete.attr)
 598			return 0;
 599	return a->mode;
 600}
 601
 602static struct attribute *dax_region_attributes[] = {
 603	&dev_attr_available_size.attr,
 604	&dev_attr_region_size.attr,
 605	&dev_attr_region_align.attr,
 606	&dev_attr_create.attr,
 607	&dev_attr_seed.attr,
 608	&dev_attr_delete.attr,
 609	&dev_attr_id.attr,
 610	NULL,
 611};
 612
 613static const struct attribute_group dax_region_attribute_group = {
 614	.name = "dax_region",
 615	.attrs = dax_region_attributes,
 616	.is_visible = dax_region_visible,
 617};
 618
 619static const struct attribute_group *dax_region_attribute_groups[] = {
 620	&dax_region_attribute_group,
 621	NULL,
 622};
 623
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 624static void dax_region_unregister(void *region)
 625{
 626	struct dax_region *dax_region = region;
 627
 628	sysfs_remove_groups(&dax_region->dev->kobj,
 629			dax_region_attribute_groups);
 630	dax_region_put(dax_region);
 631}
 632
 633struct dax_region *alloc_dax_region(struct device *parent, int region_id,
 634		struct range *range, int target_node, unsigned int align,
 635		unsigned long flags)
 636{
 637	struct dax_region *dax_region;
 638
 639	/*
 640	 * The DAX core assumes that it can store its private data in
 641	 * parent->driver_data. This WARN is a reminder / safeguard for
 642	 * developers of device-dax drivers.
 643	 */
 644	if (dev_get_drvdata(parent)) {
 645		dev_WARN(parent, "dax core failed to setup private data\n");
 646		return NULL;
 647	}
 648
 649	if (!IS_ALIGNED(range->start, align)
 650			|| !IS_ALIGNED(range_len(range), align))
 651		return NULL;
 652
 653	dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
 654	if (!dax_region)
 655		return NULL;
 656
 657	dev_set_drvdata(parent, dax_region);
 658	kref_init(&dax_region->kref);
 659	dax_region->id = region_id;
 660	dax_region->align = align;
 661	dax_region->dev = parent;
 662	dax_region->target_node = target_node;
 663	ida_init(&dax_region->ida);
 664	dax_region->res = (struct resource) {
 665		.start = range->start,
 666		.end = range->end,
 667		.flags = IORESOURCE_MEM | flags,
 668	};
 669
 670	if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
 671		kfree(dax_region);
 672		return NULL;
 673	}
 674
 
 675	if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
 676		return NULL;
 677	return dax_region;
 678}
 679EXPORT_SYMBOL_GPL(alloc_dax_region);
 680
 681static void dax_mapping_release(struct device *dev)
 682{
 683	struct dax_mapping *mapping = to_dax_mapping(dev);
 684	struct device *parent = dev->parent;
 685	struct dev_dax *dev_dax = to_dev_dax(parent);
 686
 687	ida_free(&dev_dax->ida, mapping->id);
 688	kfree(mapping);
 689	put_device(parent);
 690}
 691
 692static void unregister_dax_mapping(void *data)
 693{
 694	struct device *dev = data;
 695	struct dax_mapping *mapping = to_dax_mapping(dev);
 696	struct dev_dax *dev_dax = to_dev_dax(dev->parent);
 
 697
 698	dev_dbg(dev, "%s\n", __func__);
 699
 
 
 700	dev_dax->ranges[mapping->range_id].mapping = NULL;
 701	mapping->range_id = -1;
 702
 703	device_unregister(dev);
 
 704}
 705
 706static struct dev_dax_range *get_dax_range(struct device *dev)
 707{
 708	struct dax_mapping *mapping = to_dax_mapping(dev);
 709	struct dev_dax *dev_dax = to_dev_dax(dev->parent);
 710	int rc;
 711
 712	rc = down_write_killable(&dax_region_rwsem);
 713	if (rc)
 714		return NULL;
 715	if (mapping->range_id < 0) {
 716		up_write(&dax_region_rwsem);
 717		return NULL;
 718	}
 719
 720	return &dev_dax->ranges[mapping->range_id];
 721}
 722
 723static void put_dax_range(void)
 724{
 725	up_write(&dax_region_rwsem);
 
 
 
 
 726}
 727
 728static ssize_t start_show(struct device *dev,
 729		struct device_attribute *attr, char *buf)
 730{
 731	struct dev_dax_range *dax_range;
 732	ssize_t rc;
 733
 734	dax_range = get_dax_range(dev);
 735	if (!dax_range)
 736		return -ENXIO;
 737	rc = sysfs_emit(buf, "%#llx\n", dax_range->range.start);
 738	put_dax_range();
 739
 740	return rc;
 741}
 742static DEVICE_ATTR(start, 0400, start_show, NULL);
 743
 744static ssize_t end_show(struct device *dev,
 745		struct device_attribute *attr, char *buf)
 746{
 747	struct dev_dax_range *dax_range;
 748	ssize_t rc;
 749
 750	dax_range = get_dax_range(dev);
 751	if (!dax_range)
 752		return -ENXIO;
 753	rc = sysfs_emit(buf, "%#llx\n", dax_range->range.end);
 754	put_dax_range();
 755
 756	return rc;
 757}
 758static DEVICE_ATTR(end, 0400, end_show, NULL);
 759
 760static ssize_t pgoff_show(struct device *dev,
 761		struct device_attribute *attr, char *buf)
 762{
 763	struct dev_dax_range *dax_range;
 764	ssize_t rc;
 765
 766	dax_range = get_dax_range(dev);
 767	if (!dax_range)
 768		return -ENXIO;
 769	rc = sysfs_emit(buf, "%#lx\n", dax_range->pgoff);
 770	put_dax_range();
 771
 772	return rc;
 773}
 774static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL);
 775
 776static struct attribute *dax_mapping_attributes[] = {
 777	&dev_attr_start.attr,
 778	&dev_attr_end.attr,
 779	&dev_attr_page_offset.attr,
 780	NULL,
 781};
 782
 783static const struct attribute_group dax_mapping_attribute_group = {
 784	.attrs = dax_mapping_attributes,
 785};
 786
 787static const struct attribute_group *dax_mapping_attribute_groups[] = {
 788	&dax_mapping_attribute_group,
 789	NULL,
 790};
 791
 792static const struct device_type dax_mapping_type = {
 793	.release = dax_mapping_release,
 794	.groups = dax_mapping_attribute_groups,
 795};
 796
 797static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
 798{
 799	struct dax_region *dax_region = dev_dax->region;
 800	struct dax_mapping *mapping;
 801	struct device *dev;
 802	int rc;
 803
 804	lockdep_assert_held_write(&dax_region_rwsem);
 805
 806	if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
 807				"region disabled\n"))
 808		return -ENXIO;
 809
 810	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
 811	if (!mapping)
 812		return -ENOMEM;
 813	mapping->range_id = range_id;
 814	mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL);
 815	if (mapping->id < 0) {
 816		kfree(mapping);
 817		return -ENOMEM;
 818	}
 819	dev_dax->ranges[range_id].mapping = mapping;
 820	dev = &mapping->dev;
 821	device_initialize(dev);
 822	dev->parent = &dev_dax->dev;
 823	get_device(dev->parent);
 824	dev->type = &dax_mapping_type;
 825	dev_set_name(dev, "mapping%d", mapping->id);
 826	rc = device_add(dev);
 827	if (rc) {
 828		put_device(dev);
 829		return rc;
 830	}
 831
 832	rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping,
 833			dev);
 834	if (rc)
 835		return rc;
 836	return 0;
 837}
 838
 839static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
 840		resource_size_t size)
 841{
 842	struct dax_region *dax_region = dev_dax->region;
 843	struct resource *res = &dax_region->res;
 844	struct device *dev = &dev_dax->dev;
 845	struct dev_dax_range *ranges;
 846	unsigned long pgoff = 0;
 847	struct resource *alloc;
 848	int i, rc;
 849
 850	lockdep_assert_held_write(&dax_region_rwsem);
 851
 852	/* handle the seed alloc special case */
 853	if (!size) {
 854		if (dev_WARN_ONCE(dev, dev_dax->nr_range,
 855					"0-size allocation must be first\n"))
 856			return -EBUSY;
 857		/* nr_range == 0 is elsewhere special cased as 0-size device */
 858		return 0;
 859	}
 860
 861	alloc = __request_region(res, start, size, dev_name(dev), 0);
 862	if (!alloc)
 863		return -ENOMEM;
 864
 865	ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
 866			* (dev_dax->nr_range + 1), GFP_KERNEL);
 867	if (!ranges) {
 868		__release_region(res, alloc->start, resource_size(alloc));
 869		return -ENOMEM;
 870	}
 871
 872	for (i = 0; i < dev_dax->nr_range; i++)
 873		pgoff += PHYS_PFN(range_len(&ranges[i].range));
 874	dev_dax->ranges = ranges;
 875	ranges[dev_dax->nr_range++] = (struct dev_dax_range) {
 876		.pgoff = pgoff,
 877		.range = {
 878			.start = alloc->start,
 879			.end = alloc->end,
 880		},
 881	};
 882
 883	dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
 884			&alloc->start, &alloc->end);
 885	/*
 886	 * A dev_dax instance must be registered before mapping device
 887	 * children can be added. Defer to devm_create_dev_dax() to add
 888	 * the initial mapping device.
 889	 */
 890	if (!device_is_registered(&dev_dax->dev))
 891		return 0;
 892
 893	rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
 894	if (rc)
 895		trim_dev_dax_range(dev_dax);
 896
 897	return rc;
 898}
 899
 900static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
 901{
 902	int last_range = dev_dax->nr_range - 1;
 903	struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
 
 904	bool is_shrink = resource_size(res) > size;
 905	struct range *range = &dax_range->range;
 906	struct device *dev = &dev_dax->dev;
 907	int rc;
 908
 909	lockdep_assert_held_write(&dax_region_rwsem);
 910
 911	if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
 912		return -EINVAL;
 913
 914	rc = adjust_resource(res, range->start, size);
 915	if (rc)
 916		return rc;
 917
 918	*range = (struct range) {
 919		.start = range->start,
 920		.end = range->start + size - 1,
 921	};
 922
 923	dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend",
 924			last_range, (unsigned long long) range->start,
 925			(unsigned long long) range->end);
 926
 927	return 0;
 928}
 929
 930static ssize_t size_show(struct device *dev,
 931		struct device_attribute *attr, char *buf)
 932{
 933	struct dev_dax *dev_dax = to_dev_dax(dev);
 934	unsigned long long size;
 935	int rc;
 936
 937	rc = down_read_interruptible(&dax_dev_rwsem);
 938	if (rc)
 939		return rc;
 940	size = dev_dax_size(dev_dax);
 941	up_read(&dax_dev_rwsem);
 942
 943	return sysfs_emit(buf, "%llu\n", size);
 944}
 945
 946static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
 947{
 948	/*
 949	 * The minimum mapping granularity for a device instance is a
 950	 * single subsection, unless the arch says otherwise.
 951	 */
 952	return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align()));
 953}
 954
 955static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size)
 956{
 957	resource_size_t to_shrink = dev_dax_size(dev_dax) - size;
 958	struct dax_region *dax_region = dev_dax->region;
 959	struct device *dev = &dev_dax->dev;
 960	int i;
 961
 962	for (i = dev_dax->nr_range - 1; i >= 0; i--) {
 963		struct range *range = &dev_dax->ranges[i].range;
 964		struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
 965		struct resource *adjust = NULL, *res;
 966		resource_size_t shrink;
 967
 968		shrink = min_t(u64, to_shrink, range_len(range));
 969		if (shrink >= range_len(range)) {
 970			devm_release_action(dax_region->dev,
 971					unregister_dax_mapping, &mapping->dev);
 972			trim_dev_dax_range(dev_dax);
 973			to_shrink -= shrink;
 974			if (!to_shrink)
 975				break;
 976			continue;
 977		}
 978
 979		for_each_dax_region_resource(dax_region, res)
 980			if (strcmp(res->name, dev_name(dev)) == 0
 981					&& res->start == range->start) {
 982				adjust = res;
 983				break;
 984			}
 985
 986		if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
 987					"failed to find matching resource\n"))
 988			return -ENXIO;
 989		return adjust_dev_dax_range(dev_dax, adjust, range_len(range)
 990				- shrink);
 991	}
 992	return 0;
 993}
 994
 995/*
 996 * Only allow adjustments that preserve the relative pgoff of existing
 997 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
 998 */
 999static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res)
1000{
1001	struct dev_dax_range *last;
1002	int i;
1003
1004	if (dev_dax->nr_range == 0)
1005		return false;
1006	if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0)
1007		return false;
1008	last = &dev_dax->ranges[dev_dax->nr_range - 1];
1009	if (last->range.start != res->start || last->range.end != res->end)
1010		return false;
1011	for (i = 0; i < dev_dax->nr_range - 1; i++) {
1012		struct dev_dax_range *dax_range = &dev_dax->ranges[i];
1013
1014		if (dax_range->pgoff > last->pgoff)
1015			return false;
1016	}
1017
1018	return true;
1019}
1020
1021static ssize_t dev_dax_resize(struct dax_region *dax_region,
1022		struct dev_dax *dev_dax, resource_size_t size)
1023{
1024	resource_size_t avail = dax_region_avail_size(dax_region), to_alloc;
1025	resource_size_t dev_size = dev_dax_size(dev_dax);
1026	struct resource *region_res = &dax_region->res;
1027	struct device *dev = &dev_dax->dev;
1028	struct resource *res, *first;
1029	resource_size_t alloc = 0;
1030	int rc;
1031
1032	if (dev->driver)
1033		return -EBUSY;
1034	if (size == dev_size)
1035		return 0;
1036	if (size > dev_size && size - dev_size > avail)
1037		return -ENOSPC;
1038	if (size < dev_size)
1039		return dev_dax_shrink(dev_dax, size);
1040
1041	to_alloc = size - dev_size;
1042	if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc),
1043			"resize of %pa misaligned\n", &to_alloc))
1044		return -ENXIO;
1045
1046	/*
1047	 * Expand the device into the unused portion of the region. This
1048	 * may involve adjusting the end of an existing resource, or
1049	 * allocating a new resource.
1050	 */
1051retry:
1052	first = region_res->child;
1053	if (!first)
1054		return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc);
1055
1056	rc = -ENOSPC;
1057	for (res = first; res; res = res->sibling) {
1058		struct resource *next = res->sibling;
1059
1060		/* space at the beginning of the region */
1061		if (res == first && res->start > dax_region->res.start) {
1062			alloc = min(res->start - dax_region->res.start, to_alloc);
1063			rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc);
1064			break;
1065		}
1066
1067		alloc = 0;
1068		/* space between allocations */
1069		if (next && next->start > res->end + 1)
1070			alloc = min(next->start - (res->end + 1), to_alloc);
1071
1072		/* space at the end of the region */
1073		if (!alloc && !next && res->end < region_res->end)
1074			alloc = min(region_res->end - res->end, to_alloc);
1075
1076		if (!alloc)
1077			continue;
1078
1079		if (adjust_ok(dev_dax, res)) {
1080			rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc);
1081			break;
1082		}
1083		rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc);
1084		break;
1085	}
1086	if (rc)
1087		return rc;
1088	to_alloc -= alloc;
1089	if (to_alloc)
1090		goto retry;
1091	return 0;
1092}
1093
1094static ssize_t size_store(struct device *dev, struct device_attribute *attr,
1095		const char *buf, size_t len)
1096{
1097	ssize_t rc;
1098	unsigned long long val;
1099	struct dev_dax *dev_dax = to_dev_dax(dev);
1100	struct dax_region *dax_region = dev_dax->region;
1101
1102	rc = kstrtoull(buf, 0, &val);
1103	if (rc)
1104		return rc;
1105
1106	if (!alloc_is_aligned(dev_dax, val)) {
1107		dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val);
1108		return -EINVAL;
1109	}
1110
1111	rc = down_write_killable(&dax_region_rwsem);
1112	if (rc)
1113		return rc;
1114	if (!dax_region->dev->driver) {
1115		rc = -ENXIO;
1116		goto err_region;
1117	}
1118	rc = down_write_killable(&dax_dev_rwsem);
1119	if (rc)
1120		goto err_dev;
1121
1122	rc = dev_dax_resize(dax_region, dev_dax, val);
 
 
1123
1124err_dev:
1125	up_write(&dax_dev_rwsem);
1126err_region:
1127	up_write(&dax_region_rwsem);
1128
1129	if (rc == 0)
1130		return len;
1131	return rc;
1132}
1133static DEVICE_ATTR_RW(size);
1134
1135static ssize_t range_parse(const char *opt, size_t len, struct range *range)
1136{
1137	unsigned long long addr = 0;
1138	char *start, *end, *str;
1139	ssize_t rc = -EINVAL;
1140
1141	str = kstrdup(opt, GFP_KERNEL);
1142	if (!str)
1143		return rc;
1144
1145	end = str;
1146	start = strsep(&end, "-");
1147	if (!start || !end)
1148		goto err;
1149
1150	rc = kstrtoull(start, 16, &addr);
1151	if (rc)
1152		goto err;
1153	range->start = addr;
1154
1155	rc = kstrtoull(end, 16, &addr);
1156	if (rc)
1157		goto err;
1158	range->end = addr;
1159
1160err:
1161	kfree(str);
1162	return rc;
1163}
1164
1165static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
1166		const char *buf, size_t len)
1167{
1168	struct dev_dax *dev_dax = to_dev_dax(dev);
1169	struct dax_region *dax_region = dev_dax->region;
1170	size_t to_alloc;
1171	struct range r;
1172	ssize_t rc;
1173
1174	rc = range_parse(buf, len, &r);
1175	if (rc)
1176		return rc;
1177
1178	rc = down_write_killable(&dax_region_rwsem);
1179	if (rc)
1180		return rc;
1181	if (!dax_region->dev->driver) {
1182		up_write(&dax_region_rwsem);
1183		return rc;
1184	}
1185	rc = down_write_killable(&dax_dev_rwsem);
1186	if (rc) {
1187		up_write(&dax_region_rwsem);
1188		return rc;
1189	}
 
1190
1191	to_alloc = range_len(&r);
1192	if (alloc_is_aligned(dev_dax, to_alloc))
1193		rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
1194	up_write(&dax_dev_rwsem);
1195	up_write(&dax_region_rwsem);
1196
1197	return rc == 0 ? len : rc;
1198}
1199static DEVICE_ATTR_WO(mapping);
1200
1201static ssize_t align_show(struct device *dev,
1202		struct device_attribute *attr, char *buf)
1203{
1204	struct dev_dax *dev_dax = to_dev_dax(dev);
1205
1206	return sysfs_emit(buf, "%d\n", dev_dax->align);
1207}
1208
1209static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
1210{
1211	struct device *dev = &dev_dax->dev;
1212	int i;
1213
1214	for (i = 0; i < dev_dax->nr_range; i++) {
1215		size_t len = range_len(&dev_dax->ranges[i].range);
1216
1217		if (!alloc_is_aligned(dev_dax, len)) {
1218			dev_dbg(dev, "%s: align %u invalid for range %d\n",
1219				__func__, dev_dax->align, i);
1220			return -EINVAL;
1221		}
1222	}
1223
1224	return 0;
1225}
1226
1227static ssize_t align_store(struct device *dev, struct device_attribute *attr,
1228		const char *buf, size_t len)
1229{
1230	struct dev_dax *dev_dax = to_dev_dax(dev);
1231	struct dax_region *dax_region = dev_dax->region;
1232	unsigned long val, align_save;
1233	ssize_t rc;
1234
1235	rc = kstrtoul(buf, 0, &val);
1236	if (rc)
1237		return -ENXIO;
1238
1239	if (!dax_align_valid(val))
1240		return -EINVAL;
1241
1242	rc = down_write_killable(&dax_region_rwsem);
1243	if (rc)
1244		return rc;
1245	if (!dax_region->dev->driver) {
1246		up_write(&dax_region_rwsem);
1247		return -ENXIO;
1248	}
1249
1250	rc = down_write_killable(&dax_dev_rwsem);
1251	if (rc) {
1252		up_write(&dax_region_rwsem);
1253		return rc;
1254	}
1255	if (dev->driver) {
1256		rc = -EBUSY;
1257		goto out_unlock;
1258	}
1259
1260	align_save = dev_dax->align;
1261	dev_dax->align = val;
1262	rc = dev_dax_validate_align(dev_dax);
1263	if (rc)
1264		dev_dax->align = align_save;
1265out_unlock:
1266	up_write(&dax_dev_rwsem);
1267	up_write(&dax_region_rwsem);
1268	return rc == 0 ? len : rc;
1269}
1270static DEVICE_ATTR_RW(align);
1271
1272static int dev_dax_target_node(struct dev_dax *dev_dax)
1273{
1274	struct dax_region *dax_region = dev_dax->region;
1275
1276	return dax_region->target_node;
1277}
1278
1279static ssize_t target_node_show(struct device *dev,
1280		struct device_attribute *attr, char *buf)
1281{
1282	struct dev_dax *dev_dax = to_dev_dax(dev);
1283
1284	return sysfs_emit(buf, "%d\n", dev_dax_target_node(dev_dax));
1285}
1286static DEVICE_ATTR_RO(target_node);
1287
1288static ssize_t resource_show(struct device *dev,
1289		struct device_attribute *attr, char *buf)
1290{
1291	struct dev_dax *dev_dax = to_dev_dax(dev);
1292	struct dax_region *dax_region = dev_dax->region;
1293	unsigned long long start;
1294
1295	if (dev_dax->nr_range < 1)
1296		start = dax_region->res.start;
1297	else
1298		start = dev_dax->ranges[0].range.start;
1299
1300	return sysfs_emit(buf, "%#llx\n", start);
1301}
1302static DEVICE_ATTR(resource, 0400, resource_show, NULL);
1303
1304static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1305		char *buf)
1306{
1307	/*
1308	 * We only ever expect to handle device-dax instances, i.e. the
1309	 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
1310	 */
1311	return sysfs_emit(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
1312}
1313static DEVICE_ATTR_RO(modalias);
1314
1315static ssize_t numa_node_show(struct device *dev,
1316		struct device_attribute *attr, char *buf)
1317{
1318	return sysfs_emit(buf, "%d\n", dev_to_node(dev));
1319}
1320static DEVICE_ATTR_RO(numa_node);
1321
1322static ssize_t memmap_on_memory_show(struct device *dev,
1323				     struct device_attribute *attr, char *buf)
1324{
1325	struct dev_dax *dev_dax = to_dev_dax(dev);
1326
1327	return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory);
1328}
1329
1330static ssize_t memmap_on_memory_store(struct device *dev,
1331				      struct device_attribute *attr,
1332				      const char *buf, size_t len)
1333{
1334	struct dev_dax *dev_dax = to_dev_dax(dev);
1335	bool val;
1336	int rc;
1337
1338	rc = kstrtobool(buf, &val);
1339	if (rc)
1340		return rc;
1341
1342	if (val == true && !mhp_supports_memmap_on_memory()) {
1343		dev_dbg(dev, "memmap_on_memory is not available\n");
1344		return -EOPNOTSUPP;
1345	}
1346
1347	rc = down_write_killable(&dax_dev_rwsem);
1348	if (rc)
1349		return rc;
1350
1351	if (dev_dax->memmap_on_memory != val && dev->driver &&
1352	    to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) {
1353		up_write(&dax_dev_rwsem);
1354		return -EBUSY;
1355	}
1356
1357	dev_dax->memmap_on_memory = val;
1358	up_write(&dax_dev_rwsem);
1359
1360	return len;
1361}
1362static DEVICE_ATTR_RW(memmap_on_memory);
1363
1364static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
1365{
1366	struct device *dev = container_of(kobj, struct device, kobj);
1367	struct dev_dax *dev_dax = to_dev_dax(dev);
1368	struct dax_region *dax_region = dev_dax->region;
1369
1370	if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
1371		return 0;
1372	if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
1373		return 0;
1374	if (a == &dev_attr_mapping.attr && is_static(dax_region))
1375		return 0;
1376	if ((a == &dev_attr_align.attr ||
1377	     a == &dev_attr_size.attr) && is_static(dax_region))
1378		return 0444;
1379	return a->mode;
1380}
1381
1382static struct attribute *dev_dax_attributes[] = {
1383	&dev_attr_modalias.attr,
1384	&dev_attr_size.attr,
1385	&dev_attr_mapping.attr,
1386	&dev_attr_target_node.attr,
1387	&dev_attr_align.attr,
1388	&dev_attr_resource.attr,
1389	&dev_attr_numa_node.attr,
1390	&dev_attr_memmap_on_memory.attr,
1391	NULL,
1392};
1393
1394static const struct attribute_group dev_dax_attribute_group = {
1395	.attrs = dev_dax_attributes,
1396	.is_visible = dev_dax_visible,
1397};
1398
1399static const struct attribute_group *dax_attribute_groups[] = {
1400	&dev_dax_attribute_group,
1401	NULL,
1402};
1403
1404static void dev_dax_release(struct device *dev)
1405{
1406	struct dev_dax *dev_dax = to_dev_dax(dev);
 
1407	struct dax_device *dax_dev = dev_dax->dax_dev;
1408
1409	put_dax(dax_dev);
1410	free_dev_dax_id(dev_dax);
 
1411	kfree(dev_dax->pgmap);
1412	kfree(dev_dax);
1413}
1414
1415static const struct device_type dev_dax_type = {
1416	.release = dev_dax_release,
1417	.groups = dax_attribute_groups,
1418};
1419
1420static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data)
1421{
1422	struct dax_region *dax_region = data->dax_region;
1423	struct device *parent = dax_region->dev;
1424	struct dax_device *dax_dev;
1425	struct dev_dax *dev_dax;
1426	struct inode *inode;
1427	struct device *dev;
1428	int rc;
1429
1430	dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
1431	if (!dev_dax)
1432		return ERR_PTR(-ENOMEM);
1433
1434	dev_dax->region = dax_region;
1435	if (is_static(dax_region)) {
1436		if (dev_WARN_ONCE(parent, data->id < 0,
1437				"dynamic id specified to static region\n")) {
1438			rc = -EINVAL;
1439			goto err_id;
1440		}
1441
1442		dev_dax->id = data->id;
1443	} else {
1444		if (dev_WARN_ONCE(parent, data->id >= 0,
1445				"static id specified to dynamic region\n")) {
1446			rc = -EINVAL;
1447			goto err_id;
1448		}
1449
1450		rc = alloc_dev_dax_id(dev_dax);
1451		if (rc < 0)
1452			goto err_id;
 
1453	}
1454
 
1455	dev = &dev_dax->dev;
1456	device_initialize(dev);
1457	dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
1458
1459	rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size);
1460	if (rc)
1461		goto err_range;
1462
1463	if (data->pgmap) {
1464		dev_WARN_ONCE(parent, !is_static(dax_region),
1465			"custom dev_pagemap requires a static dax_region\n");
1466
1467		dev_dax->pgmap = kmemdup(data->pgmap,
1468				sizeof(struct dev_pagemap), GFP_KERNEL);
1469		if (!dev_dax->pgmap) {
1470			rc = -ENOMEM;
1471			goto err_pgmap;
1472		}
1473	}
1474
1475	/*
1476	 * No dax_operations since there is no access to this device outside of
1477	 * mmap of the resulting character device.
1478	 */
1479	dax_dev = alloc_dax(dev_dax, NULL);
1480	if (IS_ERR(dax_dev)) {
1481		rc = PTR_ERR(dax_dev);
1482		goto err_alloc_dax;
1483	}
1484	set_dax_synchronous(dax_dev);
1485	set_dax_nocache(dax_dev);
1486	set_dax_nomc(dax_dev);
1487
1488	/* a device_dax instance is dead while the driver is not attached */
1489	kill_dax(dax_dev);
1490
1491	dev_dax->dax_dev = dax_dev;
1492	dev_dax->target_node = dax_region->target_node;
1493	dev_dax->align = dax_region->align;
1494	ida_init(&dev_dax->ida);
1495
1496	dev_dax->memmap_on_memory = data->memmap_on_memory;
1497
1498	inode = dax_inode(dax_dev);
1499	dev->devt = inode->i_rdev;
1500	dev->bus = &dax_bus_type;
1501	dev->parent = parent;
1502	dev->type = &dev_dax_type;
1503
1504	rc = device_add(dev);
1505	if (rc) {
1506		kill_dev_dax(dev_dax);
1507		put_device(dev);
1508		return ERR_PTR(rc);
1509	}
1510
1511	rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
1512	if (rc)
1513		return ERR_PTR(rc);
1514
1515	/* register mapping device for the initial allocation range */
1516	if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) {
1517		rc = devm_register_dax_mapping(dev_dax, 0);
1518		if (rc)
1519			return ERR_PTR(rc);
1520	}
1521
1522	return dev_dax;
1523
1524err_alloc_dax:
1525	kfree(dev_dax->pgmap);
1526err_pgmap:
1527	free_dev_dax_ranges(dev_dax);
1528err_range:
1529	free_dev_dax_id(dev_dax);
1530err_id:
1531	kfree(dev_dax);
1532
1533	return ERR_PTR(rc);
1534}
 
1535
1536struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
1537{
1538	struct dev_dax *dev_dax;
1539
1540	down_write(&dax_region_rwsem);
1541	dev_dax = __devm_create_dev_dax(data);
1542	up_write(&dax_region_rwsem);
1543
1544	return dev_dax;
1545}
1546EXPORT_SYMBOL_GPL(devm_create_dev_dax);
1547
1548int __dax_driver_register(struct dax_device_driver *dax_drv,
1549		struct module *module, const char *mod_name)
1550{
1551	struct device_driver *drv = &dax_drv->drv;
 
1552
1553	/*
1554	 * dax_bus_probe() calls dax_drv->probe() unconditionally.
1555	 * So better be safe than sorry and ensure it is provided.
1556	 */
1557	if (!dax_drv->probe)
1558		return -EINVAL;
1559
1560	INIT_LIST_HEAD(&dax_drv->ids);
1561	drv->owner = module;
1562	drv->name = mod_name;
1563	drv->mod_name = mod_name;
1564	drv->bus = &dax_bus_type;
1565
1566	return driver_register(drv);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1567}
1568EXPORT_SYMBOL_GPL(__dax_driver_register);
1569
1570void dax_driver_unregister(struct dax_device_driver *dax_drv)
1571{
1572	struct device_driver *drv = &dax_drv->drv;
1573	struct dax_id *dax_id, *_id;
1574
1575	mutex_lock(&dax_bus_lock);
 
1576	list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
1577		list_del(&dax_id->list);
1578		kfree(dax_id);
1579	}
1580	mutex_unlock(&dax_bus_lock);
1581	driver_unregister(drv);
1582}
1583EXPORT_SYMBOL_GPL(dax_driver_unregister);
1584
1585int __init dax_bus_init(void)
1586{
1587	return bus_register(&dax_bus_type);
1588}
1589
1590void __exit dax_bus_exit(void)
1591{
1592	bus_unregister(&dax_bus_type);
1593}