Linux Audio

Check our new training course

Loading...
v4.17
  1/*
  2 * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved.
  3 *
  4 * This program is free software; you can redistribute it and/or modify
  5 * it under the terms of version 2 of the GNU General Public License as
  6 * published by the Free Software Foundation.
  7 *
  8 * This program is distributed in the hope that it will be useful, but
  9 * WITHOUT ANY WARRANTY; without even the implied warranty of
 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 * General Public License for more details.
 12 */
 13#include <linux/pagemap.h>
 14#include <linux/module.h>
 15#include <linux/device.h>
 16#include <linux/pfn_t.h>
 17#include <linux/cdev.h>
 18#include <linux/slab.h>
 19#include <linux/dax.h>
 20#include <linux/fs.h>
 21#include <linux/mm.h>
 22#include <linux/mman.h>
 23#include "dax-private.h"
 24#include "dax.h"
 25
 26static struct class *dax_class;
 27
 28/*
 29 * Rely on the fact that drvdata is set before the attributes are
 30 * registered, and that the attributes are unregistered before drvdata
 31 * is cleared to assume that drvdata is always valid.
 32 */
 33static ssize_t id_show(struct device *dev,
 34		struct device_attribute *attr, char *buf)
 35{
 36	struct dax_region *dax_region = dev_get_drvdata(dev);
 37
 38	return sprintf(buf, "%d\n", dax_region->id);
 39}
 40static DEVICE_ATTR_RO(id);
 41
 42static ssize_t region_size_show(struct device *dev,
 43		struct device_attribute *attr, char *buf)
 44{
 45	struct dax_region *dax_region = dev_get_drvdata(dev);
 46
 47	return sprintf(buf, "%llu\n", (unsigned long long)
 48			resource_size(&dax_region->res));
 49}
 50static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
 51		region_size_show, NULL);
 52
 53static ssize_t align_show(struct device *dev,
 54		struct device_attribute *attr, char *buf)
 55{
 56	struct dax_region *dax_region = dev_get_drvdata(dev);
 57
 58	return sprintf(buf, "%u\n", dax_region->align);
 59}
 60static DEVICE_ATTR_RO(align);
 61
 62static struct attribute *dax_region_attributes[] = {
 63	&dev_attr_region_size.attr,
 64	&dev_attr_align.attr,
 65	&dev_attr_id.attr,
 66	NULL,
 67};
 68
 69static const struct attribute_group dax_region_attribute_group = {
 70	.name = "dax_region",
 71	.attrs = dax_region_attributes,
 72};
 73
 74static const struct attribute_group *dax_region_attribute_groups[] = {
 75	&dax_region_attribute_group,
 76	NULL,
 77};
 78
 79static void dax_region_free(struct kref *kref)
 80{
 81	struct dax_region *dax_region;
 82
 83	dax_region = container_of(kref, struct dax_region, kref);
 84	kfree(dax_region);
 85}
 86
 87void dax_region_put(struct dax_region *dax_region)
 88{
 89	kref_put(&dax_region->kref, dax_region_free);
 90}
 91EXPORT_SYMBOL_GPL(dax_region_put);
 92
 93static void dax_region_unregister(void *region)
 94{
 95	struct dax_region *dax_region = region;
 96
 97	sysfs_remove_groups(&dax_region->dev->kobj,
 98			dax_region_attribute_groups);
 99	dax_region_put(dax_region);
100}
101
102struct dax_region *alloc_dax_region(struct device *parent, int region_id,
103		struct resource *res, unsigned int align, void *addr,
104		unsigned long pfn_flags)
105{
106	struct dax_region *dax_region;
107
108	/*
109	 * The DAX core assumes that it can store its private data in
110	 * parent->driver_data. This WARN is a reminder / safeguard for
111	 * developers of device-dax drivers.
112	 */
113	if (dev_get_drvdata(parent)) {
114		dev_WARN(parent, "dax core failed to setup private data\n");
115		return NULL;
116	}
117
118	if (!IS_ALIGNED(res->start, align)
119			|| !IS_ALIGNED(resource_size(res), align))
120		return NULL;
121
122	dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
123	if (!dax_region)
124		return NULL;
125
126	dev_set_drvdata(parent, dax_region);
127	memcpy(&dax_region->res, res, sizeof(*res));
128	dax_region->pfn_flags = pfn_flags;
129	kref_init(&dax_region->kref);
130	dax_region->id = region_id;
131	ida_init(&dax_region->ida);
132	dax_region->align = align;
133	dax_region->dev = parent;
134	dax_region->base = addr;
135	if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
136		kfree(dax_region);
137		return NULL;
138	}
139
140	kref_get(&dax_region->kref);
141	if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
142		return NULL;
143	return dax_region;
144}
145EXPORT_SYMBOL_GPL(alloc_dax_region);
146
147static struct dev_dax *to_dev_dax(struct device *dev)
148{
149	return container_of(dev, struct dev_dax, dev);
150}
151
152static ssize_t size_show(struct device *dev,
153		struct device_attribute *attr, char *buf)
154{
155	struct dev_dax *dev_dax = to_dev_dax(dev);
156	unsigned long long size = 0;
157	int i;
158
159	for (i = 0; i < dev_dax->num_resources; i++)
160		size += resource_size(&dev_dax->res[i]);
161
162	return sprintf(buf, "%llu\n", size);
163}
164static DEVICE_ATTR_RO(size);
165
166static struct attribute *dev_dax_attributes[] = {
167	&dev_attr_size.attr,
168	NULL,
169};
170
171static const struct attribute_group dev_dax_attribute_group = {
172	.attrs = dev_dax_attributes,
173};
174
175static const struct attribute_group *dax_attribute_groups[] = {
176	&dev_dax_attribute_group,
177	NULL,
178};
179
180static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
181		const char *func)
182{
183	struct dax_region *dax_region = dev_dax->region;
184	struct device *dev = &dev_dax->dev;
185	unsigned long mask;
186
187	if (!dax_alive(dev_dax->dax_dev))
188		return -ENXIO;
189
190	/* prevent private mappings from being established */
191	if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
192		dev_info(dev, "%s: %s: fail, attempted private mapping\n",
 
193				current->comm, func);
194		return -EINVAL;
195	}
196
197	mask = dax_region->align - 1;
198	if (vma->vm_start & mask || vma->vm_end & mask) {
199		dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
 
200				current->comm, func, vma->vm_start, vma->vm_end,
201				mask);
202		return -EINVAL;
203	}
204
205	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
206			&& (vma->vm_flags & VM_DONTCOPY) == 0) {
207		dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n",
208				current->comm, func);
209		return -EINVAL;
210	}
211
212	if (!vma_is_dax(vma)) {
213		dev_info(dev, "%s: %s: fail, vma is not DAX capable\n",
 
214				current->comm, func);
215		return -EINVAL;
216	}
217
218	return 0;
219}
220
221/* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */
222__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
223		unsigned long size)
224{
225	struct resource *res;
226	/* gcc-4.6.3-nolibc for i386 complains that this is uninitialized */
227	phys_addr_t uninitialized_var(phys);
228	int i;
229
230	for (i = 0; i < dev_dax->num_resources; i++) {
231		res = &dev_dax->res[i];
232		phys = pgoff * PAGE_SIZE + res->start;
233		if (phys >= res->start && phys <= res->end)
234			break;
235		pgoff -= PHYS_PFN(resource_size(res));
236	}
237
238	if (i < dev_dax->num_resources) {
239		res = &dev_dax->res[i];
240		if (phys + size - 1 <= res->end)
241			return phys;
 
242	}
243
244	return -1;
245}
246
247static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248{
249	struct device *dev = &dev_dax->dev;
250	struct dax_region *dax_region;
251	int rc = VM_FAULT_SIGBUS;
252	phys_addr_t phys;
253	pfn_t pfn;
254	unsigned int fault_size = PAGE_SIZE;
255
256	if (check_vma(dev_dax, vmf->vma, __func__))
257		return VM_FAULT_SIGBUS;
258
259	dax_region = dev_dax->region;
260	if (dax_region->align > PAGE_SIZE) {
261		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
262			dax_region->align, fault_size);
263		return VM_FAULT_SIGBUS;
264	}
265
266	if (fault_size != dax_region->align)
267		return VM_FAULT_SIGBUS;
268
269	phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
270	if (phys == -1) {
271		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff);
272		return VM_FAULT_SIGBUS;
273	}
274
275	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
276
277	rc = vm_insert_mixed(vmf->vma, vmf->address, pfn);
278
279	if (rc == -ENOMEM)
280		return VM_FAULT_OOM;
281	if (rc < 0 && rc != -EBUSY)
282		return VM_FAULT_SIGBUS;
283
284	return VM_FAULT_NOPAGE;
285}
286
287static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
288{
289	unsigned long pmd_addr = vmf->address & PMD_MASK;
290	struct device *dev = &dev_dax->dev;
291	struct dax_region *dax_region;
292	phys_addr_t phys;
293	pgoff_t pgoff;
294	pfn_t pfn;
295	unsigned int fault_size = PMD_SIZE;
296
297	if (check_vma(dev_dax, vmf->vma, __func__))
298		return VM_FAULT_SIGBUS;
299
300	dax_region = dev_dax->region;
301	if (dax_region->align > PMD_SIZE) {
302		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
303			dax_region->align, fault_size);
304		return VM_FAULT_SIGBUS;
305	}
306
307	/* dax pmd mappings require pfn_t_devmap() */
308	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
309		dev_dbg(dev, "region lacks devmap flags\n");
310		return VM_FAULT_SIGBUS;
311	}
312
313	if (fault_size < dax_region->align)
314		return VM_FAULT_SIGBUS;
315	else if (fault_size > dax_region->align)
316		return VM_FAULT_FALLBACK;
317
318	/* if we are outside of the VMA */
319	if (pmd_addr < vmf->vma->vm_start ||
320			(pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
321		return VM_FAULT_SIGBUS;
322
323	pgoff = linear_page_index(vmf->vma, pmd_addr);
324	phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE);
325	if (phys == -1) {
326		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
327		return VM_FAULT_SIGBUS;
328	}
329
330	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
 
331
332	return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn,
333			vmf->flags & FAULT_FLAG_WRITE);
334}
335
336#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
337static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
338{
339	unsigned long pud_addr = vmf->address & PUD_MASK;
340	struct device *dev = &dev_dax->dev;
341	struct dax_region *dax_region;
342	phys_addr_t phys;
343	pgoff_t pgoff;
344	pfn_t pfn;
345	unsigned int fault_size = PUD_SIZE;
346
347
348	if (check_vma(dev_dax, vmf->vma, __func__))
349		return VM_FAULT_SIGBUS;
350
351	dax_region = dev_dax->region;
352	if (dax_region->align > PUD_SIZE) {
353		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
354			dax_region->align, fault_size);
355		return VM_FAULT_SIGBUS;
356	}
357
358	/* dax pud mappings require pfn_t_devmap() */
359	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
360		dev_dbg(dev, "region lacks devmap flags\n");
361		return VM_FAULT_SIGBUS;
362	}
363
364	if (fault_size < dax_region->align)
365		return VM_FAULT_SIGBUS;
366	else if (fault_size > dax_region->align)
367		return VM_FAULT_FALLBACK;
368
369	/* if we are outside of the VMA */
370	if (pud_addr < vmf->vma->vm_start ||
371			(pud_addr + PUD_SIZE) > vmf->vma->vm_end)
372		return VM_FAULT_SIGBUS;
373
374	pgoff = linear_page_index(vmf->vma, pud_addr);
375	phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE);
376	if (phys == -1) {
377		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
378		return VM_FAULT_SIGBUS;
379	}
380
381	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
 
382
383	return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, pfn,
384			vmf->flags & FAULT_FLAG_WRITE);
385}
386#else
387static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
388{
389	return VM_FAULT_FALLBACK;
390}
391#endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
392
393static int dev_dax_huge_fault(struct vm_fault *vmf,
394		enum page_entry_size pe_size)
395{
396	int rc, id;
397	struct file *filp = vmf->vma->vm_file;
 
 
398	struct dev_dax *dev_dax = filp->private_data;
399
400	dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
401			(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
402			vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
403
404	id = dax_read_lock();
405	switch (pe_size) {
406	case PE_SIZE_PTE:
407		rc = __dev_dax_pte_fault(dev_dax, vmf);
408		break;
409	case PE_SIZE_PMD:
410		rc = __dev_dax_pmd_fault(dev_dax, vmf);
411		break;
412	case PE_SIZE_PUD:
413		rc = __dev_dax_pud_fault(dev_dax, vmf);
414		break;
415	default:
416		rc = VM_FAULT_SIGBUS;
417	}
418	dax_read_unlock(id);
419
420	return rc;
421}
422
423static int dev_dax_fault(struct vm_fault *vmf)
424{
425	return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
426}
427
428static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr)
429{
430	struct file *filp = vma->vm_file;
431	struct dev_dax *dev_dax = filp->private_data;
432	struct dax_region *dax_region = dev_dax->region;
433
434	if (!IS_ALIGNED(addr, dax_region->align))
435		return -EINVAL;
436	return 0;
437}
438
439static unsigned long dev_dax_pagesize(struct vm_area_struct *vma)
440{
441	struct file *filp = vma->vm_file;
442	struct dev_dax *dev_dax = filp->private_data;
443	struct dax_region *dax_region = dev_dax->region;
444
445	return dax_region->align;
446}
447
448static const struct vm_operations_struct dax_vm_ops = {
449	.fault = dev_dax_fault,
450	.huge_fault = dev_dax_huge_fault,
451	.split = dev_dax_split,
452	.pagesize = dev_dax_pagesize,
453};
454
455static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
456{
457	struct dev_dax *dev_dax = filp->private_data;
458	int rc, id;
459
460	dev_dbg(&dev_dax->dev, "trace\n");
461
462	/*
463	 * We lock to check dax_dev liveness and will re-check at
464	 * fault time.
465	 */
466	id = dax_read_lock();
467	rc = check_vma(dev_dax, vma, __func__);
468	dax_read_unlock(id);
469	if (rc)
470		return rc;
471
472	vma->vm_ops = &dax_vm_ops;
473	vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
474	return 0;
475}
476
477/* return an unmapped area aligned to the dax region specified alignment */
478static unsigned long dax_get_unmapped_area(struct file *filp,
479		unsigned long addr, unsigned long len, unsigned long pgoff,
480		unsigned long flags)
481{
482	unsigned long off, off_end, off_align, len_align, addr_align, align;
483	struct dev_dax *dev_dax = filp ? filp->private_data : NULL;
484	struct dax_region *dax_region;
485
486	if (!dev_dax || addr)
487		goto out;
488
489	dax_region = dev_dax->region;
490	align = dax_region->align;
491	off = pgoff << PAGE_SHIFT;
492	off_end = off + len;
493	off_align = round_up(off, align);
494
495	if ((off_end <= off_align) || ((off_end - off_align) < align))
496		goto out;
497
498	len_align = len + align;
499	if ((off + len_align) < off)
500		goto out;
501
502	addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
503			pgoff, flags);
504	if (!IS_ERR_VALUE(addr_align)) {
505		addr_align += (off - addr_align) & (align - 1);
506		return addr_align;
507	}
508 out:
509	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
510}
511
 
 
 
 
512static int dax_open(struct inode *inode, struct file *filp)
513{
514	struct dax_device *dax_dev = inode_dax(inode);
515	struct inode *__dax_inode = dax_inode(dax_dev);
516	struct dev_dax *dev_dax = dax_get_private(dax_dev);
517
518	dev_dbg(&dev_dax->dev, "trace\n");
519	inode->i_mapping = __dax_inode->i_mapping;
520	inode->i_mapping->host = __dax_inode;
 
521	filp->f_mapping = inode->i_mapping;
522	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
 
523	filp->private_data = dev_dax;
524	inode->i_flags = S_DAX;
525
526	return 0;
527}
528
529static int dax_release(struct inode *inode, struct file *filp)
530{
531	struct dev_dax *dev_dax = filp->private_data;
532
533	dev_dbg(&dev_dax->dev, "trace\n");
534	return 0;
535}
536
537static const struct file_operations dax_fops = {
538	.llseek = noop_llseek,
539	.owner = THIS_MODULE,
540	.open = dax_open,
541	.release = dax_release,
542	.get_unmapped_area = dax_get_unmapped_area,
543	.mmap = dax_mmap,
544	.mmap_supported_flags = MAP_SYNC,
545};
546
547static void dev_dax_release(struct device *dev)
548{
549	struct dev_dax *dev_dax = to_dev_dax(dev);
550	struct dax_region *dax_region = dev_dax->region;
551	struct dax_device *dax_dev = dev_dax->dax_dev;
552
553	if (dev_dax->id >= 0)
554		ida_simple_remove(&dax_region->ida, dev_dax->id);
555	dax_region_put(dax_region);
556	put_dax(dax_dev);
557	kfree(dev_dax);
558}
559
560static void kill_dev_dax(struct dev_dax *dev_dax)
561{
562	struct dax_device *dax_dev = dev_dax->dax_dev;
563	struct inode *inode = dax_inode(dax_dev);
564
565	kill_dax(dax_dev);
566	unmap_mapping_range(inode->i_mapping, 0, 0, 1);
567}
568
569static void unregister_dev_dax(void *dev)
570{
571	struct dev_dax *dev_dax = to_dev_dax(dev);
572	struct dax_device *dax_dev = dev_dax->dax_dev;
573	struct inode *inode = dax_inode(dax_dev);
574	struct cdev *cdev = inode->i_cdev;
575
576	dev_dbg(dev, "trace\n");
577
578	kill_dev_dax(dev_dax);
579	cdev_device_del(cdev, dev);
580	put_device(dev);
581}
582
583struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
584		int id, struct resource *res, int count)
585{
586	struct device *parent = dax_region->dev;
587	struct dax_device *dax_dev;
588	struct dev_dax *dev_dax;
589	struct inode *inode;
590	struct device *dev;
591	struct cdev *cdev;
 
592	int rc, i;
593
594	if (!count)
595		return ERR_PTR(-EINVAL);
596
597	dev_dax = kzalloc(sizeof(*dev_dax) + sizeof(*res) * count, GFP_KERNEL);
598	if (!dev_dax)
599		return ERR_PTR(-ENOMEM);
600
601	for (i = 0; i < count; i++) {
602		if (!IS_ALIGNED(res[i].start, dax_region->align)
603				|| !IS_ALIGNED(resource_size(&res[i]),
604					dax_region->align)) {
605			rc = -EINVAL;
606			break;
607		}
608		dev_dax->res[i].start = res[i].start;
609		dev_dax->res[i].end = res[i].end;
610	}
611
612	if (i < count)
613		goto err_id;
 
 
 
 
 
614
615	if (id < 0) {
616		id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
617		dev_dax->id = id;
618		if (id < 0) {
619			rc = id;
620			goto err_id;
 
 
 
 
 
 
621		}
622	} else {
623		/* region provider owns @id lifetime */
624		dev_dax->id = -1;
625	}
626
627	/*
628	 * No 'host' or dax_operations since there is no access to this
629	 * device outside of mmap of the resulting character device.
630	 */
631	dax_dev = alloc_dax(dev_dax, NULL, NULL);
632	if (!dax_dev) {
633		rc = -ENOMEM;
634		goto err_dax;
 
635	}
636
637	/* from here on we're committed to teardown via dax_dev_release() */
638	dev = &dev_dax->dev;
639	device_initialize(dev);
 
 
 
 
640
641	inode = dax_inode(dax_dev);
642	cdev = inode->i_cdev;
643	cdev_init(cdev, &dax_fops);
644	cdev->owner = parent->driver->owner;
645
646	dev_dax->num_resources = count;
647	dev_dax->dax_dev = dax_dev;
648	dev_dax->region = dax_region;
649	kref_get(&dax_region->kref);
650
651	dev->devt = inode->i_rdev;
652	dev->class = dax_class;
653	dev->parent = parent;
654	dev->groups = dax_attribute_groups;
655	dev->release = dev_dax_release;
656	dev_set_name(dev, "dax%d.%d", dax_region->id, id);
657
658	rc = cdev_device_add(cdev, dev);
659	if (rc) {
660		kill_dev_dax(dev_dax);
661		put_device(dev);
662		return ERR_PTR(rc);
663	}
664
665	rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
666	if (rc)
667		return ERR_PTR(rc);
668
669	return dev_dax;
670
671 err_dax:
672	if (dev_dax->id >= 0)
673		ida_simple_remove(&dax_region->ida, dev_dax->id);
674 err_id:
675	kfree(dev_dax);
676
677	return ERR_PTR(rc);
 
678}
679EXPORT_SYMBOL_GPL(devm_create_dev_dax);
 
 
 
 
680
681static int __init dax_init(void)
682{
683	dax_class = class_create(THIS_MODULE, "dax");
684	return PTR_ERR_OR_ZERO(dax_class);
685}
686
687static void __exit dax_exit(void)
688{
689	class_destroy(dax_class);
690}
691
692MODULE_AUTHOR("Intel Corporation");
 
693MODULE_LICENSE("GPL v2");
694subsys_initcall(dax_init);
695module_exit(dax_exit);
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright(c) 2016-2018 Intel Corporation. All rights reserved. */
  3#include <linux/memremap.h>
 
 
 
 
 
 
 
 
 
  4#include <linux/pagemap.h>
  5#include <linux/module.h>
  6#include <linux/device.h>
  7#include <linux/pfn_t.h>
  8#include <linux/cdev.h>
  9#include <linux/slab.h>
 10#include <linux/dax.h>
 11#include <linux/fs.h>
 12#include <linux/mm.h>
 13#include <linux/mman.h>
 14#include "dax-private.h"
 15#include "bus.h"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 16
 17static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
 18		const char *func)
 19{
 
 20	struct device *dev = &dev_dax->dev;
 21	unsigned long mask;
 22
 23	if (!dax_alive(dev_dax->dax_dev))
 24		return -ENXIO;
 25
 26	/* prevent private mappings from being established */
 27	if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
 28		dev_info_ratelimited(dev,
 29				"%s: %s: fail, attempted private mapping\n",
 30				current->comm, func);
 31		return -EINVAL;
 32	}
 33
 34	mask = dev_dax->align - 1;
 35	if (vma->vm_start & mask || vma->vm_end & mask) {
 36		dev_info_ratelimited(dev,
 37				"%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
 38				current->comm, func, vma->vm_start, vma->vm_end,
 39				mask);
 40		return -EINVAL;
 41	}
 42
 
 
 
 
 
 
 
 43	if (!vma_is_dax(vma)) {
 44		dev_info_ratelimited(dev,
 45				"%s: %s: fail, vma is not DAX capable\n",
 46				current->comm, func);
 47		return -EINVAL;
 48	}
 49
 50	return 0;
 51}
 52
 53/* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */
 54__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
 55		unsigned long size)
 56{
 
 
 
 57	int i;
 58
 59	for (i = 0; i < dev_dax->nr_range; i++) {
 60		struct dev_dax_range *dax_range = &dev_dax->ranges[i];
 61		struct range *range = &dax_range->range;
 62		unsigned long long pgoff_end;
 63		phys_addr_t phys;
 64
 65		pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
 66		if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
 67			continue;
 68		phys = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
 69		if (phys + size - 1 <= range->end)
 70			return phys;
 71		break;
 72	}
 
 73	return -1;
 74}
 75
 76static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
 77			      unsigned long fault_size)
 78{
 79	unsigned long i, nr_pages = fault_size / PAGE_SIZE;
 80	struct file *filp = vmf->vma->vm_file;
 81	struct dev_dax *dev_dax = filp->private_data;
 82	pgoff_t pgoff;
 83
 84	/* mapping is only set on the head */
 85	if (dev_dax->pgmap->vmemmap_shift)
 86		nr_pages = 1;
 87
 88	pgoff = linear_page_index(vmf->vma,
 89			ALIGN_DOWN(vmf->address, fault_size));
 90
 91	for (i = 0; i < nr_pages; i++) {
 92		struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
 93
 94		page = compound_head(page);
 95		if (page->mapping)
 96			continue;
 97
 98		page->mapping = filp->f_mapping;
 99		page->index = pgoff + i;
100	}
101}
102
103static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
104				struct vm_fault *vmf)
105{
106	struct device *dev = &dev_dax->dev;
 
 
107	phys_addr_t phys;
108	pfn_t pfn;
109	unsigned int fault_size = PAGE_SIZE;
110
111	if (check_vma(dev_dax, vmf->vma, __func__))
112		return VM_FAULT_SIGBUS;
113
114	if (dev_dax->align > PAGE_SIZE) {
 
115		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
116			dev_dax->align, fault_size);
117		return VM_FAULT_SIGBUS;
118	}
119
120	if (fault_size != dev_dax->align)
121		return VM_FAULT_SIGBUS;
122
123	phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
124	if (phys == -1) {
125		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff);
126		return VM_FAULT_SIGBUS;
127	}
128
129	pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
130
131	dax_set_mapping(vmf, pfn, fault_size);
 
 
 
 
 
132
133	return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
134}
135
136static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
137				struct vm_fault *vmf)
138{
139	unsigned long pmd_addr = vmf->address & PMD_MASK;
140	struct device *dev = &dev_dax->dev;
 
141	phys_addr_t phys;
142	pgoff_t pgoff;
143	pfn_t pfn;
144	unsigned int fault_size = PMD_SIZE;
145
146	if (check_vma(dev_dax, vmf->vma, __func__))
147		return VM_FAULT_SIGBUS;
148
149	if (dev_dax->align > PMD_SIZE) {
 
150		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
151			dev_dax->align, fault_size);
152		return VM_FAULT_SIGBUS;
153	}
154
155	if (fault_size < dev_dax->align)
 
 
156		return VM_FAULT_SIGBUS;
157	else if (fault_size > dev_dax->align)
 
 
 
 
158		return VM_FAULT_FALLBACK;
159
160	/* if we are outside of the VMA */
161	if (pmd_addr < vmf->vma->vm_start ||
162			(pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
163		return VM_FAULT_SIGBUS;
164
165	pgoff = linear_page_index(vmf->vma, pmd_addr);
166	phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE);
167	if (phys == -1) {
168		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
169		return VM_FAULT_SIGBUS;
170	}
171
172	pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
173
174	dax_set_mapping(vmf, pfn, fault_size);
175
176	return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
 
177}
178
179#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
180static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
181				struct vm_fault *vmf)
182{
183	unsigned long pud_addr = vmf->address & PUD_MASK;
184	struct device *dev = &dev_dax->dev;
 
185	phys_addr_t phys;
186	pgoff_t pgoff;
187	pfn_t pfn;
188	unsigned int fault_size = PUD_SIZE;
189
190
191	if (check_vma(dev_dax, vmf->vma, __func__))
192		return VM_FAULT_SIGBUS;
193
194	if (dev_dax->align > PUD_SIZE) {
 
195		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
196			dev_dax->align, fault_size);
197		return VM_FAULT_SIGBUS;
198	}
199
200	if (fault_size < dev_dax->align)
 
 
201		return VM_FAULT_SIGBUS;
202	else if (fault_size > dev_dax->align)
 
 
 
 
203		return VM_FAULT_FALLBACK;
204
205	/* if we are outside of the VMA */
206	if (pud_addr < vmf->vma->vm_start ||
207			(pud_addr + PUD_SIZE) > vmf->vma->vm_end)
208		return VM_FAULT_SIGBUS;
209
210	pgoff = linear_page_index(vmf->vma, pud_addr);
211	phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE);
212	if (phys == -1) {
213		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
214		return VM_FAULT_SIGBUS;
215	}
216
217	pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
218
219	dax_set_mapping(vmf, pfn, fault_size);
220
221	return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
 
222}
223#else
224static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
225				struct vm_fault *vmf)
226{
227	return VM_FAULT_FALLBACK;
228}
229#endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
230
231static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
 
232{
 
233	struct file *filp = vmf->vma->vm_file;
234	vm_fault_t rc = VM_FAULT_SIGBUS;
235	int id;
236	struct dev_dax *dev_dax = filp->private_data;
237
238	dev_dbg(&dev_dax->dev, "%s: op=%s addr=%#lx order=%d\n", current->comm,
239		(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
240		vmf->address & ~((1UL << (order + PAGE_SHIFT)) - 1), order);
241
242	id = dax_read_lock();
243	if (order == 0)
 
244		rc = __dev_dax_pte_fault(dev_dax, vmf);
245	else if (order == PMD_ORDER)
 
246		rc = __dev_dax_pmd_fault(dev_dax, vmf);
247	else if (order == PUD_ORDER)
 
248		rc = __dev_dax_pud_fault(dev_dax, vmf);
249	else
 
250		rc = VM_FAULT_SIGBUS;
251
252	dax_read_unlock(id);
253
254	return rc;
255}
256
257static vm_fault_t dev_dax_fault(struct vm_fault *vmf)
258{
259	return dev_dax_huge_fault(vmf, 0);
260}
261
262static int dev_dax_may_split(struct vm_area_struct *vma, unsigned long addr)
263{
264	struct file *filp = vma->vm_file;
265	struct dev_dax *dev_dax = filp->private_data;
 
266
267	if (!IS_ALIGNED(addr, dev_dax->align))
268		return -EINVAL;
269	return 0;
270}
271
272static unsigned long dev_dax_pagesize(struct vm_area_struct *vma)
273{
274	struct file *filp = vma->vm_file;
275	struct dev_dax *dev_dax = filp->private_data;
 
276
277	return dev_dax->align;
278}
279
280static const struct vm_operations_struct dax_vm_ops = {
281	.fault = dev_dax_fault,
282	.huge_fault = dev_dax_huge_fault,
283	.may_split = dev_dax_may_split,
284	.pagesize = dev_dax_pagesize,
285};
286
287static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
288{
289	struct dev_dax *dev_dax = filp->private_data;
290	int rc, id;
291
292	dev_dbg(&dev_dax->dev, "trace\n");
293
294	/*
295	 * We lock to check dax_dev liveness and will re-check at
296	 * fault time.
297	 */
298	id = dax_read_lock();
299	rc = check_vma(dev_dax, vma, __func__);
300	dax_read_unlock(id);
301	if (rc)
302		return rc;
303
304	vma->vm_ops = &dax_vm_ops;
305	vm_flags_set(vma, VM_HUGEPAGE);
306	return 0;
307}
308
309/* return an unmapped area aligned to the dax region specified alignment */
310static unsigned long dax_get_unmapped_area(struct file *filp,
311		unsigned long addr, unsigned long len, unsigned long pgoff,
312		unsigned long flags)
313{
314	unsigned long off, off_end, off_align, len_align, addr_align, align;
315	struct dev_dax *dev_dax = filp ? filp->private_data : NULL;
 
316
317	if (!dev_dax || addr)
318		goto out;
319
320	align = dev_dax->align;
 
321	off = pgoff << PAGE_SHIFT;
322	off_end = off + len;
323	off_align = round_up(off, align);
324
325	if ((off_end <= off_align) || ((off_end - off_align) < align))
326		goto out;
327
328	len_align = len + align;
329	if ((off + len_align) < off)
330		goto out;
331
332	addr_align = mm_get_unmapped_area(current->mm, filp, addr, len_align,
333					  pgoff, flags);
334	if (!IS_ERR_VALUE(addr_align)) {
335		addr_align += (off - addr_align) & (align - 1);
336		return addr_align;
337	}
338 out:
339	return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags);
340}
341
342static const struct address_space_operations dev_dax_aops = {
343	.dirty_folio	= noop_dirty_folio,
344};
345
346static int dax_open(struct inode *inode, struct file *filp)
347{
348	struct dax_device *dax_dev = inode_dax(inode);
349	struct inode *__dax_inode = dax_inode(dax_dev);
350	struct dev_dax *dev_dax = dax_get_private(dax_dev);
351
352	dev_dbg(&dev_dax->dev, "trace\n");
353	inode->i_mapping = __dax_inode->i_mapping;
354	inode->i_mapping->host = __dax_inode;
355	inode->i_mapping->a_ops = &dev_dax_aops;
356	filp->f_mapping = inode->i_mapping;
357	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
358	filp->f_sb_err = file_sample_sb_err(filp);
359	filp->private_data = dev_dax;
360	inode->i_flags = S_DAX;
361
362	return 0;
363}
364
365static int dax_release(struct inode *inode, struct file *filp)
366{
367	struct dev_dax *dev_dax = filp->private_data;
368
369	dev_dbg(&dev_dax->dev, "trace\n");
370	return 0;
371}
372
373static const struct file_operations dax_fops = {
374	.llseek = noop_llseek,
375	.owner = THIS_MODULE,
376	.open = dax_open,
377	.release = dax_release,
378	.get_unmapped_area = dax_get_unmapped_area,
379	.mmap = dax_mmap,
380	.fop_flags = FOP_MMAP_SYNC,
381};
382
383static void dev_dax_cdev_del(void *cdev)
 
 
 
 
 
 
 
 
 
 
 
 
 
384{
385	cdev_del(cdev);
 
 
 
 
386}
387
388static void dev_dax_kill(void *dev_dax)
389{
 
 
 
 
 
 
 
390	kill_dev_dax(dev_dax);
 
 
391}
392
393static int dev_dax_probe(struct dev_dax *dev_dax)
 
394{
395	struct dax_device *dax_dev = dev_dax->dax_dev;
396	struct device *dev = &dev_dax->dev;
397	struct dev_pagemap *pgmap;
398	struct inode *inode;
 
399	struct cdev *cdev;
400	void *addr;
401	int rc, i;
402
403	if (static_dev_dax(dev_dax))  {
404		if (dev_dax->nr_range > 1) {
405			dev_warn(dev,
406				"static pgmap / multi-range device conflict\n");
407			return -EINVAL;
 
 
 
 
 
 
 
 
408		}
 
 
 
409
410		pgmap = dev_dax->pgmap;
411	} else {
412		if (dev_dax->pgmap) {
413			dev_warn(dev,
414				 "dynamic-dax with pre-populated page map\n");
415			return -EINVAL;
416		}
417
418		pgmap = devm_kzalloc(dev,
419                       struct_size(pgmap, ranges, dev_dax->nr_range - 1),
420                       GFP_KERNEL);
421		if (!pgmap)
422			return -ENOMEM;
423
424		pgmap->nr_range = dev_dax->nr_range;
425		dev_dax->pgmap = pgmap;
426
427		for (i = 0; i < dev_dax->nr_range; i++) {
428			struct range *range = &dev_dax->ranges[i].range;
429			pgmap->ranges[i] = *range;
430		}
 
 
 
431	}
432
433	for (i = 0; i < dev_dax->nr_range; i++) {
434		struct range *range = &dev_dax->ranges[i].range;
435
436		if (!devm_request_mem_region(dev, range->start,
437					range_len(range), dev_name(dev))) {
438			dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n",
439					i, range->start, range->end);
440			return -EBUSY;
441		}
442	}
443
444	pgmap->type = MEMORY_DEVICE_GENERIC;
445	if (dev_dax->align > PAGE_SIZE)
446		pgmap->vmemmap_shift =
447			order_base_2(dev_dax->align >> PAGE_SHIFT);
448	addr = devm_memremap_pages(dev, pgmap);
449	if (IS_ERR(addr))
450		return PTR_ERR(addr);
451
452	inode = dax_inode(dax_dev);
453	cdev = inode->i_cdev;
454	cdev_init(cdev, &dax_fops);
455	cdev->owner = dev->driver->owner;
456	cdev_set_parent(cdev, &dev->kobj);
457	rc = cdev_add(cdev, dev->devt, 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458	if (rc)
459		return rc;
 
 
460
461	rc = devm_add_action_or_reset(dev, dev_dax_cdev_del, cdev);
462	if (rc)
463		return rc;
 
 
464
465	run_dax(dax_dev);
466	return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax);
467}
468
469static struct dax_device_driver device_dax_driver = {
470	.probe = dev_dax_probe,
471	.type = DAXDRV_DEVICE_TYPE,
472};
473
474static int __init dax_init(void)
475{
476	return dax_driver_register(&device_dax_driver);
 
477}
478
479static void __exit dax_exit(void)
480{
481	dax_driver_unregister(&device_dax_driver);
482}
483
484MODULE_AUTHOR("Intel Corporation");
485MODULE_DESCRIPTION("Device DAX: direct access device driver");
486MODULE_LICENSE("GPL v2");
487module_init(dax_init);
488module_exit(dax_exit);
489MODULE_ALIAS_DAX_DEVICE(0);