Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.9.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Copyright (C) 2020 HiSilicon Limited.
  4 */
  5
  6#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
  7
  8#include <linux/debugfs.h>
  9#include <linux/delay.h>
 10#include <linux/device.h>
 11#include <linux/dma-mapping.h>
 12#include <linux/kernel.h>
 13#include <linux/kthread.h>
 14#include <linux/map_benchmark.h>
 15#include <linux/math64.h>
 16#include <linux/module.h>
 17#include <linux/pci.h>
 18#include <linux/platform_device.h>
 19#include <linux/slab.h>
 20#include <linux/timekeeping.h>
 21
 22struct map_benchmark_data {
 23	struct map_benchmark bparam;
 24	struct device *dev;
 25	struct dentry  *debugfs;
 26	enum dma_data_direction dir;
 27	atomic64_t sum_map_100ns;
 28	atomic64_t sum_unmap_100ns;
 29	atomic64_t sum_sq_map;
 30	atomic64_t sum_sq_unmap;
 31	atomic64_t loops;
 32};
 33
 34static int map_benchmark_thread(void *data)
 35{
 36	void *buf;
 37	dma_addr_t dma_addr;
 38	struct map_benchmark_data *map = data;
 39	int npages = map->bparam.granule;
 40	u64 size = npages * PAGE_SIZE;
 41	int ret = 0;
 42
 43	buf = alloc_pages_exact(size, GFP_KERNEL);
 44	if (!buf)
 45		return -ENOMEM;
 46
 47	while (!kthread_should_stop())  {
 48		u64 map_100ns, unmap_100ns, map_sq, unmap_sq;
 49		ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
 50		ktime_t map_delta, unmap_delta;
 51
 52		/*
 53		 * for a non-coherent device, if we don't stain them in the
 54		 * cache, this will give an underestimate of the real-world
 55		 * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
 56		 * 66 means evertything goes well! 66 is lucky.
 57		 */
 58		if (map->dir != DMA_FROM_DEVICE)
 59			memset(buf, 0x66, size);
 60
 61		map_stime = ktime_get();
 62		dma_addr = dma_map_single(map->dev, buf, size, map->dir);
 63		if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
 64			pr_err("dma_map_single failed on %s\n",
 65				dev_name(map->dev));
 66			ret = -ENOMEM;
 67			goto out;
 68		}
 69		map_etime = ktime_get();
 70		map_delta = ktime_sub(map_etime, map_stime);
 71
 72		/* Pretend DMA is transmitting */
 73		ndelay(map->bparam.dma_trans_ns);
 74
 75		unmap_stime = ktime_get();
 76		dma_unmap_single(map->dev, dma_addr, size, map->dir);
 77		unmap_etime = ktime_get();
 78		unmap_delta = ktime_sub(unmap_etime, unmap_stime);
 79
 80		/* calculate sum and sum of squares */
 81
 82		map_100ns = div64_ul(map_delta,  100);
 83		unmap_100ns = div64_ul(unmap_delta, 100);
 84		map_sq = map_100ns * map_100ns;
 85		unmap_sq = unmap_100ns * unmap_100ns;
 86
 87		atomic64_add(map_100ns, &map->sum_map_100ns);
 88		atomic64_add(unmap_100ns, &map->sum_unmap_100ns);
 89		atomic64_add(map_sq, &map->sum_sq_map);
 90		atomic64_add(unmap_sq, &map->sum_sq_unmap);
 91		atomic64_inc(&map->loops);
 92	}
 93
 94out:
 95	free_pages_exact(buf, size);
 96	return ret;
 97}
 98
 99static int do_map_benchmark(struct map_benchmark_data *map)
100{
101	struct task_struct **tsk;
102	int threads = map->bparam.threads;
103	int node = map->bparam.node;
104	u64 loops;
105	int ret = 0;
106	int i;
107
108	tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL);
109	if (!tsk)
110		return -ENOMEM;
111
112	get_device(map->dev);
113
114	for (i = 0; i < threads; i++) {
115		tsk[i] = kthread_create_on_node(map_benchmark_thread, map,
116				map->bparam.node, "dma-map-benchmark/%d", i);
117		if (IS_ERR(tsk[i])) {
118			pr_err("create dma_map thread failed\n");
119			ret = PTR_ERR(tsk[i]);
120			while (--i >= 0)
121				kthread_stop(tsk[i]);
122			goto out;
123		}
124
125		if (node != NUMA_NO_NODE)
126			kthread_bind_mask(tsk[i], cpumask_of_node(node));
127	}
128
129	/* clear the old value in the previous benchmark */
130	atomic64_set(&map->sum_map_100ns, 0);
131	atomic64_set(&map->sum_unmap_100ns, 0);
132	atomic64_set(&map->sum_sq_map, 0);
133	atomic64_set(&map->sum_sq_unmap, 0);
134	atomic64_set(&map->loops, 0);
135
136	for (i = 0; i < threads; i++) {
137		get_task_struct(tsk[i]);
138		wake_up_process(tsk[i]);
139	}
140
141	msleep_interruptible(map->bparam.seconds * 1000);
142
143	/* wait for the completion of all started benchmark threads */
144	for (i = 0; i < threads; i++) {
145		int kthread_ret = kthread_stop_put(tsk[i]);
146
147		if (kthread_ret)
148			ret = kthread_ret;
149	}
150
151	if (ret)
152		goto out;
153
154	loops = atomic64_read(&map->loops);
155	if (likely(loops > 0)) {
156		u64 map_variance, unmap_variance;
157		u64 sum_map = atomic64_read(&map->sum_map_100ns);
158		u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns);
159		u64 sum_sq_map = atomic64_read(&map->sum_sq_map);
160		u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap);
161
162		/* average latency */
163		map->bparam.avg_map_100ns = div64_u64(sum_map, loops);
164		map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops);
165
166		/* standard deviation of latency */
167		map_variance = div64_u64(sum_sq_map, loops) -
168				map->bparam.avg_map_100ns *
169				map->bparam.avg_map_100ns;
170		unmap_variance = div64_u64(sum_sq_unmap, loops) -
171				map->bparam.avg_unmap_100ns *
172				map->bparam.avg_unmap_100ns;
173		map->bparam.map_stddev = int_sqrt64(map_variance);
174		map->bparam.unmap_stddev = int_sqrt64(unmap_variance);
175	}
176
177out:
178	put_device(map->dev);
179	kfree(tsk);
180	return ret;
181}
182
183static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
184		unsigned long arg)
185{
186	struct map_benchmark_data *map = file->private_data;
187	void __user *argp = (void __user *)arg;
188	u64 old_dma_mask;
189	int ret;
190
191	if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
192		return -EFAULT;
193
194	switch (cmd) {
195	case DMA_MAP_BENCHMARK:
196		if (map->bparam.threads == 0 ||
197		    map->bparam.threads > DMA_MAP_MAX_THREADS) {
198			pr_err("invalid thread number\n");
199			return -EINVAL;
200		}
201
202		if (map->bparam.seconds == 0 ||
203		    map->bparam.seconds > DMA_MAP_MAX_SECONDS) {
204			pr_err("invalid duration seconds\n");
205			return -EINVAL;
206		}
207
208		if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) {
209			pr_err("invalid transmission delay\n");
210			return -EINVAL;
211		}
212
213		if (map->bparam.node != NUMA_NO_NODE &&
214		    (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES ||
215		     !node_possible(map->bparam.node))) {
216			pr_err("invalid numa node\n");
217			return -EINVAL;
218		}
219
220		if (map->bparam.granule < 1 || map->bparam.granule > 1024) {
221			pr_err("invalid granule size\n");
222			return -EINVAL;
223		}
224
225		switch (map->bparam.dma_dir) {
226		case DMA_MAP_BIDIRECTIONAL:
227			map->dir = DMA_BIDIRECTIONAL;
228			break;
229		case DMA_MAP_FROM_DEVICE:
230			map->dir = DMA_FROM_DEVICE;
231			break;
232		case DMA_MAP_TO_DEVICE:
233			map->dir = DMA_TO_DEVICE;
234			break;
235		default:
236			pr_err("invalid DMA direction\n");
237			return -EINVAL;
238		}
239
240		old_dma_mask = dma_get_mask(map->dev);
241
242		ret = dma_set_mask(map->dev,
243				   DMA_BIT_MASK(map->bparam.dma_bits));
244		if (ret) {
245			pr_err("failed to set dma_mask on device %s\n",
246				dev_name(map->dev));
247			return -EINVAL;
248		}
249
250		ret = do_map_benchmark(map);
251
252		/*
253		 * restore the original dma_mask as many devices' dma_mask are
254		 * set by architectures, acpi, busses. When we bind them back
255		 * to their original drivers, those drivers shouldn't see
256		 * dma_mask changed by benchmark
257		 */
258		dma_set_mask(map->dev, old_dma_mask);
259		break;
260	default:
261		return -EINVAL;
262	}
263
264	if (copy_to_user(argp, &map->bparam, sizeof(map->bparam)))
265		return -EFAULT;
266
267	return ret;
268}
269
270static const struct file_operations map_benchmark_fops = {
271	.open			= simple_open,
272	.unlocked_ioctl		= map_benchmark_ioctl,
273};
274
275static void map_benchmark_remove_debugfs(void *data)
276{
277	struct map_benchmark_data *map = (struct map_benchmark_data *)data;
278
279	debugfs_remove(map->debugfs);
280}
281
282static int __map_benchmark_probe(struct device *dev)
283{
284	struct dentry *entry;
285	struct map_benchmark_data *map;
286	int ret;
287
288	map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL);
289	if (!map)
290		return -ENOMEM;
291	map->dev = dev;
292
293	ret = devm_add_action(dev, map_benchmark_remove_debugfs, map);
294	if (ret) {
295		pr_err("Can't add debugfs remove action\n");
296		return ret;
297	}
298
299	/*
300	 * we only permit a device bound with this driver, 2nd probe
301	 * will fail
302	 */
303	entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map,
304			&map_benchmark_fops);
305	if (IS_ERR(entry))
306		return PTR_ERR(entry);
307	map->debugfs = entry;
308
309	return 0;
310}
311
312static int map_benchmark_platform_probe(struct platform_device *pdev)
313{
314	return __map_benchmark_probe(&pdev->dev);
315}
316
317static struct platform_driver map_benchmark_platform_driver = {
318	.driver		= {
319		.name	= "dma_map_benchmark",
320	},
321	.probe = map_benchmark_platform_probe,
322};
323
324static int
325map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
326{
327	return __map_benchmark_probe(&pdev->dev);
328}
329
330static struct pci_driver map_benchmark_pci_driver = {
331	.name	= "dma_map_benchmark",
332	.probe	= map_benchmark_pci_probe,
333};
334
335static int __init map_benchmark_init(void)
336{
337	int ret;
338
339	ret = pci_register_driver(&map_benchmark_pci_driver);
340	if (ret)
341		return ret;
342
343	ret = platform_driver_register(&map_benchmark_platform_driver);
344	if (ret) {
345		pci_unregister_driver(&map_benchmark_pci_driver);
346		return ret;
347	}
348
349	return 0;
350}
351
352static void __exit map_benchmark_cleanup(void)
353{
354	platform_driver_unregister(&map_benchmark_platform_driver);
355	pci_unregister_driver(&map_benchmark_pci_driver);
356}
357
358module_init(map_benchmark_init);
359module_exit(map_benchmark_cleanup);
360
361MODULE_AUTHOR("Barry Song <song.bao.hua@hisilicon.com>");
362MODULE_DESCRIPTION("dma_map benchmark driver");