Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0
  2
  3/*
  4 * Copyright 2016-2019 HabanaLabs, Ltd.
  5 * All Rights Reserved.
  6 *
  7 */
  8
  9#define pr_fmt(fmt)		"habanalabs: " fmt
 10
 11#include "habanalabs.h"
 12
 13#include <linux/pci.h>
 14#include <linux/module.h>
 15
 16#define HL_DRIVER_AUTHOR	"HabanaLabs Kernel Driver Team"
 17
 18#define HL_DRIVER_DESC		"Driver for HabanaLabs's AI Accelerators"
 19
 20MODULE_AUTHOR(HL_DRIVER_AUTHOR);
 21MODULE_DESCRIPTION(HL_DRIVER_DESC);
 22MODULE_LICENSE("GPL v2");
 23
 24static int hl_major;
 25static struct class *hl_class;
 26static DEFINE_IDR(hl_devs_idr);
 27static DEFINE_MUTEX(hl_devs_idr_lock);
 28
 29static int timeout_locked = 5;
 30static int reset_on_lockup = 1;
 31
 32module_param(timeout_locked, int, 0444);
 33MODULE_PARM_DESC(timeout_locked,
 34	"Device lockup timeout in seconds (0 = disabled, default 5s)");
 35
 36module_param(reset_on_lockup, int, 0444);
 37MODULE_PARM_DESC(reset_on_lockup,
 38	"Do device reset on lockup (0 = no, 1 = yes, default yes)");
 39
 40#define PCI_VENDOR_ID_HABANALABS	0x1da3
 41
 42#define PCI_IDS_GOYA			0x0001
 43
 44static const struct pci_device_id ids[] = {
 45	{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
 46	{ 0, }
 47};
 48MODULE_DEVICE_TABLE(pci, ids);
 49
 50/*
 51 * get_asic_type - translate device id to asic type
 52 *
 53 * @device: id of the PCI device
 54 *
 55 * Translate device id to asic type.
 56 * In case of unidentified device, return -1
 57 */
 58static enum hl_asic_type get_asic_type(u16 device)
 59{
 60	enum hl_asic_type asic_type;
 61
 62	switch (device) {
 63	case PCI_IDS_GOYA:
 64		asic_type = ASIC_GOYA;
 65		break;
 66	default:
 67		asic_type = ASIC_INVALID;
 68		break;
 69	}
 70
 71	return asic_type;
 72}
 73
 74/*
 75 * hl_device_open - open function for habanalabs device
 76 *
 77 * @inode: pointer to inode structure
 78 * @filp: pointer to file structure
 79 *
 80 * Called when process opens an habanalabs device.
 81 */
 82int hl_device_open(struct inode *inode, struct file *filp)
 83{
 84	struct hl_device *hdev;
 85	struct hl_fpriv *hpriv;
 86	int rc;
 87
 88	mutex_lock(&hl_devs_idr_lock);
 89	hdev = idr_find(&hl_devs_idr, iminor(inode));
 90	mutex_unlock(&hl_devs_idr_lock);
 91
 92	if (!hdev) {
 93		pr_err("Couldn't find device %d:%d\n",
 94			imajor(inode), iminor(inode));
 95		return -ENXIO;
 96	}
 97
 98	hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
 99	if (!hpriv)
100		return -ENOMEM;
101
102	hpriv->hdev = hdev;
103	filp->private_data = hpriv;
104	hpriv->filp = filp;
105	mutex_init(&hpriv->restore_phase_mutex);
106	kref_init(&hpriv->refcount);
107	nonseekable_open(inode, filp);
108
109	hl_cb_mgr_init(&hpriv->cb_mgr);
110	hl_ctx_mgr_init(&hpriv->ctx_mgr);
111
112	hpriv->taskpid = find_get_pid(current->pid);
113
114	mutex_lock(&hdev->fpriv_list_lock);
115
116	if (hl_device_disabled_or_in_reset(hdev)) {
117		dev_err_ratelimited(hdev->dev,
118			"Can't open %s because it is disabled or in reset\n",
119			dev_name(hdev->dev));
120		rc = -EPERM;
121		goto out_err;
122	}
123
124	if (hdev->in_debug) {
125		dev_err_ratelimited(hdev->dev,
126			"Can't open %s because it is being debugged by another user\n",
127			dev_name(hdev->dev));
128		rc = -EPERM;
129		goto out_err;
130	}
131
132	if (hdev->compute_ctx) {
133		dev_dbg_ratelimited(hdev->dev,
134			"Can't open %s because another user is working on it\n",
135			dev_name(hdev->dev));
136		rc = -EBUSY;
137		goto out_err;
138	}
139
140	rc = hl_ctx_create(hdev, hpriv);
141	if (rc) {
142		dev_err(hdev->dev, "Failed to create context %d\n", rc);
143		goto out_err;
144	}
145
146	/* Device is IDLE at this point so it is legal to change PLLs.
147	 * There is no need to check anything because if the PLL is
148	 * already HIGH, the set function will return without doing
149	 * anything
150	 */
151	hl_device_set_frequency(hdev, PLL_HIGH);
152
153	list_add(&hpriv->dev_node, &hdev->fpriv_list);
154	mutex_unlock(&hdev->fpriv_list_lock);
155
156	hl_debugfs_add_file(hpriv);
157
158	return 0;
159
160out_err:
161	mutex_unlock(&hdev->fpriv_list_lock);
162
163	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
164	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
165	filp->private_data = NULL;
166	mutex_destroy(&hpriv->restore_phase_mutex);
167	put_pid(hpriv->taskpid);
168
169	kfree(hpriv);
170	return rc;
171}
172
173int hl_device_open_ctrl(struct inode *inode, struct file *filp)
174{
175	struct hl_device *hdev;
176	struct hl_fpriv *hpriv;
177	int rc;
178
179	mutex_lock(&hl_devs_idr_lock);
180	hdev = idr_find(&hl_devs_idr, iminor(inode));
181	mutex_unlock(&hl_devs_idr_lock);
182
183	if (!hdev) {
184		pr_err("Couldn't find device %d:%d\n",
185			imajor(inode), iminor(inode));
186		return -ENXIO;
187	}
188
189	hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
190	if (!hpriv)
191		return -ENOMEM;
192
193	mutex_lock(&hdev->fpriv_list_lock);
194
195	if (hl_device_disabled_or_in_reset(hdev)) {
196		dev_err_ratelimited(hdev->dev_ctrl,
197			"Can't open %s because it is disabled or in reset\n",
198			dev_name(hdev->dev_ctrl));
199		rc = -EPERM;
200		goto out_err;
201	}
202
203	list_add(&hpriv->dev_node, &hdev->fpriv_list);
204	mutex_unlock(&hdev->fpriv_list_lock);
205
206	hpriv->hdev = hdev;
207	filp->private_data = hpriv;
208	hpriv->filp = filp;
209	hpriv->is_control = true;
210	nonseekable_open(inode, filp);
211
212	hpriv->taskpid = find_get_pid(current->pid);
213
214	return 0;
215
216out_err:
217	mutex_unlock(&hdev->fpriv_list_lock);
218	kfree(hpriv);
219	return rc;
220}
221
222static void set_driver_behavior_per_device(struct hl_device *hdev)
223{
224	hdev->mmu_enable = 1;
225	hdev->cpu_enable = 1;
226	hdev->fw_loading = 1;
227	hdev->cpu_queues_enable = 1;
228	hdev->heartbeat = 1;
229
230	hdev->reset_pcilink = 0;
231}
232
233/*
234 * create_hdev - create habanalabs device instance
235 *
236 * @dev: will hold the pointer to the new habanalabs device structure
237 * @pdev: pointer to the pci device
238 * @asic_type: in case of simulator device, which device is it
239 * @minor: in case of simulator device, the minor of the device
240 *
241 * Allocate memory for habanalabs device and initialize basic fields
242 * Identify the ASIC type
243 * Allocate ID (minor) for the device (only for real devices)
244 */
245int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
246		enum hl_asic_type asic_type, int minor)
247{
248	struct hl_device *hdev;
249	int rc, main_id, ctrl_id = 0;
250
251	*dev = NULL;
252
253	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
254	if (!hdev)
255		return -ENOMEM;
256
257	/* First, we must find out which ASIC are we handling. This is needed
258	 * to configure the behavior of the driver (kernel parameters)
259	 */
260	if (pdev) {
261		hdev->asic_type = get_asic_type(pdev->device);
262		if (hdev->asic_type == ASIC_INVALID) {
263			dev_err(&pdev->dev, "Unsupported ASIC\n");
264			rc = -ENODEV;
265			goto free_hdev;
266		}
267	} else {
268		hdev->asic_type = asic_type;
269	}
270
271	hdev->major = hl_major;
272	hdev->reset_on_lockup = reset_on_lockup;
273	hdev->pldm = 0;
274
275	set_driver_behavior_per_device(hdev);
276
277	if (timeout_locked)
278		hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
279	else
280		hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
281
282	hdev->disabled = true;
283	hdev->pdev = pdev; /* can be NULL in case of simulator device */
284
285	/* Set default DMA mask to 32 bits */
286	hdev->dma_mask = 32;
287
288	mutex_lock(&hl_devs_idr_lock);
289
290	/* Always save 2 numbers, 1 for main device and 1 for control.
291	 * They must be consecutive
292	 */
293	main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
294				GFP_KERNEL);
295
296	if (main_id >= 0)
297		ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
298					main_id + 2, GFP_KERNEL);
299
300	mutex_unlock(&hl_devs_idr_lock);
301
302	if ((main_id < 0) || (ctrl_id < 0)) {
303		if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
304			pr_err("too many devices in the system\n");
305
306		if (main_id >= 0) {
307			mutex_lock(&hl_devs_idr_lock);
308			idr_remove(&hl_devs_idr, main_id);
309			mutex_unlock(&hl_devs_idr_lock);
310		}
311
312		rc = -EBUSY;
313		goto free_hdev;
314	}
315
316	hdev->id = main_id;
317	hdev->id_control = ctrl_id;
318
319	*dev = hdev;
320
321	return 0;
322
323free_hdev:
324	kfree(hdev);
325	return rc;
326}
327
328/*
329 * destroy_hdev - destroy habanalabs device instance
330 *
331 * @dev: pointer to the habanalabs device structure
332 *
333 */
334void destroy_hdev(struct hl_device *hdev)
335{
336	/* Remove device from the device list */
337	mutex_lock(&hl_devs_idr_lock);
338	idr_remove(&hl_devs_idr, hdev->id);
339	idr_remove(&hl_devs_idr, hdev->id_control);
340	mutex_unlock(&hl_devs_idr_lock);
341
342	kfree(hdev);
343}
344
345static int hl_pmops_suspend(struct device *dev)
346{
347	struct hl_device *hdev = dev_get_drvdata(dev);
348
349	pr_debug("Going to suspend PCI device\n");
350
351	if (!hdev) {
352		pr_err("device pointer is NULL in suspend\n");
353		return 0;
354	}
355
356	return hl_device_suspend(hdev);
357}
358
359static int hl_pmops_resume(struct device *dev)
360{
361	struct hl_device *hdev = dev_get_drvdata(dev);
362
363	pr_debug("Going to resume PCI device\n");
364
365	if (!hdev) {
366		pr_err("device pointer is NULL in resume\n");
367		return 0;
368	}
369
370	return hl_device_resume(hdev);
371}
372
373/*
374 * hl_pci_probe - probe PCI habanalabs devices
375 *
376 * @pdev: pointer to pci device
377 * @id: pointer to pci device id structure
378 *
379 * Standard PCI probe function for habanalabs device.
380 * Create a new habanalabs device and initialize it according to the
381 * device's type
382 */
383static int hl_pci_probe(struct pci_dev *pdev,
384				const struct pci_device_id *id)
385{
386	struct hl_device *hdev;
387	int rc;
388
389	dev_info(&pdev->dev, HL_NAME
390		 " device found [%04x:%04x] (rev %x)\n",
391		 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
392
393	rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
394	if (rc)
395		return rc;
396
397	pci_set_drvdata(pdev, hdev);
398
399	rc = hl_device_init(hdev, hl_class);
400	if (rc) {
401		dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
402		rc = -ENODEV;
403		goto disable_device;
404	}
405
406	return 0;
407
408disable_device:
409	pci_set_drvdata(pdev, NULL);
410	destroy_hdev(hdev);
411
412	return rc;
413}
414
415/*
416 * hl_pci_remove - remove PCI habanalabs devices
417 *
418 * @pdev: pointer to pci device
419 *
420 * Standard PCI remove function for habanalabs device
421 */
422static void hl_pci_remove(struct pci_dev *pdev)
423{
424	struct hl_device *hdev;
425
426	hdev = pci_get_drvdata(pdev);
427	if (!hdev)
428		return;
429
430	hl_device_fini(hdev);
431	pci_set_drvdata(pdev, NULL);
432
433	destroy_hdev(hdev);
434}
435
436static const struct dev_pm_ops hl_pm_ops = {
437	.suspend = hl_pmops_suspend,
438	.resume = hl_pmops_resume,
439};
440
441static struct pci_driver hl_pci_driver = {
442	.name = HL_NAME,
443	.id_table = ids,
444	.probe = hl_pci_probe,
445	.remove = hl_pci_remove,
446	.driver.pm = &hl_pm_ops,
447};
448
449/*
450 * hl_init - Initialize the habanalabs kernel driver
451 */
452static int __init hl_init(void)
453{
454	int rc;
455	dev_t dev;
456
457	pr_info("loading driver\n");
458
459	rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
460	if (rc < 0) {
461		pr_err("unable to get major\n");
462		return rc;
463	}
464
465	hl_major = MAJOR(dev);
466
467	hl_class = class_create(THIS_MODULE, HL_NAME);
468	if (IS_ERR(hl_class)) {
469		pr_err("failed to allocate class\n");
470		rc = PTR_ERR(hl_class);
471		goto remove_major;
472	}
473
474	hl_debugfs_init();
475
476	rc = pci_register_driver(&hl_pci_driver);
477	if (rc) {
478		pr_err("failed to register pci device\n");
479		goto remove_debugfs;
480	}
481
482	pr_debug("driver loaded\n");
483
484	return 0;
485
486remove_debugfs:
487	hl_debugfs_fini();
488	class_destroy(hl_class);
489remove_major:
490	unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
491	return rc;
492}
493
494/*
495 * hl_exit - Release all resources of the habanalabs kernel driver
496 */
497static void __exit hl_exit(void)
498{
499	pci_unregister_driver(&hl_pci_driver);
500
501	/*
502	 * Removing debugfs must be after all devices or simulator devices
503	 * have been removed because otherwise we get a bug in the
504	 * debugfs module for referencing NULL objects
505	 */
506	hl_debugfs_fini();
507
508	class_destroy(hl_class);
509	unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
510
511	idr_destroy(&hl_devs_idr);
512
513	pr_debug("driver removed\n");
514}
515
516module_init(hl_init);
517module_exit(hl_exit);