Loading...
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
3
4#include <linux/init.h>
5#include <linux/kernel.h>
6#include <linux/module.h>
7#include <linux/pci.h>
8#include <linux/device.h>
9#include <linux/iommu.h>
10#include <uapi/linux/idxd.h>
11#include <linux/highmem.h>
12#include <linux/sched/smt.h>
13#include <crypto/internal/acompress.h>
14
15#include "idxd.h"
16#include "iaa_crypto.h"
17#include "iaa_crypto_stats.h"
18
19#ifdef pr_fmt
20#undef pr_fmt
21#endif
22
23#define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt
24
25#define IAA_ALG_PRIORITY 300
26
27/* number of iaa instances probed */
28static unsigned int nr_iaa;
29static unsigned int nr_cpus;
30static unsigned int nr_nodes;
31static unsigned int nr_cpus_per_node;
32
33/* Number of physical cpus sharing each iaa instance */
34static unsigned int cpus_per_iaa;
35
36static struct crypto_comp *deflate_generic_tfm;
37
38/* Per-cpu lookup table for balanced wqs */
39static struct wq_table_entry __percpu *wq_table;
40
41static struct idxd_wq *wq_table_next_wq(int cpu)
42{
43 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
44
45 if (++entry->cur_wq >= entry->n_wqs)
46 entry->cur_wq = 0;
47
48 if (!entry->wqs[entry->cur_wq])
49 return NULL;
50
51 pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
52 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
53 entry->wqs[entry->cur_wq]->id, cpu);
54
55 return entry->wqs[entry->cur_wq];
56}
57
58static void wq_table_add(int cpu, struct idxd_wq *wq)
59{
60 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
61
62 if (WARN_ON(entry->n_wqs == entry->max_wqs))
63 return;
64
65 entry->wqs[entry->n_wqs++] = wq;
66
67 pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
68 entry->wqs[entry->n_wqs - 1]->idxd->id,
69 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
70}
71
72static void wq_table_free_entry(int cpu)
73{
74 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
75
76 kfree(entry->wqs);
77 memset(entry, 0, sizeof(*entry));
78}
79
80static void wq_table_clear_entry(int cpu)
81{
82 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
83
84 entry->n_wqs = 0;
85 entry->cur_wq = 0;
86 memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
87}
88
89LIST_HEAD(iaa_devices);
90DEFINE_MUTEX(iaa_devices_lock);
91
92/* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
93static bool iaa_crypto_enabled;
94static bool iaa_crypto_registered;
95
96/* Verify results of IAA compress or not */
97static bool iaa_verify_compress = true;
98
99static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
100{
101 return sprintf(buf, "%d\n", iaa_verify_compress);
102}
103
104static ssize_t verify_compress_store(struct device_driver *driver,
105 const char *buf, size_t count)
106{
107 int ret = -EBUSY;
108
109 mutex_lock(&iaa_devices_lock);
110
111 if (iaa_crypto_enabled)
112 goto out;
113
114 ret = kstrtobool(buf, &iaa_verify_compress);
115 if (ret)
116 goto out;
117
118 ret = count;
119out:
120 mutex_unlock(&iaa_devices_lock);
121
122 return ret;
123}
124static DRIVER_ATTR_RW(verify_compress);
125
126/*
127 * The iaa crypto driver supports three 'sync' methods determining how
128 * compressions and decompressions are performed:
129 *
130 * - sync: the compression or decompression completes before
131 * returning. This is the mode used by the async crypto
132 * interface when the sync mode is set to 'sync' and by
133 * the sync crypto interface regardless of setting.
134 *
135 * - async: the compression or decompression is submitted and returns
136 * immediately. Completion interrupts are not used so
137 * the caller is responsible for polling the descriptor
138 * for completion. This mode is applicable to only the
139 * async crypto interface and is ignored for anything
140 * else.
141 *
142 * - async_irq: the compression or decompression is submitted and
143 * returns immediately. Completion interrupts are
144 * enabled so the caller can wait for the completion and
145 * yield to other threads. When the compression or
146 * decompression completes, the completion is signaled
147 * and the caller awakened. This mode is applicable to
148 * only the async crypto interface and is ignored for
149 * anything else.
150 *
151 * These modes can be set using the iaa_crypto sync_mode driver
152 * attribute.
153 */
154
155/* Use async mode */
156static bool async_mode;
157/* Use interrupts */
158static bool use_irq;
159
160/**
161 * set_iaa_sync_mode - Set IAA sync mode
162 * @name: The name of the sync mode
163 *
164 * Make the IAA sync mode named @name the current sync mode used by
165 * compression/decompression.
166 */
167
168static int set_iaa_sync_mode(const char *name)
169{
170 int ret = 0;
171
172 if (sysfs_streq(name, "sync")) {
173 async_mode = false;
174 use_irq = false;
175 } else if (sysfs_streq(name, "async")) {
176 async_mode = true;
177 use_irq = false;
178 } else if (sysfs_streq(name, "async_irq")) {
179 async_mode = true;
180 use_irq = true;
181 } else {
182 ret = -EINVAL;
183 }
184
185 return ret;
186}
187
188static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
189{
190 int ret = 0;
191
192 if (!async_mode && !use_irq)
193 ret = sprintf(buf, "%s\n", "sync");
194 else if (async_mode && !use_irq)
195 ret = sprintf(buf, "%s\n", "async");
196 else if (async_mode && use_irq)
197 ret = sprintf(buf, "%s\n", "async_irq");
198
199 return ret;
200}
201
202static ssize_t sync_mode_store(struct device_driver *driver,
203 const char *buf, size_t count)
204{
205 int ret = -EBUSY;
206
207 mutex_lock(&iaa_devices_lock);
208
209 if (iaa_crypto_enabled)
210 goto out;
211
212 ret = set_iaa_sync_mode(buf);
213 if (ret == 0)
214 ret = count;
215out:
216 mutex_unlock(&iaa_devices_lock);
217
218 return ret;
219}
220static DRIVER_ATTR_RW(sync_mode);
221
222static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
223
224static int find_empty_iaa_compression_mode(void)
225{
226 int i = -EINVAL;
227
228 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
229 if (iaa_compression_modes[i])
230 continue;
231 break;
232 }
233
234 return i;
235}
236
237static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
238{
239 struct iaa_compression_mode *mode;
240 int i;
241
242 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
243 mode = iaa_compression_modes[i];
244 if (!mode)
245 continue;
246
247 if (!strcmp(mode->name, name)) {
248 *idx = i;
249 return iaa_compression_modes[i];
250 }
251 }
252
253 return NULL;
254}
255
256static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
257{
258 kfree(mode->name);
259 kfree(mode->ll_table);
260 kfree(mode->d_table);
261 kfree(mode->header_table);
262
263 kfree(mode);
264}
265
266/*
267 * IAA Compression modes are defined by an ll_table, a d_table, and an
268 * optional header_table. These tables are typically generated and
269 * captured using statistics collected from running actual
270 * compress/decompress workloads.
271 *
272 * A module or other kernel code can add and remove compression modes
273 * with a given name using the exported @add_iaa_compression_mode()
274 * and @remove_iaa_compression_mode functions.
275 *
276 * When a new compression mode is added, the tables are saved in a
277 * global compression mode list. When IAA devices are added, a
278 * per-IAA device dma mapping is created for each IAA device, for each
279 * compression mode. These are the tables used to do the actual
280 * compression/deccompression and are unmapped if/when the devices are
281 * removed. Currently, compression modes must be added before any
282 * device is added, and removed after all devices have been removed.
283 */
284
285/**
286 * remove_iaa_compression_mode - Remove an IAA compression mode
287 * @name: The name the compression mode will be known as
288 *
289 * Remove the IAA compression mode named @name.
290 */
291void remove_iaa_compression_mode(const char *name)
292{
293 struct iaa_compression_mode *mode;
294 int idx;
295
296 mutex_lock(&iaa_devices_lock);
297
298 if (!list_empty(&iaa_devices))
299 goto out;
300
301 mode = find_iaa_compression_mode(name, &idx);
302 if (mode) {
303 free_iaa_compression_mode(mode);
304 iaa_compression_modes[idx] = NULL;
305 }
306out:
307 mutex_unlock(&iaa_devices_lock);
308}
309EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
310
311/**
312 * add_iaa_compression_mode - Add an IAA compression mode
313 * @name: The name the compression mode will be known as
314 * @ll_table: The ll table
315 * @ll_table_size: The ll table size in bytes
316 * @d_table: The d table
317 * @d_table_size: The d table size in bytes
318 * @header_table: Optional header table
319 * @header_table_size: Optional header table size in bytes
320 * @gen_decomp_table_flags: Otional flags used to generate the decomp table
321 * @init: Optional callback function to init the compression mode data
322 * @free: Optional callback function to free the compression mode data
323 *
324 * Add a new IAA compression mode named @name.
325 *
326 * Returns 0 if successful, errcode otherwise.
327 */
328int add_iaa_compression_mode(const char *name,
329 const u32 *ll_table,
330 int ll_table_size,
331 const u32 *d_table,
332 int d_table_size,
333 const u8 *header_table,
334 int header_table_size,
335 u16 gen_decomp_table_flags,
336 iaa_dev_comp_init_fn_t init,
337 iaa_dev_comp_free_fn_t free)
338{
339 struct iaa_compression_mode *mode;
340 int idx, ret = -ENOMEM;
341
342 mutex_lock(&iaa_devices_lock);
343
344 if (!list_empty(&iaa_devices)) {
345 ret = -EBUSY;
346 goto out;
347 }
348
349 mode = kzalloc(sizeof(*mode), GFP_KERNEL);
350 if (!mode)
351 goto out;
352
353 mode->name = kstrdup(name, GFP_KERNEL);
354 if (!mode->name)
355 goto free;
356
357 if (ll_table) {
358 mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL);
359 if (!mode->ll_table)
360 goto free;
361 memcpy(mode->ll_table, ll_table, ll_table_size);
362 mode->ll_table_size = ll_table_size;
363 }
364
365 if (d_table) {
366 mode->d_table = kzalloc(d_table_size, GFP_KERNEL);
367 if (!mode->d_table)
368 goto free;
369 memcpy(mode->d_table, d_table, d_table_size);
370 mode->d_table_size = d_table_size;
371 }
372
373 if (header_table) {
374 mode->header_table = kzalloc(header_table_size, GFP_KERNEL);
375 if (!mode->header_table)
376 goto free;
377 memcpy(mode->header_table, header_table, header_table_size);
378 mode->header_table_size = header_table_size;
379 }
380
381 mode->gen_decomp_table_flags = gen_decomp_table_flags;
382
383 mode->init = init;
384 mode->free = free;
385
386 idx = find_empty_iaa_compression_mode();
387 if (idx < 0)
388 goto free;
389
390 pr_debug("IAA compression mode %s added at idx %d\n",
391 mode->name, idx);
392
393 iaa_compression_modes[idx] = mode;
394
395 ret = 0;
396out:
397 mutex_unlock(&iaa_devices_lock);
398
399 return ret;
400free:
401 free_iaa_compression_mode(mode);
402 goto out;
403}
404EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
405
406static struct iaa_device_compression_mode *
407get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
408{
409 return iaa_device->compression_modes[idx];
410}
411
412static void free_device_compression_mode(struct iaa_device *iaa_device,
413 struct iaa_device_compression_mode *device_mode)
414{
415 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
416 struct device *dev = &iaa_device->idxd->pdev->dev;
417
418 kfree(device_mode->name);
419
420 if (device_mode->aecs_comp_table)
421 dma_free_coherent(dev, size, device_mode->aecs_comp_table,
422 device_mode->aecs_comp_table_dma_addr);
423 if (device_mode->aecs_decomp_table)
424 dma_free_coherent(dev, size, device_mode->aecs_decomp_table,
425 device_mode->aecs_decomp_table_dma_addr);
426
427 kfree(device_mode);
428}
429
430#define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000
431#define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
432#define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
433#define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
434#define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
435 IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
436 IDXD_OP_FLAG_AECS_RW_TGLS)
437
438static int check_completion(struct device *dev,
439 struct iax_completion_record *comp,
440 bool compress,
441 bool only_once);
442
443static int decompress_header(struct iaa_device_compression_mode *device_mode,
444 struct iaa_compression_mode *mode,
445 struct idxd_wq *wq)
446{
447 dma_addr_t src_addr, src2_addr;
448 struct idxd_desc *idxd_desc;
449 struct iax_hw_desc *desc;
450 struct device *dev;
451 int ret = 0;
452
453 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
454 if (IS_ERR(idxd_desc))
455 return PTR_ERR(idxd_desc);
456
457 desc = idxd_desc->iax_hw;
458
459 dev = &wq->idxd->pdev->dev;
460
461 src_addr = dma_map_single(dev, (void *)mode->header_table,
462 mode->header_table_size, DMA_TO_DEVICE);
463 dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n",
464 __func__, mode->name, src_addr, dev,
465 mode->header_table, mode->header_table_size);
466 if (unlikely(dma_mapping_error(dev, src_addr))) {
467 dev_dbg(dev, "dma_map_single err, exiting\n");
468 ret = -ENOMEM;
469 return ret;
470 }
471
472 desc->flags = IAX_AECS_GEN_FLAG;
473 desc->opcode = IAX_OPCODE_DECOMPRESS;
474
475 desc->src1_addr = (u64)src_addr;
476 desc->src1_size = mode->header_table_size;
477
478 src2_addr = device_mode->aecs_decomp_table_dma_addr;
479 desc->src2_addr = (u64)src2_addr;
480 desc->src2_size = 1088;
481 dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n",
482 __func__, mode->name, desc->src2_addr, dev, desc->src2_size);
483 desc->max_dst_size = 0; // suppressed output
484
485 desc->decompr_flags = mode->gen_decomp_table_flags;
486
487 desc->priv = 0;
488
489 desc->completion_addr = idxd_desc->compl_dma;
490
491 ret = idxd_submit_desc(wq, idxd_desc);
492 if (ret) {
493 pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret);
494 goto out;
495 }
496
497 ret = check_completion(dev, idxd_desc->iax_completion, false, false);
498 if (ret)
499 dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n",
500 __func__, mode->name, ret);
501 else
502 dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__,
503 mode->name);
504out:
505 dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE);
506
507 return ret;
508}
509
510static int init_device_compression_mode(struct iaa_device *iaa_device,
511 struct iaa_compression_mode *mode,
512 int idx, struct idxd_wq *wq)
513{
514 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
515 struct device *dev = &iaa_device->idxd->pdev->dev;
516 struct iaa_device_compression_mode *device_mode;
517 int ret = -ENOMEM;
518
519 device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
520 if (!device_mode)
521 return -ENOMEM;
522
523 device_mode->name = kstrdup(mode->name, GFP_KERNEL);
524 if (!device_mode->name)
525 goto free;
526
527 device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
528 &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
529 if (!device_mode->aecs_comp_table)
530 goto free;
531
532 device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size,
533 &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL);
534 if (!device_mode->aecs_decomp_table)
535 goto free;
536
537 /* Add Huffman table to aecs */
538 memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
539 memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
540 memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
541
542 if (mode->header_table) {
543 ret = decompress_header(device_mode, mode, wq);
544 if (ret) {
545 pr_debug("iaa header decompression failed: ret=%d\n", ret);
546 goto free;
547 }
548 }
549
550 if (mode->init) {
551 ret = mode->init(device_mode);
552 if (ret)
553 goto free;
554 }
555
556 /* mode index should match iaa_compression_modes idx */
557 iaa_device->compression_modes[idx] = device_mode;
558
559 pr_debug("IAA %s compression mode initialized for iaa device %d\n",
560 mode->name, iaa_device->idxd->id);
561
562 ret = 0;
563out:
564 return ret;
565free:
566 pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
567 mode->name, iaa_device->idxd->id);
568
569 free_device_compression_mode(iaa_device, device_mode);
570 goto out;
571}
572
573static int init_device_compression_modes(struct iaa_device *iaa_device,
574 struct idxd_wq *wq)
575{
576 struct iaa_compression_mode *mode;
577 int i, ret = 0;
578
579 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
580 mode = iaa_compression_modes[i];
581 if (!mode)
582 continue;
583
584 ret = init_device_compression_mode(iaa_device, mode, i, wq);
585 if (ret)
586 break;
587 }
588
589 return ret;
590}
591
592static void remove_device_compression_modes(struct iaa_device *iaa_device)
593{
594 struct iaa_device_compression_mode *device_mode;
595 int i;
596
597 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
598 device_mode = iaa_device->compression_modes[i];
599 if (!device_mode)
600 continue;
601
602 free_device_compression_mode(iaa_device, device_mode);
603 iaa_device->compression_modes[i] = NULL;
604 if (iaa_compression_modes[i]->free)
605 iaa_compression_modes[i]->free(device_mode);
606 }
607}
608
609static struct iaa_device *iaa_device_alloc(void)
610{
611 struct iaa_device *iaa_device;
612
613 iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
614 if (!iaa_device)
615 return NULL;
616
617 INIT_LIST_HEAD(&iaa_device->wqs);
618
619 return iaa_device;
620}
621
622static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
623{
624 struct iaa_wq *iaa_wq;
625
626 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
627 if (iaa_wq->wq == wq)
628 return true;
629 }
630
631 return false;
632}
633
634static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
635{
636 struct iaa_device *iaa_device;
637
638 iaa_device = iaa_device_alloc();
639 if (!iaa_device)
640 return NULL;
641
642 iaa_device->idxd = idxd;
643
644 list_add_tail(&iaa_device->list, &iaa_devices);
645
646 nr_iaa++;
647
648 return iaa_device;
649}
650
651static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
652{
653 int ret = 0;
654
655 ret = init_device_compression_modes(iaa_device, iaa_wq->wq);
656 if (ret)
657 return ret;
658
659 return ret;
660}
661
662static void del_iaa_device(struct iaa_device *iaa_device)
663{
664 list_del(&iaa_device->list);
665
666 nr_iaa--;
667}
668
669static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
670 struct iaa_wq **new_wq)
671{
672 struct idxd_device *idxd = iaa_device->idxd;
673 struct pci_dev *pdev = idxd->pdev;
674 struct device *dev = &pdev->dev;
675 struct iaa_wq *iaa_wq;
676
677 iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
678 if (!iaa_wq)
679 return -ENOMEM;
680
681 iaa_wq->wq = wq;
682 iaa_wq->iaa_device = iaa_device;
683 idxd_wq_set_private(wq, iaa_wq);
684
685 list_add_tail(&iaa_wq->list, &iaa_device->wqs);
686
687 iaa_device->n_wq++;
688
689 if (new_wq)
690 *new_wq = iaa_wq;
691
692 dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
693 wq->id, iaa_device->idxd->id, iaa_device->n_wq);
694
695 return 0;
696}
697
698static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
699{
700 struct idxd_device *idxd = iaa_device->idxd;
701 struct pci_dev *pdev = idxd->pdev;
702 struct device *dev = &pdev->dev;
703 struct iaa_wq *iaa_wq;
704
705 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
706 if (iaa_wq->wq == wq) {
707 list_del(&iaa_wq->list);
708 iaa_device->n_wq--;
709
710 dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
711 wq->id, iaa_device->idxd->id,
712 iaa_device->n_wq, nr_iaa);
713
714 if (iaa_device->n_wq == 0)
715 del_iaa_device(iaa_device);
716 break;
717 }
718 }
719}
720
721static void clear_wq_table(void)
722{
723 int cpu;
724
725 for (cpu = 0; cpu < nr_cpus; cpu++)
726 wq_table_clear_entry(cpu);
727
728 pr_debug("cleared wq table\n");
729}
730
731static void free_iaa_device(struct iaa_device *iaa_device)
732{
733 if (!iaa_device)
734 return;
735
736 remove_device_compression_modes(iaa_device);
737 kfree(iaa_device);
738}
739
740static void __free_iaa_wq(struct iaa_wq *iaa_wq)
741{
742 struct iaa_device *iaa_device;
743
744 if (!iaa_wq)
745 return;
746
747 iaa_device = iaa_wq->iaa_device;
748 if (iaa_device->n_wq == 0)
749 free_iaa_device(iaa_wq->iaa_device);
750}
751
752static void free_iaa_wq(struct iaa_wq *iaa_wq)
753{
754 struct idxd_wq *wq;
755
756 __free_iaa_wq(iaa_wq);
757
758 wq = iaa_wq->wq;
759
760 kfree(iaa_wq);
761 idxd_wq_set_private(wq, NULL);
762}
763
764static int iaa_wq_get(struct idxd_wq *wq)
765{
766 struct idxd_device *idxd = wq->idxd;
767 struct iaa_wq *iaa_wq;
768 int ret = 0;
769
770 spin_lock(&idxd->dev_lock);
771 iaa_wq = idxd_wq_get_private(wq);
772 if (iaa_wq && !iaa_wq->remove) {
773 iaa_wq->ref++;
774 idxd_wq_get(wq);
775 } else {
776 ret = -ENODEV;
777 }
778 spin_unlock(&idxd->dev_lock);
779
780 return ret;
781}
782
783static int iaa_wq_put(struct idxd_wq *wq)
784{
785 struct idxd_device *idxd = wq->idxd;
786 struct iaa_wq *iaa_wq;
787 bool free = false;
788 int ret = 0;
789
790 spin_lock(&idxd->dev_lock);
791 iaa_wq = idxd_wq_get_private(wq);
792 if (iaa_wq) {
793 iaa_wq->ref--;
794 if (iaa_wq->ref == 0 && iaa_wq->remove) {
795 idxd_wq_set_private(wq, NULL);
796 free = true;
797 }
798 idxd_wq_put(wq);
799 } else {
800 ret = -ENODEV;
801 }
802 spin_unlock(&idxd->dev_lock);
803 if (free) {
804 __free_iaa_wq(iaa_wq);
805 kfree(iaa_wq);
806 }
807
808 return ret;
809}
810
811static void free_wq_table(void)
812{
813 int cpu;
814
815 for (cpu = 0; cpu < nr_cpus; cpu++)
816 wq_table_free_entry(cpu);
817
818 free_percpu(wq_table);
819
820 pr_debug("freed wq table\n");
821}
822
823static int alloc_wq_table(int max_wqs)
824{
825 struct wq_table_entry *entry;
826 int cpu;
827
828 wq_table = alloc_percpu(struct wq_table_entry);
829 if (!wq_table)
830 return -ENOMEM;
831
832 for (cpu = 0; cpu < nr_cpus; cpu++) {
833 entry = per_cpu_ptr(wq_table, cpu);
834 entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
835 if (!entry->wqs) {
836 free_wq_table();
837 return -ENOMEM;
838 }
839
840 entry->max_wqs = max_wqs;
841 }
842
843 pr_debug("initialized wq table\n");
844
845 return 0;
846}
847
848static int save_iaa_wq(struct idxd_wq *wq)
849{
850 struct iaa_device *iaa_device, *found = NULL;
851 struct idxd_device *idxd;
852 struct pci_dev *pdev;
853 struct device *dev;
854 int ret = 0;
855
856 list_for_each_entry(iaa_device, &iaa_devices, list) {
857 if (iaa_device->idxd == wq->idxd) {
858 idxd = iaa_device->idxd;
859 pdev = idxd->pdev;
860 dev = &pdev->dev;
861 /*
862 * Check to see that we don't already have this wq.
863 * Shouldn't happen but we don't control probing.
864 */
865 if (iaa_has_wq(iaa_device, wq)) {
866 dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
867 iaa_device);
868 goto out;
869 }
870
871 found = iaa_device;
872
873 ret = add_iaa_wq(iaa_device, wq, NULL);
874 if (ret)
875 goto out;
876
877 break;
878 }
879 }
880
881 if (!found) {
882 struct iaa_device *new_device;
883 struct iaa_wq *new_wq;
884
885 new_device = add_iaa_device(wq->idxd);
886 if (!new_device) {
887 ret = -ENOMEM;
888 goto out;
889 }
890
891 ret = add_iaa_wq(new_device, wq, &new_wq);
892 if (ret) {
893 del_iaa_device(new_device);
894 free_iaa_device(new_device);
895 goto out;
896 }
897
898 ret = init_iaa_device(new_device, new_wq);
899 if (ret) {
900 del_iaa_wq(new_device, new_wq->wq);
901 del_iaa_device(new_device);
902 free_iaa_wq(new_wq);
903 goto out;
904 }
905 }
906
907 if (WARN_ON(nr_iaa == 0))
908 return -EINVAL;
909
910 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
911out:
912 return 0;
913}
914
915static void remove_iaa_wq(struct idxd_wq *wq)
916{
917 struct iaa_device *iaa_device;
918
919 list_for_each_entry(iaa_device, &iaa_devices, list) {
920 if (iaa_has_wq(iaa_device, wq)) {
921 del_iaa_wq(iaa_device, wq);
922 break;
923 }
924 }
925
926 if (nr_iaa)
927 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
928 else
929 cpus_per_iaa = 0;
930}
931
932static int wq_table_add_wqs(int iaa, int cpu)
933{
934 struct iaa_device *iaa_device, *found_device = NULL;
935 int ret = 0, cur_iaa = 0, n_wqs_added = 0;
936 struct idxd_device *idxd;
937 struct iaa_wq *iaa_wq;
938 struct pci_dev *pdev;
939 struct device *dev;
940
941 list_for_each_entry(iaa_device, &iaa_devices, list) {
942 idxd = iaa_device->idxd;
943 pdev = idxd->pdev;
944 dev = &pdev->dev;
945
946 if (cur_iaa != iaa) {
947 cur_iaa++;
948 continue;
949 }
950
951 found_device = iaa_device;
952 dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
953 found_device->idxd->id, cur_iaa);
954 break;
955 }
956
957 if (!found_device) {
958 found_device = list_first_entry_or_null(&iaa_devices,
959 struct iaa_device, list);
960 if (!found_device) {
961 pr_debug("couldn't find any iaa devices with wqs!\n");
962 ret = -EINVAL;
963 goto out;
964 }
965 cur_iaa = 0;
966
967 idxd = found_device->idxd;
968 pdev = idxd->pdev;
969 dev = &pdev->dev;
970 dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
971 found_device->idxd->id, cur_iaa);
972 }
973
974 list_for_each_entry(iaa_wq, &found_device->wqs, list) {
975 wq_table_add(cpu, iaa_wq->wq);
976 pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
977 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
978 n_wqs_added++;
979 }
980
981 if (!n_wqs_added) {
982 pr_debug("couldn't find any iaa wqs!\n");
983 ret = -EINVAL;
984 goto out;
985 }
986out:
987 return ret;
988}
989
990/*
991 * Rebalance the wq table so that given a cpu, it's easy to find the
992 * closest IAA instance. The idea is to try to choose the most
993 * appropriate IAA instance for a caller and spread available
994 * workqueues around to clients.
995 */
996static void rebalance_wq_table(void)
997{
998 const struct cpumask *node_cpus;
999 int node, cpu, iaa = -1;
1000
1001 if (nr_iaa == 0)
1002 return;
1003
1004 pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
1005 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
1006
1007 clear_wq_table();
1008
1009 if (nr_iaa == 1) {
1010 for (cpu = 0; cpu < nr_cpus; cpu++) {
1011 if (WARN_ON(wq_table_add_wqs(0, cpu))) {
1012 pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
1013 return;
1014 }
1015 }
1016
1017 return;
1018 }
1019
1020 for_each_node_with_cpus(node) {
1021 node_cpus = cpumask_of_node(node);
1022
1023 for (cpu = 0; cpu < nr_cpus_per_node; cpu++) {
1024 int node_cpu = cpumask_nth(cpu, node_cpus);
1025
1026 if (WARN_ON(node_cpu >= nr_cpu_ids)) {
1027 pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
1028 return;
1029 }
1030
1031 if ((cpu % cpus_per_iaa) == 0)
1032 iaa++;
1033
1034 if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
1035 pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
1036 return;
1037 }
1038 }
1039 }
1040}
1041
1042static inline int check_completion(struct device *dev,
1043 struct iax_completion_record *comp,
1044 bool compress,
1045 bool only_once)
1046{
1047 char *op_str = compress ? "compress" : "decompress";
1048 int ret = 0;
1049
1050 while (!comp->status) {
1051 if (only_once)
1052 return -EAGAIN;
1053 cpu_relax();
1054 }
1055
1056 if (comp->status != IAX_COMP_SUCCESS) {
1057 if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
1058 ret = -ETIMEDOUT;
1059 dev_dbg(dev, "%s timed out, size=0x%x\n",
1060 op_str, comp->output_size);
1061 update_completion_timeout_errs();
1062 goto out;
1063 }
1064
1065 if (comp->status == IAA_ANALYTICS_ERROR &&
1066 comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
1067 ret = -E2BIG;
1068 dev_dbg(dev, "compressed > uncompressed size,"
1069 " not compressing, size=0x%x\n",
1070 comp->output_size);
1071 update_completion_comp_buf_overflow_errs();
1072 goto out;
1073 }
1074
1075 if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
1076 ret = -EOVERFLOW;
1077 goto out;
1078 }
1079
1080 ret = -EINVAL;
1081 dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
1082 op_str, comp->status, comp->error_code, comp->output_size);
1083 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
1084 update_completion_einval_errs();
1085
1086 goto out;
1087 }
1088out:
1089 return ret;
1090}
1091
1092static int deflate_generic_decompress(struct acomp_req *req)
1093{
1094 void *src, *dst;
1095 int ret;
1096
1097 src = kmap_local_page(sg_page(req->src)) + req->src->offset;
1098 dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
1099
1100 ret = crypto_comp_decompress(deflate_generic_tfm,
1101 src, req->slen, dst, &req->dlen);
1102
1103 kunmap_local(src);
1104 kunmap_local(dst);
1105
1106 update_total_sw_decomp_calls();
1107
1108 return ret;
1109}
1110
1111static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1112 struct acomp_req *req,
1113 dma_addr_t *src_addr, dma_addr_t *dst_addr);
1114
1115static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1116 struct idxd_wq *wq,
1117 dma_addr_t src_addr, unsigned int slen,
1118 dma_addr_t dst_addr, unsigned int *dlen,
1119 u32 compression_crc);
1120
1121static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1122 enum idxd_complete_type comp_type,
1123 bool free_desc, void *__ctx,
1124 u32 *status)
1125{
1126 struct iaa_device_compression_mode *active_compression_mode;
1127 struct iaa_compression_ctx *compression_ctx;
1128 struct crypto_ctx *ctx = __ctx;
1129 struct iaa_device *iaa_device;
1130 struct idxd_device *idxd;
1131 struct iaa_wq *iaa_wq;
1132 struct pci_dev *pdev;
1133 struct device *dev;
1134 int ret, err = 0;
1135
1136 compression_ctx = crypto_tfm_ctx(ctx->tfm);
1137
1138 iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1139 iaa_device = iaa_wq->iaa_device;
1140 idxd = iaa_device->idxd;
1141 pdev = idxd->pdev;
1142 dev = &pdev->dev;
1143
1144 active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1145 compression_ctx->mode);
1146 dev_dbg(dev, "%s: compression mode %s,"
1147 " ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1148 active_compression_mode->name,
1149 ctx->src_addr, ctx->dst_addr);
1150
1151 ret = check_completion(dev, idxd_desc->iax_completion,
1152 ctx->compress, false);
1153 if (ret) {
1154 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1155 if (!ctx->compress &&
1156 idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1157 pr_warn("%s: falling back to deflate-generic decompress, "
1158 "analytics error code %x\n", __func__,
1159 idxd_desc->iax_completion->error_code);
1160 ret = deflate_generic_decompress(ctx->req);
1161 if (ret) {
1162 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1163 __func__, ret);
1164 err = -EIO;
1165 goto err;
1166 }
1167 } else {
1168 err = -EIO;
1169 goto err;
1170 }
1171 } else {
1172 ctx->req->dlen = idxd_desc->iax_completion->output_size;
1173 }
1174
1175 /* Update stats */
1176 if (ctx->compress) {
1177 update_total_comp_bytes_out(ctx->req->dlen);
1178 update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1179 } else {
1180 update_total_decomp_bytes_in(ctx->req->dlen);
1181 update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen);
1182 }
1183
1184 if (ctx->compress && compression_ctx->verify_compress) {
1185 dma_addr_t src_addr, dst_addr;
1186 u32 compression_crc;
1187
1188 compression_crc = idxd_desc->iax_completion->crc;
1189
1190 ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1191 if (ret) {
1192 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1193 err = -EIO;
1194 goto out;
1195 }
1196
1197 ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1198 ctx->req->slen, dst_addr, &ctx->req->dlen,
1199 compression_crc);
1200 if (ret) {
1201 dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1202 err = -EIO;
1203 }
1204
1205 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1206 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1207
1208 goto out;
1209 }
1210err:
1211 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1212 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1213out:
1214 if (ret != 0)
1215 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1216
1217 if (ctx->req->base.complete)
1218 acomp_request_complete(ctx->req, err);
1219
1220 if (free_desc)
1221 idxd_free_desc(idxd_desc->wq, idxd_desc);
1222 iaa_wq_put(idxd_desc->wq);
1223}
1224
1225static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
1226 struct idxd_wq *wq,
1227 dma_addr_t src_addr, unsigned int slen,
1228 dma_addr_t dst_addr, unsigned int *dlen,
1229 u32 *compression_crc,
1230 bool disable_async)
1231{
1232 struct iaa_device_compression_mode *active_compression_mode;
1233 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1234 struct iaa_device *iaa_device;
1235 struct idxd_desc *idxd_desc;
1236 struct iax_hw_desc *desc;
1237 struct idxd_device *idxd;
1238 struct iaa_wq *iaa_wq;
1239 struct pci_dev *pdev;
1240 struct device *dev;
1241 int ret = 0;
1242
1243 iaa_wq = idxd_wq_get_private(wq);
1244 iaa_device = iaa_wq->iaa_device;
1245 idxd = iaa_device->idxd;
1246 pdev = idxd->pdev;
1247 dev = &pdev->dev;
1248
1249 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1250
1251 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1252 if (IS_ERR(idxd_desc)) {
1253 dev_dbg(dev, "idxd descriptor allocation failed\n");
1254 dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1255 return PTR_ERR(idxd_desc);
1256 }
1257 desc = idxd_desc->iax_hw;
1258
1259 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1260 IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1261 desc->opcode = IAX_OPCODE_COMPRESS;
1262 desc->compr_flags = IAA_COMP_FLAGS;
1263 desc->priv = 0;
1264
1265 desc->src1_addr = (u64)src_addr;
1266 desc->src1_size = slen;
1267 desc->dst_addr = (u64)dst_addr;
1268 desc->max_dst_size = *dlen;
1269 desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1270 desc->src2_size = sizeof(struct aecs_comp_table_record);
1271 desc->completion_addr = idxd_desc->compl_dma;
1272
1273 if (ctx->use_irq && !disable_async) {
1274 desc->flags |= IDXD_OP_FLAG_RCI;
1275
1276 idxd_desc->crypto.req = req;
1277 idxd_desc->crypto.tfm = tfm;
1278 idxd_desc->crypto.src_addr = src_addr;
1279 idxd_desc->crypto.dst_addr = dst_addr;
1280 idxd_desc->crypto.compress = true;
1281
1282 dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1283 " src_addr %llx, dst_addr %llx\n", __func__,
1284 active_compression_mode->name,
1285 src_addr, dst_addr);
1286 } else if (ctx->async_mode && !disable_async)
1287 req->base.data = idxd_desc;
1288
1289 dev_dbg(dev, "%s: compression mode %s,"
1290 " desc->src1_addr %llx, desc->src1_size %d,"
1291 " desc->dst_addr %llx, desc->max_dst_size %d,"
1292 " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1293 active_compression_mode->name,
1294 desc->src1_addr, desc->src1_size, desc->dst_addr,
1295 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1296
1297 ret = idxd_submit_desc(wq, idxd_desc);
1298 if (ret) {
1299 dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1300 goto err;
1301 }
1302
1303 /* Update stats */
1304 update_total_comp_calls();
1305 update_wq_comp_calls(wq);
1306
1307 if (ctx->async_mode && !disable_async) {
1308 ret = -EINPROGRESS;
1309 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1310 goto out;
1311 }
1312
1313 ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1314 if (ret) {
1315 dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1316 goto err;
1317 }
1318
1319 *dlen = idxd_desc->iax_completion->output_size;
1320
1321 /* Update stats */
1322 update_total_comp_bytes_out(*dlen);
1323 update_wq_comp_bytes(wq, *dlen);
1324
1325 *compression_crc = idxd_desc->iax_completion->crc;
1326
1327 if (!ctx->async_mode)
1328 idxd_free_desc(wq, idxd_desc);
1329out:
1330 return ret;
1331err:
1332 idxd_free_desc(wq, idxd_desc);
1333 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1334
1335 goto out;
1336}
1337
1338static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1339 struct acomp_req *req,
1340 dma_addr_t *src_addr, dma_addr_t *dst_addr)
1341{
1342 int ret = 0;
1343 int nr_sgs;
1344
1345 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1346 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1347
1348 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1349 if (nr_sgs <= 0 || nr_sgs > 1) {
1350 dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1351 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1352 iaa_wq->wq->id, ret);
1353 ret = -EIO;
1354 goto out;
1355 }
1356 *src_addr = sg_dma_address(req->src);
1357 dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1358 " req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1359 req->src, req->slen, sg_dma_len(req->src));
1360
1361 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1362 if (nr_sgs <= 0 || nr_sgs > 1) {
1363 dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1364 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1365 iaa_wq->wq->id, ret);
1366 ret = -EIO;
1367 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1368 goto out;
1369 }
1370 *dst_addr = sg_dma_address(req->dst);
1371 dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1372 " req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1373 req->dst, req->dlen, sg_dma_len(req->dst));
1374out:
1375 return ret;
1376}
1377
1378static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1379 struct idxd_wq *wq,
1380 dma_addr_t src_addr, unsigned int slen,
1381 dma_addr_t dst_addr, unsigned int *dlen,
1382 u32 compression_crc)
1383{
1384 struct iaa_device_compression_mode *active_compression_mode;
1385 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1386 struct iaa_device *iaa_device;
1387 struct idxd_desc *idxd_desc;
1388 struct iax_hw_desc *desc;
1389 struct idxd_device *idxd;
1390 struct iaa_wq *iaa_wq;
1391 struct pci_dev *pdev;
1392 struct device *dev;
1393 int ret = 0;
1394
1395 iaa_wq = idxd_wq_get_private(wq);
1396 iaa_device = iaa_wq->iaa_device;
1397 idxd = iaa_device->idxd;
1398 pdev = idxd->pdev;
1399 dev = &pdev->dev;
1400
1401 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1402
1403 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1404 if (IS_ERR(idxd_desc)) {
1405 dev_dbg(dev, "idxd descriptor allocation failed\n");
1406 dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1407 PTR_ERR(idxd_desc));
1408 return PTR_ERR(idxd_desc);
1409 }
1410 desc = idxd_desc->iax_hw;
1411
1412 /* Verify (optional) - decompress and check crc, suppress dest write */
1413
1414 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1415 desc->opcode = IAX_OPCODE_DECOMPRESS;
1416 desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1417 desc->priv = 0;
1418
1419 desc->src1_addr = (u64)dst_addr;
1420 desc->src1_size = *dlen;
1421 desc->dst_addr = (u64)src_addr;
1422 desc->max_dst_size = slen;
1423 desc->completion_addr = idxd_desc->compl_dma;
1424
1425 dev_dbg(dev, "(verify) compression mode %s,"
1426 " desc->src1_addr %llx, desc->src1_size %d,"
1427 " desc->dst_addr %llx, desc->max_dst_size %d,"
1428 " desc->src2_addr %llx, desc->src2_size %d\n",
1429 active_compression_mode->name,
1430 desc->src1_addr, desc->src1_size, desc->dst_addr,
1431 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1432
1433 ret = idxd_submit_desc(wq, idxd_desc);
1434 if (ret) {
1435 dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1436 goto err;
1437 }
1438
1439 ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1440 if (ret) {
1441 dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1442 goto err;
1443 }
1444
1445 if (compression_crc != idxd_desc->iax_completion->crc) {
1446 ret = -EINVAL;
1447 dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1448 " comp=0x%x, decomp=0x%x\n", compression_crc,
1449 idxd_desc->iax_completion->crc);
1450 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1451 8, 1, idxd_desc->iax_completion, 64, 0);
1452 goto err;
1453 }
1454
1455 idxd_free_desc(wq, idxd_desc);
1456out:
1457 return ret;
1458err:
1459 idxd_free_desc(wq, idxd_desc);
1460 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1461
1462 goto out;
1463}
1464
1465static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1466 struct idxd_wq *wq,
1467 dma_addr_t src_addr, unsigned int slen,
1468 dma_addr_t dst_addr, unsigned int *dlen,
1469 bool disable_async)
1470{
1471 struct iaa_device_compression_mode *active_compression_mode;
1472 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1473 struct iaa_device *iaa_device;
1474 struct idxd_desc *idxd_desc;
1475 struct iax_hw_desc *desc;
1476 struct idxd_device *idxd;
1477 struct iaa_wq *iaa_wq;
1478 struct pci_dev *pdev;
1479 struct device *dev;
1480 int ret = 0;
1481
1482 iaa_wq = idxd_wq_get_private(wq);
1483 iaa_device = iaa_wq->iaa_device;
1484 idxd = iaa_device->idxd;
1485 pdev = idxd->pdev;
1486 dev = &pdev->dev;
1487
1488 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1489
1490 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1491 if (IS_ERR(idxd_desc)) {
1492 dev_dbg(dev, "idxd descriptor allocation failed\n");
1493 dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1494 PTR_ERR(idxd_desc));
1495 return PTR_ERR(idxd_desc);
1496 }
1497 desc = idxd_desc->iax_hw;
1498
1499 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1500 desc->opcode = IAX_OPCODE_DECOMPRESS;
1501 desc->max_dst_size = PAGE_SIZE;
1502 desc->decompr_flags = IAA_DECOMP_FLAGS;
1503 desc->priv = 0;
1504
1505 desc->src1_addr = (u64)src_addr;
1506 desc->dst_addr = (u64)dst_addr;
1507 desc->max_dst_size = *dlen;
1508 desc->src1_size = slen;
1509 desc->completion_addr = idxd_desc->compl_dma;
1510
1511 if (ctx->use_irq && !disable_async) {
1512 desc->flags |= IDXD_OP_FLAG_RCI;
1513
1514 idxd_desc->crypto.req = req;
1515 idxd_desc->crypto.tfm = tfm;
1516 idxd_desc->crypto.src_addr = src_addr;
1517 idxd_desc->crypto.dst_addr = dst_addr;
1518 idxd_desc->crypto.compress = false;
1519
1520 dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1521 " src_addr %llx, dst_addr %llx\n", __func__,
1522 active_compression_mode->name,
1523 src_addr, dst_addr);
1524 } else if (ctx->async_mode && !disable_async)
1525 req->base.data = idxd_desc;
1526
1527 dev_dbg(dev, "%s: decompression mode %s,"
1528 " desc->src1_addr %llx, desc->src1_size %d,"
1529 " desc->dst_addr %llx, desc->max_dst_size %d,"
1530 " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1531 active_compression_mode->name,
1532 desc->src1_addr, desc->src1_size, desc->dst_addr,
1533 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1534
1535 ret = idxd_submit_desc(wq, idxd_desc);
1536 if (ret) {
1537 dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1538 goto err;
1539 }
1540
1541 /* Update stats */
1542 update_total_decomp_calls();
1543 update_wq_decomp_calls(wq);
1544
1545 if (ctx->async_mode && !disable_async) {
1546 ret = -EINPROGRESS;
1547 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1548 goto out;
1549 }
1550
1551 ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1552 if (ret) {
1553 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1554 if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1555 pr_warn("%s: falling back to deflate-generic decompress, "
1556 "analytics error code %x\n", __func__,
1557 idxd_desc->iax_completion->error_code);
1558 ret = deflate_generic_decompress(req);
1559 if (ret) {
1560 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1561 __func__, ret);
1562 goto err;
1563 }
1564 } else {
1565 goto err;
1566 }
1567 } else {
1568 req->dlen = idxd_desc->iax_completion->output_size;
1569 }
1570
1571 *dlen = req->dlen;
1572
1573 if (!ctx->async_mode)
1574 idxd_free_desc(wq, idxd_desc);
1575
1576 /* Update stats */
1577 update_total_decomp_bytes_in(slen);
1578 update_wq_decomp_bytes(wq, slen);
1579out:
1580 return ret;
1581err:
1582 idxd_free_desc(wq, idxd_desc);
1583 dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1584
1585 goto out;
1586}
1587
1588static int iaa_comp_acompress(struct acomp_req *req)
1589{
1590 struct iaa_compression_ctx *compression_ctx;
1591 struct crypto_tfm *tfm = req->base.tfm;
1592 dma_addr_t src_addr, dst_addr;
1593 bool disable_async = false;
1594 int nr_sgs, cpu, ret = 0;
1595 struct iaa_wq *iaa_wq;
1596 u32 compression_crc;
1597 struct idxd_wq *wq;
1598 struct device *dev;
1599 int order = -1;
1600
1601 compression_ctx = crypto_tfm_ctx(tfm);
1602
1603 if (!iaa_crypto_enabled) {
1604 pr_debug("iaa_crypto disabled, not compressing\n");
1605 return -ENODEV;
1606 }
1607
1608 if (!req->src || !req->slen) {
1609 pr_debug("invalid src, not compressing\n");
1610 return -EINVAL;
1611 }
1612
1613 cpu = get_cpu();
1614 wq = wq_table_next_wq(cpu);
1615 put_cpu();
1616 if (!wq) {
1617 pr_debug("no wq configured for cpu=%d\n", cpu);
1618 return -ENODEV;
1619 }
1620
1621 ret = iaa_wq_get(wq);
1622 if (ret) {
1623 pr_debug("no wq available for cpu=%d\n", cpu);
1624 return -ENODEV;
1625 }
1626
1627 iaa_wq = idxd_wq_get_private(wq);
1628
1629 if (!req->dst) {
1630 gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
1631
1632 /* incompressible data will always be < 2 * slen */
1633 req->dlen = 2 * req->slen;
1634 order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1635 req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1636 if (!req->dst) {
1637 ret = -ENOMEM;
1638 order = -1;
1639 goto out;
1640 }
1641 disable_async = true;
1642 }
1643
1644 dev = &wq->idxd->pdev->dev;
1645
1646 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1647 if (nr_sgs <= 0 || nr_sgs > 1) {
1648 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1649 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1650 iaa_wq->wq->id, ret);
1651 ret = -EIO;
1652 goto out;
1653 }
1654 src_addr = sg_dma_address(req->src);
1655 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1656 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1657 req->src, req->slen, sg_dma_len(req->src));
1658
1659 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1660 if (nr_sgs <= 0 || nr_sgs > 1) {
1661 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1662 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1663 iaa_wq->wq->id, ret);
1664 ret = -EIO;
1665 goto err_map_dst;
1666 }
1667 dst_addr = sg_dma_address(req->dst);
1668 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1669 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1670 req->dst, req->dlen, sg_dma_len(req->dst));
1671
1672 ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1673 &req->dlen, &compression_crc, disable_async);
1674 if (ret == -EINPROGRESS)
1675 return ret;
1676
1677 if (!ret && compression_ctx->verify_compress) {
1678 ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1679 if (ret) {
1680 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1681 goto out;
1682 }
1683
1684 ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1685 dst_addr, &req->dlen, compression_crc);
1686 if (ret)
1687 dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1688
1689 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1690 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1691
1692 goto out;
1693 }
1694
1695 if (ret)
1696 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1697
1698 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1699err_map_dst:
1700 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1701out:
1702 iaa_wq_put(wq);
1703
1704 if (order >= 0)
1705 sgl_free_order(req->dst, order);
1706
1707 return ret;
1708}
1709
1710static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
1711{
1712 gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1713 GFP_KERNEL : GFP_ATOMIC;
1714 struct crypto_tfm *tfm = req->base.tfm;
1715 dma_addr_t src_addr, dst_addr;
1716 int nr_sgs, cpu, ret = 0;
1717 struct iaa_wq *iaa_wq;
1718 struct device *dev;
1719 struct idxd_wq *wq;
1720 int order = -1;
1721
1722 cpu = get_cpu();
1723 wq = wq_table_next_wq(cpu);
1724 put_cpu();
1725 if (!wq) {
1726 pr_debug("no wq configured for cpu=%d\n", cpu);
1727 return -ENODEV;
1728 }
1729
1730 ret = iaa_wq_get(wq);
1731 if (ret) {
1732 pr_debug("no wq available for cpu=%d\n", cpu);
1733 return -ENODEV;
1734 }
1735
1736 iaa_wq = idxd_wq_get_private(wq);
1737
1738 dev = &wq->idxd->pdev->dev;
1739
1740 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1741 if (nr_sgs <= 0 || nr_sgs > 1) {
1742 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1743 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1744 iaa_wq->wq->id, ret);
1745 ret = -EIO;
1746 goto out;
1747 }
1748 src_addr = sg_dma_address(req->src);
1749 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1750 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1751 req->src, req->slen, sg_dma_len(req->src));
1752
1753 req->dlen = 4 * req->slen; /* start with ~avg comp rato */
1754alloc_dest:
1755 order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1756 req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1757 if (!req->dst) {
1758 ret = -ENOMEM;
1759 order = -1;
1760 goto out;
1761 }
1762
1763 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1764 if (nr_sgs <= 0 || nr_sgs > 1) {
1765 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1766 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1767 iaa_wq->wq->id, ret);
1768 ret = -EIO;
1769 goto err_map_dst;
1770 }
1771
1772 dst_addr = sg_dma_address(req->dst);
1773 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1774 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1775 req->dst, req->dlen, sg_dma_len(req->dst));
1776 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1777 dst_addr, &req->dlen, true);
1778 if (ret == -EOVERFLOW) {
1779 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1780 req->dlen *= 2;
1781 if (req->dlen > CRYPTO_ACOMP_DST_MAX)
1782 goto err_map_dst;
1783 goto alloc_dest;
1784 }
1785
1786 if (ret != 0)
1787 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1788
1789 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1790err_map_dst:
1791 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1792out:
1793 iaa_wq_put(wq);
1794
1795 if (order >= 0)
1796 sgl_free_order(req->dst, order);
1797
1798 return ret;
1799}
1800
1801static int iaa_comp_adecompress(struct acomp_req *req)
1802{
1803 struct crypto_tfm *tfm = req->base.tfm;
1804 dma_addr_t src_addr, dst_addr;
1805 int nr_sgs, cpu, ret = 0;
1806 struct iaa_wq *iaa_wq;
1807 struct device *dev;
1808 struct idxd_wq *wq;
1809
1810 if (!iaa_crypto_enabled) {
1811 pr_debug("iaa_crypto disabled, not decompressing\n");
1812 return -ENODEV;
1813 }
1814
1815 if (!req->src || !req->slen) {
1816 pr_debug("invalid src, not decompressing\n");
1817 return -EINVAL;
1818 }
1819
1820 if (!req->dst)
1821 return iaa_comp_adecompress_alloc_dest(req);
1822
1823 cpu = get_cpu();
1824 wq = wq_table_next_wq(cpu);
1825 put_cpu();
1826 if (!wq) {
1827 pr_debug("no wq configured for cpu=%d\n", cpu);
1828 return -ENODEV;
1829 }
1830
1831 ret = iaa_wq_get(wq);
1832 if (ret) {
1833 pr_debug("no wq available for cpu=%d\n", cpu);
1834 return -ENODEV;
1835 }
1836
1837 iaa_wq = idxd_wq_get_private(wq);
1838
1839 dev = &wq->idxd->pdev->dev;
1840
1841 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1842 if (nr_sgs <= 0 || nr_sgs > 1) {
1843 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1844 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1845 iaa_wq->wq->id, ret);
1846 ret = -EIO;
1847 goto out;
1848 }
1849 src_addr = sg_dma_address(req->src);
1850 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1851 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1852 req->src, req->slen, sg_dma_len(req->src));
1853
1854 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1855 if (nr_sgs <= 0 || nr_sgs > 1) {
1856 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1857 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1858 iaa_wq->wq->id, ret);
1859 ret = -EIO;
1860 goto err_map_dst;
1861 }
1862 dst_addr = sg_dma_address(req->dst);
1863 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1864 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1865 req->dst, req->dlen, sg_dma_len(req->dst));
1866
1867 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1868 dst_addr, &req->dlen, false);
1869 if (ret == -EINPROGRESS)
1870 return ret;
1871
1872 if (ret != 0)
1873 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1874
1875 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1876err_map_dst:
1877 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1878out:
1879 iaa_wq_put(wq);
1880
1881 return ret;
1882}
1883
1884static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1885{
1886 ctx->verify_compress = iaa_verify_compress;
1887 ctx->async_mode = async_mode;
1888 ctx->use_irq = use_irq;
1889}
1890
1891static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1892{
1893 struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1894 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1895
1896 compression_ctx_init(ctx);
1897
1898 ctx->mode = IAA_MODE_FIXED;
1899
1900 return 0;
1901}
1902
1903static void dst_free(struct scatterlist *sgl)
1904{
1905 /*
1906 * Called for req->dst = NULL cases but we free elsewhere
1907 * using sgl_free_order().
1908 */
1909}
1910
1911static struct acomp_alg iaa_acomp_fixed_deflate = {
1912 .init = iaa_comp_init_fixed,
1913 .compress = iaa_comp_acompress,
1914 .decompress = iaa_comp_adecompress,
1915 .dst_free = dst_free,
1916 .base = {
1917 .cra_name = "deflate",
1918 .cra_driver_name = "deflate-iaa",
1919 .cra_ctxsize = sizeof(struct iaa_compression_ctx),
1920 .cra_module = THIS_MODULE,
1921 .cra_priority = IAA_ALG_PRIORITY,
1922 }
1923};
1924
1925static int iaa_register_compression_device(void)
1926{
1927 int ret;
1928
1929 ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1930 if (ret) {
1931 pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1932 goto out;
1933 }
1934
1935 iaa_crypto_registered = true;
1936out:
1937 return ret;
1938}
1939
1940static int iaa_unregister_compression_device(void)
1941{
1942 if (iaa_crypto_registered)
1943 crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1944
1945 return 0;
1946}
1947
1948static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1949{
1950 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1951 struct idxd_device *idxd = wq->idxd;
1952 struct idxd_driver_data *data = idxd->data;
1953 struct device *dev = &idxd_dev->conf_dev;
1954 bool first_wq = false;
1955 int ret = 0;
1956
1957 if (idxd->state != IDXD_DEV_ENABLED)
1958 return -ENXIO;
1959
1960 if (data->type != IDXD_TYPE_IAX)
1961 return -ENODEV;
1962
1963 mutex_lock(&wq->wq_lock);
1964
1965 if (idxd_wq_get_private(wq)) {
1966 mutex_unlock(&wq->wq_lock);
1967 return -EBUSY;
1968 }
1969
1970 if (!idxd_wq_driver_name_match(wq, dev)) {
1971 dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1972 idxd->id, wq->id, wq->driver_name, dev->driver->name);
1973 idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1974 ret = -ENODEV;
1975 goto err;
1976 }
1977
1978 wq->type = IDXD_WQT_KERNEL;
1979
1980 ret = idxd_drv_enable_wq(wq);
1981 if (ret < 0) {
1982 dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1983 idxd->id, wq->id, ret);
1984 ret = -ENXIO;
1985 goto err;
1986 }
1987
1988 mutex_lock(&iaa_devices_lock);
1989
1990 if (list_empty(&iaa_devices)) {
1991 ret = alloc_wq_table(wq->idxd->max_wqs);
1992 if (ret)
1993 goto err_alloc;
1994 first_wq = true;
1995 }
1996
1997 ret = save_iaa_wq(wq);
1998 if (ret)
1999 goto err_save;
2000
2001 rebalance_wq_table();
2002
2003 if (first_wq) {
2004 iaa_crypto_enabled = true;
2005 ret = iaa_register_compression_device();
2006 if (ret != 0) {
2007 iaa_crypto_enabled = false;
2008 dev_dbg(dev, "IAA compression device registration failed\n");
2009 goto err_register;
2010 }
2011 try_module_get(THIS_MODULE);
2012
2013 pr_info("iaa_crypto now ENABLED\n");
2014 }
2015
2016 mutex_unlock(&iaa_devices_lock);
2017out:
2018 mutex_unlock(&wq->wq_lock);
2019
2020 return ret;
2021
2022err_register:
2023 remove_iaa_wq(wq);
2024 free_iaa_wq(idxd_wq_get_private(wq));
2025err_save:
2026 if (first_wq)
2027 free_wq_table();
2028err_alloc:
2029 mutex_unlock(&iaa_devices_lock);
2030 idxd_drv_disable_wq(wq);
2031err:
2032 wq->type = IDXD_WQT_NONE;
2033
2034 goto out;
2035}
2036
2037static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
2038{
2039 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
2040 struct idxd_device *idxd = wq->idxd;
2041 struct iaa_wq *iaa_wq;
2042 bool free = false;
2043
2044 idxd_wq_quiesce(wq);
2045
2046 mutex_lock(&wq->wq_lock);
2047 mutex_lock(&iaa_devices_lock);
2048
2049 remove_iaa_wq(wq);
2050
2051 spin_lock(&idxd->dev_lock);
2052 iaa_wq = idxd_wq_get_private(wq);
2053 if (!iaa_wq) {
2054 spin_unlock(&idxd->dev_lock);
2055 pr_err("%s: no iaa_wq available to remove\n", __func__);
2056 goto out;
2057 }
2058
2059 if (iaa_wq->ref) {
2060 iaa_wq->remove = true;
2061 } else {
2062 wq = iaa_wq->wq;
2063 idxd_wq_set_private(wq, NULL);
2064 free = true;
2065 }
2066 spin_unlock(&idxd->dev_lock);
2067 if (free) {
2068 __free_iaa_wq(iaa_wq);
2069 kfree(iaa_wq);
2070 }
2071
2072 idxd_drv_disable_wq(wq);
2073 rebalance_wq_table();
2074
2075 if (nr_iaa == 0) {
2076 iaa_crypto_enabled = false;
2077 free_wq_table();
2078 module_put(THIS_MODULE);
2079
2080 pr_info("iaa_crypto now DISABLED\n");
2081 }
2082out:
2083 mutex_unlock(&iaa_devices_lock);
2084 mutex_unlock(&wq->wq_lock);
2085}
2086
2087static enum idxd_dev_type dev_types[] = {
2088 IDXD_DEV_WQ,
2089 IDXD_DEV_NONE,
2090};
2091
2092static struct idxd_device_driver iaa_crypto_driver = {
2093 .probe = iaa_crypto_probe,
2094 .remove = iaa_crypto_remove,
2095 .name = IDXD_SUBDRIVER_NAME,
2096 .type = dev_types,
2097 .desc_complete = iaa_desc_complete,
2098};
2099
2100static int __init iaa_crypto_init_module(void)
2101{
2102 int ret = 0;
2103 int node;
2104
2105 nr_cpus = num_online_cpus();
2106 for_each_node_with_cpus(node)
2107 nr_nodes++;
2108 if (!nr_nodes) {
2109 pr_err("IAA couldn't find any nodes with cpus\n");
2110 return -ENODEV;
2111 }
2112 nr_cpus_per_node = nr_cpus / nr_nodes;
2113
2114 if (crypto_has_comp("deflate-generic", 0, 0))
2115 deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0);
2116
2117 if (IS_ERR_OR_NULL(deflate_generic_tfm)) {
2118 pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2119 "deflate-generic", PTR_ERR(deflate_generic_tfm));
2120 return -ENOMEM;
2121 }
2122
2123 ret = iaa_aecs_init_fixed();
2124 if (ret < 0) {
2125 pr_debug("IAA fixed compression mode init failed\n");
2126 goto err_aecs_init;
2127 }
2128
2129 ret = idxd_driver_register(&iaa_crypto_driver);
2130 if (ret) {
2131 pr_debug("IAA wq sub-driver registration failed\n");
2132 goto err_driver_reg;
2133 }
2134
2135 ret = driver_create_file(&iaa_crypto_driver.drv,
2136 &driver_attr_verify_compress);
2137 if (ret) {
2138 pr_debug("IAA verify_compress attr creation failed\n");
2139 goto err_verify_attr_create;
2140 }
2141
2142 ret = driver_create_file(&iaa_crypto_driver.drv,
2143 &driver_attr_sync_mode);
2144 if (ret) {
2145 pr_debug("IAA sync mode attr creation failed\n");
2146 goto err_sync_attr_create;
2147 }
2148
2149 if (iaa_crypto_debugfs_init())
2150 pr_warn("debugfs init failed, stats not available\n");
2151
2152 pr_debug("initialized\n");
2153out:
2154 return ret;
2155
2156err_sync_attr_create:
2157 driver_remove_file(&iaa_crypto_driver.drv,
2158 &driver_attr_verify_compress);
2159err_verify_attr_create:
2160 idxd_driver_unregister(&iaa_crypto_driver);
2161err_driver_reg:
2162 iaa_aecs_cleanup_fixed();
2163err_aecs_init:
2164 crypto_free_comp(deflate_generic_tfm);
2165
2166 goto out;
2167}
2168
2169static void __exit iaa_crypto_cleanup_module(void)
2170{
2171 if (iaa_unregister_compression_device())
2172 pr_debug("IAA compression device unregister failed\n");
2173
2174 iaa_crypto_debugfs_cleanup();
2175 driver_remove_file(&iaa_crypto_driver.drv,
2176 &driver_attr_sync_mode);
2177 driver_remove_file(&iaa_crypto_driver.drv,
2178 &driver_attr_verify_compress);
2179 idxd_driver_unregister(&iaa_crypto_driver);
2180 iaa_aecs_cleanup_fixed();
2181 crypto_free_comp(deflate_generic_tfm);
2182
2183 pr_debug("cleaned up\n");
2184}
2185
2186MODULE_IMPORT_NS(IDXD);
2187MODULE_LICENSE("GPL");
2188MODULE_ALIAS_IDXD_DEVICE(0);
2189MODULE_AUTHOR("Intel Corporation");
2190MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2191
2192module_init(iaa_crypto_init_module);
2193module_exit(iaa_crypto_cleanup_module);
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
3
4#include <linux/init.h>
5#include <linux/kernel.h>
6#include <linux/module.h>
7#include <linux/pci.h>
8#include <linux/device.h>
9#include <linux/iommu.h>
10#include <uapi/linux/idxd.h>
11#include <linux/highmem.h>
12#include <linux/sched/smt.h>
13#include <crypto/internal/acompress.h>
14
15#include "idxd.h"
16#include "iaa_crypto.h"
17#include "iaa_crypto_stats.h"
18
19#ifdef pr_fmt
20#undef pr_fmt
21#endif
22
23#define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt
24
25#define IAA_ALG_PRIORITY 300
26
27/* number of iaa instances probed */
28static unsigned int nr_iaa;
29static unsigned int nr_cpus;
30static unsigned int nr_nodes;
31static unsigned int nr_cpus_per_node;
32
33/* Number of physical cpus sharing each iaa instance */
34static unsigned int cpus_per_iaa;
35
36static struct crypto_comp *deflate_generic_tfm;
37
38/* Per-cpu lookup table for balanced wqs */
39static struct wq_table_entry __percpu *wq_table;
40
41static struct idxd_wq *wq_table_next_wq(int cpu)
42{
43 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
44
45 if (++entry->cur_wq >= entry->n_wqs)
46 entry->cur_wq = 0;
47
48 if (!entry->wqs[entry->cur_wq])
49 return NULL;
50
51 pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
52 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
53 entry->wqs[entry->cur_wq]->id, cpu);
54
55 return entry->wqs[entry->cur_wq];
56}
57
58static void wq_table_add(int cpu, struct idxd_wq *wq)
59{
60 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
61
62 if (WARN_ON(entry->n_wqs == entry->max_wqs))
63 return;
64
65 entry->wqs[entry->n_wqs++] = wq;
66
67 pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
68 entry->wqs[entry->n_wqs - 1]->idxd->id,
69 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
70}
71
72static void wq_table_free_entry(int cpu)
73{
74 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
75
76 kfree(entry->wqs);
77 memset(entry, 0, sizeof(*entry));
78}
79
80static void wq_table_clear_entry(int cpu)
81{
82 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
83
84 entry->n_wqs = 0;
85 entry->cur_wq = 0;
86 memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
87}
88
89LIST_HEAD(iaa_devices);
90DEFINE_MUTEX(iaa_devices_lock);
91
92/* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
93static bool iaa_crypto_enabled;
94static bool iaa_crypto_registered;
95
96/* Verify results of IAA compress or not */
97static bool iaa_verify_compress = true;
98
99static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
100{
101 return sprintf(buf, "%d\n", iaa_verify_compress);
102}
103
104static ssize_t verify_compress_store(struct device_driver *driver,
105 const char *buf, size_t count)
106{
107 int ret = -EBUSY;
108
109 mutex_lock(&iaa_devices_lock);
110
111 if (iaa_crypto_enabled)
112 goto out;
113
114 ret = kstrtobool(buf, &iaa_verify_compress);
115 if (ret)
116 goto out;
117
118 ret = count;
119out:
120 mutex_unlock(&iaa_devices_lock);
121
122 return ret;
123}
124static DRIVER_ATTR_RW(verify_compress);
125
126/*
127 * The iaa crypto driver supports three 'sync' methods determining how
128 * compressions and decompressions are performed:
129 *
130 * - sync: the compression or decompression completes before
131 * returning. This is the mode used by the async crypto
132 * interface when the sync mode is set to 'sync' and by
133 * the sync crypto interface regardless of setting.
134 *
135 * - async: the compression or decompression is submitted and returns
136 * immediately. Completion interrupts are not used so
137 * the caller is responsible for polling the descriptor
138 * for completion. This mode is applicable to only the
139 * async crypto interface and is ignored for anything
140 * else.
141 *
142 * - async_irq: the compression or decompression is submitted and
143 * returns immediately. Completion interrupts are
144 * enabled so the caller can wait for the completion and
145 * yield to other threads. When the compression or
146 * decompression completes, the completion is signaled
147 * and the caller awakened. This mode is applicable to
148 * only the async crypto interface and is ignored for
149 * anything else.
150 *
151 * These modes can be set using the iaa_crypto sync_mode driver
152 * attribute.
153 */
154
155/* Use async mode */
156static bool async_mode;
157/* Use interrupts */
158static bool use_irq;
159
160/**
161 * set_iaa_sync_mode - Set IAA sync mode
162 * @name: The name of the sync mode
163 *
164 * Make the IAA sync mode named @name the current sync mode used by
165 * compression/decompression.
166 */
167
168static int set_iaa_sync_mode(const char *name)
169{
170 int ret = 0;
171
172 if (sysfs_streq(name, "sync")) {
173 async_mode = false;
174 use_irq = false;
175 } else if (sysfs_streq(name, "async")) {
176 async_mode = true;
177 use_irq = false;
178 } else if (sysfs_streq(name, "async_irq")) {
179 async_mode = true;
180 use_irq = true;
181 } else {
182 ret = -EINVAL;
183 }
184
185 return ret;
186}
187
188static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
189{
190 int ret = 0;
191
192 if (!async_mode && !use_irq)
193 ret = sprintf(buf, "%s\n", "sync");
194 else if (async_mode && !use_irq)
195 ret = sprintf(buf, "%s\n", "async");
196 else if (async_mode && use_irq)
197 ret = sprintf(buf, "%s\n", "async_irq");
198
199 return ret;
200}
201
202static ssize_t sync_mode_store(struct device_driver *driver,
203 const char *buf, size_t count)
204{
205 int ret = -EBUSY;
206
207 mutex_lock(&iaa_devices_lock);
208
209 if (iaa_crypto_enabled)
210 goto out;
211
212 ret = set_iaa_sync_mode(buf);
213 if (ret == 0)
214 ret = count;
215out:
216 mutex_unlock(&iaa_devices_lock);
217
218 return ret;
219}
220static DRIVER_ATTR_RW(sync_mode);
221
222static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
223
224static int find_empty_iaa_compression_mode(void)
225{
226 int i = -EINVAL;
227
228 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
229 if (iaa_compression_modes[i])
230 continue;
231 break;
232 }
233
234 return i;
235}
236
237static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
238{
239 struct iaa_compression_mode *mode;
240 int i;
241
242 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
243 mode = iaa_compression_modes[i];
244 if (!mode)
245 continue;
246
247 if (!strcmp(mode->name, name)) {
248 *idx = i;
249 return iaa_compression_modes[i];
250 }
251 }
252
253 return NULL;
254}
255
256static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
257{
258 kfree(mode->name);
259 kfree(mode->ll_table);
260 kfree(mode->d_table);
261
262 kfree(mode);
263}
264
265/*
266 * IAA Compression modes are defined by an ll_table and a d_table.
267 * These tables are typically generated and captured using statistics
268 * collected from running actual compress/decompress workloads.
269 *
270 * A module or other kernel code can add and remove compression modes
271 * with a given name using the exported @add_iaa_compression_mode()
272 * and @remove_iaa_compression_mode functions.
273 *
274 * When a new compression mode is added, the tables are saved in a
275 * global compression mode list. When IAA devices are added, a
276 * per-IAA device dma mapping is created for each IAA device, for each
277 * compression mode. These are the tables used to do the actual
278 * compression/deccompression and are unmapped if/when the devices are
279 * removed. Currently, compression modes must be added before any
280 * device is added, and removed after all devices have been removed.
281 */
282
283/**
284 * remove_iaa_compression_mode - Remove an IAA compression mode
285 * @name: The name the compression mode will be known as
286 *
287 * Remove the IAA compression mode named @name.
288 */
289void remove_iaa_compression_mode(const char *name)
290{
291 struct iaa_compression_mode *mode;
292 int idx;
293
294 mutex_lock(&iaa_devices_lock);
295
296 if (!list_empty(&iaa_devices))
297 goto out;
298
299 mode = find_iaa_compression_mode(name, &idx);
300 if (mode) {
301 free_iaa_compression_mode(mode);
302 iaa_compression_modes[idx] = NULL;
303 }
304out:
305 mutex_unlock(&iaa_devices_lock);
306}
307EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
308
309/**
310 * add_iaa_compression_mode - Add an IAA compression mode
311 * @name: The name the compression mode will be known as
312 * @ll_table: The ll table
313 * @ll_table_size: The ll table size in bytes
314 * @d_table: The d table
315 * @d_table_size: The d table size in bytes
316 * @init: Optional callback function to init the compression mode data
317 * @free: Optional callback function to free the compression mode data
318 *
319 * Add a new IAA compression mode named @name.
320 *
321 * Returns 0 if successful, errcode otherwise.
322 */
323int add_iaa_compression_mode(const char *name,
324 const u32 *ll_table,
325 int ll_table_size,
326 const u32 *d_table,
327 int d_table_size,
328 iaa_dev_comp_init_fn_t init,
329 iaa_dev_comp_free_fn_t free)
330{
331 struct iaa_compression_mode *mode;
332 int idx, ret = -ENOMEM;
333
334 mutex_lock(&iaa_devices_lock);
335
336 if (!list_empty(&iaa_devices)) {
337 ret = -EBUSY;
338 goto out;
339 }
340
341 mode = kzalloc(sizeof(*mode), GFP_KERNEL);
342 if (!mode)
343 goto out;
344
345 mode->name = kstrdup(name, GFP_KERNEL);
346 if (!mode->name)
347 goto free;
348
349 if (ll_table) {
350 mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL);
351 if (!mode->ll_table)
352 goto free;
353 memcpy(mode->ll_table, ll_table, ll_table_size);
354 mode->ll_table_size = ll_table_size;
355 }
356
357 if (d_table) {
358 mode->d_table = kzalloc(d_table_size, GFP_KERNEL);
359 if (!mode->d_table)
360 goto free;
361 memcpy(mode->d_table, d_table, d_table_size);
362 mode->d_table_size = d_table_size;
363 }
364
365 mode->init = init;
366 mode->free = free;
367
368 idx = find_empty_iaa_compression_mode();
369 if (idx < 0)
370 goto free;
371
372 pr_debug("IAA compression mode %s added at idx %d\n",
373 mode->name, idx);
374
375 iaa_compression_modes[idx] = mode;
376
377 ret = 0;
378out:
379 mutex_unlock(&iaa_devices_lock);
380
381 return ret;
382free:
383 free_iaa_compression_mode(mode);
384 goto out;
385}
386EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
387
388static struct iaa_device_compression_mode *
389get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
390{
391 return iaa_device->compression_modes[idx];
392}
393
394static void free_device_compression_mode(struct iaa_device *iaa_device,
395 struct iaa_device_compression_mode *device_mode)
396{
397 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
398 struct device *dev = &iaa_device->idxd->pdev->dev;
399
400 kfree(device_mode->name);
401
402 if (device_mode->aecs_comp_table)
403 dma_free_coherent(dev, size, device_mode->aecs_comp_table,
404 device_mode->aecs_comp_table_dma_addr);
405 kfree(device_mode);
406}
407
408#define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000
409#define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
410#define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
411#define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
412#define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
413 IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
414 IDXD_OP_FLAG_AECS_RW_TGLS)
415
416static int check_completion(struct device *dev,
417 struct iax_completion_record *comp,
418 bool compress,
419 bool only_once);
420
421static int init_device_compression_mode(struct iaa_device *iaa_device,
422 struct iaa_compression_mode *mode,
423 int idx, struct idxd_wq *wq)
424{
425 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
426 struct device *dev = &iaa_device->idxd->pdev->dev;
427 struct iaa_device_compression_mode *device_mode;
428 int ret = -ENOMEM;
429
430 device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
431 if (!device_mode)
432 return -ENOMEM;
433
434 device_mode->name = kstrdup(mode->name, GFP_KERNEL);
435 if (!device_mode->name)
436 goto free;
437
438 device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
439 &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
440 if (!device_mode->aecs_comp_table)
441 goto free;
442
443 /* Add Huffman table to aecs */
444 memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
445 memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
446 memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
447
448 if (mode->init) {
449 ret = mode->init(device_mode);
450 if (ret)
451 goto free;
452 }
453
454 /* mode index should match iaa_compression_modes idx */
455 iaa_device->compression_modes[idx] = device_mode;
456
457 pr_debug("IAA %s compression mode initialized for iaa device %d\n",
458 mode->name, iaa_device->idxd->id);
459
460 ret = 0;
461out:
462 return ret;
463free:
464 pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
465 mode->name, iaa_device->idxd->id);
466
467 free_device_compression_mode(iaa_device, device_mode);
468 goto out;
469}
470
471static int init_device_compression_modes(struct iaa_device *iaa_device,
472 struct idxd_wq *wq)
473{
474 struct iaa_compression_mode *mode;
475 int i, ret = 0;
476
477 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
478 mode = iaa_compression_modes[i];
479 if (!mode)
480 continue;
481
482 ret = init_device_compression_mode(iaa_device, mode, i, wq);
483 if (ret)
484 break;
485 }
486
487 return ret;
488}
489
490static void remove_device_compression_modes(struct iaa_device *iaa_device)
491{
492 struct iaa_device_compression_mode *device_mode;
493 int i;
494
495 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
496 device_mode = iaa_device->compression_modes[i];
497 if (!device_mode)
498 continue;
499
500 free_device_compression_mode(iaa_device, device_mode);
501 iaa_device->compression_modes[i] = NULL;
502 if (iaa_compression_modes[i]->free)
503 iaa_compression_modes[i]->free(device_mode);
504 }
505}
506
507static struct iaa_device *iaa_device_alloc(void)
508{
509 struct iaa_device *iaa_device;
510
511 iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
512 if (!iaa_device)
513 return NULL;
514
515 INIT_LIST_HEAD(&iaa_device->wqs);
516
517 return iaa_device;
518}
519
520static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
521{
522 struct iaa_wq *iaa_wq;
523
524 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
525 if (iaa_wq->wq == wq)
526 return true;
527 }
528
529 return false;
530}
531
532static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
533{
534 struct iaa_device *iaa_device;
535
536 iaa_device = iaa_device_alloc();
537 if (!iaa_device)
538 return NULL;
539
540 iaa_device->idxd = idxd;
541
542 list_add_tail(&iaa_device->list, &iaa_devices);
543
544 nr_iaa++;
545
546 return iaa_device;
547}
548
549static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
550{
551 int ret = 0;
552
553 ret = init_device_compression_modes(iaa_device, iaa_wq->wq);
554 if (ret)
555 return ret;
556
557 return ret;
558}
559
560static void del_iaa_device(struct iaa_device *iaa_device)
561{
562 list_del(&iaa_device->list);
563
564 nr_iaa--;
565}
566
567static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
568 struct iaa_wq **new_wq)
569{
570 struct idxd_device *idxd = iaa_device->idxd;
571 struct pci_dev *pdev = idxd->pdev;
572 struct device *dev = &pdev->dev;
573 struct iaa_wq *iaa_wq;
574
575 iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
576 if (!iaa_wq)
577 return -ENOMEM;
578
579 iaa_wq->wq = wq;
580 iaa_wq->iaa_device = iaa_device;
581 idxd_wq_set_private(wq, iaa_wq);
582
583 list_add_tail(&iaa_wq->list, &iaa_device->wqs);
584
585 iaa_device->n_wq++;
586
587 if (new_wq)
588 *new_wq = iaa_wq;
589
590 dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
591 wq->id, iaa_device->idxd->id, iaa_device->n_wq);
592
593 return 0;
594}
595
596static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
597{
598 struct idxd_device *idxd = iaa_device->idxd;
599 struct pci_dev *pdev = idxd->pdev;
600 struct device *dev = &pdev->dev;
601 struct iaa_wq *iaa_wq;
602
603 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
604 if (iaa_wq->wq == wq) {
605 list_del(&iaa_wq->list);
606 iaa_device->n_wq--;
607
608 dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
609 wq->id, iaa_device->idxd->id,
610 iaa_device->n_wq, nr_iaa);
611
612 if (iaa_device->n_wq == 0)
613 del_iaa_device(iaa_device);
614 break;
615 }
616 }
617}
618
619static void clear_wq_table(void)
620{
621 int cpu;
622
623 for (cpu = 0; cpu < nr_cpus; cpu++)
624 wq_table_clear_entry(cpu);
625
626 pr_debug("cleared wq table\n");
627}
628
629static void free_iaa_device(struct iaa_device *iaa_device)
630{
631 if (!iaa_device)
632 return;
633
634 remove_device_compression_modes(iaa_device);
635 kfree(iaa_device);
636}
637
638static void __free_iaa_wq(struct iaa_wq *iaa_wq)
639{
640 struct iaa_device *iaa_device;
641
642 if (!iaa_wq)
643 return;
644
645 iaa_device = iaa_wq->iaa_device;
646 if (iaa_device->n_wq == 0)
647 free_iaa_device(iaa_wq->iaa_device);
648}
649
650static void free_iaa_wq(struct iaa_wq *iaa_wq)
651{
652 struct idxd_wq *wq;
653
654 __free_iaa_wq(iaa_wq);
655
656 wq = iaa_wq->wq;
657
658 kfree(iaa_wq);
659 idxd_wq_set_private(wq, NULL);
660}
661
662static int iaa_wq_get(struct idxd_wq *wq)
663{
664 struct idxd_device *idxd = wq->idxd;
665 struct iaa_wq *iaa_wq;
666 int ret = 0;
667
668 spin_lock(&idxd->dev_lock);
669 iaa_wq = idxd_wq_get_private(wq);
670 if (iaa_wq && !iaa_wq->remove) {
671 iaa_wq->ref++;
672 idxd_wq_get(wq);
673 } else {
674 ret = -ENODEV;
675 }
676 spin_unlock(&idxd->dev_lock);
677
678 return ret;
679}
680
681static int iaa_wq_put(struct idxd_wq *wq)
682{
683 struct idxd_device *idxd = wq->idxd;
684 struct iaa_wq *iaa_wq;
685 bool free = false;
686 int ret = 0;
687
688 spin_lock(&idxd->dev_lock);
689 iaa_wq = idxd_wq_get_private(wq);
690 if (iaa_wq) {
691 iaa_wq->ref--;
692 if (iaa_wq->ref == 0 && iaa_wq->remove) {
693 idxd_wq_set_private(wq, NULL);
694 free = true;
695 }
696 idxd_wq_put(wq);
697 } else {
698 ret = -ENODEV;
699 }
700 spin_unlock(&idxd->dev_lock);
701 if (free) {
702 __free_iaa_wq(iaa_wq);
703 kfree(iaa_wq);
704 }
705
706 return ret;
707}
708
709static void free_wq_table(void)
710{
711 int cpu;
712
713 for (cpu = 0; cpu < nr_cpus; cpu++)
714 wq_table_free_entry(cpu);
715
716 free_percpu(wq_table);
717
718 pr_debug("freed wq table\n");
719}
720
721static int alloc_wq_table(int max_wqs)
722{
723 struct wq_table_entry *entry;
724 int cpu;
725
726 wq_table = alloc_percpu(struct wq_table_entry);
727 if (!wq_table)
728 return -ENOMEM;
729
730 for (cpu = 0; cpu < nr_cpus; cpu++) {
731 entry = per_cpu_ptr(wq_table, cpu);
732 entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
733 if (!entry->wqs) {
734 free_wq_table();
735 return -ENOMEM;
736 }
737
738 entry->max_wqs = max_wqs;
739 }
740
741 pr_debug("initialized wq table\n");
742
743 return 0;
744}
745
746static int save_iaa_wq(struct idxd_wq *wq)
747{
748 struct iaa_device *iaa_device, *found = NULL;
749 struct idxd_device *idxd;
750 struct pci_dev *pdev;
751 struct device *dev;
752 int ret = 0;
753
754 list_for_each_entry(iaa_device, &iaa_devices, list) {
755 if (iaa_device->idxd == wq->idxd) {
756 idxd = iaa_device->idxd;
757 pdev = idxd->pdev;
758 dev = &pdev->dev;
759 /*
760 * Check to see that we don't already have this wq.
761 * Shouldn't happen but we don't control probing.
762 */
763 if (iaa_has_wq(iaa_device, wq)) {
764 dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
765 iaa_device);
766 goto out;
767 }
768
769 found = iaa_device;
770
771 ret = add_iaa_wq(iaa_device, wq, NULL);
772 if (ret)
773 goto out;
774
775 break;
776 }
777 }
778
779 if (!found) {
780 struct iaa_device *new_device;
781 struct iaa_wq *new_wq;
782
783 new_device = add_iaa_device(wq->idxd);
784 if (!new_device) {
785 ret = -ENOMEM;
786 goto out;
787 }
788
789 ret = add_iaa_wq(new_device, wq, &new_wq);
790 if (ret) {
791 del_iaa_device(new_device);
792 free_iaa_device(new_device);
793 goto out;
794 }
795
796 ret = init_iaa_device(new_device, new_wq);
797 if (ret) {
798 del_iaa_wq(new_device, new_wq->wq);
799 del_iaa_device(new_device);
800 free_iaa_wq(new_wq);
801 goto out;
802 }
803 }
804
805 if (WARN_ON(nr_iaa == 0))
806 return -EINVAL;
807
808 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
809 if (!cpus_per_iaa)
810 cpus_per_iaa = 1;
811out:
812 return 0;
813}
814
815static void remove_iaa_wq(struct idxd_wq *wq)
816{
817 struct iaa_device *iaa_device;
818
819 list_for_each_entry(iaa_device, &iaa_devices, list) {
820 if (iaa_has_wq(iaa_device, wq)) {
821 del_iaa_wq(iaa_device, wq);
822 break;
823 }
824 }
825
826 if (nr_iaa) {
827 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
828 if (!cpus_per_iaa)
829 cpus_per_iaa = 1;
830 } else
831 cpus_per_iaa = 1;
832}
833
834static int wq_table_add_wqs(int iaa, int cpu)
835{
836 struct iaa_device *iaa_device, *found_device = NULL;
837 int ret = 0, cur_iaa = 0, n_wqs_added = 0;
838 struct idxd_device *idxd;
839 struct iaa_wq *iaa_wq;
840 struct pci_dev *pdev;
841 struct device *dev;
842
843 list_for_each_entry(iaa_device, &iaa_devices, list) {
844 idxd = iaa_device->idxd;
845 pdev = idxd->pdev;
846 dev = &pdev->dev;
847
848 if (cur_iaa != iaa) {
849 cur_iaa++;
850 continue;
851 }
852
853 found_device = iaa_device;
854 dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
855 found_device->idxd->id, cur_iaa);
856 break;
857 }
858
859 if (!found_device) {
860 found_device = list_first_entry_or_null(&iaa_devices,
861 struct iaa_device, list);
862 if (!found_device) {
863 pr_debug("couldn't find any iaa devices with wqs!\n");
864 ret = -EINVAL;
865 goto out;
866 }
867 cur_iaa = 0;
868
869 idxd = found_device->idxd;
870 pdev = idxd->pdev;
871 dev = &pdev->dev;
872 dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
873 found_device->idxd->id, cur_iaa);
874 }
875
876 list_for_each_entry(iaa_wq, &found_device->wqs, list) {
877 wq_table_add(cpu, iaa_wq->wq);
878 pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
879 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
880 n_wqs_added++;
881 }
882
883 if (!n_wqs_added) {
884 pr_debug("couldn't find any iaa wqs!\n");
885 ret = -EINVAL;
886 goto out;
887 }
888out:
889 return ret;
890}
891
892/*
893 * Rebalance the wq table so that given a cpu, it's easy to find the
894 * closest IAA instance. The idea is to try to choose the most
895 * appropriate IAA instance for a caller and spread available
896 * workqueues around to clients.
897 */
898static void rebalance_wq_table(void)
899{
900 const struct cpumask *node_cpus;
901 int node, cpu, iaa = -1;
902
903 if (nr_iaa == 0)
904 return;
905
906 pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
907 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
908
909 clear_wq_table();
910
911 if (nr_iaa == 1) {
912 for (cpu = 0; cpu < nr_cpus; cpu++) {
913 if (WARN_ON(wq_table_add_wqs(0, cpu))) {
914 pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
915 return;
916 }
917 }
918
919 return;
920 }
921
922 for_each_node_with_cpus(node) {
923 node_cpus = cpumask_of_node(node);
924
925 for (cpu = 0; cpu < nr_cpus_per_node; cpu++) {
926 int node_cpu = cpumask_nth(cpu, node_cpus);
927
928 if (WARN_ON(node_cpu >= nr_cpu_ids)) {
929 pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
930 return;
931 }
932
933 if ((cpu % cpus_per_iaa) == 0)
934 iaa++;
935
936 if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
937 pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
938 return;
939 }
940 }
941 }
942}
943
944static inline int check_completion(struct device *dev,
945 struct iax_completion_record *comp,
946 bool compress,
947 bool only_once)
948{
949 char *op_str = compress ? "compress" : "decompress";
950 int ret = 0;
951
952 while (!comp->status) {
953 if (only_once)
954 return -EAGAIN;
955 cpu_relax();
956 }
957
958 if (comp->status != IAX_COMP_SUCCESS) {
959 if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
960 ret = -ETIMEDOUT;
961 dev_dbg(dev, "%s timed out, size=0x%x\n",
962 op_str, comp->output_size);
963 update_completion_timeout_errs();
964 goto out;
965 }
966
967 if (comp->status == IAA_ANALYTICS_ERROR &&
968 comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
969 ret = -E2BIG;
970 dev_dbg(dev, "compressed > uncompressed size,"
971 " not compressing, size=0x%x\n",
972 comp->output_size);
973 update_completion_comp_buf_overflow_errs();
974 goto out;
975 }
976
977 if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
978 ret = -EOVERFLOW;
979 goto out;
980 }
981
982 ret = -EINVAL;
983 dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
984 op_str, comp->status, comp->error_code, comp->output_size);
985 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
986 update_completion_einval_errs();
987
988 goto out;
989 }
990out:
991 return ret;
992}
993
994static int deflate_generic_decompress(struct acomp_req *req)
995{
996 void *src, *dst;
997 int ret;
998
999 src = kmap_local_page(sg_page(req->src)) + req->src->offset;
1000 dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
1001
1002 ret = crypto_comp_decompress(deflate_generic_tfm,
1003 src, req->slen, dst, &req->dlen);
1004
1005 kunmap_local(src);
1006 kunmap_local(dst);
1007
1008 update_total_sw_decomp_calls();
1009
1010 return ret;
1011}
1012
1013static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1014 struct acomp_req *req,
1015 dma_addr_t *src_addr, dma_addr_t *dst_addr);
1016
1017static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1018 struct idxd_wq *wq,
1019 dma_addr_t src_addr, unsigned int slen,
1020 dma_addr_t dst_addr, unsigned int *dlen,
1021 u32 compression_crc);
1022
1023static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1024 enum idxd_complete_type comp_type,
1025 bool free_desc, void *__ctx,
1026 u32 *status)
1027{
1028 struct iaa_device_compression_mode *active_compression_mode;
1029 struct iaa_compression_ctx *compression_ctx;
1030 struct crypto_ctx *ctx = __ctx;
1031 struct iaa_device *iaa_device;
1032 struct idxd_device *idxd;
1033 struct iaa_wq *iaa_wq;
1034 struct pci_dev *pdev;
1035 struct device *dev;
1036 int ret, err = 0;
1037
1038 compression_ctx = crypto_tfm_ctx(ctx->tfm);
1039
1040 iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1041 iaa_device = iaa_wq->iaa_device;
1042 idxd = iaa_device->idxd;
1043 pdev = idxd->pdev;
1044 dev = &pdev->dev;
1045
1046 active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1047 compression_ctx->mode);
1048 dev_dbg(dev, "%s: compression mode %s,"
1049 " ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1050 active_compression_mode->name,
1051 ctx->src_addr, ctx->dst_addr);
1052
1053 ret = check_completion(dev, idxd_desc->iax_completion,
1054 ctx->compress, false);
1055 if (ret) {
1056 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1057 if (!ctx->compress &&
1058 idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1059 pr_warn("%s: falling back to deflate-generic decompress, "
1060 "analytics error code %x\n", __func__,
1061 idxd_desc->iax_completion->error_code);
1062 ret = deflate_generic_decompress(ctx->req);
1063 if (ret) {
1064 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1065 __func__, ret);
1066 err = -EIO;
1067 goto err;
1068 }
1069 } else {
1070 err = -EIO;
1071 goto err;
1072 }
1073 } else {
1074 ctx->req->dlen = idxd_desc->iax_completion->output_size;
1075 }
1076
1077 /* Update stats */
1078 if (ctx->compress) {
1079 update_total_comp_bytes_out(ctx->req->dlen);
1080 update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1081 } else {
1082 update_total_decomp_bytes_in(ctx->req->dlen);
1083 update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen);
1084 }
1085
1086 if (ctx->compress && compression_ctx->verify_compress) {
1087 dma_addr_t src_addr, dst_addr;
1088 u32 compression_crc;
1089
1090 compression_crc = idxd_desc->iax_completion->crc;
1091
1092 ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1093 if (ret) {
1094 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1095 err = -EIO;
1096 goto out;
1097 }
1098
1099 ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1100 ctx->req->slen, dst_addr, &ctx->req->dlen,
1101 compression_crc);
1102 if (ret) {
1103 dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1104 err = -EIO;
1105 }
1106
1107 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1108 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1109
1110 goto out;
1111 }
1112err:
1113 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1114 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1115out:
1116 if (ret != 0)
1117 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1118
1119 if (ctx->req->base.complete)
1120 acomp_request_complete(ctx->req, err);
1121
1122 if (free_desc)
1123 idxd_free_desc(idxd_desc->wq, idxd_desc);
1124 iaa_wq_put(idxd_desc->wq);
1125}
1126
1127static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
1128 struct idxd_wq *wq,
1129 dma_addr_t src_addr, unsigned int slen,
1130 dma_addr_t dst_addr, unsigned int *dlen,
1131 u32 *compression_crc,
1132 bool disable_async)
1133{
1134 struct iaa_device_compression_mode *active_compression_mode;
1135 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1136 struct iaa_device *iaa_device;
1137 struct idxd_desc *idxd_desc;
1138 struct iax_hw_desc *desc;
1139 struct idxd_device *idxd;
1140 struct iaa_wq *iaa_wq;
1141 struct pci_dev *pdev;
1142 struct device *dev;
1143 int ret = 0;
1144
1145 iaa_wq = idxd_wq_get_private(wq);
1146 iaa_device = iaa_wq->iaa_device;
1147 idxd = iaa_device->idxd;
1148 pdev = idxd->pdev;
1149 dev = &pdev->dev;
1150
1151 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1152
1153 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1154 if (IS_ERR(idxd_desc)) {
1155 dev_dbg(dev, "idxd descriptor allocation failed\n");
1156 dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1157 return PTR_ERR(idxd_desc);
1158 }
1159 desc = idxd_desc->iax_hw;
1160
1161 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1162 IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1163 desc->opcode = IAX_OPCODE_COMPRESS;
1164 desc->compr_flags = IAA_COMP_FLAGS;
1165 desc->priv = 0;
1166
1167 desc->src1_addr = (u64)src_addr;
1168 desc->src1_size = slen;
1169 desc->dst_addr = (u64)dst_addr;
1170 desc->max_dst_size = *dlen;
1171 desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1172 desc->src2_size = sizeof(struct aecs_comp_table_record);
1173 desc->completion_addr = idxd_desc->compl_dma;
1174
1175 if (ctx->use_irq && !disable_async) {
1176 desc->flags |= IDXD_OP_FLAG_RCI;
1177
1178 idxd_desc->crypto.req = req;
1179 idxd_desc->crypto.tfm = tfm;
1180 idxd_desc->crypto.src_addr = src_addr;
1181 idxd_desc->crypto.dst_addr = dst_addr;
1182 idxd_desc->crypto.compress = true;
1183
1184 dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1185 " src_addr %llx, dst_addr %llx\n", __func__,
1186 active_compression_mode->name,
1187 src_addr, dst_addr);
1188 } else if (ctx->async_mode && !disable_async)
1189 req->base.data = idxd_desc;
1190
1191 dev_dbg(dev, "%s: compression mode %s,"
1192 " desc->src1_addr %llx, desc->src1_size %d,"
1193 " desc->dst_addr %llx, desc->max_dst_size %d,"
1194 " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1195 active_compression_mode->name,
1196 desc->src1_addr, desc->src1_size, desc->dst_addr,
1197 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1198
1199 ret = idxd_submit_desc(wq, idxd_desc);
1200 if (ret) {
1201 dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1202 goto err;
1203 }
1204
1205 /* Update stats */
1206 update_total_comp_calls();
1207 update_wq_comp_calls(wq);
1208
1209 if (ctx->async_mode && !disable_async) {
1210 ret = -EINPROGRESS;
1211 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1212 goto out;
1213 }
1214
1215 ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1216 if (ret) {
1217 dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1218 goto err;
1219 }
1220
1221 *dlen = idxd_desc->iax_completion->output_size;
1222
1223 /* Update stats */
1224 update_total_comp_bytes_out(*dlen);
1225 update_wq_comp_bytes(wq, *dlen);
1226
1227 *compression_crc = idxd_desc->iax_completion->crc;
1228
1229 if (!ctx->async_mode || disable_async)
1230 idxd_free_desc(wq, idxd_desc);
1231out:
1232 return ret;
1233err:
1234 idxd_free_desc(wq, idxd_desc);
1235 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1236
1237 goto out;
1238}
1239
1240static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1241 struct acomp_req *req,
1242 dma_addr_t *src_addr, dma_addr_t *dst_addr)
1243{
1244 int ret = 0;
1245 int nr_sgs;
1246
1247 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1248 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1249
1250 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1251 if (nr_sgs <= 0 || nr_sgs > 1) {
1252 dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1253 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1254 iaa_wq->wq->id, ret);
1255 ret = -EIO;
1256 goto out;
1257 }
1258 *src_addr = sg_dma_address(req->src);
1259 dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1260 " req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1261 req->src, req->slen, sg_dma_len(req->src));
1262
1263 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1264 if (nr_sgs <= 0 || nr_sgs > 1) {
1265 dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1266 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1267 iaa_wq->wq->id, ret);
1268 ret = -EIO;
1269 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1270 goto out;
1271 }
1272 *dst_addr = sg_dma_address(req->dst);
1273 dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1274 " req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1275 req->dst, req->dlen, sg_dma_len(req->dst));
1276out:
1277 return ret;
1278}
1279
1280static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1281 struct idxd_wq *wq,
1282 dma_addr_t src_addr, unsigned int slen,
1283 dma_addr_t dst_addr, unsigned int *dlen,
1284 u32 compression_crc)
1285{
1286 struct iaa_device_compression_mode *active_compression_mode;
1287 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1288 struct iaa_device *iaa_device;
1289 struct idxd_desc *idxd_desc;
1290 struct iax_hw_desc *desc;
1291 struct idxd_device *idxd;
1292 struct iaa_wq *iaa_wq;
1293 struct pci_dev *pdev;
1294 struct device *dev;
1295 int ret = 0;
1296
1297 iaa_wq = idxd_wq_get_private(wq);
1298 iaa_device = iaa_wq->iaa_device;
1299 idxd = iaa_device->idxd;
1300 pdev = idxd->pdev;
1301 dev = &pdev->dev;
1302
1303 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1304
1305 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1306 if (IS_ERR(idxd_desc)) {
1307 dev_dbg(dev, "idxd descriptor allocation failed\n");
1308 dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1309 PTR_ERR(idxd_desc));
1310 return PTR_ERR(idxd_desc);
1311 }
1312 desc = idxd_desc->iax_hw;
1313
1314 /* Verify (optional) - decompress and check crc, suppress dest write */
1315
1316 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1317 desc->opcode = IAX_OPCODE_DECOMPRESS;
1318 desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1319 desc->priv = 0;
1320
1321 desc->src1_addr = (u64)dst_addr;
1322 desc->src1_size = *dlen;
1323 desc->dst_addr = (u64)src_addr;
1324 desc->max_dst_size = slen;
1325 desc->completion_addr = idxd_desc->compl_dma;
1326
1327 dev_dbg(dev, "(verify) compression mode %s,"
1328 " desc->src1_addr %llx, desc->src1_size %d,"
1329 " desc->dst_addr %llx, desc->max_dst_size %d,"
1330 " desc->src2_addr %llx, desc->src2_size %d\n",
1331 active_compression_mode->name,
1332 desc->src1_addr, desc->src1_size, desc->dst_addr,
1333 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1334
1335 ret = idxd_submit_desc(wq, idxd_desc);
1336 if (ret) {
1337 dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1338 goto err;
1339 }
1340
1341 ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1342 if (ret) {
1343 dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1344 goto err;
1345 }
1346
1347 if (compression_crc != idxd_desc->iax_completion->crc) {
1348 ret = -EINVAL;
1349 dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1350 " comp=0x%x, decomp=0x%x\n", compression_crc,
1351 idxd_desc->iax_completion->crc);
1352 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1353 8, 1, idxd_desc->iax_completion, 64, 0);
1354 goto err;
1355 }
1356
1357 idxd_free_desc(wq, idxd_desc);
1358out:
1359 return ret;
1360err:
1361 idxd_free_desc(wq, idxd_desc);
1362 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1363
1364 goto out;
1365}
1366
1367static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1368 struct idxd_wq *wq,
1369 dma_addr_t src_addr, unsigned int slen,
1370 dma_addr_t dst_addr, unsigned int *dlen,
1371 bool disable_async)
1372{
1373 struct iaa_device_compression_mode *active_compression_mode;
1374 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1375 struct iaa_device *iaa_device;
1376 struct idxd_desc *idxd_desc;
1377 struct iax_hw_desc *desc;
1378 struct idxd_device *idxd;
1379 struct iaa_wq *iaa_wq;
1380 struct pci_dev *pdev;
1381 struct device *dev;
1382 int ret = 0;
1383
1384 iaa_wq = idxd_wq_get_private(wq);
1385 iaa_device = iaa_wq->iaa_device;
1386 idxd = iaa_device->idxd;
1387 pdev = idxd->pdev;
1388 dev = &pdev->dev;
1389
1390 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1391
1392 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1393 if (IS_ERR(idxd_desc)) {
1394 dev_dbg(dev, "idxd descriptor allocation failed\n");
1395 dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1396 PTR_ERR(idxd_desc));
1397 return PTR_ERR(idxd_desc);
1398 }
1399 desc = idxd_desc->iax_hw;
1400
1401 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1402 desc->opcode = IAX_OPCODE_DECOMPRESS;
1403 desc->max_dst_size = PAGE_SIZE;
1404 desc->decompr_flags = IAA_DECOMP_FLAGS;
1405 desc->priv = 0;
1406
1407 desc->src1_addr = (u64)src_addr;
1408 desc->dst_addr = (u64)dst_addr;
1409 desc->max_dst_size = *dlen;
1410 desc->src1_size = slen;
1411 desc->completion_addr = idxd_desc->compl_dma;
1412
1413 if (ctx->use_irq && !disable_async) {
1414 desc->flags |= IDXD_OP_FLAG_RCI;
1415
1416 idxd_desc->crypto.req = req;
1417 idxd_desc->crypto.tfm = tfm;
1418 idxd_desc->crypto.src_addr = src_addr;
1419 idxd_desc->crypto.dst_addr = dst_addr;
1420 idxd_desc->crypto.compress = false;
1421
1422 dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1423 " src_addr %llx, dst_addr %llx\n", __func__,
1424 active_compression_mode->name,
1425 src_addr, dst_addr);
1426 } else if (ctx->async_mode && !disable_async)
1427 req->base.data = idxd_desc;
1428
1429 dev_dbg(dev, "%s: decompression mode %s,"
1430 " desc->src1_addr %llx, desc->src1_size %d,"
1431 " desc->dst_addr %llx, desc->max_dst_size %d,"
1432 " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1433 active_compression_mode->name,
1434 desc->src1_addr, desc->src1_size, desc->dst_addr,
1435 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1436
1437 ret = idxd_submit_desc(wq, idxd_desc);
1438 if (ret) {
1439 dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1440 goto err;
1441 }
1442
1443 /* Update stats */
1444 update_total_decomp_calls();
1445 update_wq_decomp_calls(wq);
1446
1447 if (ctx->async_mode && !disable_async) {
1448 ret = -EINPROGRESS;
1449 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1450 goto out;
1451 }
1452
1453 ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1454 if (ret) {
1455 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1456 if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1457 pr_warn("%s: falling back to deflate-generic decompress, "
1458 "analytics error code %x\n", __func__,
1459 idxd_desc->iax_completion->error_code);
1460 ret = deflate_generic_decompress(req);
1461 if (ret) {
1462 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1463 __func__, ret);
1464 goto err;
1465 }
1466 } else {
1467 goto err;
1468 }
1469 } else {
1470 req->dlen = idxd_desc->iax_completion->output_size;
1471 }
1472
1473 *dlen = req->dlen;
1474
1475 if (!ctx->async_mode || disable_async)
1476 idxd_free_desc(wq, idxd_desc);
1477
1478 /* Update stats */
1479 update_total_decomp_bytes_in(slen);
1480 update_wq_decomp_bytes(wq, slen);
1481out:
1482 return ret;
1483err:
1484 idxd_free_desc(wq, idxd_desc);
1485 dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1486
1487 goto out;
1488}
1489
1490static int iaa_comp_acompress(struct acomp_req *req)
1491{
1492 struct iaa_compression_ctx *compression_ctx;
1493 struct crypto_tfm *tfm = req->base.tfm;
1494 dma_addr_t src_addr, dst_addr;
1495 bool disable_async = false;
1496 int nr_sgs, cpu, ret = 0;
1497 struct iaa_wq *iaa_wq;
1498 u32 compression_crc;
1499 struct idxd_wq *wq;
1500 struct device *dev;
1501 u64 start_time_ns;
1502 int order = -1;
1503
1504 compression_ctx = crypto_tfm_ctx(tfm);
1505
1506 if (!iaa_crypto_enabled) {
1507 pr_debug("iaa_crypto disabled, not compressing\n");
1508 return -ENODEV;
1509 }
1510
1511 if (!req->src || !req->slen) {
1512 pr_debug("invalid src, not compressing\n");
1513 return -EINVAL;
1514 }
1515
1516 cpu = get_cpu();
1517 wq = wq_table_next_wq(cpu);
1518 put_cpu();
1519 if (!wq) {
1520 pr_debug("no wq configured for cpu=%d\n", cpu);
1521 return -ENODEV;
1522 }
1523
1524 ret = iaa_wq_get(wq);
1525 if (ret) {
1526 pr_debug("no wq available for cpu=%d\n", cpu);
1527 return -ENODEV;
1528 }
1529
1530 iaa_wq = idxd_wq_get_private(wq);
1531
1532 if (!req->dst) {
1533 gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
1534
1535 /* incompressible data will always be < 2 * slen */
1536 req->dlen = 2 * req->slen;
1537 order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1538 req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1539 if (!req->dst) {
1540 ret = -ENOMEM;
1541 order = -1;
1542 goto out;
1543 }
1544 disable_async = true;
1545 }
1546
1547 dev = &wq->idxd->pdev->dev;
1548
1549 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1550 if (nr_sgs <= 0 || nr_sgs > 1) {
1551 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1552 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1553 iaa_wq->wq->id, ret);
1554 ret = -EIO;
1555 goto out;
1556 }
1557 src_addr = sg_dma_address(req->src);
1558 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1559 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1560 req->src, req->slen, sg_dma_len(req->src));
1561
1562 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1563 if (nr_sgs <= 0 || nr_sgs > 1) {
1564 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1565 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1566 iaa_wq->wq->id, ret);
1567 ret = -EIO;
1568 goto err_map_dst;
1569 }
1570 dst_addr = sg_dma_address(req->dst);
1571 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1572 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1573 req->dst, req->dlen, sg_dma_len(req->dst));
1574
1575 start_time_ns = iaa_get_ts();
1576 ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1577 &req->dlen, &compression_crc, disable_async);
1578 update_max_comp_delay_ns(start_time_ns);
1579 if (ret == -EINPROGRESS)
1580 return ret;
1581
1582 if (!ret && compression_ctx->verify_compress) {
1583 ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1584 if (ret) {
1585 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1586 goto out;
1587 }
1588
1589 ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1590 dst_addr, &req->dlen, compression_crc);
1591 if (ret)
1592 dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1593
1594 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1595 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1596
1597 goto out;
1598 }
1599
1600 if (ret)
1601 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1602
1603 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1604err_map_dst:
1605 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1606out:
1607 iaa_wq_put(wq);
1608
1609 if (order >= 0)
1610 sgl_free_order(req->dst, order);
1611
1612 return ret;
1613}
1614
1615static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
1616{
1617 gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1618 GFP_KERNEL : GFP_ATOMIC;
1619 struct crypto_tfm *tfm = req->base.tfm;
1620 dma_addr_t src_addr, dst_addr;
1621 int nr_sgs, cpu, ret = 0;
1622 struct iaa_wq *iaa_wq;
1623 struct device *dev;
1624 struct idxd_wq *wq;
1625 u64 start_time_ns;
1626 int order = -1;
1627
1628 cpu = get_cpu();
1629 wq = wq_table_next_wq(cpu);
1630 put_cpu();
1631 if (!wq) {
1632 pr_debug("no wq configured for cpu=%d\n", cpu);
1633 return -ENODEV;
1634 }
1635
1636 ret = iaa_wq_get(wq);
1637 if (ret) {
1638 pr_debug("no wq available for cpu=%d\n", cpu);
1639 return -ENODEV;
1640 }
1641
1642 iaa_wq = idxd_wq_get_private(wq);
1643
1644 dev = &wq->idxd->pdev->dev;
1645
1646 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1647 if (nr_sgs <= 0 || nr_sgs > 1) {
1648 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1649 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1650 iaa_wq->wq->id, ret);
1651 ret = -EIO;
1652 goto out;
1653 }
1654 src_addr = sg_dma_address(req->src);
1655 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1656 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1657 req->src, req->slen, sg_dma_len(req->src));
1658
1659 req->dlen = 4 * req->slen; /* start with ~avg comp rato */
1660alloc_dest:
1661 order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1662 req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1663 if (!req->dst) {
1664 ret = -ENOMEM;
1665 order = -1;
1666 goto out;
1667 }
1668
1669 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1670 if (nr_sgs <= 0 || nr_sgs > 1) {
1671 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1672 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1673 iaa_wq->wq->id, ret);
1674 ret = -EIO;
1675 goto err_map_dst;
1676 }
1677
1678 dst_addr = sg_dma_address(req->dst);
1679 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1680 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1681 req->dst, req->dlen, sg_dma_len(req->dst));
1682 start_time_ns = iaa_get_ts();
1683 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1684 dst_addr, &req->dlen, true);
1685 update_max_decomp_delay_ns(start_time_ns);
1686 if (ret == -EOVERFLOW) {
1687 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1688 req->dlen *= 2;
1689 if (req->dlen > CRYPTO_ACOMP_DST_MAX)
1690 goto err_map_dst;
1691 goto alloc_dest;
1692 }
1693
1694 if (ret != 0)
1695 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1696
1697 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1698err_map_dst:
1699 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1700out:
1701 iaa_wq_put(wq);
1702
1703 if (order >= 0)
1704 sgl_free_order(req->dst, order);
1705
1706 return ret;
1707}
1708
1709static int iaa_comp_adecompress(struct acomp_req *req)
1710{
1711 struct crypto_tfm *tfm = req->base.tfm;
1712 dma_addr_t src_addr, dst_addr;
1713 int nr_sgs, cpu, ret = 0;
1714 struct iaa_wq *iaa_wq;
1715 struct device *dev;
1716 u64 start_time_ns;
1717 struct idxd_wq *wq;
1718
1719 if (!iaa_crypto_enabled) {
1720 pr_debug("iaa_crypto disabled, not decompressing\n");
1721 return -ENODEV;
1722 }
1723
1724 if (!req->src || !req->slen) {
1725 pr_debug("invalid src, not decompressing\n");
1726 return -EINVAL;
1727 }
1728
1729 if (!req->dst)
1730 return iaa_comp_adecompress_alloc_dest(req);
1731
1732 cpu = get_cpu();
1733 wq = wq_table_next_wq(cpu);
1734 put_cpu();
1735 if (!wq) {
1736 pr_debug("no wq configured for cpu=%d\n", cpu);
1737 return -ENODEV;
1738 }
1739
1740 ret = iaa_wq_get(wq);
1741 if (ret) {
1742 pr_debug("no wq available for cpu=%d\n", cpu);
1743 return -ENODEV;
1744 }
1745
1746 iaa_wq = idxd_wq_get_private(wq);
1747
1748 dev = &wq->idxd->pdev->dev;
1749
1750 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1751 if (nr_sgs <= 0 || nr_sgs > 1) {
1752 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1753 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1754 iaa_wq->wq->id, ret);
1755 ret = -EIO;
1756 goto out;
1757 }
1758 src_addr = sg_dma_address(req->src);
1759 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1760 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1761 req->src, req->slen, sg_dma_len(req->src));
1762
1763 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1764 if (nr_sgs <= 0 || nr_sgs > 1) {
1765 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1766 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1767 iaa_wq->wq->id, ret);
1768 ret = -EIO;
1769 goto err_map_dst;
1770 }
1771 dst_addr = sg_dma_address(req->dst);
1772 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1773 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1774 req->dst, req->dlen, sg_dma_len(req->dst));
1775
1776 start_time_ns = iaa_get_ts();
1777 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1778 dst_addr, &req->dlen, false);
1779 update_max_decomp_delay_ns(start_time_ns);
1780 if (ret == -EINPROGRESS)
1781 return ret;
1782
1783 if (ret != 0)
1784 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1785
1786 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1787err_map_dst:
1788 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1789out:
1790 iaa_wq_put(wq);
1791
1792 return ret;
1793}
1794
1795static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1796{
1797 ctx->verify_compress = iaa_verify_compress;
1798 ctx->async_mode = async_mode;
1799 ctx->use_irq = use_irq;
1800}
1801
1802static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1803{
1804 struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1805 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1806
1807 compression_ctx_init(ctx);
1808
1809 ctx->mode = IAA_MODE_FIXED;
1810
1811 return 0;
1812}
1813
1814static void dst_free(struct scatterlist *sgl)
1815{
1816 /*
1817 * Called for req->dst = NULL cases but we free elsewhere
1818 * using sgl_free_order().
1819 */
1820}
1821
1822static struct acomp_alg iaa_acomp_fixed_deflate = {
1823 .init = iaa_comp_init_fixed,
1824 .compress = iaa_comp_acompress,
1825 .decompress = iaa_comp_adecompress,
1826 .dst_free = dst_free,
1827 .base = {
1828 .cra_name = "deflate",
1829 .cra_driver_name = "deflate-iaa",
1830 .cra_flags = CRYPTO_ALG_ASYNC,
1831 .cra_ctxsize = sizeof(struct iaa_compression_ctx),
1832 .cra_module = THIS_MODULE,
1833 .cra_priority = IAA_ALG_PRIORITY,
1834 }
1835};
1836
1837static int iaa_register_compression_device(void)
1838{
1839 int ret;
1840
1841 ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1842 if (ret) {
1843 pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1844 goto out;
1845 }
1846
1847 iaa_crypto_registered = true;
1848out:
1849 return ret;
1850}
1851
1852static int iaa_unregister_compression_device(void)
1853{
1854 if (iaa_crypto_registered)
1855 crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1856
1857 return 0;
1858}
1859
1860static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1861{
1862 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1863 struct idxd_device *idxd = wq->idxd;
1864 struct idxd_driver_data *data = idxd->data;
1865 struct device *dev = &idxd_dev->conf_dev;
1866 bool first_wq = false;
1867 int ret = 0;
1868
1869 if (idxd->state != IDXD_DEV_ENABLED)
1870 return -ENXIO;
1871
1872 if (data->type != IDXD_TYPE_IAX)
1873 return -ENODEV;
1874
1875 mutex_lock(&wq->wq_lock);
1876
1877 if (idxd_wq_get_private(wq)) {
1878 mutex_unlock(&wq->wq_lock);
1879 return -EBUSY;
1880 }
1881
1882 if (!idxd_wq_driver_name_match(wq, dev)) {
1883 dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1884 idxd->id, wq->id, wq->driver_name, dev->driver->name);
1885 idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1886 ret = -ENODEV;
1887 goto err;
1888 }
1889
1890 wq->type = IDXD_WQT_KERNEL;
1891
1892 ret = idxd_drv_enable_wq(wq);
1893 if (ret < 0) {
1894 dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1895 idxd->id, wq->id, ret);
1896 ret = -ENXIO;
1897 goto err;
1898 }
1899
1900 mutex_lock(&iaa_devices_lock);
1901
1902 if (list_empty(&iaa_devices)) {
1903 ret = alloc_wq_table(wq->idxd->max_wqs);
1904 if (ret)
1905 goto err_alloc;
1906 first_wq = true;
1907 }
1908
1909 ret = save_iaa_wq(wq);
1910 if (ret)
1911 goto err_save;
1912
1913 rebalance_wq_table();
1914
1915 if (first_wq) {
1916 iaa_crypto_enabled = true;
1917 ret = iaa_register_compression_device();
1918 if (ret != 0) {
1919 iaa_crypto_enabled = false;
1920 dev_dbg(dev, "IAA compression device registration failed\n");
1921 goto err_register;
1922 }
1923 try_module_get(THIS_MODULE);
1924
1925 pr_info("iaa_crypto now ENABLED\n");
1926 }
1927
1928 mutex_unlock(&iaa_devices_lock);
1929out:
1930 mutex_unlock(&wq->wq_lock);
1931
1932 return ret;
1933
1934err_register:
1935 remove_iaa_wq(wq);
1936 free_iaa_wq(idxd_wq_get_private(wq));
1937err_save:
1938 if (first_wq)
1939 free_wq_table();
1940err_alloc:
1941 mutex_unlock(&iaa_devices_lock);
1942 idxd_drv_disable_wq(wq);
1943err:
1944 wq->type = IDXD_WQT_NONE;
1945
1946 goto out;
1947}
1948
1949static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
1950{
1951 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1952 struct idxd_device *idxd = wq->idxd;
1953 struct iaa_wq *iaa_wq;
1954 bool free = false;
1955
1956 idxd_wq_quiesce(wq);
1957
1958 mutex_lock(&wq->wq_lock);
1959 mutex_lock(&iaa_devices_lock);
1960
1961 remove_iaa_wq(wq);
1962
1963 spin_lock(&idxd->dev_lock);
1964 iaa_wq = idxd_wq_get_private(wq);
1965 if (!iaa_wq) {
1966 spin_unlock(&idxd->dev_lock);
1967 pr_err("%s: no iaa_wq available to remove\n", __func__);
1968 goto out;
1969 }
1970
1971 if (iaa_wq->ref) {
1972 iaa_wq->remove = true;
1973 } else {
1974 wq = iaa_wq->wq;
1975 idxd_wq_set_private(wq, NULL);
1976 free = true;
1977 }
1978 spin_unlock(&idxd->dev_lock);
1979 if (free) {
1980 __free_iaa_wq(iaa_wq);
1981 kfree(iaa_wq);
1982 }
1983
1984 idxd_drv_disable_wq(wq);
1985 rebalance_wq_table();
1986
1987 if (nr_iaa == 0) {
1988 iaa_crypto_enabled = false;
1989 free_wq_table();
1990 module_put(THIS_MODULE);
1991
1992 pr_info("iaa_crypto now DISABLED\n");
1993 }
1994out:
1995 mutex_unlock(&iaa_devices_lock);
1996 mutex_unlock(&wq->wq_lock);
1997}
1998
1999static enum idxd_dev_type dev_types[] = {
2000 IDXD_DEV_WQ,
2001 IDXD_DEV_NONE,
2002};
2003
2004static struct idxd_device_driver iaa_crypto_driver = {
2005 .probe = iaa_crypto_probe,
2006 .remove = iaa_crypto_remove,
2007 .name = IDXD_SUBDRIVER_NAME,
2008 .type = dev_types,
2009 .desc_complete = iaa_desc_complete,
2010};
2011
2012static int __init iaa_crypto_init_module(void)
2013{
2014 int ret = 0;
2015 int node;
2016
2017 nr_cpus = num_online_cpus();
2018 for_each_node_with_cpus(node)
2019 nr_nodes++;
2020 if (!nr_nodes) {
2021 pr_err("IAA couldn't find any nodes with cpus\n");
2022 return -ENODEV;
2023 }
2024 nr_cpus_per_node = nr_cpus / nr_nodes;
2025
2026 if (crypto_has_comp("deflate-generic", 0, 0))
2027 deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0);
2028
2029 if (IS_ERR_OR_NULL(deflate_generic_tfm)) {
2030 pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2031 "deflate-generic", PTR_ERR(deflate_generic_tfm));
2032 return -ENOMEM;
2033 }
2034
2035 ret = iaa_aecs_init_fixed();
2036 if (ret < 0) {
2037 pr_debug("IAA fixed compression mode init failed\n");
2038 goto err_aecs_init;
2039 }
2040
2041 ret = idxd_driver_register(&iaa_crypto_driver);
2042 if (ret) {
2043 pr_debug("IAA wq sub-driver registration failed\n");
2044 goto err_driver_reg;
2045 }
2046
2047 ret = driver_create_file(&iaa_crypto_driver.drv,
2048 &driver_attr_verify_compress);
2049 if (ret) {
2050 pr_debug("IAA verify_compress attr creation failed\n");
2051 goto err_verify_attr_create;
2052 }
2053
2054 ret = driver_create_file(&iaa_crypto_driver.drv,
2055 &driver_attr_sync_mode);
2056 if (ret) {
2057 pr_debug("IAA sync mode attr creation failed\n");
2058 goto err_sync_attr_create;
2059 }
2060
2061 if (iaa_crypto_debugfs_init())
2062 pr_warn("debugfs init failed, stats not available\n");
2063
2064 pr_debug("initialized\n");
2065out:
2066 return ret;
2067
2068err_sync_attr_create:
2069 driver_remove_file(&iaa_crypto_driver.drv,
2070 &driver_attr_verify_compress);
2071err_verify_attr_create:
2072 idxd_driver_unregister(&iaa_crypto_driver);
2073err_driver_reg:
2074 iaa_aecs_cleanup_fixed();
2075err_aecs_init:
2076 crypto_free_comp(deflate_generic_tfm);
2077
2078 goto out;
2079}
2080
2081static void __exit iaa_crypto_cleanup_module(void)
2082{
2083 if (iaa_unregister_compression_device())
2084 pr_debug("IAA compression device unregister failed\n");
2085
2086 iaa_crypto_debugfs_cleanup();
2087 driver_remove_file(&iaa_crypto_driver.drv,
2088 &driver_attr_sync_mode);
2089 driver_remove_file(&iaa_crypto_driver.drv,
2090 &driver_attr_verify_compress);
2091 idxd_driver_unregister(&iaa_crypto_driver);
2092 iaa_aecs_cleanup_fixed();
2093 crypto_free_comp(deflate_generic_tfm);
2094
2095 pr_debug("cleaned up\n");
2096}
2097
2098MODULE_IMPORT_NS(IDXD);
2099MODULE_LICENSE("GPL");
2100MODULE_ALIAS_IDXD_DEVICE(0);
2101MODULE_AUTHOR("Intel Corporation");
2102MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2103
2104module_init(iaa_crypto_init_module);
2105module_exit(iaa_crypto_cleanup_module);