Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
   3
   4#include <linux/init.h>
   5#include <linux/kernel.h>
   6#include <linux/module.h>
   7#include <linux/pci.h>
   8#include <linux/device.h>
   9#include <linux/iommu.h>
  10#include <uapi/linux/idxd.h>
  11#include <linux/highmem.h>
  12#include <linux/sched/smt.h>
  13#include <crypto/internal/acompress.h>
  14
  15#include "idxd.h"
  16#include "iaa_crypto.h"
  17#include "iaa_crypto_stats.h"
  18
  19#ifdef pr_fmt
  20#undef pr_fmt
  21#endif
  22
  23#define pr_fmt(fmt)			"idxd: " IDXD_SUBDRIVER_NAME ": " fmt
  24
  25#define IAA_ALG_PRIORITY               300
  26
  27/* number of iaa instances probed */
  28static unsigned int nr_iaa;
  29static unsigned int nr_cpus;
  30static unsigned int nr_nodes;
  31static unsigned int nr_cpus_per_node;
  32
  33/* Number of physical cpus sharing each iaa instance */
  34static unsigned int cpus_per_iaa;
  35
  36static struct crypto_comp *deflate_generic_tfm;
  37
  38/* Per-cpu lookup table for balanced wqs */
  39static struct wq_table_entry __percpu *wq_table;
  40
  41static struct idxd_wq *wq_table_next_wq(int cpu)
  42{
  43	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
  44
  45	if (++entry->cur_wq >= entry->n_wqs)
  46		entry->cur_wq = 0;
  47
  48	if (!entry->wqs[entry->cur_wq])
  49		return NULL;
  50
  51	pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
  52		 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
  53		 entry->wqs[entry->cur_wq]->id, cpu);
  54
  55	return entry->wqs[entry->cur_wq];
  56}
  57
  58static void wq_table_add(int cpu, struct idxd_wq *wq)
  59{
  60	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
  61
  62	if (WARN_ON(entry->n_wqs == entry->max_wqs))
  63		return;
  64
  65	entry->wqs[entry->n_wqs++] = wq;
  66
  67	pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
  68		 entry->wqs[entry->n_wqs - 1]->idxd->id,
  69		 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
  70}
  71
  72static void wq_table_free_entry(int cpu)
  73{
  74	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
  75
  76	kfree(entry->wqs);
  77	memset(entry, 0, sizeof(*entry));
  78}
  79
  80static void wq_table_clear_entry(int cpu)
  81{
  82	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
  83
  84	entry->n_wqs = 0;
  85	entry->cur_wq = 0;
  86	memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
  87}
  88
  89LIST_HEAD(iaa_devices);
  90DEFINE_MUTEX(iaa_devices_lock);
  91
  92/* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
  93static bool iaa_crypto_enabled;
  94static bool iaa_crypto_registered;
  95
  96/* Verify results of IAA compress or not */
  97static bool iaa_verify_compress = true;
  98
  99static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
 100{
 101	return sprintf(buf, "%d\n", iaa_verify_compress);
 102}
 103
 104static ssize_t verify_compress_store(struct device_driver *driver,
 105				     const char *buf, size_t count)
 106{
 107	int ret = -EBUSY;
 108
 109	mutex_lock(&iaa_devices_lock);
 110
 111	if (iaa_crypto_enabled)
 112		goto out;
 113
 114	ret = kstrtobool(buf, &iaa_verify_compress);
 115	if (ret)
 116		goto out;
 117
 118	ret = count;
 119out:
 120	mutex_unlock(&iaa_devices_lock);
 121
 122	return ret;
 123}
 124static DRIVER_ATTR_RW(verify_compress);
 125
 126/*
 127 * The iaa crypto driver supports three 'sync' methods determining how
 128 * compressions and decompressions are performed:
 129 *
 130 * - sync:      the compression or decompression completes before
 131 *              returning.  This is the mode used by the async crypto
 132 *              interface when the sync mode is set to 'sync' and by
 133 *              the sync crypto interface regardless of setting.
 134 *
 135 * - async:     the compression or decompression is submitted and returns
 136 *              immediately.  Completion interrupts are not used so
 137 *              the caller is responsible for polling the descriptor
 138 *              for completion.  This mode is applicable to only the
 139 *              async crypto interface and is ignored for anything
 140 *              else.
 141 *
 142 * - async_irq: the compression or decompression is submitted and
 143 *              returns immediately.  Completion interrupts are
 144 *              enabled so the caller can wait for the completion and
 145 *              yield to other threads.  When the compression or
 146 *              decompression completes, the completion is signaled
 147 *              and the caller awakened.  This mode is applicable to
 148 *              only the async crypto interface and is ignored for
 149 *              anything else.
 150 *
 151 * These modes can be set using the iaa_crypto sync_mode driver
 152 * attribute.
 153 */
 154
 155/* Use async mode */
 156static bool async_mode;
 157/* Use interrupts */
 158static bool use_irq;
 159
 160/**
 161 * set_iaa_sync_mode - Set IAA sync mode
 162 * @name: The name of the sync mode
 163 *
 164 * Make the IAA sync mode named @name the current sync mode used by
 165 * compression/decompression.
 166 */
 167
 168static int set_iaa_sync_mode(const char *name)
 169{
 170	int ret = 0;
 171
 172	if (sysfs_streq(name, "sync")) {
 173		async_mode = false;
 174		use_irq = false;
 175	} else if (sysfs_streq(name, "async")) {
 176		async_mode = true;
 177		use_irq = false;
 178	} else if (sysfs_streq(name, "async_irq")) {
 179		async_mode = true;
 180		use_irq = true;
 181	} else {
 182		ret = -EINVAL;
 183	}
 184
 185	return ret;
 186}
 187
 188static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
 189{
 190	int ret = 0;
 191
 192	if (!async_mode && !use_irq)
 193		ret = sprintf(buf, "%s\n", "sync");
 194	else if (async_mode && !use_irq)
 195		ret = sprintf(buf, "%s\n", "async");
 196	else if (async_mode && use_irq)
 197		ret = sprintf(buf, "%s\n", "async_irq");
 198
 199	return ret;
 200}
 201
 202static ssize_t sync_mode_store(struct device_driver *driver,
 203			       const char *buf, size_t count)
 204{
 205	int ret = -EBUSY;
 206
 207	mutex_lock(&iaa_devices_lock);
 208
 209	if (iaa_crypto_enabled)
 210		goto out;
 211
 212	ret = set_iaa_sync_mode(buf);
 213	if (ret == 0)
 214		ret = count;
 215out:
 216	mutex_unlock(&iaa_devices_lock);
 217
 218	return ret;
 219}
 220static DRIVER_ATTR_RW(sync_mode);
 221
 222static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
 223
 224static int find_empty_iaa_compression_mode(void)
 225{
 226	int i = -EINVAL;
 227
 228	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
 229		if (iaa_compression_modes[i])
 230			continue;
 231		break;
 232	}
 233
 234	return i;
 235}
 236
 237static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
 238{
 239	struct iaa_compression_mode *mode;
 240	int i;
 241
 242	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
 243		mode = iaa_compression_modes[i];
 244		if (!mode)
 245			continue;
 246
 247		if (!strcmp(mode->name, name)) {
 248			*idx = i;
 249			return iaa_compression_modes[i];
 250		}
 251	}
 252
 253	return NULL;
 254}
 255
 256static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
 257{
 258	kfree(mode->name);
 259	kfree(mode->ll_table);
 260	kfree(mode->d_table);
 261	kfree(mode->header_table);
 262
 263	kfree(mode);
 264}
 265
 266/*
 267 * IAA Compression modes are defined by an ll_table, a d_table, and an
 268 * optional header_table.  These tables are typically generated and
 269 * captured using statistics collected from running actual
 270 * compress/decompress workloads.
 271 *
 272 * A module or other kernel code can add and remove compression modes
 273 * with a given name using the exported @add_iaa_compression_mode()
 274 * and @remove_iaa_compression_mode functions.
 275 *
 276 * When a new compression mode is added, the tables are saved in a
 277 * global compression mode list.  When IAA devices are added, a
 278 * per-IAA device dma mapping is created for each IAA device, for each
 279 * compression mode.  These are the tables used to do the actual
 280 * compression/deccompression and are unmapped if/when the devices are
 281 * removed.  Currently, compression modes must be added before any
 282 * device is added, and removed after all devices have been removed.
 283 */
 284
 285/**
 286 * remove_iaa_compression_mode - Remove an IAA compression mode
 287 * @name: The name the compression mode will be known as
 288 *
 289 * Remove the IAA compression mode named @name.
 290 */
 291void remove_iaa_compression_mode(const char *name)
 292{
 293	struct iaa_compression_mode *mode;
 294	int idx;
 295
 296	mutex_lock(&iaa_devices_lock);
 297
 298	if (!list_empty(&iaa_devices))
 299		goto out;
 300
 301	mode = find_iaa_compression_mode(name, &idx);
 302	if (mode) {
 303		free_iaa_compression_mode(mode);
 304		iaa_compression_modes[idx] = NULL;
 305	}
 306out:
 307	mutex_unlock(&iaa_devices_lock);
 308}
 309EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
 310
 311/**
 312 * add_iaa_compression_mode - Add an IAA compression mode
 313 * @name: The name the compression mode will be known as
 314 * @ll_table: The ll table
 315 * @ll_table_size: The ll table size in bytes
 316 * @d_table: The d table
 317 * @d_table_size: The d table size in bytes
 318 * @header_table: Optional header table
 319 * @header_table_size: Optional header table size in bytes
 320 * @gen_decomp_table_flags: Otional flags used to generate the decomp table
 321 * @init: Optional callback function to init the compression mode data
 322 * @free: Optional callback function to free the compression mode data
 323 *
 324 * Add a new IAA compression mode named @name.
 325 *
 326 * Returns 0 if successful, errcode otherwise.
 327 */
 328int add_iaa_compression_mode(const char *name,
 329			     const u32 *ll_table,
 330			     int ll_table_size,
 331			     const u32 *d_table,
 332			     int d_table_size,
 333			     const u8 *header_table,
 334			     int header_table_size,
 335			     u16 gen_decomp_table_flags,
 336			     iaa_dev_comp_init_fn_t init,
 337			     iaa_dev_comp_free_fn_t free)
 338{
 339	struct iaa_compression_mode *mode;
 340	int idx, ret = -ENOMEM;
 341
 342	mutex_lock(&iaa_devices_lock);
 343
 344	if (!list_empty(&iaa_devices)) {
 345		ret = -EBUSY;
 346		goto out;
 347	}
 348
 349	mode = kzalloc(sizeof(*mode), GFP_KERNEL);
 350	if (!mode)
 351		goto out;
 352
 353	mode->name = kstrdup(name, GFP_KERNEL);
 354	if (!mode->name)
 355		goto free;
 356
 357	if (ll_table) {
 358		mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL);
 359		if (!mode->ll_table)
 360			goto free;
 361		memcpy(mode->ll_table, ll_table, ll_table_size);
 362		mode->ll_table_size = ll_table_size;
 363	}
 364
 365	if (d_table) {
 366		mode->d_table = kzalloc(d_table_size, GFP_KERNEL);
 367		if (!mode->d_table)
 368			goto free;
 369		memcpy(mode->d_table, d_table, d_table_size);
 370		mode->d_table_size = d_table_size;
 371	}
 372
 373	if (header_table) {
 374		mode->header_table = kzalloc(header_table_size, GFP_KERNEL);
 375		if (!mode->header_table)
 376			goto free;
 377		memcpy(mode->header_table, header_table, header_table_size);
 378		mode->header_table_size = header_table_size;
 379	}
 380
 381	mode->gen_decomp_table_flags = gen_decomp_table_flags;
 382
 383	mode->init = init;
 384	mode->free = free;
 385
 386	idx = find_empty_iaa_compression_mode();
 387	if (idx < 0)
 388		goto free;
 389
 390	pr_debug("IAA compression mode %s added at idx %d\n",
 391		 mode->name, idx);
 392
 393	iaa_compression_modes[idx] = mode;
 394
 395	ret = 0;
 396out:
 397	mutex_unlock(&iaa_devices_lock);
 398
 399	return ret;
 400free:
 401	free_iaa_compression_mode(mode);
 402	goto out;
 403}
 404EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
 405
 406static struct iaa_device_compression_mode *
 407get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
 408{
 409	return iaa_device->compression_modes[idx];
 410}
 411
 412static void free_device_compression_mode(struct iaa_device *iaa_device,
 413					 struct iaa_device_compression_mode *device_mode)
 414{
 415	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
 416	struct device *dev = &iaa_device->idxd->pdev->dev;
 417
 418	kfree(device_mode->name);
 419
 420	if (device_mode->aecs_comp_table)
 421		dma_free_coherent(dev, size, device_mode->aecs_comp_table,
 422				  device_mode->aecs_comp_table_dma_addr);
 423	if (device_mode->aecs_decomp_table)
 424		dma_free_coherent(dev, size, device_mode->aecs_decomp_table,
 425				  device_mode->aecs_decomp_table_dma_addr);
 426
 427	kfree(device_mode);
 428}
 429
 430#define IDXD_OP_FLAG_AECS_RW_TGLS       0x400000
 431#define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
 432#define IAX_AECS_COMPRESS_FLAG	(IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
 433#define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
 434#define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
 435						IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
 436						IDXD_OP_FLAG_AECS_RW_TGLS)
 437
 438static int check_completion(struct device *dev,
 439			    struct iax_completion_record *comp,
 440			    bool compress,
 441			    bool only_once);
 442
 443static int decompress_header(struct iaa_device_compression_mode *device_mode,
 444			     struct iaa_compression_mode *mode,
 445			     struct idxd_wq *wq)
 446{
 447	dma_addr_t src_addr, src2_addr;
 448	struct idxd_desc *idxd_desc;
 449	struct iax_hw_desc *desc;
 450	struct device *dev;
 451	int ret = 0;
 452
 453	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
 454	if (IS_ERR(idxd_desc))
 455		return PTR_ERR(idxd_desc);
 456
 457	desc = idxd_desc->iax_hw;
 458
 459	dev = &wq->idxd->pdev->dev;
 460
 461	src_addr = dma_map_single(dev, (void *)mode->header_table,
 462				  mode->header_table_size, DMA_TO_DEVICE);
 463	dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n",
 464		__func__, mode->name, src_addr,	dev,
 465		mode->header_table, mode->header_table_size);
 466	if (unlikely(dma_mapping_error(dev, src_addr))) {
 467		dev_dbg(dev, "dma_map_single err, exiting\n");
 468		ret = -ENOMEM;
 469		return ret;
 470	}
 471
 472	desc->flags = IAX_AECS_GEN_FLAG;
 473	desc->opcode = IAX_OPCODE_DECOMPRESS;
 474
 475	desc->src1_addr = (u64)src_addr;
 476	desc->src1_size = mode->header_table_size;
 477
 478	src2_addr = device_mode->aecs_decomp_table_dma_addr;
 479	desc->src2_addr = (u64)src2_addr;
 480	desc->src2_size = 1088;
 481	dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n",
 482		__func__, mode->name, desc->src2_addr, dev, desc->src2_size);
 483	desc->max_dst_size = 0; // suppressed output
 484
 485	desc->decompr_flags = mode->gen_decomp_table_flags;
 486
 487	desc->priv = 0;
 488
 489	desc->completion_addr = idxd_desc->compl_dma;
 490
 491	ret = idxd_submit_desc(wq, idxd_desc);
 492	if (ret) {
 493		pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret);
 494		goto out;
 495	}
 496
 497	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
 498	if (ret)
 499		dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n",
 500			__func__, mode->name, ret);
 501	else
 502		dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__,
 503			mode->name);
 504out:
 505	dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE);
 506
 507	return ret;
 508}
 509
 510static int init_device_compression_mode(struct iaa_device *iaa_device,
 511					struct iaa_compression_mode *mode,
 512					int idx, struct idxd_wq *wq)
 513{
 514	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
 515	struct device *dev = &iaa_device->idxd->pdev->dev;
 516	struct iaa_device_compression_mode *device_mode;
 517	int ret = -ENOMEM;
 518
 519	device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
 520	if (!device_mode)
 521		return -ENOMEM;
 522
 523	device_mode->name = kstrdup(mode->name, GFP_KERNEL);
 524	if (!device_mode->name)
 525		goto free;
 526
 527	device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
 528							  &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
 529	if (!device_mode->aecs_comp_table)
 530		goto free;
 531
 532	device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size,
 533							    &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL);
 534	if (!device_mode->aecs_decomp_table)
 535		goto free;
 536
 537	/* Add Huffman table to aecs */
 538	memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
 539	memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
 540	memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
 541
 542	if (mode->header_table) {
 543		ret = decompress_header(device_mode, mode, wq);
 544		if (ret) {
 545			pr_debug("iaa header decompression failed: ret=%d\n", ret);
 546			goto free;
 547		}
 548	}
 549
 550	if (mode->init) {
 551		ret = mode->init(device_mode);
 552		if (ret)
 553			goto free;
 554	}
 555
 556	/* mode index should match iaa_compression_modes idx */
 557	iaa_device->compression_modes[idx] = device_mode;
 558
 559	pr_debug("IAA %s compression mode initialized for iaa device %d\n",
 560		 mode->name, iaa_device->idxd->id);
 561
 562	ret = 0;
 563out:
 564	return ret;
 565free:
 566	pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
 567		 mode->name, iaa_device->idxd->id);
 568
 569	free_device_compression_mode(iaa_device, device_mode);
 570	goto out;
 571}
 572
 573static int init_device_compression_modes(struct iaa_device *iaa_device,
 574					 struct idxd_wq *wq)
 575{
 576	struct iaa_compression_mode *mode;
 577	int i, ret = 0;
 578
 579	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
 580		mode = iaa_compression_modes[i];
 581		if (!mode)
 582			continue;
 583
 584		ret = init_device_compression_mode(iaa_device, mode, i, wq);
 585		if (ret)
 586			break;
 587	}
 588
 589	return ret;
 590}
 591
 592static void remove_device_compression_modes(struct iaa_device *iaa_device)
 593{
 594	struct iaa_device_compression_mode *device_mode;
 595	int i;
 596
 597	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
 598		device_mode = iaa_device->compression_modes[i];
 599		if (!device_mode)
 600			continue;
 601
 602		free_device_compression_mode(iaa_device, device_mode);
 603		iaa_device->compression_modes[i] = NULL;
 604		if (iaa_compression_modes[i]->free)
 605			iaa_compression_modes[i]->free(device_mode);
 
 
 606	}
 607}
 608
 609static struct iaa_device *iaa_device_alloc(void)
 610{
 611	struct iaa_device *iaa_device;
 612
 613	iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
 614	if (!iaa_device)
 615		return NULL;
 616
 617	INIT_LIST_HEAD(&iaa_device->wqs);
 618
 619	return iaa_device;
 620}
 621
 622static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
 623{
 624	struct iaa_wq *iaa_wq;
 625
 626	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
 627		if (iaa_wq->wq == wq)
 628			return true;
 629	}
 630
 631	return false;
 632}
 633
 634static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
 635{
 636	struct iaa_device *iaa_device;
 637
 638	iaa_device = iaa_device_alloc();
 639	if (!iaa_device)
 640		return NULL;
 641
 642	iaa_device->idxd = idxd;
 643
 644	list_add_tail(&iaa_device->list, &iaa_devices);
 645
 646	nr_iaa++;
 647
 648	return iaa_device;
 649}
 650
 651static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
 652{
 653	int ret = 0;
 654
 655	ret = init_device_compression_modes(iaa_device, iaa_wq->wq);
 656	if (ret)
 657		return ret;
 658
 659	return ret;
 660}
 661
 662static void del_iaa_device(struct iaa_device *iaa_device)
 663{
 664	list_del(&iaa_device->list);
 665
 666	nr_iaa--;
 667}
 668
 669static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
 670		      struct iaa_wq **new_wq)
 671{
 672	struct idxd_device *idxd = iaa_device->idxd;
 673	struct pci_dev *pdev = idxd->pdev;
 674	struct device *dev = &pdev->dev;
 675	struct iaa_wq *iaa_wq;
 676
 677	iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
 678	if (!iaa_wq)
 679		return -ENOMEM;
 680
 681	iaa_wq->wq = wq;
 682	iaa_wq->iaa_device = iaa_device;
 683	idxd_wq_set_private(wq, iaa_wq);
 684
 685	list_add_tail(&iaa_wq->list, &iaa_device->wqs);
 686
 687	iaa_device->n_wq++;
 688
 689	if (new_wq)
 690		*new_wq = iaa_wq;
 691
 692	dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
 693		wq->id, iaa_device->idxd->id, iaa_device->n_wq);
 694
 695	return 0;
 696}
 697
 698static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
 699{
 700	struct idxd_device *idxd = iaa_device->idxd;
 701	struct pci_dev *pdev = idxd->pdev;
 702	struct device *dev = &pdev->dev;
 703	struct iaa_wq *iaa_wq;
 704
 705	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
 706		if (iaa_wq->wq == wq) {
 707			list_del(&iaa_wq->list);
 708			iaa_device->n_wq--;
 709
 710			dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
 711				wq->id, iaa_device->idxd->id,
 712				iaa_device->n_wq, nr_iaa);
 713
 714			if (iaa_device->n_wq == 0)
 715				del_iaa_device(iaa_device);
 716			break;
 717		}
 718	}
 719}
 720
 721static void clear_wq_table(void)
 722{
 723	int cpu;
 724
 725	for (cpu = 0; cpu < nr_cpus; cpu++)
 726		wq_table_clear_entry(cpu);
 727
 728	pr_debug("cleared wq table\n");
 729}
 730
 731static void free_iaa_device(struct iaa_device *iaa_device)
 732{
 733	if (!iaa_device)
 734		return;
 735
 736	remove_device_compression_modes(iaa_device);
 737	kfree(iaa_device);
 738}
 739
 740static void __free_iaa_wq(struct iaa_wq *iaa_wq)
 741{
 742	struct iaa_device *iaa_device;
 743
 744	if (!iaa_wq)
 745		return;
 746
 747	iaa_device = iaa_wq->iaa_device;
 748	if (iaa_device->n_wq == 0)
 749		free_iaa_device(iaa_wq->iaa_device);
 750}
 751
 752static void free_iaa_wq(struct iaa_wq *iaa_wq)
 753{
 754	struct idxd_wq *wq;
 755
 756	__free_iaa_wq(iaa_wq);
 757
 758	wq = iaa_wq->wq;
 759
 760	kfree(iaa_wq);
 761	idxd_wq_set_private(wq, NULL);
 762}
 763
 764static int iaa_wq_get(struct idxd_wq *wq)
 765{
 766	struct idxd_device *idxd = wq->idxd;
 767	struct iaa_wq *iaa_wq;
 768	int ret = 0;
 769
 770	spin_lock(&idxd->dev_lock);
 771	iaa_wq = idxd_wq_get_private(wq);
 772	if (iaa_wq && !iaa_wq->remove) {
 773		iaa_wq->ref++;
 774		idxd_wq_get(wq);
 775	} else {
 776		ret = -ENODEV;
 777	}
 778	spin_unlock(&idxd->dev_lock);
 779
 780	return ret;
 781}
 782
 783static int iaa_wq_put(struct idxd_wq *wq)
 784{
 785	struct idxd_device *idxd = wq->idxd;
 786	struct iaa_wq *iaa_wq;
 787	bool free = false;
 788	int ret = 0;
 789
 790	spin_lock(&idxd->dev_lock);
 791	iaa_wq = idxd_wq_get_private(wq);
 792	if (iaa_wq) {
 793		iaa_wq->ref--;
 794		if (iaa_wq->ref == 0 && iaa_wq->remove) {
 795			idxd_wq_set_private(wq, NULL);
 796			free = true;
 797		}
 798		idxd_wq_put(wq);
 799	} else {
 800		ret = -ENODEV;
 801	}
 802	spin_unlock(&idxd->dev_lock);
 803	if (free) {
 804		__free_iaa_wq(iaa_wq);
 805		kfree(iaa_wq);
 806	}
 807
 808	return ret;
 809}
 810
 811static void free_wq_table(void)
 812{
 813	int cpu;
 814
 815	for (cpu = 0; cpu < nr_cpus; cpu++)
 816		wq_table_free_entry(cpu);
 817
 818	free_percpu(wq_table);
 819
 820	pr_debug("freed wq table\n");
 821}
 822
 823static int alloc_wq_table(int max_wqs)
 824{
 825	struct wq_table_entry *entry;
 826	int cpu;
 827
 828	wq_table = alloc_percpu(struct wq_table_entry);
 829	if (!wq_table)
 830		return -ENOMEM;
 831
 832	for (cpu = 0; cpu < nr_cpus; cpu++) {
 833		entry = per_cpu_ptr(wq_table, cpu);
 834		entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
 835		if (!entry->wqs) {
 836			free_wq_table();
 837			return -ENOMEM;
 838		}
 839
 840		entry->max_wqs = max_wqs;
 841	}
 842
 843	pr_debug("initialized wq table\n");
 844
 845	return 0;
 846}
 847
 848static int save_iaa_wq(struct idxd_wq *wq)
 849{
 850	struct iaa_device *iaa_device, *found = NULL;
 851	struct idxd_device *idxd;
 852	struct pci_dev *pdev;
 853	struct device *dev;
 854	int ret = 0;
 855
 856	list_for_each_entry(iaa_device, &iaa_devices, list) {
 857		if (iaa_device->idxd == wq->idxd) {
 858			idxd = iaa_device->idxd;
 859			pdev = idxd->pdev;
 860			dev = &pdev->dev;
 861			/*
 862			 * Check to see that we don't already have this wq.
 863			 * Shouldn't happen but we don't control probing.
 864			 */
 865			if (iaa_has_wq(iaa_device, wq)) {
 866				dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
 867					iaa_device);
 868				goto out;
 869			}
 870
 871			found = iaa_device;
 872
 873			ret = add_iaa_wq(iaa_device, wq, NULL);
 874			if (ret)
 875				goto out;
 876
 877			break;
 878		}
 879	}
 880
 881	if (!found) {
 882		struct iaa_device *new_device;
 883		struct iaa_wq *new_wq;
 884
 885		new_device = add_iaa_device(wq->idxd);
 886		if (!new_device) {
 887			ret = -ENOMEM;
 888			goto out;
 889		}
 890
 891		ret = add_iaa_wq(new_device, wq, &new_wq);
 892		if (ret) {
 893			del_iaa_device(new_device);
 894			free_iaa_device(new_device);
 895			goto out;
 896		}
 897
 898		ret = init_iaa_device(new_device, new_wq);
 899		if (ret) {
 900			del_iaa_wq(new_device, new_wq->wq);
 901			del_iaa_device(new_device);
 902			free_iaa_wq(new_wq);
 903			goto out;
 904		}
 905	}
 906
 907	if (WARN_ON(nr_iaa == 0))
 908		return -EINVAL;
 909
 910	cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
 
 
 911out:
 912	return 0;
 913}
 914
 915static void remove_iaa_wq(struct idxd_wq *wq)
 916{
 917	struct iaa_device *iaa_device;
 918
 919	list_for_each_entry(iaa_device, &iaa_devices, list) {
 920		if (iaa_has_wq(iaa_device, wq)) {
 921			del_iaa_wq(iaa_device, wq);
 922			break;
 923		}
 924	}
 925
 926	if (nr_iaa)
 927		cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
 928	else
 929		cpus_per_iaa = 0;
 
 
 930}
 931
 932static int wq_table_add_wqs(int iaa, int cpu)
 933{
 934	struct iaa_device *iaa_device, *found_device = NULL;
 935	int ret = 0, cur_iaa = 0, n_wqs_added = 0;
 936	struct idxd_device *idxd;
 937	struct iaa_wq *iaa_wq;
 938	struct pci_dev *pdev;
 939	struct device *dev;
 940
 941	list_for_each_entry(iaa_device, &iaa_devices, list) {
 942		idxd = iaa_device->idxd;
 943		pdev = idxd->pdev;
 944		dev = &pdev->dev;
 945
 946		if (cur_iaa != iaa) {
 947			cur_iaa++;
 948			continue;
 949		}
 950
 951		found_device = iaa_device;
 952		dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
 953			found_device->idxd->id, cur_iaa);
 954		break;
 955	}
 956
 957	if (!found_device) {
 958		found_device = list_first_entry_or_null(&iaa_devices,
 959							struct iaa_device, list);
 960		if (!found_device) {
 961			pr_debug("couldn't find any iaa devices with wqs!\n");
 962			ret = -EINVAL;
 963			goto out;
 964		}
 965		cur_iaa = 0;
 966
 967		idxd = found_device->idxd;
 968		pdev = idxd->pdev;
 969		dev = &pdev->dev;
 970		dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
 971			found_device->idxd->id, cur_iaa);
 972	}
 973
 974	list_for_each_entry(iaa_wq, &found_device->wqs, list) {
 975		wq_table_add(cpu, iaa_wq->wq);
 976		pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
 977			 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
 978		n_wqs_added++;
 979	}
 980
 981	if (!n_wqs_added) {
 982		pr_debug("couldn't find any iaa wqs!\n");
 983		ret = -EINVAL;
 984		goto out;
 985	}
 986out:
 987	return ret;
 988}
 989
 990/*
 991 * Rebalance the wq table so that given a cpu, it's easy to find the
 992 * closest IAA instance.  The idea is to try to choose the most
 993 * appropriate IAA instance for a caller and spread available
 994 * workqueues around to clients.
 995 */
 996static void rebalance_wq_table(void)
 997{
 998	const struct cpumask *node_cpus;
 999	int node, cpu, iaa = -1;
1000
1001	if (nr_iaa == 0)
1002		return;
1003
1004	pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
1005		 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
1006
1007	clear_wq_table();
1008
1009	if (nr_iaa == 1) {
1010		for (cpu = 0; cpu < nr_cpus; cpu++) {
1011			if (WARN_ON(wq_table_add_wqs(0, cpu))) {
1012				pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
1013				return;
1014			}
1015		}
1016
1017		return;
1018	}
1019
1020	for_each_node_with_cpus(node) {
1021		node_cpus = cpumask_of_node(node);
1022
1023		for (cpu = 0; cpu < nr_cpus_per_node; cpu++) {
1024			int node_cpu = cpumask_nth(cpu, node_cpus);
1025
1026			if (WARN_ON(node_cpu >= nr_cpu_ids)) {
1027				pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
1028				return;
1029			}
1030
1031			if ((cpu % cpus_per_iaa) == 0)
1032				iaa++;
1033
1034			if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
1035				pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
1036				return;
1037			}
1038		}
1039	}
1040}
1041
1042static inline int check_completion(struct device *dev,
1043				   struct iax_completion_record *comp,
1044				   bool compress,
1045				   bool only_once)
1046{
1047	char *op_str = compress ? "compress" : "decompress";
 
1048	int ret = 0;
1049
1050	while (!comp->status) {
1051		if (only_once)
1052			return -EAGAIN;
1053		cpu_relax();
 
 
 
 
 
 
 
 
 
1054	}
1055
1056	if (comp->status != IAX_COMP_SUCCESS) {
1057		if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
1058			ret = -ETIMEDOUT;
1059			dev_dbg(dev, "%s timed out, size=0x%x\n",
1060				op_str, comp->output_size);
1061			update_completion_timeout_errs();
1062			goto out;
1063		}
1064
1065		if (comp->status == IAA_ANALYTICS_ERROR &&
1066		    comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
1067			ret = -E2BIG;
1068			dev_dbg(dev, "compressed > uncompressed size,"
1069				" not compressing, size=0x%x\n",
1070				comp->output_size);
1071			update_completion_comp_buf_overflow_errs();
1072			goto out;
1073		}
1074
1075		if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
1076			ret = -EOVERFLOW;
1077			goto out;
1078		}
1079
1080		ret = -EINVAL;
1081		dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
1082			op_str, comp->status, comp->error_code, comp->output_size);
1083		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
1084		update_completion_einval_errs();
1085
1086		goto out;
1087	}
1088out:
1089	return ret;
1090}
1091
1092static int deflate_generic_decompress(struct acomp_req *req)
1093{
1094	void *src, *dst;
1095	int ret;
1096
1097	src = kmap_local_page(sg_page(req->src)) + req->src->offset;
1098	dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
1099
1100	ret = crypto_comp_decompress(deflate_generic_tfm,
1101				     src, req->slen, dst, &req->dlen);
1102
1103	kunmap_local(src);
1104	kunmap_local(dst);
1105
1106	update_total_sw_decomp_calls();
1107
1108	return ret;
1109}
1110
1111static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1112				struct acomp_req *req,
1113				dma_addr_t *src_addr, dma_addr_t *dst_addr);
1114
1115static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1116			       struct idxd_wq *wq,
1117			       dma_addr_t src_addr, unsigned int slen,
1118			       dma_addr_t dst_addr, unsigned int *dlen,
1119			       u32 compression_crc);
1120
1121static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1122			      enum idxd_complete_type comp_type,
1123			      bool free_desc, void *__ctx,
1124			      u32 *status)
1125{
1126	struct iaa_device_compression_mode *active_compression_mode;
1127	struct iaa_compression_ctx *compression_ctx;
1128	struct crypto_ctx *ctx = __ctx;
1129	struct iaa_device *iaa_device;
1130	struct idxd_device *idxd;
1131	struct iaa_wq *iaa_wq;
1132	struct pci_dev *pdev;
1133	struct device *dev;
1134	int ret, err = 0;
1135
1136	compression_ctx = crypto_tfm_ctx(ctx->tfm);
1137
1138	iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1139	iaa_device = iaa_wq->iaa_device;
1140	idxd = iaa_device->idxd;
1141	pdev = idxd->pdev;
1142	dev = &pdev->dev;
1143
1144	active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1145								  compression_ctx->mode);
1146	dev_dbg(dev, "%s: compression mode %s,"
1147		" ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1148		active_compression_mode->name,
1149		ctx->src_addr, ctx->dst_addr);
1150
1151	ret = check_completion(dev, idxd_desc->iax_completion,
1152			       ctx->compress, false);
1153	if (ret) {
1154		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1155		if (!ctx->compress &&
1156		    idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1157			pr_warn("%s: falling back to deflate-generic decompress, "
1158				"analytics error code %x\n", __func__,
1159				idxd_desc->iax_completion->error_code);
1160			ret = deflate_generic_decompress(ctx->req);
1161			if (ret) {
1162				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1163					__func__, ret);
1164				err = -EIO;
1165				goto err;
1166			}
1167		} else {
1168			err = -EIO;
1169			goto err;
1170		}
1171	} else {
1172		ctx->req->dlen = idxd_desc->iax_completion->output_size;
1173	}
1174
1175	/* Update stats */
1176	if (ctx->compress) {
1177		update_total_comp_bytes_out(ctx->req->dlen);
1178		update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1179	} else {
1180		update_total_decomp_bytes_in(ctx->req->dlen);
1181		update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen);
1182	}
1183
1184	if (ctx->compress && compression_ctx->verify_compress) {
1185		dma_addr_t src_addr, dst_addr;
1186		u32 compression_crc;
1187
1188		compression_crc = idxd_desc->iax_completion->crc;
1189
1190		ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1191		if (ret) {
1192			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1193			err = -EIO;
1194			goto out;
1195		}
1196
1197		ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1198					  ctx->req->slen, dst_addr, &ctx->req->dlen,
1199					  compression_crc);
1200		if (ret) {
1201			dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1202			err = -EIO;
1203		}
1204
1205		dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1206		dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1207
1208		goto out;
1209	}
1210err:
1211	dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1212	dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1213out:
1214	if (ret != 0)
1215		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1216
1217	if (ctx->req->base.complete)
1218		acomp_request_complete(ctx->req, err);
1219
1220	if (free_desc)
1221		idxd_free_desc(idxd_desc->wq, idxd_desc);
1222	iaa_wq_put(idxd_desc->wq);
1223}
1224
1225static int iaa_compress(struct crypto_tfm *tfm,	struct acomp_req *req,
1226			struct idxd_wq *wq,
1227			dma_addr_t src_addr, unsigned int slen,
1228			dma_addr_t dst_addr, unsigned int *dlen,
1229			u32 *compression_crc,
1230			bool disable_async)
1231{
1232	struct iaa_device_compression_mode *active_compression_mode;
1233	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1234	struct iaa_device *iaa_device;
1235	struct idxd_desc *idxd_desc;
1236	struct iax_hw_desc *desc;
1237	struct idxd_device *idxd;
1238	struct iaa_wq *iaa_wq;
1239	struct pci_dev *pdev;
1240	struct device *dev;
1241	int ret = 0;
1242
1243	iaa_wq = idxd_wq_get_private(wq);
1244	iaa_device = iaa_wq->iaa_device;
1245	idxd = iaa_device->idxd;
1246	pdev = idxd->pdev;
1247	dev = &pdev->dev;
1248
1249	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1250
1251	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1252	if (IS_ERR(idxd_desc)) {
1253		dev_dbg(dev, "idxd descriptor allocation failed\n");
1254		dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1255		return PTR_ERR(idxd_desc);
1256	}
1257	desc = idxd_desc->iax_hw;
1258
1259	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1260		IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1261	desc->opcode = IAX_OPCODE_COMPRESS;
1262	desc->compr_flags = IAA_COMP_FLAGS;
1263	desc->priv = 0;
1264
1265	desc->src1_addr = (u64)src_addr;
1266	desc->src1_size = slen;
1267	desc->dst_addr = (u64)dst_addr;
1268	desc->max_dst_size = *dlen;
1269	desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1270	desc->src2_size = sizeof(struct aecs_comp_table_record);
1271	desc->completion_addr = idxd_desc->compl_dma;
1272
1273	if (ctx->use_irq && !disable_async) {
1274		desc->flags |= IDXD_OP_FLAG_RCI;
1275
1276		idxd_desc->crypto.req = req;
1277		idxd_desc->crypto.tfm = tfm;
1278		idxd_desc->crypto.src_addr = src_addr;
1279		idxd_desc->crypto.dst_addr = dst_addr;
1280		idxd_desc->crypto.compress = true;
1281
1282		dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1283			" src_addr %llx, dst_addr %llx\n", __func__,
1284			active_compression_mode->name,
1285			src_addr, dst_addr);
1286	} else if (ctx->async_mode && !disable_async)
1287		req->base.data = idxd_desc;
1288
1289	dev_dbg(dev, "%s: compression mode %s,"
1290		" desc->src1_addr %llx, desc->src1_size %d,"
1291		" desc->dst_addr %llx, desc->max_dst_size %d,"
1292		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1293		active_compression_mode->name,
1294		desc->src1_addr, desc->src1_size, desc->dst_addr,
1295		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1296
1297	ret = idxd_submit_desc(wq, idxd_desc);
1298	if (ret) {
1299		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1300		goto err;
1301	}
1302
1303	/* Update stats */
1304	update_total_comp_calls();
1305	update_wq_comp_calls(wq);
1306
1307	if (ctx->async_mode && !disable_async) {
1308		ret = -EINPROGRESS;
1309		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1310		goto out;
1311	}
1312
1313	ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1314	if (ret) {
1315		dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1316		goto err;
1317	}
1318
1319	*dlen = idxd_desc->iax_completion->output_size;
1320
1321	/* Update stats */
1322	update_total_comp_bytes_out(*dlen);
1323	update_wq_comp_bytes(wq, *dlen);
1324
1325	*compression_crc = idxd_desc->iax_completion->crc;
1326
1327	if (!ctx->async_mode)
1328		idxd_free_desc(wq, idxd_desc);
1329out:
1330	return ret;
1331err:
1332	idxd_free_desc(wq, idxd_desc);
1333	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1334
1335	goto out;
1336}
1337
1338static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1339				struct acomp_req *req,
1340				dma_addr_t *src_addr, dma_addr_t *dst_addr)
1341{
1342	int ret = 0;
1343	int nr_sgs;
1344
1345	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1346	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1347
1348	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1349	if (nr_sgs <= 0 || nr_sgs > 1) {
1350		dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1351			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1352			iaa_wq->wq->id, ret);
1353		ret = -EIO;
1354		goto out;
1355	}
1356	*src_addr = sg_dma_address(req->src);
1357	dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1358		" req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1359		req->src, req->slen, sg_dma_len(req->src));
1360
1361	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1362	if (nr_sgs <= 0 || nr_sgs > 1) {
1363		dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1364			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1365			iaa_wq->wq->id, ret);
1366		ret = -EIO;
1367		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1368		goto out;
1369	}
1370	*dst_addr = sg_dma_address(req->dst);
1371	dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1372		" req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1373		req->dst, req->dlen, sg_dma_len(req->dst));
1374out:
1375	return ret;
1376}
1377
1378static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1379			       struct idxd_wq *wq,
1380			       dma_addr_t src_addr, unsigned int slen,
1381			       dma_addr_t dst_addr, unsigned int *dlen,
1382			       u32 compression_crc)
1383{
1384	struct iaa_device_compression_mode *active_compression_mode;
1385	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1386	struct iaa_device *iaa_device;
1387	struct idxd_desc *idxd_desc;
1388	struct iax_hw_desc *desc;
1389	struct idxd_device *idxd;
1390	struct iaa_wq *iaa_wq;
1391	struct pci_dev *pdev;
1392	struct device *dev;
1393	int ret = 0;
1394
1395	iaa_wq = idxd_wq_get_private(wq);
1396	iaa_device = iaa_wq->iaa_device;
1397	idxd = iaa_device->idxd;
1398	pdev = idxd->pdev;
1399	dev = &pdev->dev;
1400
1401	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1402
1403	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1404	if (IS_ERR(idxd_desc)) {
1405		dev_dbg(dev, "idxd descriptor allocation failed\n");
1406		dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1407			PTR_ERR(idxd_desc));
1408		return PTR_ERR(idxd_desc);
1409	}
1410	desc = idxd_desc->iax_hw;
1411
1412	/* Verify (optional) - decompress and check crc, suppress dest write */
1413
1414	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1415	desc->opcode = IAX_OPCODE_DECOMPRESS;
1416	desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1417	desc->priv = 0;
1418
1419	desc->src1_addr = (u64)dst_addr;
1420	desc->src1_size = *dlen;
1421	desc->dst_addr = (u64)src_addr;
1422	desc->max_dst_size = slen;
1423	desc->completion_addr = idxd_desc->compl_dma;
1424
1425	dev_dbg(dev, "(verify) compression mode %s,"
1426		" desc->src1_addr %llx, desc->src1_size %d,"
1427		" desc->dst_addr %llx, desc->max_dst_size %d,"
1428		" desc->src2_addr %llx, desc->src2_size %d\n",
1429		active_compression_mode->name,
1430		desc->src1_addr, desc->src1_size, desc->dst_addr,
1431		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1432
1433	ret = idxd_submit_desc(wq, idxd_desc);
1434	if (ret) {
1435		dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1436		goto err;
1437	}
1438
1439	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1440	if (ret) {
1441		dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1442		goto err;
1443	}
1444
1445	if (compression_crc != idxd_desc->iax_completion->crc) {
1446		ret = -EINVAL;
1447		dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1448			" comp=0x%x, decomp=0x%x\n", compression_crc,
1449			idxd_desc->iax_completion->crc);
1450		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1451			       8, 1, idxd_desc->iax_completion, 64, 0);
1452		goto err;
1453	}
1454
1455	idxd_free_desc(wq, idxd_desc);
1456out:
1457	return ret;
1458err:
1459	idxd_free_desc(wq, idxd_desc);
1460	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1461
1462	goto out;
1463}
1464
1465static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1466			  struct idxd_wq *wq,
1467			  dma_addr_t src_addr, unsigned int slen,
1468			  dma_addr_t dst_addr, unsigned int *dlen,
1469			  bool disable_async)
1470{
1471	struct iaa_device_compression_mode *active_compression_mode;
1472	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1473	struct iaa_device *iaa_device;
1474	struct idxd_desc *idxd_desc;
1475	struct iax_hw_desc *desc;
1476	struct idxd_device *idxd;
1477	struct iaa_wq *iaa_wq;
1478	struct pci_dev *pdev;
1479	struct device *dev;
1480	int ret = 0;
1481
1482	iaa_wq = idxd_wq_get_private(wq);
1483	iaa_device = iaa_wq->iaa_device;
1484	idxd = iaa_device->idxd;
1485	pdev = idxd->pdev;
1486	dev = &pdev->dev;
1487
1488	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1489
1490	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1491	if (IS_ERR(idxd_desc)) {
1492		dev_dbg(dev, "idxd descriptor allocation failed\n");
1493		dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1494			PTR_ERR(idxd_desc));
1495		return PTR_ERR(idxd_desc);
1496	}
1497	desc = idxd_desc->iax_hw;
1498
1499	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1500	desc->opcode = IAX_OPCODE_DECOMPRESS;
1501	desc->max_dst_size = PAGE_SIZE;
1502	desc->decompr_flags = IAA_DECOMP_FLAGS;
1503	desc->priv = 0;
1504
1505	desc->src1_addr = (u64)src_addr;
1506	desc->dst_addr = (u64)dst_addr;
1507	desc->max_dst_size = *dlen;
1508	desc->src1_size = slen;
1509	desc->completion_addr = idxd_desc->compl_dma;
1510
1511	if (ctx->use_irq && !disable_async) {
1512		desc->flags |= IDXD_OP_FLAG_RCI;
1513
1514		idxd_desc->crypto.req = req;
1515		idxd_desc->crypto.tfm = tfm;
1516		idxd_desc->crypto.src_addr = src_addr;
1517		idxd_desc->crypto.dst_addr = dst_addr;
1518		idxd_desc->crypto.compress = false;
1519
1520		dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1521			" src_addr %llx, dst_addr %llx\n", __func__,
1522			active_compression_mode->name,
1523			src_addr, dst_addr);
1524	} else if (ctx->async_mode && !disable_async)
1525		req->base.data = idxd_desc;
1526
1527	dev_dbg(dev, "%s: decompression mode %s,"
1528		" desc->src1_addr %llx, desc->src1_size %d,"
1529		" desc->dst_addr %llx, desc->max_dst_size %d,"
1530		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1531		active_compression_mode->name,
1532		desc->src1_addr, desc->src1_size, desc->dst_addr,
1533		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1534
1535	ret = idxd_submit_desc(wq, idxd_desc);
1536	if (ret) {
1537		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1538		goto err;
1539	}
1540
1541	/* Update stats */
1542	update_total_decomp_calls();
1543	update_wq_decomp_calls(wq);
1544
1545	if (ctx->async_mode && !disable_async) {
1546		ret = -EINPROGRESS;
1547		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1548		goto out;
1549	}
1550
1551	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1552	if (ret) {
1553		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1554		if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1555			pr_warn("%s: falling back to deflate-generic decompress, "
1556				"analytics error code %x\n", __func__,
1557				idxd_desc->iax_completion->error_code);
1558			ret = deflate_generic_decompress(req);
1559			if (ret) {
1560				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1561					__func__, ret);
1562				goto err;
1563			}
1564		} else {
1565			goto err;
1566		}
1567	} else {
1568		req->dlen = idxd_desc->iax_completion->output_size;
1569	}
1570
1571	*dlen = req->dlen;
1572
1573	if (!ctx->async_mode)
1574		idxd_free_desc(wq, idxd_desc);
1575
1576	/* Update stats */
1577	update_total_decomp_bytes_in(slen);
1578	update_wq_decomp_bytes(wq, slen);
1579out:
1580	return ret;
1581err:
1582	idxd_free_desc(wq, idxd_desc);
1583	dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1584
1585	goto out;
1586}
1587
1588static int iaa_comp_acompress(struct acomp_req *req)
1589{
1590	struct iaa_compression_ctx *compression_ctx;
1591	struct crypto_tfm *tfm = req->base.tfm;
1592	dma_addr_t src_addr, dst_addr;
1593	bool disable_async = false;
1594	int nr_sgs, cpu, ret = 0;
1595	struct iaa_wq *iaa_wq;
1596	u32 compression_crc;
1597	struct idxd_wq *wq;
1598	struct device *dev;
1599	int order = -1;
1600
1601	compression_ctx = crypto_tfm_ctx(tfm);
1602
1603	if (!iaa_crypto_enabled) {
1604		pr_debug("iaa_crypto disabled, not compressing\n");
1605		return -ENODEV;
1606	}
1607
1608	if (!req->src || !req->slen) {
1609		pr_debug("invalid src, not compressing\n");
1610		return -EINVAL;
1611	}
1612
1613	cpu = get_cpu();
1614	wq = wq_table_next_wq(cpu);
1615	put_cpu();
1616	if (!wq) {
1617		pr_debug("no wq configured for cpu=%d\n", cpu);
1618		return -ENODEV;
1619	}
1620
1621	ret = iaa_wq_get(wq);
1622	if (ret) {
1623		pr_debug("no wq available for cpu=%d\n", cpu);
1624		return -ENODEV;
1625	}
1626
1627	iaa_wq = idxd_wq_get_private(wq);
1628
1629	if (!req->dst) {
1630		gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
1631
1632		/* incompressible data will always be < 2 * slen */
1633		req->dlen = 2 * req->slen;
1634		order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1635		req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1636		if (!req->dst) {
1637			ret = -ENOMEM;
1638			order = -1;
1639			goto out;
1640		}
1641		disable_async = true;
1642	}
1643
1644	dev = &wq->idxd->pdev->dev;
1645
1646	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1647	if (nr_sgs <= 0 || nr_sgs > 1) {
1648		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1649			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1650			iaa_wq->wq->id, ret);
1651		ret = -EIO;
1652		goto out;
1653	}
1654	src_addr = sg_dma_address(req->src);
1655	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1656		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1657		req->src, req->slen, sg_dma_len(req->src));
1658
1659	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1660	if (nr_sgs <= 0 || nr_sgs > 1) {
1661		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1662			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1663			iaa_wq->wq->id, ret);
1664		ret = -EIO;
1665		goto err_map_dst;
1666	}
1667	dst_addr = sg_dma_address(req->dst);
1668	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1669		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1670		req->dst, req->dlen, sg_dma_len(req->dst));
1671
1672	ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1673			   &req->dlen, &compression_crc, disable_async);
1674	if (ret == -EINPROGRESS)
1675		return ret;
1676
1677	if (!ret && compression_ctx->verify_compress) {
1678		ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1679		if (ret) {
1680			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1681			goto out;
1682		}
1683
1684		ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1685					  dst_addr, &req->dlen, compression_crc);
1686		if (ret)
1687			dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1688
1689		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1690		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1691
1692		goto out;
1693	}
1694
1695	if (ret)
1696		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1697
1698	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1699err_map_dst:
1700	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1701out:
1702	iaa_wq_put(wq);
1703
1704	if (order >= 0)
1705		sgl_free_order(req->dst, order);
1706
1707	return ret;
1708}
1709
1710static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
1711{
1712	gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1713		GFP_KERNEL : GFP_ATOMIC;
1714	struct crypto_tfm *tfm = req->base.tfm;
1715	dma_addr_t src_addr, dst_addr;
1716	int nr_sgs, cpu, ret = 0;
1717	struct iaa_wq *iaa_wq;
1718	struct device *dev;
1719	struct idxd_wq *wq;
1720	int order = -1;
1721
1722	cpu = get_cpu();
1723	wq = wq_table_next_wq(cpu);
1724	put_cpu();
1725	if (!wq) {
1726		pr_debug("no wq configured for cpu=%d\n", cpu);
1727		return -ENODEV;
1728	}
1729
1730	ret = iaa_wq_get(wq);
1731	if (ret) {
1732		pr_debug("no wq available for cpu=%d\n", cpu);
1733		return -ENODEV;
1734	}
1735
1736	iaa_wq = idxd_wq_get_private(wq);
1737
1738	dev = &wq->idxd->pdev->dev;
1739
1740	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1741	if (nr_sgs <= 0 || nr_sgs > 1) {
1742		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1743			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1744			iaa_wq->wq->id, ret);
1745		ret = -EIO;
1746		goto out;
1747	}
1748	src_addr = sg_dma_address(req->src);
1749	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1750		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1751		req->src, req->slen, sg_dma_len(req->src));
1752
1753	req->dlen = 4 * req->slen; /* start with ~avg comp rato */
1754alloc_dest:
1755	order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1756	req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1757	if (!req->dst) {
1758		ret = -ENOMEM;
1759		order = -1;
1760		goto out;
1761	}
1762
1763	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1764	if (nr_sgs <= 0 || nr_sgs > 1) {
1765		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1766			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1767			iaa_wq->wq->id, ret);
1768		ret = -EIO;
1769		goto err_map_dst;
1770	}
1771
1772	dst_addr = sg_dma_address(req->dst);
1773	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1774		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1775		req->dst, req->dlen, sg_dma_len(req->dst));
1776	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1777			     dst_addr, &req->dlen, true);
1778	if (ret == -EOVERFLOW) {
1779		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1780		req->dlen *= 2;
1781		if (req->dlen > CRYPTO_ACOMP_DST_MAX)
1782			goto err_map_dst;
1783		goto alloc_dest;
1784	}
1785
1786	if (ret != 0)
1787		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1788
1789	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1790err_map_dst:
1791	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1792out:
1793	iaa_wq_put(wq);
1794
1795	if (order >= 0)
1796		sgl_free_order(req->dst, order);
1797
1798	return ret;
1799}
1800
1801static int iaa_comp_adecompress(struct acomp_req *req)
1802{
1803	struct crypto_tfm *tfm = req->base.tfm;
1804	dma_addr_t src_addr, dst_addr;
1805	int nr_sgs, cpu, ret = 0;
1806	struct iaa_wq *iaa_wq;
1807	struct device *dev;
1808	struct idxd_wq *wq;
1809
1810	if (!iaa_crypto_enabled) {
1811		pr_debug("iaa_crypto disabled, not decompressing\n");
1812		return -ENODEV;
1813	}
1814
1815	if (!req->src || !req->slen) {
1816		pr_debug("invalid src, not decompressing\n");
1817		return -EINVAL;
1818	}
1819
1820	if (!req->dst)
1821		return iaa_comp_adecompress_alloc_dest(req);
1822
1823	cpu = get_cpu();
1824	wq = wq_table_next_wq(cpu);
1825	put_cpu();
1826	if (!wq) {
1827		pr_debug("no wq configured for cpu=%d\n", cpu);
1828		return -ENODEV;
1829	}
1830
1831	ret = iaa_wq_get(wq);
1832	if (ret) {
1833		pr_debug("no wq available for cpu=%d\n", cpu);
1834		return -ENODEV;
1835	}
1836
1837	iaa_wq = idxd_wq_get_private(wq);
1838
1839	dev = &wq->idxd->pdev->dev;
1840
1841	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1842	if (nr_sgs <= 0 || nr_sgs > 1) {
1843		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1844			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1845			iaa_wq->wq->id, ret);
1846		ret = -EIO;
1847		goto out;
1848	}
1849	src_addr = sg_dma_address(req->src);
1850	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1851		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1852		req->src, req->slen, sg_dma_len(req->src));
1853
1854	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1855	if (nr_sgs <= 0 || nr_sgs > 1) {
1856		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1857			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1858			iaa_wq->wq->id, ret);
1859		ret = -EIO;
1860		goto err_map_dst;
1861	}
1862	dst_addr = sg_dma_address(req->dst);
1863	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1864		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1865		req->dst, req->dlen, sg_dma_len(req->dst));
1866
1867	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1868			     dst_addr, &req->dlen, false);
1869	if (ret == -EINPROGRESS)
1870		return ret;
1871
1872	if (ret != 0)
1873		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1874
1875	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1876err_map_dst:
1877	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1878out:
1879	iaa_wq_put(wq);
1880
1881	return ret;
1882}
1883
1884static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1885{
1886	ctx->verify_compress = iaa_verify_compress;
1887	ctx->async_mode = async_mode;
1888	ctx->use_irq = use_irq;
1889}
1890
1891static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1892{
1893	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1894	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1895
1896	compression_ctx_init(ctx);
1897
1898	ctx->mode = IAA_MODE_FIXED;
1899
1900	return 0;
1901}
1902
1903static void dst_free(struct scatterlist *sgl)
1904{
1905	/*
1906	 * Called for req->dst = NULL cases but we free elsewhere
1907	 * using sgl_free_order().
1908	 */
1909}
1910
1911static struct acomp_alg iaa_acomp_fixed_deflate = {
1912	.init			= iaa_comp_init_fixed,
1913	.compress		= iaa_comp_acompress,
1914	.decompress		= iaa_comp_adecompress,
1915	.dst_free               = dst_free,
1916	.base			= {
1917		.cra_name		= "deflate",
1918		.cra_driver_name	= "deflate-iaa",
 
1919		.cra_ctxsize		= sizeof(struct iaa_compression_ctx),
1920		.cra_module		= THIS_MODULE,
1921		.cra_priority		= IAA_ALG_PRIORITY,
1922	}
1923};
1924
1925static int iaa_register_compression_device(void)
1926{
1927	int ret;
1928
1929	ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1930	if (ret) {
1931		pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1932		goto out;
1933	}
1934
1935	iaa_crypto_registered = true;
1936out:
1937	return ret;
1938}
1939
1940static int iaa_unregister_compression_device(void)
1941{
1942	if (iaa_crypto_registered)
1943		crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1944
1945	return 0;
1946}
1947
1948static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1949{
1950	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1951	struct idxd_device *idxd = wq->idxd;
1952	struct idxd_driver_data *data = idxd->data;
1953	struct device *dev = &idxd_dev->conf_dev;
1954	bool first_wq = false;
1955	int ret = 0;
1956
1957	if (idxd->state != IDXD_DEV_ENABLED)
1958		return -ENXIO;
1959
1960	if (data->type != IDXD_TYPE_IAX)
1961		return -ENODEV;
1962
1963	mutex_lock(&wq->wq_lock);
1964
1965	if (idxd_wq_get_private(wq)) {
1966		mutex_unlock(&wq->wq_lock);
1967		return -EBUSY;
1968	}
1969
1970	if (!idxd_wq_driver_name_match(wq, dev)) {
1971		dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1972			idxd->id, wq->id, wq->driver_name, dev->driver->name);
1973		idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1974		ret = -ENODEV;
1975		goto err;
1976	}
1977
1978	wq->type = IDXD_WQT_KERNEL;
1979
1980	ret = idxd_drv_enable_wq(wq);
1981	if (ret < 0) {
1982		dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1983			idxd->id, wq->id, ret);
1984		ret = -ENXIO;
1985		goto err;
1986	}
1987
1988	mutex_lock(&iaa_devices_lock);
1989
1990	if (list_empty(&iaa_devices)) {
1991		ret = alloc_wq_table(wq->idxd->max_wqs);
1992		if (ret)
1993			goto err_alloc;
1994		first_wq = true;
1995	}
1996
1997	ret = save_iaa_wq(wq);
1998	if (ret)
1999		goto err_save;
2000
2001	rebalance_wq_table();
2002
2003	if (first_wq) {
2004		iaa_crypto_enabled = true;
2005		ret = iaa_register_compression_device();
2006		if (ret != 0) {
2007			iaa_crypto_enabled = false;
2008			dev_dbg(dev, "IAA compression device registration failed\n");
2009			goto err_register;
2010		}
2011		try_module_get(THIS_MODULE);
2012
2013		pr_info("iaa_crypto now ENABLED\n");
2014	}
2015
2016	mutex_unlock(&iaa_devices_lock);
2017out:
2018	mutex_unlock(&wq->wq_lock);
2019
2020	return ret;
2021
2022err_register:
2023	remove_iaa_wq(wq);
2024	free_iaa_wq(idxd_wq_get_private(wq));
2025err_save:
2026	if (first_wq)
2027		free_wq_table();
2028err_alloc:
2029	mutex_unlock(&iaa_devices_lock);
2030	idxd_drv_disable_wq(wq);
2031err:
2032	wq->type = IDXD_WQT_NONE;
2033
2034	goto out;
2035}
2036
2037static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
2038{
2039	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
2040	struct idxd_device *idxd = wq->idxd;
2041	struct iaa_wq *iaa_wq;
2042	bool free = false;
2043
2044	idxd_wq_quiesce(wq);
2045
2046	mutex_lock(&wq->wq_lock);
2047	mutex_lock(&iaa_devices_lock);
2048
2049	remove_iaa_wq(wq);
2050
2051	spin_lock(&idxd->dev_lock);
2052	iaa_wq = idxd_wq_get_private(wq);
2053	if (!iaa_wq) {
2054		spin_unlock(&idxd->dev_lock);
2055		pr_err("%s: no iaa_wq available to remove\n", __func__);
2056		goto out;
2057	}
2058
2059	if (iaa_wq->ref) {
2060		iaa_wq->remove = true;
2061	} else {
2062		wq = iaa_wq->wq;
2063		idxd_wq_set_private(wq, NULL);
2064		free = true;
2065	}
2066	spin_unlock(&idxd->dev_lock);
2067	if (free) {
2068		__free_iaa_wq(iaa_wq);
2069		kfree(iaa_wq);
2070	}
2071
2072	idxd_drv_disable_wq(wq);
2073	rebalance_wq_table();
2074
2075	if (nr_iaa == 0) {
2076		iaa_crypto_enabled = false;
2077		free_wq_table();
2078		module_put(THIS_MODULE);
2079
2080		pr_info("iaa_crypto now DISABLED\n");
2081	}
2082out:
2083	mutex_unlock(&iaa_devices_lock);
2084	mutex_unlock(&wq->wq_lock);
2085}
2086
2087static enum idxd_dev_type dev_types[] = {
2088	IDXD_DEV_WQ,
2089	IDXD_DEV_NONE,
2090};
2091
2092static struct idxd_device_driver iaa_crypto_driver = {
2093	.probe = iaa_crypto_probe,
2094	.remove = iaa_crypto_remove,
2095	.name = IDXD_SUBDRIVER_NAME,
2096	.type = dev_types,
2097	.desc_complete = iaa_desc_complete,
2098};
2099
2100static int __init iaa_crypto_init_module(void)
2101{
2102	int ret = 0;
2103	int node;
2104
2105	nr_cpus = num_online_cpus();
2106	for_each_node_with_cpus(node)
2107		nr_nodes++;
2108	if (!nr_nodes) {
2109		pr_err("IAA couldn't find any nodes with cpus\n");
2110		return -ENODEV;
2111	}
2112	nr_cpus_per_node = nr_cpus / nr_nodes;
2113
2114	if (crypto_has_comp("deflate-generic", 0, 0))
2115		deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0);
2116
2117	if (IS_ERR_OR_NULL(deflate_generic_tfm)) {
2118		pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2119		       "deflate-generic", PTR_ERR(deflate_generic_tfm));
2120		return -ENOMEM;
2121	}
2122
2123	ret = iaa_aecs_init_fixed();
2124	if (ret < 0) {
2125		pr_debug("IAA fixed compression mode init failed\n");
2126		goto err_aecs_init;
2127	}
2128
2129	ret = idxd_driver_register(&iaa_crypto_driver);
2130	if (ret) {
2131		pr_debug("IAA wq sub-driver registration failed\n");
2132		goto err_driver_reg;
2133	}
2134
2135	ret = driver_create_file(&iaa_crypto_driver.drv,
2136				 &driver_attr_verify_compress);
2137	if (ret) {
2138		pr_debug("IAA verify_compress attr creation failed\n");
2139		goto err_verify_attr_create;
2140	}
2141
2142	ret = driver_create_file(&iaa_crypto_driver.drv,
2143				 &driver_attr_sync_mode);
2144	if (ret) {
2145		pr_debug("IAA sync mode attr creation failed\n");
2146		goto err_sync_attr_create;
2147	}
2148
2149	if (iaa_crypto_debugfs_init())
2150		pr_warn("debugfs init failed, stats not available\n");
2151
2152	pr_debug("initialized\n");
2153out:
2154	return ret;
2155
2156err_sync_attr_create:
2157	driver_remove_file(&iaa_crypto_driver.drv,
2158			   &driver_attr_verify_compress);
2159err_verify_attr_create:
2160	idxd_driver_unregister(&iaa_crypto_driver);
2161err_driver_reg:
2162	iaa_aecs_cleanup_fixed();
2163err_aecs_init:
2164	crypto_free_comp(deflate_generic_tfm);
2165
2166	goto out;
2167}
2168
2169static void __exit iaa_crypto_cleanup_module(void)
2170{
2171	if (iaa_unregister_compression_device())
2172		pr_debug("IAA compression device unregister failed\n");
2173
2174	iaa_crypto_debugfs_cleanup();
2175	driver_remove_file(&iaa_crypto_driver.drv,
2176			   &driver_attr_sync_mode);
2177	driver_remove_file(&iaa_crypto_driver.drv,
2178			   &driver_attr_verify_compress);
2179	idxd_driver_unregister(&iaa_crypto_driver);
2180	iaa_aecs_cleanup_fixed();
2181	crypto_free_comp(deflate_generic_tfm);
2182
2183	pr_debug("cleaned up\n");
2184}
2185
2186MODULE_IMPORT_NS(IDXD);
2187MODULE_LICENSE("GPL");
2188MODULE_ALIAS_IDXD_DEVICE(0);
2189MODULE_AUTHOR("Intel Corporation");
2190MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2191
2192module_init(iaa_crypto_init_module);
2193module_exit(iaa_crypto_cleanup_module);
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
   3
   4#include <linux/init.h>
   5#include <linux/kernel.h>
   6#include <linux/module.h>
   7#include <linux/pci.h>
   8#include <linux/device.h>
   9#include <linux/iommu.h>
  10#include <uapi/linux/idxd.h>
  11#include <linux/highmem.h>
  12#include <linux/sched/smt.h>
  13#include <crypto/internal/acompress.h>
  14
  15#include "idxd.h"
  16#include "iaa_crypto.h"
  17#include "iaa_crypto_stats.h"
  18
  19#ifdef pr_fmt
  20#undef pr_fmt
  21#endif
  22
  23#define pr_fmt(fmt)			"idxd: " IDXD_SUBDRIVER_NAME ": " fmt
  24
  25#define IAA_ALG_PRIORITY               300
  26
  27/* number of iaa instances probed */
  28static unsigned int nr_iaa;
  29static unsigned int nr_cpus;
  30static unsigned int nr_nodes;
  31static unsigned int nr_cpus_per_node;
  32
  33/* Number of physical cpus sharing each iaa instance */
  34static unsigned int cpus_per_iaa;
  35
  36static struct crypto_comp *deflate_generic_tfm;
  37
  38/* Per-cpu lookup table for balanced wqs */
  39static struct wq_table_entry __percpu *wq_table;
  40
  41static struct idxd_wq *wq_table_next_wq(int cpu)
  42{
  43	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
  44
  45	if (++entry->cur_wq >= entry->n_wqs)
  46		entry->cur_wq = 0;
  47
  48	if (!entry->wqs[entry->cur_wq])
  49		return NULL;
  50
  51	pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
  52		 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
  53		 entry->wqs[entry->cur_wq]->id, cpu);
  54
  55	return entry->wqs[entry->cur_wq];
  56}
  57
  58static void wq_table_add(int cpu, struct idxd_wq *wq)
  59{
  60	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
  61
  62	if (WARN_ON(entry->n_wqs == entry->max_wqs))
  63		return;
  64
  65	entry->wqs[entry->n_wqs++] = wq;
  66
  67	pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
  68		 entry->wqs[entry->n_wqs - 1]->idxd->id,
  69		 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
  70}
  71
  72static void wq_table_free_entry(int cpu)
  73{
  74	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
  75
  76	kfree(entry->wqs);
  77	memset(entry, 0, sizeof(*entry));
  78}
  79
  80static void wq_table_clear_entry(int cpu)
  81{
  82	struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
  83
  84	entry->n_wqs = 0;
  85	entry->cur_wq = 0;
  86	memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
  87}
  88
  89LIST_HEAD(iaa_devices);
  90DEFINE_MUTEX(iaa_devices_lock);
  91
  92/* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
  93static bool iaa_crypto_enabled;
  94static bool iaa_crypto_registered;
  95
  96/* Verify results of IAA compress or not */
  97static bool iaa_verify_compress = true;
  98
  99static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
 100{
 101	return sprintf(buf, "%d\n", iaa_verify_compress);
 102}
 103
 104static ssize_t verify_compress_store(struct device_driver *driver,
 105				     const char *buf, size_t count)
 106{
 107	int ret = -EBUSY;
 108
 109	mutex_lock(&iaa_devices_lock);
 110
 111	if (iaa_crypto_enabled)
 112		goto out;
 113
 114	ret = kstrtobool(buf, &iaa_verify_compress);
 115	if (ret)
 116		goto out;
 117
 118	ret = count;
 119out:
 120	mutex_unlock(&iaa_devices_lock);
 121
 122	return ret;
 123}
 124static DRIVER_ATTR_RW(verify_compress);
 125
 126/*
 127 * The iaa crypto driver supports three 'sync' methods determining how
 128 * compressions and decompressions are performed:
 129 *
 130 * - sync:      the compression or decompression completes before
 131 *              returning.  This is the mode used by the async crypto
 132 *              interface when the sync mode is set to 'sync' and by
 133 *              the sync crypto interface regardless of setting.
 134 *
 135 * - async:     the compression or decompression is submitted and returns
 136 *              immediately.  Completion interrupts are not used so
 137 *              the caller is responsible for polling the descriptor
 138 *              for completion.  This mode is applicable to only the
 139 *              async crypto interface and is ignored for anything
 140 *              else.
 141 *
 142 * - async_irq: the compression or decompression is submitted and
 143 *              returns immediately.  Completion interrupts are
 144 *              enabled so the caller can wait for the completion and
 145 *              yield to other threads.  When the compression or
 146 *              decompression completes, the completion is signaled
 147 *              and the caller awakened.  This mode is applicable to
 148 *              only the async crypto interface and is ignored for
 149 *              anything else.
 150 *
 151 * These modes can be set using the iaa_crypto sync_mode driver
 152 * attribute.
 153 */
 154
 155/* Use async mode */
 156static bool async_mode;
 157/* Use interrupts */
 158static bool use_irq;
 159
 160/**
 161 * set_iaa_sync_mode - Set IAA sync mode
 162 * @name: The name of the sync mode
 163 *
 164 * Make the IAA sync mode named @name the current sync mode used by
 165 * compression/decompression.
 166 */
 167
 168static int set_iaa_sync_mode(const char *name)
 169{
 170	int ret = 0;
 171
 172	if (sysfs_streq(name, "sync")) {
 173		async_mode = false;
 174		use_irq = false;
 175	} else if (sysfs_streq(name, "async")) {
 176		async_mode = false;
 177		use_irq = false;
 178	} else if (sysfs_streq(name, "async_irq")) {
 179		async_mode = true;
 180		use_irq = true;
 181	} else {
 182		ret = -EINVAL;
 183	}
 184
 185	return ret;
 186}
 187
 188static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
 189{
 190	int ret = 0;
 191
 192	if (!async_mode && !use_irq)
 193		ret = sprintf(buf, "%s\n", "sync");
 194	else if (async_mode && !use_irq)
 195		ret = sprintf(buf, "%s\n", "async");
 196	else if (async_mode && use_irq)
 197		ret = sprintf(buf, "%s\n", "async_irq");
 198
 199	return ret;
 200}
 201
 202static ssize_t sync_mode_store(struct device_driver *driver,
 203			       const char *buf, size_t count)
 204{
 205	int ret = -EBUSY;
 206
 207	mutex_lock(&iaa_devices_lock);
 208
 209	if (iaa_crypto_enabled)
 210		goto out;
 211
 212	ret = set_iaa_sync_mode(buf);
 213	if (ret == 0)
 214		ret = count;
 215out:
 216	mutex_unlock(&iaa_devices_lock);
 217
 218	return ret;
 219}
 220static DRIVER_ATTR_RW(sync_mode);
 221
 222static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
 223
 224static int find_empty_iaa_compression_mode(void)
 225{
 226	int i = -EINVAL;
 227
 228	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
 229		if (iaa_compression_modes[i])
 230			continue;
 231		break;
 232	}
 233
 234	return i;
 235}
 236
 237static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
 238{
 239	struct iaa_compression_mode *mode;
 240	int i;
 241
 242	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
 243		mode = iaa_compression_modes[i];
 244		if (!mode)
 245			continue;
 246
 247		if (!strcmp(mode->name, name)) {
 248			*idx = i;
 249			return iaa_compression_modes[i];
 250		}
 251	}
 252
 253	return NULL;
 254}
 255
 256static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
 257{
 258	kfree(mode->name);
 259	kfree(mode->ll_table);
 260	kfree(mode->d_table);
 
 261
 262	kfree(mode);
 263}
 264
 265/*
 266 * IAA Compression modes are defined by an ll_table and a d_table.
 267 * These tables are typically generated and captured using statistics
 268 * collected from running actual compress/decompress workloads.
 
 269 *
 270 * A module or other kernel code can add and remove compression modes
 271 * with a given name using the exported @add_iaa_compression_mode()
 272 * and @remove_iaa_compression_mode functions.
 273 *
 274 * When a new compression mode is added, the tables are saved in a
 275 * global compression mode list.  When IAA devices are added, a
 276 * per-IAA device dma mapping is created for each IAA device, for each
 277 * compression mode.  These are the tables used to do the actual
 278 * compression/deccompression and are unmapped if/when the devices are
 279 * removed.  Currently, compression modes must be added before any
 280 * device is added, and removed after all devices have been removed.
 281 */
 282
 283/**
 284 * remove_iaa_compression_mode - Remove an IAA compression mode
 285 * @name: The name the compression mode will be known as
 286 *
 287 * Remove the IAA compression mode named @name.
 288 */
 289void remove_iaa_compression_mode(const char *name)
 290{
 291	struct iaa_compression_mode *mode;
 292	int idx;
 293
 294	mutex_lock(&iaa_devices_lock);
 295
 296	if (!list_empty(&iaa_devices))
 297		goto out;
 298
 299	mode = find_iaa_compression_mode(name, &idx);
 300	if (mode) {
 301		free_iaa_compression_mode(mode);
 302		iaa_compression_modes[idx] = NULL;
 303	}
 304out:
 305	mutex_unlock(&iaa_devices_lock);
 306}
 307EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
 308
 309/**
 310 * add_iaa_compression_mode - Add an IAA compression mode
 311 * @name: The name the compression mode will be known as
 312 * @ll_table: The ll table
 313 * @ll_table_size: The ll table size in bytes
 314 * @d_table: The d table
 315 * @d_table_size: The d table size in bytes
 
 
 
 316 * @init: Optional callback function to init the compression mode data
 317 * @free: Optional callback function to free the compression mode data
 318 *
 319 * Add a new IAA compression mode named @name.
 320 *
 321 * Returns 0 if successful, errcode otherwise.
 322 */
 323int add_iaa_compression_mode(const char *name,
 324			     const u32 *ll_table,
 325			     int ll_table_size,
 326			     const u32 *d_table,
 327			     int d_table_size,
 
 
 
 328			     iaa_dev_comp_init_fn_t init,
 329			     iaa_dev_comp_free_fn_t free)
 330{
 331	struct iaa_compression_mode *mode;
 332	int idx, ret = -ENOMEM;
 333
 334	mutex_lock(&iaa_devices_lock);
 335
 336	if (!list_empty(&iaa_devices)) {
 337		ret = -EBUSY;
 338		goto out;
 339	}
 340
 341	mode = kzalloc(sizeof(*mode), GFP_KERNEL);
 342	if (!mode)
 343		goto out;
 344
 345	mode->name = kstrdup(name, GFP_KERNEL);
 346	if (!mode->name)
 347		goto free;
 348
 349	if (ll_table) {
 350		mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL);
 351		if (!mode->ll_table)
 352			goto free;
 
 353		mode->ll_table_size = ll_table_size;
 354	}
 355
 356	if (d_table) {
 357		mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL);
 358		if (!mode->d_table)
 359			goto free;
 
 360		mode->d_table_size = d_table_size;
 361	}
 362
 
 
 
 
 
 
 
 
 
 
 363	mode->init = init;
 364	mode->free = free;
 365
 366	idx = find_empty_iaa_compression_mode();
 367	if (idx < 0)
 368		goto free;
 369
 370	pr_debug("IAA compression mode %s added at idx %d\n",
 371		 mode->name, idx);
 372
 373	iaa_compression_modes[idx] = mode;
 374
 375	ret = 0;
 376out:
 377	mutex_unlock(&iaa_devices_lock);
 378
 379	return ret;
 380free:
 381	free_iaa_compression_mode(mode);
 382	goto out;
 383}
 384EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
 385
 386static struct iaa_device_compression_mode *
 387get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
 388{
 389	return iaa_device->compression_modes[idx];
 390}
 391
 392static void free_device_compression_mode(struct iaa_device *iaa_device,
 393					 struct iaa_device_compression_mode *device_mode)
 394{
 395	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
 396	struct device *dev = &iaa_device->idxd->pdev->dev;
 397
 398	kfree(device_mode->name);
 399
 400	if (device_mode->aecs_comp_table)
 401		dma_free_coherent(dev, size, device_mode->aecs_comp_table,
 402				  device_mode->aecs_comp_table_dma_addr);
 
 
 
 
 403	kfree(device_mode);
 404}
 405
 406#define IDXD_OP_FLAG_AECS_RW_TGLS       0x400000
 407#define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
 408#define IAX_AECS_COMPRESS_FLAG	(IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
 409#define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
 410#define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
 411						IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
 412						IDXD_OP_FLAG_AECS_RW_TGLS)
 413
 414static int check_completion(struct device *dev,
 415			    struct iax_completion_record *comp,
 416			    bool compress,
 417			    bool only_once);
 418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 419static int init_device_compression_mode(struct iaa_device *iaa_device,
 420					struct iaa_compression_mode *mode,
 421					int idx, struct idxd_wq *wq)
 422{
 423	size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
 424	struct device *dev = &iaa_device->idxd->pdev->dev;
 425	struct iaa_device_compression_mode *device_mode;
 426	int ret = -ENOMEM;
 427
 428	device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
 429	if (!device_mode)
 430		return -ENOMEM;
 431
 432	device_mode->name = kstrdup(mode->name, GFP_KERNEL);
 433	if (!device_mode->name)
 434		goto free;
 435
 436	device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
 437							  &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
 438	if (!device_mode->aecs_comp_table)
 439		goto free;
 440
 
 
 
 
 
 441	/* Add Huffman table to aecs */
 442	memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
 443	memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
 444	memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
 445
 
 
 
 
 
 
 
 
 446	if (mode->init) {
 447		ret = mode->init(device_mode);
 448		if (ret)
 449			goto free;
 450	}
 451
 452	/* mode index should match iaa_compression_modes idx */
 453	iaa_device->compression_modes[idx] = device_mode;
 454
 455	pr_debug("IAA %s compression mode initialized for iaa device %d\n",
 456		 mode->name, iaa_device->idxd->id);
 457
 458	ret = 0;
 459out:
 460	return ret;
 461free:
 462	pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
 463		 mode->name, iaa_device->idxd->id);
 464
 465	free_device_compression_mode(iaa_device, device_mode);
 466	goto out;
 467}
 468
 469static int init_device_compression_modes(struct iaa_device *iaa_device,
 470					 struct idxd_wq *wq)
 471{
 472	struct iaa_compression_mode *mode;
 473	int i, ret = 0;
 474
 475	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
 476		mode = iaa_compression_modes[i];
 477		if (!mode)
 478			continue;
 479
 480		ret = init_device_compression_mode(iaa_device, mode, i, wq);
 481		if (ret)
 482			break;
 483	}
 484
 485	return ret;
 486}
 487
 488static void remove_device_compression_modes(struct iaa_device *iaa_device)
 489{
 490	struct iaa_device_compression_mode *device_mode;
 491	int i;
 492
 493	for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
 494		device_mode = iaa_device->compression_modes[i];
 495		if (!device_mode)
 496			continue;
 497
 
 
 498		if (iaa_compression_modes[i]->free)
 499			iaa_compression_modes[i]->free(device_mode);
 500		free_device_compression_mode(iaa_device, device_mode);
 501		iaa_device->compression_modes[i] = NULL;
 502	}
 503}
 504
 505static struct iaa_device *iaa_device_alloc(void)
 506{
 507	struct iaa_device *iaa_device;
 508
 509	iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
 510	if (!iaa_device)
 511		return NULL;
 512
 513	INIT_LIST_HEAD(&iaa_device->wqs);
 514
 515	return iaa_device;
 516}
 517
 518static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
 519{
 520	struct iaa_wq *iaa_wq;
 521
 522	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
 523		if (iaa_wq->wq == wq)
 524			return true;
 525	}
 526
 527	return false;
 528}
 529
 530static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
 531{
 532	struct iaa_device *iaa_device;
 533
 534	iaa_device = iaa_device_alloc();
 535	if (!iaa_device)
 536		return NULL;
 537
 538	iaa_device->idxd = idxd;
 539
 540	list_add_tail(&iaa_device->list, &iaa_devices);
 541
 542	nr_iaa++;
 543
 544	return iaa_device;
 545}
 546
 547static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
 548{
 549	int ret = 0;
 550
 551	ret = init_device_compression_modes(iaa_device, iaa_wq->wq);
 552	if (ret)
 553		return ret;
 554
 555	return ret;
 556}
 557
 558static void del_iaa_device(struct iaa_device *iaa_device)
 559{
 560	list_del(&iaa_device->list);
 561
 562	nr_iaa--;
 563}
 564
 565static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
 566		      struct iaa_wq **new_wq)
 567{
 568	struct idxd_device *idxd = iaa_device->idxd;
 569	struct pci_dev *pdev = idxd->pdev;
 570	struct device *dev = &pdev->dev;
 571	struct iaa_wq *iaa_wq;
 572
 573	iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
 574	if (!iaa_wq)
 575		return -ENOMEM;
 576
 577	iaa_wq->wq = wq;
 578	iaa_wq->iaa_device = iaa_device;
 579	idxd_wq_set_private(wq, iaa_wq);
 580
 581	list_add_tail(&iaa_wq->list, &iaa_device->wqs);
 582
 583	iaa_device->n_wq++;
 584
 585	if (new_wq)
 586		*new_wq = iaa_wq;
 587
 588	dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
 589		wq->id, iaa_device->idxd->id, iaa_device->n_wq);
 590
 591	return 0;
 592}
 593
 594static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
 595{
 596	struct idxd_device *idxd = iaa_device->idxd;
 597	struct pci_dev *pdev = idxd->pdev;
 598	struct device *dev = &pdev->dev;
 599	struct iaa_wq *iaa_wq;
 600
 601	list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
 602		if (iaa_wq->wq == wq) {
 603			list_del(&iaa_wq->list);
 604			iaa_device->n_wq--;
 605
 606			dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
 607				wq->id, iaa_device->idxd->id,
 608				iaa_device->n_wq, nr_iaa);
 609
 610			if (iaa_device->n_wq == 0)
 611				del_iaa_device(iaa_device);
 612			break;
 613		}
 614	}
 615}
 616
 617static void clear_wq_table(void)
 618{
 619	int cpu;
 620
 621	for (cpu = 0; cpu < nr_cpus; cpu++)
 622		wq_table_clear_entry(cpu);
 623
 624	pr_debug("cleared wq table\n");
 625}
 626
 627static void free_iaa_device(struct iaa_device *iaa_device)
 628{
 629	if (!iaa_device)
 630		return;
 631
 632	remove_device_compression_modes(iaa_device);
 633	kfree(iaa_device);
 634}
 635
 636static void __free_iaa_wq(struct iaa_wq *iaa_wq)
 637{
 638	struct iaa_device *iaa_device;
 639
 640	if (!iaa_wq)
 641		return;
 642
 643	iaa_device = iaa_wq->iaa_device;
 644	if (iaa_device->n_wq == 0)
 645		free_iaa_device(iaa_wq->iaa_device);
 646}
 647
 648static void free_iaa_wq(struct iaa_wq *iaa_wq)
 649{
 650	struct idxd_wq *wq;
 651
 652	__free_iaa_wq(iaa_wq);
 653
 654	wq = iaa_wq->wq;
 655
 656	kfree(iaa_wq);
 657	idxd_wq_set_private(wq, NULL);
 658}
 659
 660static int iaa_wq_get(struct idxd_wq *wq)
 661{
 662	struct idxd_device *idxd = wq->idxd;
 663	struct iaa_wq *iaa_wq;
 664	int ret = 0;
 665
 666	spin_lock(&idxd->dev_lock);
 667	iaa_wq = idxd_wq_get_private(wq);
 668	if (iaa_wq && !iaa_wq->remove) {
 669		iaa_wq->ref++;
 670		idxd_wq_get(wq);
 671	} else {
 672		ret = -ENODEV;
 673	}
 674	spin_unlock(&idxd->dev_lock);
 675
 676	return ret;
 677}
 678
 679static int iaa_wq_put(struct idxd_wq *wq)
 680{
 681	struct idxd_device *idxd = wq->idxd;
 682	struct iaa_wq *iaa_wq;
 683	bool free = false;
 684	int ret = 0;
 685
 686	spin_lock(&idxd->dev_lock);
 687	iaa_wq = idxd_wq_get_private(wq);
 688	if (iaa_wq) {
 689		iaa_wq->ref--;
 690		if (iaa_wq->ref == 0 && iaa_wq->remove) {
 691			idxd_wq_set_private(wq, NULL);
 692			free = true;
 693		}
 694		idxd_wq_put(wq);
 695	} else {
 696		ret = -ENODEV;
 697	}
 698	spin_unlock(&idxd->dev_lock);
 699	if (free) {
 700		__free_iaa_wq(iaa_wq);
 701		kfree(iaa_wq);
 702	}
 703
 704	return ret;
 705}
 706
 707static void free_wq_table(void)
 708{
 709	int cpu;
 710
 711	for (cpu = 0; cpu < nr_cpus; cpu++)
 712		wq_table_free_entry(cpu);
 713
 714	free_percpu(wq_table);
 715
 716	pr_debug("freed wq table\n");
 717}
 718
 719static int alloc_wq_table(int max_wqs)
 720{
 721	struct wq_table_entry *entry;
 722	int cpu;
 723
 724	wq_table = alloc_percpu(struct wq_table_entry);
 725	if (!wq_table)
 726		return -ENOMEM;
 727
 728	for (cpu = 0; cpu < nr_cpus; cpu++) {
 729		entry = per_cpu_ptr(wq_table, cpu);
 730		entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
 731		if (!entry->wqs) {
 732			free_wq_table();
 733			return -ENOMEM;
 734		}
 735
 736		entry->max_wqs = max_wqs;
 737	}
 738
 739	pr_debug("initialized wq table\n");
 740
 741	return 0;
 742}
 743
 744static int save_iaa_wq(struct idxd_wq *wq)
 745{
 746	struct iaa_device *iaa_device, *found = NULL;
 747	struct idxd_device *idxd;
 748	struct pci_dev *pdev;
 749	struct device *dev;
 750	int ret = 0;
 751
 752	list_for_each_entry(iaa_device, &iaa_devices, list) {
 753		if (iaa_device->idxd == wq->idxd) {
 754			idxd = iaa_device->idxd;
 755			pdev = idxd->pdev;
 756			dev = &pdev->dev;
 757			/*
 758			 * Check to see that we don't already have this wq.
 759			 * Shouldn't happen but we don't control probing.
 760			 */
 761			if (iaa_has_wq(iaa_device, wq)) {
 762				dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
 763					iaa_device);
 764				goto out;
 765			}
 766
 767			found = iaa_device;
 768
 769			ret = add_iaa_wq(iaa_device, wq, NULL);
 770			if (ret)
 771				goto out;
 772
 773			break;
 774		}
 775	}
 776
 777	if (!found) {
 778		struct iaa_device *new_device;
 779		struct iaa_wq *new_wq;
 780
 781		new_device = add_iaa_device(wq->idxd);
 782		if (!new_device) {
 783			ret = -ENOMEM;
 784			goto out;
 785		}
 786
 787		ret = add_iaa_wq(new_device, wq, &new_wq);
 788		if (ret) {
 789			del_iaa_device(new_device);
 790			free_iaa_device(new_device);
 791			goto out;
 792		}
 793
 794		ret = init_iaa_device(new_device, new_wq);
 795		if (ret) {
 796			del_iaa_wq(new_device, new_wq->wq);
 797			del_iaa_device(new_device);
 798			free_iaa_wq(new_wq);
 799			goto out;
 800		}
 801	}
 802
 803	if (WARN_ON(nr_iaa == 0))
 804		return -EINVAL;
 805
 806	cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
 807	if (!cpus_per_iaa)
 808		cpus_per_iaa = 1;
 809out:
 810	return 0;
 811}
 812
 813static void remove_iaa_wq(struct idxd_wq *wq)
 814{
 815	struct iaa_device *iaa_device;
 816
 817	list_for_each_entry(iaa_device, &iaa_devices, list) {
 818		if (iaa_has_wq(iaa_device, wq)) {
 819			del_iaa_wq(iaa_device, wq);
 820			break;
 821		}
 822	}
 823
 824	if (nr_iaa) {
 825		cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
 826		if (!cpus_per_iaa)
 827			cpus_per_iaa = 1;
 828	} else
 829		cpus_per_iaa = 1;
 830}
 831
 832static int wq_table_add_wqs(int iaa, int cpu)
 833{
 834	struct iaa_device *iaa_device, *found_device = NULL;
 835	int ret = 0, cur_iaa = 0, n_wqs_added = 0;
 836	struct idxd_device *idxd;
 837	struct iaa_wq *iaa_wq;
 838	struct pci_dev *pdev;
 839	struct device *dev;
 840
 841	list_for_each_entry(iaa_device, &iaa_devices, list) {
 842		idxd = iaa_device->idxd;
 843		pdev = idxd->pdev;
 844		dev = &pdev->dev;
 845
 846		if (cur_iaa != iaa) {
 847			cur_iaa++;
 848			continue;
 849		}
 850
 851		found_device = iaa_device;
 852		dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
 853			found_device->idxd->id, cur_iaa);
 854		break;
 855	}
 856
 857	if (!found_device) {
 858		found_device = list_first_entry_or_null(&iaa_devices,
 859							struct iaa_device, list);
 860		if (!found_device) {
 861			pr_debug("couldn't find any iaa devices with wqs!\n");
 862			ret = -EINVAL;
 863			goto out;
 864		}
 865		cur_iaa = 0;
 866
 867		idxd = found_device->idxd;
 868		pdev = idxd->pdev;
 869		dev = &pdev->dev;
 870		dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
 871			found_device->idxd->id, cur_iaa);
 872	}
 873
 874	list_for_each_entry(iaa_wq, &found_device->wqs, list) {
 875		wq_table_add(cpu, iaa_wq->wq);
 876		pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
 877			 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
 878		n_wqs_added++;
 879	}
 880
 881	if (!n_wqs_added) {
 882		pr_debug("couldn't find any iaa wqs!\n");
 883		ret = -EINVAL;
 884		goto out;
 885	}
 886out:
 887	return ret;
 888}
 889
 890/*
 891 * Rebalance the wq table so that given a cpu, it's easy to find the
 892 * closest IAA instance.  The idea is to try to choose the most
 893 * appropriate IAA instance for a caller and spread available
 894 * workqueues around to clients.
 895 */
 896static void rebalance_wq_table(void)
 897{
 898	const struct cpumask *node_cpus;
 899	int node, cpu, iaa = -1;
 900
 901	if (nr_iaa == 0)
 902		return;
 903
 904	pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
 905		 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
 906
 907	clear_wq_table();
 908
 909	if (nr_iaa == 1) {
 910		for (cpu = 0; cpu < nr_cpus; cpu++) {
 911			if (WARN_ON(wq_table_add_wqs(0, cpu))) {
 912				pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
 913				return;
 914			}
 915		}
 916
 917		return;
 918	}
 919
 920	for_each_node_with_cpus(node) {
 921		node_cpus = cpumask_of_node(node);
 922
 923		for (cpu = 0; cpu <  cpumask_weight(node_cpus); cpu++) {
 924			int node_cpu = cpumask_nth(cpu, node_cpus);
 925
 926			if (WARN_ON(node_cpu >= nr_cpu_ids)) {
 927				pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
 928				return;
 929			}
 930
 931			if ((cpu % cpus_per_iaa) == 0)
 932				iaa++;
 933
 934			if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
 935				pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
 936				return;
 937			}
 938		}
 939	}
 940}
 941
 942static inline int check_completion(struct device *dev,
 943				   struct iax_completion_record *comp,
 944				   bool compress,
 945				   bool only_once)
 946{
 947	char *op_str = compress ? "compress" : "decompress";
 948	int status_checks = 0;
 949	int ret = 0;
 950
 951	while (!comp->status) {
 952		if (only_once)
 953			return -EAGAIN;
 954		cpu_relax();
 955		if (status_checks++ >= IAA_COMPLETION_TIMEOUT) {
 956			/* Something is wrong with the hw, disable it. */
 957			dev_err(dev, "%s completion timed out - "
 958				"assuming broken hw, iaa_crypto now DISABLED\n",
 959				op_str);
 960			iaa_crypto_enabled = false;
 961			ret = -ETIMEDOUT;
 962			goto out;
 963		}
 964	}
 965
 966	if (comp->status != IAX_COMP_SUCCESS) {
 967		if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
 968			ret = -ETIMEDOUT;
 969			dev_dbg(dev, "%s timed out, size=0x%x\n",
 970				op_str, comp->output_size);
 971			update_completion_timeout_errs();
 972			goto out;
 973		}
 974
 975		if (comp->status == IAA_ANALYTICS_ERROR &&
 976		    comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
 977			ret = -E2BIG;
 978			dev_dbg(dev, "compressed > uncompressed size,"
 979				" not compressing, size=0x%x\n",
 980				comp->output_size);
 981			update_completion_comp_buf_overflow_errs();
 982			goto out;
 983		}
 984
 985		if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
 986			ret = -EOVERFLOW;
 987			goto out;
 988		}
 989
 990		ret = -EINVAL;
 991		dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
 992			op_str, comp->status, comp->error_code, comp->output_size);
 993		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
 994		update_completion_einval_errs();
 995
 996		goto out;
 997	}
 998out:
 999	return ret;
1000}
1001
1002static int deflate_generic_decompress(struct acomp_req *req)
1003{
1004	void *src, *dst;
1005	int ret;
1006
1007	src = kmap_local_page(sg_page(req->src)) + req->src->offset;
1008	dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
1009
1010	ret = crypto_comp_decompress(deflate_generic_tfm,
1011				     src, req->slen, dst, &req->dlen);
1012
1013	kunmap_local(src);
1014	kunmap_local(dst);
1015
1016	update_total_sw_decomp_calls();
1017
1018	return ret;
1019}
1020
1021static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1022				struct acomp_req *req,
1023				dma_addr_t *src_addr, dma_addr_t *dst_addr);
1024
1025static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1026			       struct idxd_wq *wq,
1027			       dma_addr_t src_addr, unsigned int slen,
1028			       dma_addr_t dst_addr, unsigned int *dlen,
1029			       u32 compression_crc);
1030
1031static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1032			      enum idxd_complete_type comp_type,
1033			      bool free_desc, void *__ctx,
1034			      u32 *status)
1035{
1036	struct iaa_device_compression_mode *active_compression_mode;
1037	struct iaa_compression_ctx *compression_ctx;
1038	struct crypto_ctx *ctx = __ctx;
1039	struct iaa_device *iaa_device;
1040	struct idxd_device *idxd;
1041	struct iaa_wq *iaa_wq;
1042	struct pci_dev *pdev;
1043	struct device *dev;
1044	int ret, err = 0;
1045
1046	compression_ctx = crypto_tfm_ctx(ctx->tfm);
1047
1048	iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1049	iaa_device = iaa_wq->iaa_device;
1050	idxd = iaa_device->idxd;
1051	pdev = idxd->pdev;
1052	dev = &pdev->dev;
1053
1054	active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1055								  compression_ctx->mode);
1056	dev_dbg(dev, "%s: compression mode %s,"
1057		" ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1058		active_compression_mode->name,
1059		ctx->src_addr, ctx->dst_addr);
1060
1061	ret = check_completion(dev, idxd_desc->iax_completion,
1062			       ctx->compress, false);
1063	if (ret) {
1064		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1065		if (!ctx->compress &&
1066		    idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1067			pr_warn("%s: falling back to deflate-generic decompress, "
1068				"analytics error code %x\n", __func__,
1069				idxd_desc->iax_completion->error_code);
1070			ret = deflate_generic_decompress(ctx->req);
1071			if (ret) {
1072				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1073					__func__, ret);
1074				err = -EIO;
1075				goto err;
1076			}
1077		} else {
1078			err = -EIO;
1079			goto err;
1080		}
1081	} else {
1082		ctx->req->dlen = idxd_desc->iax_completion->output_size;
1083	}
1084
1085	/* Update stats */
1086	if (ctx->compress) {
1087		update_total_comp_bytes_out(ctx->req->dlen);
1088		update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1089	} else {
1090		update_total_decomp_bytes_in(ctx->req->slen);
1091		update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen);
1092	}
1093
1094	if (ctx->compress && compression_ctx->verify_compress) {
1095		dma_addr_t src_addr, dst_addr;
1096		u32 compression_crc;
1097
1098		compression_crc = idxd_desc->iax_completion->crc;
1099
1100		ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1101		if (ret) {
1102			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1103			err = -EIO;
1104			goto out;
1105		}
1106
1107		ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1108					  ctx->req->slen, dst_addr, &ctx->req->dlen,
1109					  compression_crc);
1110		if (ret) {
1111			dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1112			err = -EIO;
1113		}
1114
1115		dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1116		dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1117
1118		goto out;
1119	}
1120err:
1121	dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1122	dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1123out:
1124	if (ret != 0)
1125		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1126
1127	if (ctx->req->base.complete)
1128		acomp_request_complete(ctx->req, err);
1129
1130	if (free_desc)
1131		idxd_free_desc(idxd_desc->wq, idxd_desc);
1132	iaa_wq_put(idxd_desc->wq);
1133}
1134
1135static int iaa_compress(struct crypto_tfm *tfm,	struct acomp_req *req,
1136			struct idxd_wq *wq,
1137			dma_addr_t src_addr, unsigned int slen,
1138			dma_addr_t dst_addr, unsigned int *dlen,
1139			u32 *compression_crc,
1140			bool disable_async)
1141{
1142	struct iaa_device_compression_mode *active_compression_mode;
1143	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1144	struct iaa_device *iaa_device;
1145	struct idxd_desc *idxd_desc;
1146	struct iax_hw_desc *desc;
1147	struct idxd_device *idxd;
1148	struct iaa_wq *iaa_wq;
1149	struct pci_dev *pdev;
1150	struct device *dev;
1151	int ret = 0;
1152
1153	iaa_wq = idxd_wq_get_private(wq);
1154	iaa_device = iaa_wq->iaa_device;
1155	idxd = iaa_device->idxd;
1156	pdev = idxd->pdev;
1157	dev = &pdev->dev;
1158
1159	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1160
1161	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1162	if (IS_ERR(idxd_desc)) {
1163		dev_dbg(dev, "idxd descriptor allocation failed\n");
1164		dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1165		return PTR_ERR(idxd_desc);
1166	}
1167	desc = idxd_desc->iax_hw;
1168
1169	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1170		IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1171	desc->opcode = IAX_OPCODE_COMPRESS;
1172	desc->compr_flags = IAA_COMP_FLAGS;
1173	desc->priv = 0;
1174
1175	desc->src1_addr = (u64)src_addr;
1176	desc->src1_size = slen;
1177	desc->dst_addr = (u64)dst_addr;
1178	desc->max_dst_size = *dlen;
1179	desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1180	desc->src2_size = sizeof(struct aecs_comp_table_record);
1181	desc->completion_addr = idxd_desc->compl_dma;
1182
1183	if (ctx->use_irq && !disable_async) {
1184		desc->flags |= IDXD_OP_FLAG_RCI;
1185
1186		idxd_desc->crypto.req = req;
1187		idxd_desc->crypto.tfm = tfm;
1188		idxd_desc->crypto.src_addr = src_addr;
1189		idxd_desc->crypto.dst_addr = dst_addr;
1190		idxd_desc->crypto.compress = true;
1191
1192		dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1193			" src_addr %llx, dst_addr %llx\n", __func__,
1194			active_compression_mode->name,
1195			src_addr, dst_addr);
1196	} else if (ctx->async_mode && !disable_async)
1197		req->base.data = idxd_desc;
1198
1199	dev_dbg(dev, "%s: compression mode %s,"
1200		" desc->src1_addr %llx, desc->src1_size %d,"
1201		" desc->dst_addr %llx, desc->max_dst_size %d,"
1202		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1203		active_compression_mode->name,
1204		desc->src1_addr, desc->src1_size, desc->dst_addr,
1205		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1206
1207	ret = idxd_submit_desc(wq, idxd_desc);
1208	if (ret) {
1209		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1210		goto err;
1211	}
1212
1213	/* Update stats */
1214	update_total_comp_calls();
1215	update_wq_comp_calls(wq);
1216
1217	if (ctx->async_mode && !disable_async) {
1218		ret = -EINPROGRESS;
1219		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1220		goto out;
1221	}
1222
1223	ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1224	if (ret) {
1225		dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1226		goto err;
1227	}
1228
1229	*dlen = idxd_desc->iax_completion->output_size;
1230
1231	/* Update stats */
1232	update_total_comp_bytes_out(*dlen);
1233	update_wq_comp_bytes(wq, *dlen);
1234
1235	*compression_crc = idxd_desc->iax_completion->crc;
1236
1237	if (!ctx->async_mode || disable_async)
1238		idxd_free_desc(wq, idxd_desc);
1239out:
1240	return ret;
1241err:
1242	idxd_free_desc(wq, idxd_desc);
1243	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1244
1245	goto out;
1246}
1247
1248static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1249				struct acomp_req *req,
1250				dma_addr_t *src_addr, dma_addr_t *dst_addr)
1251{
1252	int ret = 0;
1253	int nr_sgs;
1254
1255	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1256	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1257
1258	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1259	if (nr_sgs <= 0 || nr_sgs > 1) {
1260		dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1261			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1262			iaa_wq->wq->id, ret);
1263		ret = -EIO;
1264		goto out;
1265	}
1266	*src_addr = sg_dma_address(req->src);
1267	dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1268		" req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1269		req->src, req->slen, sg_dma_len(req->src));
1270
1271	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1272	if (nr_sgs <= 0 || nr_sgs > 1) {
1273		dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1274			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1275			iaa_wq->wq->id, ret);
1276		ret = -EIO;
1277		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1278		goto out;
1279	}
1280	*dst_addr = sg_dma_address(req->dst);
1281	dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1282		" req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1283		req->dst, req->dlen, sg_dma_len(req->dst));
1284out:
1285	return ret;
1286}
1287
1288static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1289			       struct idxd_wq *wq,
1290			       dma_addr_t src_addr, unsigned int slen,
1291			       dma_addr_t dst_addr, unsigned int *dlen,
1292			       u32 compression_crc)
1293{
1294	struct iaa_device_compression_mode *active_compression_mode;
1295	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1296	struct iaa_device *iaa_device;
1297	struct idxd_desc *idxd_desc;
1298	struct iax_hw_desc *desc;
1299	struct idxd_device *idxd;
1300	struct iaa_wq *iaa_wq;
1301	struct pci_dev *pdev;
1302	struct device *dev;
1303	int ret = 0;
1304
1305	iaa_wq = idxd_wq_get_private(wq);
1306	iaa_device = iaa_wq->iaa_device;
1307	idxd = iaa_device->idxd;
1308	pdev = idxd->pdev;
1309	dev = &pdev->dev;
1310
1311	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1312
1313	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1314	if (IS_ERR(idxd_desc)) {
1315		dev_dbg(dev, "idxd descriptor allocation failed\n");
1316		dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1317			PTR_ERR(idxd_desc));
1318		return PTR_ERR(idxd_desc);
1319	}
1320	desc = idxd_desc->iax_hw;
1321
1322	/* Verify (optional) - decompress and check crc, suppress dest write */
1323
1324	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1325	desc->opcode = IAX_OPCODE_DECOMPRESS;
1326	desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1327	desc->priv = 0;
1328
1329	desc->src1_addr = (u64)dst_addr;
1330	desc->src1_size = *dlen;
1331	desc->dst_addr = (u64)src_addr;
1332	desc->max_dst_size = slen;
1333	desc->completion_addr = idxd_desc->compl_dma;
1334
1335	dev_dbg(dev, "(verify) compression mode %s,"
1336		" desc->src1_addr %llx, desc->src1_size %d,"
1337		" desc->dst_addr %llx, desc->max_dst_size %d,"
1338		" desc->src2_addr %llx, desc->src2_size %d\n",
1339		active_compression_mode->name,
1340		desc->src1_addr, desc->src1_size, desc->dst_addr,
1341		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1342
1343	ret = idxd_submit_desc(wq, idxd_desc);
1344	if (ret) {
1345		dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1346		goto err;
1347	}
1348
1349	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1350	if (ret) {
1351		dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1352		goto err;
1353	}
1354
1355	if (compression_crc != idxd_desc->iax_completion->crc) {
1356		ret = -EINVAL;
1357		dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1358			" comp=0x%x, decomp=0x%x\n", compression_crc,
1359			idxd_desc->iax_completion->crc);
1360		print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1361			       8, 1, idxd_desc->iax_completion, 64, 0);
1362		goto err;
1363	}
1364
1365	idxd_free_desc(wq, idxd_desc);
1366out:
1367	return ret;
1368err:
1369	idxd_free_desc(wq, idxd_desc);
1370	dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1371
1372	goto out;
1373}
1374
1375static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1376			  struct idxd_wq *wq,
1377			  dma_addr_t src_addr, unsigned int slen,
1378			  dma_addr_t dst_addr, unsigned int *dlen,
1379			  bool disable_async)
1380{
1381	struct iaa_device_compression_mode *active_compression_mode;
1382	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1383	struct iaa_device *iaa_device;
1384	struct idxd_desc *idxd_desc;
1385	struct iax_hw_desc *desc;
1386	struct idxd_device *idxd;
1387	struct iaa_wq *iaa_wq;
1388	struct pci_dev *pdev;
1389	struct device *dev;
1390	int ret = 0;
1391
1392	iaa_wq = idxd_wq_get_private(wq);
1393	iaa_device = iaa_wq->iaa_device;
1394	idxd = iaa_device->idxd;
1395	pdev = idxd->pdev;
1396	dev = &pdev->dev;
1397
1398	active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1399
1400	idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1401	if (IS_ERR(idxd_desc)) {
1402		dev_dbg(dev, "idxd descriptor allocation failed\n");
1403		dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1404			PTR_ERR(idxd_desc));
1405		return PTR_ERR(idxd_desc);
1406	}
1407	desc = idxd_desc->iax_hw;
1408
1409	desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1410	desc->opcode = IAX_OPCODE_DECOMPRESS;
1411	desc->max_dst_size = PAGE_SIZE;
1412	desc->decompr_flags = IAA_DECOMP_FLAGS;
1413	desc->priv = 0;
1414
1415	desc->src1_addr = (u64)src_addr;
1416	desc->dst_addr = (u64)dst_addr;
1417	desc->max_dst_size = *dlen;
1418	desc->src1_size = slen;
1419	desc->completion_addr = idxd_desc->compl_dma;
1420
1421	if (ctx->use_irq && !disable_async) {
1422		desc->flags |= IDXD_OP_FLAG_RCI;
1423
1424		idxd_desc->crypto.req = req;
1425		idxd_desc->crypto.tfm = tfm;
1426		idxd_desc->crypto.src_addr = src_addr;
1427		idxd_desc->crypto.dst_addr = dst_addr;
1428		idxd_desc->crypto.compress = false;
1429
1430		dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1431			" src_addr %llx, dst_addr %llx\n", __func__,
1432			active_compression_mode->name,
1433			src_addr, dst_addr);
1434	} else if (ctx->async_mode && !disable_async)
1435		req->base.data = idxd_desc;
1436
1437	dev_dbg(dev, "%s: decompression mode %s,"
1438		" desc->src1_addr %llx, desc->src1_size %d,"
1439		" desc->dst_addr %llx, desc->max_dst_size %d,"
1440		" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1441		active_compression_mode->name,
1442		desc->src1_addr, desc->src1_size, desc->dst_addr,
1443		desc->max_dst_size, desc->src2_addr, desc->src2_size);
1444
1445	ret = idxd_submit_desc(wq, idxd_desc);
1446	if (ret) {
1447		dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1448		goto err;
1449	}
1450
1451	/* Update stats */
1452	update_total_decomp_calls();
1453	update_wq_decomp_calls(wq);
1454
1455	if (ctx->async_mode && !disable_async) {
1456		ret = -EINPROGRESS;
1457		dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1458		goto out;
1459	}
1460
1461	ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1462	if (ret) {
1463		dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1464		if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1465			pr_warn("%s: falling back to deflate-generic decompress, "
1466				"analytics error code %x\n", __func__,
1467				idxd_desc->iax_completion->error_code);
1468			ret = deflate_generic_decompress(req);
1469			if (ret) {
1470				dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1471					__func__, ret);
1472				goto err;
1473			}
1474		} else {
1475			goto err;
1476		}
1477	} else {
1478		req->dlen = idxd_desc->iax_completion->output_size;
1479	}
1480
1481	*dlen = req->dlen;
1482
1483	if (!ctx->async_mode || disable_async)
1484		idxd_free_desc(wq, idxd_desc);
1485
1486	/* Update stats */
1487	update_total_decomp_bytes_in(slen);
1488	update_wq_decomp_bytes(wq, slen);
1489out:
1490	return ret;
1491err:
1492	idxd_free_desc(wq, idxd_desc);
1493	dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1494
1495	goto out;
1496}
1497
1498static int iaa_comp_acompress(struct acomp_req *req)
1499{
1500	struct iaa_compression_ctx *compression_ctx;
1501	struct crypto_tfm *tfm = req->base.tfm;
1502	dma_addr_t src_addr, dst_addr;
1503	bool disable_async = false;
1504	int nr_sgs, cpu, ret = 0;
1505	struct iaa_wq *iaa_wq;
1506	u32 compression_crc;
1507	struct idxd_wq *wq;
1508	struct device *dev;
1509	int order = -1;
1510
1511	compression_ctx = crypto_tfm_ctx(tfm);
1512
1513	if (!iaa_crypto_enabled) {
1514		pr_debug("iaa_crypto disabled, not compressing\n");
1515		return -ENODEV;
1516	}
1517
1518	if (!req->src || !req->slen) {
1519		pr_debug("invalid src, not compressing\n");
1520		return -EINVAL;
1521	}
1522
1523	cpu = get_cpu();
1524	wq = wq_table_next_wq(cpu);
1525	put_cpu();
1526	if (!wq) {
1527		pr_debug("no wq configured for cpu=%d\n", cpu);
1528		return -ENODEV;
1529	}
1530
1531	ret = iaa_wq_get(wq);
1532	if (ret) {
1533		pr_debug("no wq available for cpu=%d\n", cpu);
1534		return -ENODEV;
1535	}
1536
1537	iaa_wq = idxd_wq_get_private(wq);
1538
1539	if (!req->dst) {
1540		gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
1541
1542		/* incompressible data will always be < 2 * slen */
1543		req->dlen = 2 * req->slen;
1544		order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1545		req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1546		if (!req->dst) {
1547			ret = -ENOMEM;
1548			order = -1;
1549			goto out;
1550		}
1551		disable_async = true;
1552	}
1553
1554	dev = &wq->idxd->pdev->dev;
1555
1556	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1557	if (nr_sgs <= 0 || nr_sgs > 1) {
1558		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1559			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1560			iaa_wq->wq->id, ret);
1561		ret = -EIO;
1562		goto out;
1563	}
1564	src_addr = sg_dma_address(req->src);
1565	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1566		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1567		req->src, req->slen, sg_dma_len(req->src));
1568
1569	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1570	if (nr_sgs <= 0 || nr_sgs > 1) {
1571		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1572			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1573			iaa_wq->wq->id, ret);
1574		ret = -EIO;
1575		goto err_map_dst;
1576	}
1577	dst_addr = sg_dma_address(req->dst);
1578	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1579		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1580		req->dst, req->dlen, sg_dma_len(req->dst));
1581
1582	ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1583			   &req->dlen, &compression_crc, disable_async);
1584	if (ret == -EINPROGRESS)
1585		return ret;
1586
1587	if (!ret && compression_ctx->verify_compress) {
1588		ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1589		if (ret) {
1590			dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1591			goto out;
1592		}
1593
1594		ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1595					  dst_addr, &req->dlen, compression_crc);
1596		if (ret)
1597			dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1598
1599		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1600		dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1601
1602		goto out;
1603	}
1604
1605	if (ret)
1606		dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1607
1608	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1609err_map_dst:
1610	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1611out:
1612	iaa_wq_put(wq);
1613
1614	if (order >= 0)
1615		sgl_free_order(req->dst, order);
1616
1617	return ret;
1618}
1619
1620static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
1621{
1622	gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1623		GFP_KERNEL : GFP_ATOMIC;
1624	struct crypto_tfm *tfm = req->base.tfm;
1625	dma_addr_t src_addr, dst_addr;
1626	int nr_sgs, cpu, ret = 0;
1627	struct iaa_wq *iaa_wq;
1628	struct device *dev;
1629	struct idxd_wq *wq;
1630	int order = -1;
1631
1632	cpu = get_cpu();
1633	wq = wq_table_next_wq(cpu);
1634	put_cpu();
1635	if (!wq) {
1636		pr_debug("no wq configured for cpu=%d\n", cpu);
1637		return -ENODEV;
1638	}
1639
1640	ret = iaa_wq_get(wq);
1641	if (ret) {
1642		pr_debug("no wq available for cpu=%d\n", cpu);
1643		return -ENODEV;
1644	}
1645
1646	iaa_wq = idxd_wq_get_private(wq);
1647
1648	dev = &wq->idxd->pdev->dev;
1649
1650	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1651	if (nr_sgs <= 0 || nr_sgs > 1) {
1652		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1653			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1654			iaa_wq->wq->id, ret);
1655		ret = -EIO;
1656		goto out;
1657	}
1658	src_addr = sg_dma_address(req->src);
1659	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1660		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1661		req->src, req->slen, sg_dma_len(req->src));
1662
1663	req->dlen = 4 * req->slen; /* start with ~avg comp rato */
1664alloc_dest:
1665	order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1666	req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1667	if (!req->dst) {
1668		ret = -ENOMEM;
1669		order = -1;
1670		goto out;
1671	}
1672
1673	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1674	if (nr_sgs <= 0 || nr_sgs > 1) {
1675		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1676			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1677			iaa_wq->wq->id, ret);
1678		ret = -EIO;
1679		goto err_map_dst;
1680	}
1681
1682	dst_addr = sg_dma_address(req->dst);
1683	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1684		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1685		req->dst, req->dlen, sg_dma_len(req->dst));
1686	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1687			     dst_addr, &req->dlen, true);
1688	if (ret == -EOVERFLOW) {
1689		dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1690		req->dlen *= 2;
1691		if (req->dlen > CRYPTO_ACOMP_DST_MAX)
1692			goto err_map_dst;
1693		goto alloc_dest;
1694	}
1695
1696	if (ret != 0)
1697		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1698
1699	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1700err_map_dst:
1701	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1702out:
1703	iaa_wq_put(wq);
1704
1705	if (order >= 0)
1706		sgl_free_order(req->dst, order);
1707
1708	return ret;
1709}
1710
1711static int iaa_comp_adecompress(struct acomp_req *req)
1712{
1713	struct crypto_tfm *tfm = req->base.tfm;
1714	dma_addr_t src_addr, dst_addr;
1715	int nr_sgs, cpu, ret = 0;
1716	struct iaa_wq *iaa_wq;
1717	struct device *dev;
1718	struct idxd_wq *wq;
1719
1720	if (!iaa_crypto_enabled) {
1721		pr_debug("iaa_crypto disabled, not decompressing\n");
1722		return -ENODEV;
1723	}
1724
1725	if (!req->src || !req->slen) {
1726		pr_debug("invalid src, not decompressing\n");
1727		return -EINVAL;
1728	}
1729
1730	if (!req->dst)
1731		return iaa_comp_adecompress_alloc_dest(req);
1732
1733	cpu = get_cpu();
1734	wq = wq_table_next_wq(cpu);
1735	put_cpu();
1736	if (!wq) {
1737		pr_debug("no wq configured for cpu=%d\n", cpu);
1738		return -ENODEV;
1739	}
1740
1741	ret = iaa_wq_get(wq);
1742	if (ret) {
1743		pr_debug("no wq available for cpu=%d\n", cpu);
1744		return -ENODEV;
1745	}
1746
1747	iaa_wq = idxd_wq_get_private(wq);
1748
1749	dev = &wq->idxd->pdev->dev;
1750
1751	nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1752	if (nr_sgs <= 0 || nr_sgs > 1) {
1753		dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1754			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1755			iaa_wq->wq->id, ret);
1756		ret = -EIO;
1757		goto out;
1758	}
1759	src_addr = sg_dma_address(req->src);
1760	dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1761		" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1762		req->src, req->slen, sg_dma_len(req->src));
1763
1764	nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1765	if (nr_sgs <= 0 || nr_sgs > 1) {
1766		dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1767			" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1768			iaa_wq->wq->id, ret);
1769		ret = -EIO;
1770		goto err_map_dst;
1771	}
1772	dst_addr = sg_dma_address(req->dst);
1773	dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1774		" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1775		req->dst, req->dlen, sg_dma_len(req->dst));
1776
1777	ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1778			     dst_addr, &req->dlen, false);
1779	if (ret == -EINPROGRESS)
1780		return ret;
1781
1782	if (ret != 0)
1783		dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1784
1785	dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1786err_map_dst:
1787	dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1788out:
1789	iaa_wq_put(wq);
1790
1791	return ret;
1792}
1793
1794static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1795{
1796	ctx->verify_compress = iaa_verify_compress;
1797	ctx->async_mode = async_mode;
1798	ctx->use_irq = use_irq;
1799}
1800
1801static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1802{
1803	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1804	struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1805
1806	compression_ctx_init(ctx);
1807
1808	ctx->mode = IAA_MODE_FIXED;
1809
1810	return 0;
1811}
1812
1813static void dst_free(struct scatterlist *sgl)
1814{
1815	/*
1816	 * Called for req->dst = NULL cases but we free elsewhere
1817	 * using sgl_free_order().
1818	 */
1819}
1820
1821static struct acomp_alg iaa_acomp_fixed_deflate = {
1822	.init			= iaa_comp_init_fixed,
1823	.compress		= iaa_comp_acompress,
1824	.decompress		= iaa_comp_adecompress,
1825	.dst_free               = dst_free,
1826	.base			= {
1827		.cra_name		= "deflate",
1828		.cra_driver_name	= "deflate-iaa",
1829		.cra_flags		= CRYPTO_ALG_ASYNC,
1830		.cra_ctxsize		= sizeof(struct iaa_compression_ctx),
1831		.cra_module		= THIS_MODULE,
1832		.cra_priority		= IAA_ALG_PRIORITY,
1833	}
1834};
1835
1836static int iaa_register_compression_device(void)
1837{
1838	int ret;
1839
1840	ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1841	if (ret) {
1842		pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1843		goto out;
1844	}
1845
1846	iaa_crypto_registered = true;
1847out:
1848	return ret;
1849}
1850
1851static int iaa_unregister_compression_device(void)
1852{
1853	if (iaa_crypto_registered)
1854		crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1855
1856	return 0;
1857}
1858
1859static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1860{
1861	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1862	struct idxd_device *idxd = wq->idxd;
1863	struct idxd_driver_data *data = idxd->data;
1864	struct device *dev = &idxd_dev->conf_dev;
1865	bool first_wq = false;
1866	int ret = 0;
1867
1868	if (idxd->state != IDXD_DEV_ENABLED)
1869		return -ENXIO;
1870
1871	if (data->type != IDXD_TYPE_IAX)
1872		return -ENODEV;
1873
1874	mutex_lock(&wq->wq_lock);
1875
1876	if (idxd_wq_get_private(wq)) {
1877		mutex_unlock(&wq->wq_lock);
1878		return -EBUSY;
1879	}
1880
1881	if (!idxd_wq_driver_name_match(wq, dev)) {
1882		dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1883			idxd->id, wq->id, wq->driver_name, dev->driver->name);
1884		idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1885		ret = -ENODEV;
1886		goto err;
1887	}
1888
1889	wq->type = IDXD_WQT_KERNEL;
1890
1891	ret = idxd_drv_enable_wq(wq);
1892	if (ret < 0) {
1893		dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1894			idxd->id, wq->id, ret);
1895		ret = -ENXIO;
1896		goto err;
1897	}
1898
1899	mutex_lock(&iaa_devices_lock);
1900
1901	if (list_empty(&iaa_devices)) {
1902		ret = alloc_wq_table(wq->idxd->max_wqs);
1903		if (ret)
1904			goto err_alloc;
1905		first_wq = true;
1906	}
1907
1908	ret = save_iaa_wq(wq);
1909	if (ret)
1910		goto err_save;
1911
1912	rebalance_wq_table();
1913
1914	if (first_wq) {
1915		iaa_crypto_enabled = true;
1916		ret = iaa_register_compression_device();
1917		if (ret != 0) {
1918			iaa_crypto_enabled = false;
1919			dev_dbg(dev, "IAA compression device registration failed\n");
1920			goto err_register;
1921		}
1922		try_module_get(THIS_MODULE);
1923
1924		pr_info("iaa_crypto now ENABLED\n");
1925	}
1926
1927	mutex_unlock(&iaa_devices_lock);
1928out:
1929	mutex_unlock(&wq->wq_lock);
1930
1931	return ret;
1932
1933err_register:
1934	remove_iaa_wq(wq);
1935	free_iaa_wq(idxd_wq_get_private(wq));
1936err_save:
1937	if (first_wq)
1938		free_wq_table();
1939err_alloc:
1940	mutex_unlock(&iaa_devices_lock);
1941	idxd_drv_disable_wq(wq);
1942err:
1943	wq->type = IDXD_WQT_NONE;
1944
1945	goto out;
1946}
1947
1948static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
1949{
1950	struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1951	struct idxd_device *idxd = wq->idxd;
1952	struct iaa_wq *iaa_wq;
1953	bool free = false;
1954
1955	idxd_wq_quiesce(wq);
1956
1957	mutex_lock(&wq->wq_lock);
1958	mutex_lock(&iaa_devices_lock);
1959
1960	remove_iaa_wq(wq);
1961
1962	spin_lock(&idxd->dev_lock);
1963	iaa_wq = idxd_wq_get_private(wq);
1964	if (!iaa_wq) {
1965		spin_unlock(&idxd->dev_lock);
1966		pr_err("%s: no iaa_wq available to remove\n", __func__);
1967		goto out;
1968	}
1969
1970	if (iaa_wq->ref) {
1971		iaa_wq->remove = true;
1972	} else {
1973		wq = iaa_wq->wq;
1974		idxd_wq_set_private(wq, NULL);
1975		free = true;
1976	}
1977	spin_unlock(&idxd->dev_lock);
1978	if (free) {
1979		__free_iaa_wq(iaa_wq);
1980		kfree(iaa_wq);
1981	}
1982
1983	idxd_drv_disable_wq(wq);
1984	rebalance_wq_table();
1985
1986	if (nr_iaa == 0) {
1987		iaa_crypto_enabled = false;
1988		free_wq_table();
1989		module_put(THIS_MODULE);
1990
1991		pr_info("iaa_crypto now DISABLED\n");
1992	}
1993out:
1994	mutex_unlock(&iaa_devices_lock);
1995	mutex_unlock(&wq->wq_lock);
1996}
1997
1998static enum idxd_dev_type dev_types[] = {
1999	IDXD_DEV_WQ,
2000	IDXD_DEV_NONE,
2001};
2002
2003static struct idxd_device_driver iaa_crypto_driver = {
2004	.probe = iaa_crypto_probe,
2005	.remove = iaa_crypto_remove,
2006	.name = IDXD_SUBDRIVER_NAME,
2007	.type = dev_types,
2008	.desc_complete = iaa_desc_complete,
2009};
2010
2011static int __init iaa_crypto_init_module(void)
2012{
2013	int ret = 0;
2014	int node;
2015
2016	nr_cpus = num_possible_cpus();
2017	for_each_node_with_cpus(node)
2018		nr_nodes++;
2019	if (!nr_nodes) {
2020		pr_err("IAA couldn't find any nodes with cpus\n");
2021		return -ENODEV;
2022	}
2023	nr_cpus_per_node = nr_cpus / nr_nodes;
2024
2025	if (crypto_has_comp("deflate-generic", 0, 0))
2026		deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0);
2027
2028	if (IS_ERR_OR_NULL(deflate_generic_tfm)) {
2029		pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2030		       "deflate-generic", PTR_ERR(deflate_generic_tfm));
2031		return -ENOMEM;
2032	}
2033
2034	ret = iaa_aecs_init_fixed();
2035	if (ret < 0) {
2036		pr_debug("IAA fixed compression mode init failed\n");
2037		goto err_aecs_init;
2038	}
2039
2040	ret = idxd_driver_register(&iaa_crypto_driver);
2041	if (ret) {
2042		pr_debug("IAA wq sub-driver registration failed\n");
2043		goto err_driver_reg;
2044	}
2045
2046	ret = driver_create_file(&iaa_crypto_driver.drv,
2047				 &driver_attr_verify_compress);
2048	if (ret) {
2049		pr_debug("IAA verify_compress attr creation failed\n");
2050		goto err_verify_attr_create;
2051	}
2052
2053	ret = driver_create_file(&iaa_crypto_driver.drv,
2054				 &driver_attr_sync_mode);
2055	if (ret) {
2056		pr_debug("IAA sync mode attr creation failed\n");
2057		goto err_sync_attr_create;
2058	}
2059
2060	if (iaa_crypto_debugfs_init())
2061		pr_warn("debugfs init failed, stats not available\n");
2062
2063	pr_debug("initialized\n");
2064out:
2065	return ret;
2066
2067err_sync_attr_create:
2068	driver_remove_file(&iaa_crypto_driver.drv,
2069			   &driver_attr_verify_compress);
2070err_verify_attr_create:
2071	idxd_driver_unregister(&iaa_crypto_driver);
2072err_driver_reg:
2073	iaa_aecs_cleanup_fixed();
2074err_aecs_init:
2075	crypto_free_comp(deflate_generic_tfm);
2076
2077	goto out;
2078}
2079
2080static void __exit iaa_crypto_cleanup_module(void)
2081{
2082	if (iaa_unregister_compression_device())
2083		pr_debug("IAA compression device unregister failed\n");
2084
2085	iaa_crypto_debugfs_cleanup();
2086	driver_remove_file(&iaa_crypto_driver.drv,
2087			   &driver_attr_sync_mode);
2088	driver_remove_file(&iaa_crypto_driver.drv,
2089			   &driver_attr_verify_compress);
2090	idxd_driver_unregister(&iaa_crypto_driver);
2091	iaa_aecs_cleanup_fixed();
2092	crypto_free_comp(deflate_generic_tfm);
2093
2094	pr_debug("cleaned up\n");
2095}
2096
2097MODULE_IMPORT_NS("IDXD");
2098MODULE_LICENSE("GPL");
2099MODULE_ALIAS_IDXD_DEVICE(0);
2100MODULE_AUTHOR("Intel Corporation");
2101MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2102
2103module_init(iaa_crypto_init_module);
2104module_exit(iaa_crypto_cleanup_module);