Linux Audio

Check our new training course

Loading...
v4.6
   1/*
   2 * AMD Cryptographic Coprocessor (CCP) driver
   3 *
   4 * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
   5 *
   6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/kernel.h>
  15#include <linux/pci.h>
 
 
 
  16#include <linux/interrupt.h>
  17#include <crypto/scatterwalk.h>
 
 
  18#include <linux/ccp.h>
 
 
 
  19
  20#include "ccp-dev.h"
  21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  22/* SHA initial context values */
  23static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
  24	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
  25	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
  26	cpu_to_be32(SHA1_H4), 0, 0, 0,
  27};
  28
  29static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
  30	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
  31	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
  32	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
  33	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
  34};
  35
  36static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
  37	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
  38	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
  39	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
  40	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
  41};
  42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  43static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
  44{
  45	int start;
  46
  47	for (;;) {
  48		mutex_lock(&ccp->ksb_mutex);
  49
  50		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
  51							ccp->ksb_count,
  52							ccp->ksb_start,
  53							count, 0);
  54		if (start <= ccp->ksb_count) {
  55			bitmap_set(ccp->ksb, start, count);
  56
  57			mutex_unlock(&ccp->ksb_mutex);
  58			break;
  59		}
  60
  61		ccp->ksb_avail = 0;
  62
  63		mutex_unlock(&ccp->ksb_mutex);
  64
  65		/* Wait for KSB entries to become available */
  66		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
  67			return 0;
  68	}
  69
  70	return KSB_START + start;
  71}
  72
  73static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
  74			 unsigned int count)
  75{
  76	if (!start)
  77		return;
  78
  79	mutex_lock(&ccp->ksb_mutex);
  80
  81	bitmap_clear(ccp->ksb, start - KSB_START, count);
  82
  83	ccp->ksb_avail = 1;
  84
  85	mutex_unlock(&ccp->ksb_mutex);
  86
  87	wake_up_interruptible_all(&ccp->ksb_queue);
  88}
  89
  90static u32 ccp_gen_jobid(struct ccp_device *ccp)
  91{
  92	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
  93}
  94
  95static void ccp_sg_free(struct ccp_sg_workarea *wa)
  96{
  97	if (wa->dma_count)
  98		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
  99
 100	wa->dma_count = 0;
 101}
 102
 103static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
 104				struct scatterlist *sg, u64 len,
 105				enum dma_data_direction dma_dir)
 106{
 107	memset(wa, 0, sizeof(*wa));
 108
 109	wa->sg = sg;
 110	if (!sg)
 111		return 0;
 112
 113	wa->nents = sg_nents_for_len(sg, len);
 114	if (wa->nents < 0)
 115		return wa->nents;
 116
 117	wa->bytes_left = len;
 118	wa->sg_used = 0;
 119
 120	if (len == 0)
 121		return 0;
 122
 123	if (dma_dir == DMA_NONE)
 124		return 0;
 125
 126	wa->dma_sg = sg;
 127	wa->dma_dev = dev;
 128	wa->dma_dir = dma_dir;
 129	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
 130	if (!wa->dma_count)
 131		return -ENOMEM;
 132
 
 133	return 0;
 134}
 135
 136static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
 137{
 138	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
 139
 140	if (!wa->sg)
 141		return;
 142
 143	wa->sg_used += nbytes;
 144	wa->bytes_left -= nbytes;
 145	if (wa->sg_used == wa->sg->length) {
 146		wa->sg = sg_next(wa->sg);
 147		wa->sg_used = 0;
 148	}
 149}
 150
 151static void ccp_dm_free(struct ccp_dm_workarea *wa)
 152{
 153	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
 154		if (wa->address)
 155			dma_pool_free(wa->dma_pool, wa->address,
 156				      wa->dma.address);
 157	} else {
 158		if (wa->dma.address)
 159			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
 160					 wa->dma.dir);
 161		kfree(wa->address);
 162	}
 163
 164	wa->address = NULL;
 165	wa->dma.address = 0;
 166}
 167
 168static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
 169				struct ccp_cmd_queue *cmd_q,
 170				unsigned int len,
 171				enum dma_data_direction dir)
 172{
 173	memset(wa, 0, sizeof(*wa));
 174
 175	if (!len)
 176		return 0;
 177
 178	wa->dev = cmd_q->ccp->dev;
 179	wa->length = len;
 180
 181	if (len <= CCP_DMAPOOL_MAX_SIZE) {
 182		wa->dma_pool = cmd_q->dma_pool;
 183
 184		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
 185					     &wa->dma.address);
 186		if (!wa->address)
 187			return -ENOMEM;
 188
 189		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
 190
 191		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
 192	} else {
 193		wa->address = kzalloc(len, GFP_KERNEL);
 194		if (!wa->address)
 195			return -ENOMEM;
 196
 197		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
 198						 dir);
 199		if (!wa->dma.address)
 200			return -ENOMEM;
 201
 202		wa->dma.length = len;
 203	}
 204	wa->dma.dir = dir;
 205
 206	return 0;
 207}
 208
 209static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
 210			    struct scatterlist *sg, unsigned int sg_offset,
 211			    unsigned int len)
 212{
 213	WARN_ON(!wa->address);
 214
 215	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
 216				 0);
 217}
 218
 219static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
 220			    struct scatterlist *sg, unsigned int sg_offset,
 221			    unsigned int len)
 222{
 223	WARN_ON(!wa->address);
 224
 225	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
 226				 1);
 227}
 228
 229static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
 230				   struct scatterlist *sg,
 231				   unsigned int len, unsigned int se_len,
 232				   bool sign_extend)
 233{
 234	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
 235	u8 buffer[CCP_REVERSE_BUF_SIZE];
 236
 237	if (WARN_ON(se_len > sizeof(buffer)))
 238		return -EINVAL;
 239
 240	sg_offset = len;
 241	dm_offset = 0;
 242	nbytes = len;
 243	while (nbytes) {
 244		ksb_len = min_t(unsigned int, nbytes, se_len);
 245		sg_offset -= ksb_len;
 246
 247		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
 248		for (i = 0; i < ksb_len; i++)
 249			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
 250
 251		dm_offset += ksb_len;
 252		nbytes -= ksb_len;
 253
 254		if ((ksb_len != se_len) && sign_extend) {
 255			/* Must sign-extend to nearest sign-extend length */
 256			if (wa->address[dm_offset - 1] & 0x80)
 257				memset(wa->address + dm_offset, 0xff,
 258				       se_len - ksb_len);
 259		}
 260	}
 261
 262	return 0;
 263}
 264
 265static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
 266				    struct scatterlist *sg,
 267				    unsigned int len)
 268{
 269	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
 270	u8 buffer[CCP_REVERSE_BUF_SIZE];
 271
 272	sg_offset = 0;
 273	dm_offset = len;
 274	nbytes = len;
 275	while (nbytes) {
 276		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
 277		dm_offset -= ksb_len;
 278
 279		for (i = 0; i < ksb_len; i++)
 280			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
 281		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
 282
 283		sg_offset += ksb_len;
 284		nbytes -= ksb_len;
 285	}
 286}
 287
 288static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
 289{
 290	ccp_dm_free(&data->dm_wa);
 291	ccp_sg_free(&data->sg_wa);
 292}
 293
 294static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
 295			 struct scatterlist *sg, u64 sg_len,
 296			 unsigned int dm_len,
 297			 enum dma_data_direction dir)
 298{
 299	int ret;
 300
 301	memset(data, 0, sizeof(*data));
 302
 303	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
 304				   dir);
 305	if (ret)
 306		goto e_err;
 307
 308	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
 309	if (ret)
 310		goto e_err;
 311
 312	return 0;
 313
 314e_err:
 315	ccp_free_data(data, cmd_q);
 316
 317	return ret;
 318}
 319
 320static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
 321{
 322	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
 323	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
 324	unsigned int buf_count, nbytes;
 325
 326	/* Clear the buffer if setting it */
 327	if (!from)
 328		memset(dm_wa->address, 0, dm_wa->length);
 329
 330	if (!sg_wa->sg)
 331		return 0;
 332
 333	/* Perform the copy operation
 334	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
 335	 *   an unsigned int
 336	 */
 337	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
 338	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
 339				 nbytes, from);
 340
 341	/* Update the structures and generate the count */
 342	buf_count = 0;
 343	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
 344		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
 345			     dm_wa->length - buf_count);
 346		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
 347
 348		buf_count += nbytes;
 349		ccp_update_sg_workarea(sg_wa, nbytes);
 350	}
 351
 352	return buf_count;
 353}
 354
 355static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
 356{
 357	return ccp_queue_buf(data, 0);
 358}
 359
 360static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
 361{
 362	return ccp_queue_buf(data, 1);
 363}
 364
 365static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
 366			     struct ccp_op *op, unsigned int block_size,
 367			     bool blocksize_op)
 368{
 369	unsigned int sg_src_len, sg_dst_len, op_len;
 370
 371	/* The CCP can only DMA from/to one address each per operation. This
 372	 * requires that we find the smallest DMA area between the source
 373	 * and destination. The resulting len values will always be <= UINT_MAX
 374	 * because the dma length is an unsigned int.
 375	 */
 376	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
 377	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
 378
 379	if (dst) {
 380		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
 381		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
 382		op_len = min(sg_src_len, sg_dst_len);
 383	} else {
 384		op_len = sg_src_len;
 385	}
 386
 387	/* The data operation length will be at least block_size in length
 388	 * or the smaller of available sg room remaining for the source or
 389	 * the destination
 390	 */
 391	op_len = max(op_len, block_size);
 392
 393	/* Unless we have to buffer data, there's no reason to wait */
 394	op->soc = 0;
 395
 396	if (sg_src_len < block_size) {
 397		/* Not enough data in the sg element, so it
 398		 * needs to be buffered into a blocksize chunk
 399		 */
 400		int cp_len = ccp_fill_queue_buf(src);
 401
 402		op->soc = 1;
 403		op->src.u.dma.address = src->dm_wa.dma.address;
 404		op->src.u.dma.offset = 0;
 405		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
 406	} else {
 407		/* Enough data in the sg element, but we need to
 408		 * adjust for any previously copied data
 409		 */
 410		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
 411		op->src.u.dma.offset = src->sg_wa.sg_used;
 412		op->src.u.dma.length = op_len & ~(block_size - 1);
 413
 414		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
 415	}
 416
 417	if (dst) {
 418		if (sg_dst_len < block_size) {
 419			/* Not enough room in the sg element or we're on the
 420			 * last piece of data (when using padding), so the
 421			 * output needs to be buffered into a blocksize chunk
 422			 */
 423			op->soc = 1;
 424			op->dst.u.dma.address = dst->dm_wa.dma.address;
 425			op->dst.u.dma.offset = 0;
 426			op->dst.u.dma.length = op->src.u.dma.length;
 427		} else {
 428			/* Enough room in the sg element, but we need to
 429			 * adjust for any previously used area
 430			 */
 431			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
 432			op->dst.u.dma.offset = dst->sg_wa.sg_used;
 433			op->dst.u.dma.length = op->src.u.dma.length;
 434		}
 435	}
 436}
 437
 438static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
 439			     struct ccp_op *op)
 440{
 441	op->init = 0;
 442
 443	if (dst) {
 444		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
 445			ccp_empty_queue_buf(dst);
 446		else
 447			ccp_update_sg_workarea(&dst->sg_wa,
 448					       op->dst.u.dma.length);
 449	}
 450}
 451
 452static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
 453				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
 454				u32 byte_swap, bool from)
 455{
 456	struct ccp_op op;
 457
 458	memset(&op, 0, sizeof(op));
 459
 460	op.cmd_q = cmd_q;
 461	op.jobid = jobid;
 462	op.eom = 1;
 463
 464	if (from) {
 465		op.soc = 1;
 466		op.src.type = CCP_MEMTYPE_KSB;
 467		op.src.u.ksb = ksb;
 468		op.dst.type = CCP_MEMTYPE_SYSTEM;
 469		op.dst.u.dma.address = wa->dma.address;
 470		op.dst.u.dma.length = wa->length;
 471	} else {
 472		op.src.type = CCP_MEMTYPE_SYSTEM;
 473		op.src.u.dma.address = wa->dma.address;
 474		op.src.u.dma.length = wa->length;
 475		op.dst.type = CCP_MEMTYPE_KSB;
 476		op.dst.u.ksb = ksb;
 477	}
 478
 479	op.u.passthru.byte_swap = byte_swap;
 480
 481	return cmd_q->ccp->vdata->perform->perform_passthru(&op);
 482}
 483
 484static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
 485			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
 486			   u32 byte_swap)
 487{
 488	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
 489}
 490
 491static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
 492			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
 493			     u32 byte_swap)
 494{
 495	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
 496}
 497
 498static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 499				struct ccp_cmd *cmd)
 500{
 501	struct ccp_aes_engine *aes = &cmd->u.aes;
 502	struct ccp_dm_workarea key, ctx;
 503	struct ccp_data src;
 504	struct ccp_op op;
 505	unsigned int dm_offset;
 506	int ret;
 507
 508	if (!((aes->key_len == AES_KEYSIZE_128) ||
 509	      (aes->key_len == AES_KEYSIZE_192) ||
 510	      (aes->key_len == AES_KEYSIZE_256)))
 511		return -EINVAL;
 512
 513	if (aes->src_len & (AES_BLOCK_SIZE - 1))
 514		return -EINVAL;
 515
 516	if (aes->iv_len != AES_BLOCK_SIZE)
 517		return -EINVAL;
 518
 519	if (!aes->key || !aes->iv || !aes->src)
 520		return -EINVAL;
 521
 522	if (aes->cmac_final) {
 523		if (aes->cmac_key_len != AES_BLOCK_SIZE)
 524			return -EINVAL;
 525
 526		if (!aes->cmac_key)
 527			return -EINVAL;
 528	}
 529
 530	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
 531	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
 532
 533	ret = -EIO;
 534	memset(&op, 0, sizeof(op));
 535	op.cmd_q = cmd_q;
 536	op.jobid = ccp_gen_jobid(cmd_q->ccp);
 537	op.ksb_key = cmd_q->ksb_key;
 538	op.ksb_ctx = cmd_q->ksb_ctx;
 539	op.init = 1;
 540	op.u.aes.type = aes->type;
 541	op.u.aes.mode = aes->mode;
 542	op.u.aes.action = aes->action;
 543
 544	/* All supported key sizes fit in a single (32-byte) KSB entry
 545	 * and must be in little endian format. Use the 256-bit byte
 546	 * swap passthru option to convert from big endian to little
 547	 * endian.
 548	 */
 549	ret = ccp_init_dm_workarea(&key, cmd_q,
 550				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
 551				   DMA_TO_DEVICE);
 552	if (ret)
 553		return ret;
 554
 555	dm_offset = CCP_KSB_BYTES - aes->key_len;
 556	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
 557	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
 558			      CCP_PASSTHRU_BYTESWAP_256BIT);
 559	if (ret) {
 560		cmd->engine_error = cmd_q->cmd_error;
 561		goto e_key;
 562	}
 563
 564	/* The AES context fits in a single (32-byte) KSB entry and
 565	 * must be in little endian format. Use the 256-bit byte swap
 566	 * passthru option to convert from big endian to little endian.
 567	 */
 568	ret = ccp_init_dm_workarea(&ctx, cmd_q,
 569				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
 570				   DMA_BIDIRECTIONAL);
 571	if (ret)
 572		goto e_key;
 573
 574	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
 575	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 576	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 577			      CCP_PASSTHRU_BYTESWAP_256BIT);
 578	if (ret) {
 579		cmd->engine_error = cmd_q->cmd_error;
 580		goto e_ctx;
 581	}
 582
 583	/* Send data to the CCP AES engine */
 584	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
 585			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
 586	if (ret)
 587		goto e_ctx;
 588
 589	while (src.sg_wa.bytes_left) {
 590		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
 591		if (aes->cmac_final && !src.sg_wa.bytes_left) {
 592			op.eom = 1;
 593
 594			/* Push the K1/K2 key to the CCP now */
 595			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
 596						op.ksb_ctx,
 597						CCP_PASSTHRU_BYTESWAP_256BIT);
 598			if (ret) {
 599				cmd->engine_error = cmd_q->cmd_error;
 600				goto e_src;
 601			}
 602
 603			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
 604					aes->cmac_key_len);
 605			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 606					      CCP_PASSTHRU_BYTESWAP_256BIT);
 607			if (ret) {
 608				cmd->engine_error = cmd_q->cmd_error;
 609				goto e_src;
 610			}
 611		}
 612
 613		ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
 614		if (ret) {
 615			cmd->engine_error = cmd_q->cmd_error;
 616			goto e_src;
 617		}
 618
 619		ccp_process_data(&src, NULL, &op);
 620	}
 621
 622	/* Retrieve the AES context - convert from LE to BE using
 623	 * 32-byte (256-bit) byteswapping
 624	 */
 625	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 626				CCP_PASSTHRU_BYTESWAP_256BIT);
 627	if (ret) {
 628		cmd->engine_error = cmd_q->cmd_error;
 629		goto e_src;
 630	}
 631
 632	/* ...but we only need AES_BLOCK_SIZE bytes */
 633	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
 634	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 635
 636e_src:
 637	ccp_free_data(&src, cmd_q);
 638
 639e_ctx:
 640	ccp_dm_free(&ctx);
 641
 642e_key:
 643	ccp_dm_free(&key);
 644
 645	return ret;
 646}
 647
 648static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 649{
 650	struct ccp_aes_engine *aes = &cmd->u.aes;
 651	struct ccp_dm_workarea key, ctx;
 652	struct ccp_data src, dst;
 653	struct ccp_op op;
 654	unsigned int dm_offset;
 655	bool in_place = false;
 656	int ret;
 657
 658	if (aes->mode == CCP_AES_MODE_CMAC)
 659		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
 660
 661	if (!((aes->key_len == AES_KEYSIZE_128) ||
 662	      (aes->key_len == AES_KEYSIZE_192) ||
 663	      (aes->key_len == AES_KEYSIZE_256)))
 664		return -EINVAL;
 665
 666	if (((aes->mode == CCP_AES_MODE_ECB) ||
 667	     (aes->mode == CCP_AES_MODE_CBC) ||
 668	     (aes->mode == CCP_AES_MODE_CFB)) &&
 669	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
 670		return -EINVAL;
 671
 672	if (!aes->key || !aes->src || !aes->dst)
 673		return -EINVAL;
 674
 675	if (aes->mode != CCP_AES_MODE_ECB) {
 676		if (aes->iv_len != AES_BLOCK_SIZE)
 677			return -EINVAL;
 678
 679		if (!aes->iv)
 680			return -EINVAL;
 681	}
 682
 683	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
 684	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
 685
 686	ret = -EIO;
 687	memset(&op, 0, sizeof(op));
 688	op.cmd_q = cmd_q;
 689	op.jobid = ccp_gen_jobid(cmd_q->ccp);
 690	op.ksb_key = cmd_q->ksb_key;
 691	op.ksb_ctx = cmd_q->ksb_ctx;
 692	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
 693	op.u.aes.type = aes->type;
 694	op.u.aes.mode = aes->mode;
 695	op.u.aes.action = aes->action;
 696
 697	/* All supported key sizes fit in a single (32-byte) KSB entry
 698	 * and must be in little endian format. Use the 256-bit byte
 699	 * swap passthru option to convert from big endian to little
 700	 * endian.
 701	 */
 702	ret = ccp_init_dm_workarea(&key, cmd_q,
 703				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
 704				   DMA_TO_DEVICE);
 705	if (ret)
 706		return ret;
 707
 708	dm_offset = CCP_KSB_BYTES - aes->key_len;
 709	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
 710	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
 711			      CCP_PASSTHRU_BYTESWAP_256BIT);
 712	if (ret) {
 713		cmd->engine_error = cmd_q->cmd_error;
 714		goto e_key;
 715	}
 716
 717	/* The AES context fits in a single (32-byte) KSB entry and
 718	 * must be in little endian format. Use the 256-bit byte swap
 719	 * passthru option to convert from big endian to little endian.
 720	 */
 721	ret = ccp_init_dm_workarea(&ctx, cmd_q,
 722				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
 723				   DMA_BIDIRECTIONAL);
 724	if (ret)
 725		goto e_key;
 726
 727	if (aes->mode != CCP_AES_MODE_ECB) {
 728		/* Load the AES context - conver to LE */
 729		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
 730		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 731		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 732				      CCP_PASSTHRU_BYTESWAP_256BIT);
 733		if (ret) {
 734			cmd->engine_error = cmd_q->cmd_error;
 735			goto e_ctx;
 736		}
 737	}
 738
 739	/* Prepare the input and output data workareas. For in-place
 740	 * operations we need to set the dma direction to BIDIRECTIONAL
 741	 * and copy the src workarea to the dst workarea.
 742	 */
 743	if (sg_virt(aes->src) == sg_virt(aes->dst))
 744		in_place = true;
 745
 746	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
 747			    AES_BLOCK_SIZE,
 748			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
 749	if (ret)
 750		goto e_ctx;
 751
 752	if (in_place) {
 753		dst = src;
 754	} else {
 755		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
 756				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
 757		if (ret)
 758			goto e_src;
 759	}
 760
 761	/* Send data to the CCP AES engine */
 762	while (src.sg_wa.bytes_left) {
 763		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
 764		if (!src.sg_wa.bytes_left) {
 765			op.eom = 1;
 766
 767			/* Since we don't retrieve the AES context in ECB
 768			 * mode we have to wait for the operation to complete
 769			 * on the last piece of data
 770			 */
 771			if (aes->mode == CCP_AES_MODE_ECB)
 772				op.soc = 1;
 773		}
 774
 775		ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
 776		if (ret) {
 777			cmd->engine_error = cmd_q->cmd_error;
 778			goto e_dst;
 779		}
 780
 781		ccp_process_data(&src, &dst, &op);
 782	}
 783
 784	if (aes->mode != CCP_AES_MODE_ECB) {
 785		/* Retrieve the AES context - convert from LE to BE using
 786		 * 32-byte (256-bit) byteswapping
 787		 */
 788		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 789					CCP_PASSTHRU_BYTESWAP_256BIT);
 790		if (ret) {
 791			cmd->engine_error = cmd_q->cmd_error;
 792			goto e_dst;
 793		}
 794
 795		/* ...but we only need AES_BLOCK_SIZE bytes */
 796		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
 797		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 798	}
 799
 800e_dst:
 801	if (!in_place)
 802		ccp_free_data(&dst, cmd_q);
 803
 804e_src:
 805	ccp_free_data(&src, cmd_q);
 806
 807e_ctx:
 808	ccp_dm_free(&ctx);
 809
 810e_key:
 811	ccp_dm_free(&key);
 812
 813	return ret;
 814}
 815
 816static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 817			       struct ccp_cmd *cmd)
 818{
 819	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
 820	struct ccp_dm_workarea key, ctx;
 821	struct ccp_data src, dst;
 822	struct ccp_op op;
 823	unsigned int unit_size, dm_offset;
 824	bool in_place = false;
 825	int ret;
 826
 827	switch (xts->unit_size) {
 828	case CCP_XTS_AES_UNIT_SIZE_16:
 829		unit_size = 16;
 830		break;
 831	case CCP_XTS_AES_UNIT_SIZE_512:
 832		unit_size = 512;
 833		break;
 834	case CCP_XTS_AES_UNIT_SIZE_1024:
 835		unit_size = 1024;
 836		break;
 837	case CCP_XTS_AES_UNIT_SIZE_2048:
 838		unit_size = 2048;
 839		break;
 840	case CCP_XTS_AES_UNIT_SIZE_4096:
 841		unit_size = 4096;
 842		break;
 843
 844	default:
 845		return -EINVAL;
 846	}
 847
 848	if (xts->key_len != AES_KEYSIZE_128)
 849		return -EINVAL;
 850
 851	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
 852		return -EINVAL;
 853
 854	if (xts->iv_len != AES_BLOCK_SIZE)
 855		return -EINVAL;
 856
 857	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
 858		return -EINVAL;
 859
 860	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
 861	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
 862
 863	ret = -EIO;
 864	memset(&op, 0, sizeof(op));
 865	op.cmd_q = cmd_q;
 866	op.jobid = ccp_gen_jobid(cmd_q->ccp);
 867	op.ksb_key = cmd_q->ksb_key;
 868	op.ksb_ctx = cmd_q->ksb_ctx;
 869	op.init = 1;
 870	op.u.xts.action = xts->action;
 871	op.u.xts.unit_size = xts->unit_size;
 872
 873	/* All supported key sizes fit in a single (32-byte) KSB entry
 874	 * and must be in little endian format. Use the 256-bit byte
 875	 * swap passthru option to convert from big endian to little
 876	 * endian.
 877	 */
 878	ret = ccp_init_dm_workarea(&key, cmd_q,
 879				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
 880				   DMA_TO_DEVICE);
 881	if (ret)
 882		return ret;
 883
 884	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
 885	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
 886	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
 887	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
 888			      CCP_PASSTHRU_BYTESWAP_256BIT);
 889	if (ret) {
 890		cmd->engine_error = cmd_q->cmd_error;
 891		goto e_key;
 892	}
 893
 894	/* The AES context fits in a single (32-byte) KSB entry and
 895	 * for XTS is already in little endian format so no byte swapping
 896	 * is needed.
 897	 */
 898	ret = ccp_init_dm_workarea(&ctx, cmd_q,
 899				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
 900				   DMA_BIDIRECTIONAL);
 901	if (ret)
 902		goto e_key;
 903
 904	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
 905	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 906			      CCP_PASSTHRU_BYTESWAP_NOOP);
 907	if (ret) {
 908		cmd->engine_error = cmd_q->cmd_error;
 909		goto e_ctx;
 910	}
 911
 912	/* Prepare the input and output data workareas. For in-place
 913	 * operations we need to set the dma direction to BIDIRECTIONAL
 914	 * and copy the src workarea to the dst workarea.
 915	 */
 916	if (sg_virt(xts->src) == sg_virt(xts->dst))
 917		in_place = true;
 918
 919	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
 920			    unit_size,
 921			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
 922	if (ret)
 923		goto e_ctx;
 924
 925	if (in_place) {
 926		dst = src;
 927	} else {
 928		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
 929				    unit_size, DMA_FROM_DEVICE);
 930		if (ret)
 931			goto e_src;
 932	}
 933
 934	/* Send data to the CCP AES engine */
 935	while (src.sg_wa.bytes_left) {
 936		ccp_prepare_data(&src, &dst, &op, unit_size, true);
 937		if (!src.sg_wa.bytes_left)
 938			op.eom = 1;
 939
 940		ret = cmd_q->ccp->vdata->perform->perform_xts_aes(&op);
 941		if (ret) {
 942			cmd->engine_error = cmd_q->cmd_error;
 943			goto e_dst;
 944		}
 945
 946		ccp_process_data(&src, &dst, &op);
 947	}
 948
 949	/* Retrieve the AES context - convert from LE to BE using
 950	 * 32-byte (256-bit) byteswapping
 951	 */
 952	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 953				CCP_PASSTHRU_BYTESWAP_256BIT);
 954	if (ret) {
 955		cmd->engine_error = cmd_q->cmd_error;
 956		goto e_dst;
 957	}
 958
 959	/* ...but we only need AES_BLOCK_SIZE bytes */
 960	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
 961	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
 962
 963e_dst:
 964	if (!in_place)
 965		ccp_free_data(&dst, cmd_q);
 966
 967e_src:
 968	ccp_free_data(&src, cmd_q);
 969
 970e_ctx:
 971	ccp_dm_free(&ctx);
 972
 973e_key:
 974	ccp_dm_free(&key);
 975
 976	return ret;
 977}
 978
 979static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 980{
 981	struct ccp_sha_engine *sha = &cmd->u.sha;
 982	struct ccp_dm_workarea ctx;
 983	struct ccp_data src;
 984	struct ccp_op op;
 985	int ret;
 986
 987	if (sha->ctx_len != CCP_SHA_CTXSIZE)
 988		return -EINVAL;
 989
 990	if (!sha->ctx)
 991		return -EINVAL;
 992
 993	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
 994		return -EINVAL;
 995
 996	if (!sha->src_len) {
 997		const u8 *sha_zero;
 998
 999		/* Not final, just return */
1000		if (!sha->final)
1001			return 0;
1002
1003		/* CCP can't do a zero length sha operation so the caller
1004		 * must buffer the data.
1005		 */
1006		if (sha->msg_bits)
1007			return -EINVAL;
1008
1009		/* The CCP cannot perform zero-length sha operations so the
1010		 * caller is required to buffer data for the final operation.
1011		 * However, a sha operation for a message with a total length
1012		 * of zero is valid so known values are required to supply
1013		 * the result.
1014		 */
1015		switch (sha->type) {
1016		case CCP_SHA_TYPE_1:
1017			sha_zero = sha1_zero_message_hash;
1018			break;
1019		case CCP_SHA_TYPE_224:
1020			sha_zero = sha224_zero_message_hash;
1021			break;
1022		case CCP_SHA_TYPE_256:
1023			sha_zero = sha256_zero_message_hash;
1024			break;
1025		default:
1026			return -EINVAL;
1027		}
1028
1029		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1030					 sha->ctx_len, 1);
1031
1032		return 0;
1033	}
1034
1035	if (!sha->src)
1036		return -EINVAL;
1037
1038	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1039
1040	memset(&op, 0, sizeof(op));
1041	op.cmd_q = cmd_q;
1042	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1043	op.ksb_ctx = cmd_q->ksb_ctx;
1044	op.u.sha.type = sha->type;
1045	op.u.sha.msg_bits = sha->msg_bits;
1046
1047	/* The SHA context fits in a single (32-byte) KSB entry and
1048	 * must be in little endian format. Use the 256-bit byte swap
1049	 * passthru option to convert from big endian to little endian.
1050	 */
1051	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1052				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1053				   DMA_BIDIRECTIONAL);
1054	if (ret)
1055		return ret;
1056
1057	if (sha->first) {
1058		const __be32 *init;
1059
1060		switch (sha->type) {
1061		case CCP_SHA_TYPE_1:
1062			init = ccp_sha1_init;
1063			break;
1064		case CCP_SHA_TYPE_224:
1065			init = ccp_sha224_init;
1066			break;
1067		case CCP_SHA_TYPE_256:
1068			init = ccp_sha256_init;
1069			break;
1070		default:
1071			ret = -EINVAL;
1072			goto e_ctx;
1073		}
1074		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1075	} else {
1076		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1077	}
1078
1079	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1080			      CCP_PASSTHRU_BYTESWAP_256BIT);
1081	if (ret) {
1082		cmd->engine_error = cmd_q->cmd_error;
1083		goto e_ctx;
1084	}
1085
1086	/* Send data to the CCP SHA engine */
1087	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1088			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1089	if (ret)
1090		goto e_ctx;
1091
1092	while (src.sg_wa.bytes_left) {
1093		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1094		if (sha->final && !src.sg_wa.bytes_left)
1095			op.eom = 1;
1096
1097		ret = cmd_q->ccp->vdata->perform->perform_sha(&op);
1098		if (ret) {
1099			cmd->engine_error = cmd_q->cmd_error;
1100			goto e_data;
1101		}
1102
1103		ccp_process_data(&src, NULL, &op);
1104	}
1105
1106	/* Retrieve the SHA context - convert from LE to BE using
1107	 * 32-byte (256-bit) byteswapping to BE
1108	 */
1109	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1110				CCP_PASSTHRU_BYTESWAP_256BIT);
1111	if (ret) {
1112		cmd->engine_error = cmd_q->cmd_error;
1113		goto e_data;
1114	}
1115
1116	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1117
1118	if (sha->final && sha->opad) {
1119		/* HMAC operation, recursively perform final SHA */
1120		struct ccp_cmd hmac_cmd;
1121		struct scatterlist sg;
1122		u64 block_size, digest_size;
1123		u8 *hmac_buf;
1124
1125		switch (sha->type) {
1126		case CCP_SHA_TYPE_1:
1127			block_size = SHA1_BLOCK_SIZE;
1128			digest_size = SHA1_DIGEST_SIZE;
1129			break;
1130		case CCP_SHA_TYPE_224:
1131			block_size = SHA224_BLOCK_SIZE;
1132			digest_size = SHA224_DIGEST_SIZE;
1133			break;
1134		case CCP_SHA_TYPE_256:
1135			block_size = SHA256_BLOCK_SIZE;
1136			digest_size = SHA256_DIGEST_SIZE;
1137			break;
1138		default:
1139			ret = -EINVAL;
1140			goto e_data;
1141		}
1142
1143		if (sha->opad_len != block_size) {
1144			ret = -EINVAL;
1145			goto e_data;
1146		}
1147
1148		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1149		if (!hmac_buf) {
1150			ret = -ENOMEM;
1151			goto e_data;
1152		}
1153		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1154
1155		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1156		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1157
1158		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1159		hmac_cmd.engine = CCP_ENGINE_SHA;
1160		hmac_cmd.u.sha.type = sha->type;
1161		hmac_cmd.u.sha.ctx = sha->ctx;
1162		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1163		hmac_cmd.u.sha.src = &sg;
1164		hmac_cmd.u.sha.src_len = block_size + digest_size;
1165		hmac_cmd.u.sha.opad = NULL;
1166		hmac_cmd.u.sha.opad_len = 0;
1167		hmac_cmd.u.sha.first = 1;
1168		hmac_cmd.u.sha.final = 1;
1169		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1170
1171		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1172		if (ret)
1173			cmd->engine_error = hmac_cmd.engine_error;
1174
1175		kfree(hmac_buf);
1176	}
1177
1178e_data:
1179	ccp_free_data(&src, cmd_q);
1180
1181e_ctx:
1182	ccp_dm_free(&ctx);
1183
1184	return ret;
1185}
1186
1187static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1188{
1189	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1190	struct ccp_dm_workarea exp, src;
1191	struct ccp_data dst;
1192	struct ccp_op op;
1193	unsigned int ksb_count, i_len, o_len;
1194	int ret;
1195
1196	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1197		return -EINVAL;
1198
1199	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1200		return -EINVAL;
1201
1202	/* The RSA modulus must precede the message being acted upon, so
1203	 * it must be copied to a DMA area where the message and the
1204	 * modulus can be concatenated.  Therefore the input buffer
1205	 * length required is twice the output buffer length (which
1206	 * must be a multiple of 256-bits).
1207	 */
1208	o_len = ((rsa->key_size + 255) / 256) * 32;
1209	i_len = o_len * 2;
1210
1211	ksb_count = o_len / CCP_KSB_BYTES;
1212
1213	memset(&op, 0, sizeof(op));
1214	op.cmd_q = cmd_q;
1215	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1216	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1217	if (!op.ksb_key)
1218		return -EIO;
1219
1220	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1221	 * be in little endian format. Reverse copy each 32-byte chunk
1222	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1223	 * and each byte within that chunk and do not perform any byte swap
1224	 * operations on the passthru operation.
1225	 */
1226	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1227	if (ret)
1228		goto e_ksb;
1229
1230	ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
1231				      CCP_KSB_BYTES, false);
1232	if (ret)
1233		goto e_exp;
1234	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1235			      CCP_PASSTHRU_BYTESWAP_NOOP);
1236	if (ret) {
1237		cmd->engine_error = cmd_q->cmd_error;
1238		goto e_exp;
1239	}
1240
1241	/* Concatenate the modulus and the message. Both the modulus and
1242	 * the operands must be in little endian format.  Since the input
1243	 * is in big endian format it must be converted.
1244	 */
1245	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1246	if (ret)
1247		goto e_exp;
1248
1249	ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
1250				      CCP_KSB_BYTES, false);
1251	if (ret)
1252		goto e_src;
1253	src.address += o_len;	/* Adjust the address for the copy operation */
1254	ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
1255				      CCP_KSB_BYTES, false);
1256	if (ret)
1257		goto e_src;
1258	src.address -= o_len;	/* Reset the address to original value */
1259
1260	/* Prepare the output area for the operation */
1261	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1262			    o_len, DMA_FROM_DEVICE);
1263	if (ret)
1264		goto e_src;
1265
1266	op.soc = 1;
1267	op.src.u.dma.address = src.dma.address;
1268	op.src.u.dma.offset = 0;
1269	op.src.u.dma.length = i_len;
1270	op.dst.u.dma.address = dst.dm_wa.dma.address;
1271	op.dst.u.dma.offset = 0;
1272	op.dst.u.dma.length = o_len;
1273
1274	op.u.rsa.mod_size = rsa->key_size;
1275	op.u.rsa.input_len = i_len;
1276
1277	ret = cmd_q->ccp->vdata->perform->perform_rsa(&op);
1278	if (ret) {
1279		cmd->engine_error = cmd_q->cmd_error;
1280		goto e_dst;
1281	}
1282
1283	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1284
1285e_dst:
1286	ccp_free_data(&dst, cmd_q);
1287
1288e_src:
1289	ccp_dm_free(&src);
1290
1291e_exp:
1292	ccp_dm_free(&exp);
1293
1294e_ksb:
1295	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1296
1297	return ret;
1298}
1299
1300static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1301				struct ccp_cmd *cmd)
1302{
1303	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1304	struct ccp_dm_workarea mask;
1305	struct ccp_data src, dst;
1306	struct ccp_op op;
1307	bool in_place = false;
1308	unsigned int i;
1309	int ret;
1310
1311	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1312		return -EINVAL;
1313
1314	if (!pt->src || !pt->dst)
1315		return -EINVAL;
1316
1317	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1318		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1319			return -EINVAL;
1320		if (!pt->mask)
1321			return -EINVAL;
1322	}
1323
1324	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1325
1326	memset(&op, 0, sizeof(op));
1327	op.cmd_q = cmd_q;
1328	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1329
1330	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1331		/* Load the mask */
1332		op.ksb_key = cmd_q->ksb_key;
1333
1334		ret = ccp_init_dm_workarea(&mask, cmd_q,
1335					   CCP_PASSTHRU_KSB_COUNT *
1336					   CCP_KSB_BYTES,
1337					   DMA_TO_DEVICE);
1338		if (ret)
1339			return ret;
1340
1341		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1342		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1343				      CCP_PASSTHRU_BYTESWAP_NOOP);
1344		if (ret) {
1345			cmd->engine_error = cmd_q->cmd_error;
1346			goto e_mask;
1347		}
1348	}
1349
1350	/* Prepare the input and output data workareas. For in-place
1351	 * operations we need to set the dma direction to BIDIRECTIONAL
1352	 * and copy the src workarea to the dst workarea.
1353	 */
1354	if (sg_virt(pt->src) == sg_virt(pt->dst))
1355		in_place = true;
1356
1357	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1358			    CCP_PASSTHRU_MASKSIZE,
1359			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1360	if (ret)
1361		goto e_mask;
1362
1363	if (in_place) {
1364		dst = src;
1365	} else {
1366		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1367				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1368		if (ret)
1369			goto e_src;
1370	}
1371
1372	/* Send data to the CCP Passthru engine
1373	 *   Because the CCP engine works on a single source and destination
1374	 *   dma address at a time, each entry in the source scatterlist
1375	 *   (after the dma_map_sg call) must be less than or equal to the
1376	 *   (remaining) length in the destination scatterlist entry and the
1377	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1378	 */
1379	dst.sg_wa.sg_used = 0;
1380	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1381		if (!dst.sg_wa.sg ||
1382		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1383			ret = -EINVAL;
1384			goto e_dst;
1385		}
1386
1387		if (i == src.sg_wa.dma_count) {
1388			op.eom = 1;
1389			op.soc = 1;
1390		}
1391
1392		op.src.type = CCP_MEMTYPE_SYSTEM;
1393		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1394		op.src.u.dma.offset = 0;
1395		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1396
1397		op.dst.type = CCP_MEMTYPE_SYSTEM;
1398		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1399		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1400		op.dst.u.dma.length = op.src.u.dma.length;
1401
1402		ret = cmd_q->ccp->vdata->perform->perform_passthru(&op);
1403		if (ret) {
1404			cmd->engine_error = cmd_q->cmd_error;
1405			goto e_dst;
1406		}
1407
1408		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1409		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1410			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1411			dst.sg_wa.sg_used = 0;
1412		}
1413		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1414	}
1415
1416e_dst:
1417	if (!in_place)
1418		ccp_free_data(&dst, cmd_q);
1419
1420e_src:
1421	ccp_free_data(&src, cmd_q);
1422
1423e_mask:
1424	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1425		ccp_dm_free(&mask);
1426
1427	return ret;
1428}
1429
1430static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1431{
1432	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1433	struct ccp_dm_workarea src, dst;
1434	struct ccp_op op;
1435	int ret;
1436	u8 *save;
1437
1438	if (!ecc->u.mm.operand_1 ||
1439	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1440		return -EINVAL;
1441
1442	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1443		if (!ecc->u.mm.operand_2 ||
1444		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1445			return -EINVAL;
1446
1447	if (!ecc->u.mm.result ||
1448	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1449		return -EINVAL;
1450
1451	memset(&op, 0, sizeof(op));
1452	op.cmd_q = cmd_q;
1453	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1454
1455	/* Concatenate the modulus and the operands. Both the modulus and
1456	 * the operands must be in little endian format.  Since the input
1457	 * is in big endian format it must be converted and placed in a
1458	 * fixed length buffer.
1459	 */
1460	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1461				   DMA_TO_DEVICE);
1462	if (ret)
1463		return ret;
1464
1465	/* Save the workarea address since it is updated in order to perform
1466	 * the concatenation
1467	 */
1468	save = src.address;
1469
1470	/* Copy the ECC modulus */
1471	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1472				      CCP_ECC_OPERAND_SIZE, false);
1473	if (ret)
1474		goto e_src;
1475	src.address += CCP_ECC_OPERAND_SIZE;
1476
1477	/* Copy the first operand */
1478	ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1479				      ecc->u.mm.operand_1_len,
1480				      CCP_ECC_OPERAND_SIZE, false);
1481	if (ret)
1482		goto e_src;
1483	src.address += CCP_ECC_OPERAND_SIZE;
1484
1485	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1486		/* Copy the second operand */
1487		ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1488					      ecc->u.mm.operand_2_len,
1489					      CCP_ECC_OPERAND_SIZE, false);
1490		if (ret)
1491			goto e_src;
1492		src.address += CCP_ECC_OPERAND_SIZE;
1493	}
1494
1495	/* Restore the workarea address */
1496	src.address = save;
1497
1498	/* Prepare the output area for the operation */
1499	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1500				   DMA_FROM_DEVICE);
1501	if (ret)
1502		goto e_src;
1503
1504	op.soc = 1;
1505	op.src.u.dma.address = src.dma.address;
1506	op.src.u.dma.offset = 0;
1507	op.src.u.dma.length = src.length;
1508	op.dst.u.dma.address = dst.dma.address;
1509	op.dst.u.dma.offset = 0;
1510	op.dst.u.dma.length = dst.length;
1511
1512	op.u.ecc.function = cmd->u.ecc.function;
1513
1514	ret = cmd_q->ccp->vdata->perform->perform_ecc(&op);
1515	if (ret) {
1516		cmd->engine_error = cmd_q->cmd_error;
1517		goto e_dst;
1518	}
1519
1520	ecc->ecc_result = le16_to_cpup(
1521		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1522	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1523		ret = -EIO;
1524		goto e_dst;
1525	}
1526
1527	/* Save the ECC result */
1528	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1529
1530e_dst:
1531	ccp_dm_free(&dst);
1532
1533e_src:
1534	ccp_dm_free(&src);
1535
1536	return ret;
1537}
1538
1539static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1540{
1541	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1542	struct ccp_dm_workarea src, dst;
1543	struct ccp_op op;
1544	int ret;
1545	u8 *save;
1546
1547	if (!ecc->u.pm.point_1.x ||
1548	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1549	    !ecc->u.pm.point_1.y ||
1550	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1551		return -EINVAL;
1552
1553	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1554		if (!ecc->u.pm.point_2.x ||
1555		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1556		    !ecc->u.pm.point_2.y ||
1557		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1558			return -EINVAL;
1559	} else {
1560		if (!ecc->u.pm.domain_a ||
1561		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1562			return -EINVAL;
1563
1564		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1565			if (!ecc->u.pm.scalar ||
1566			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1567				return -EINVAL;
1568	}
1569
1570	if (!ecc->u.pm.result.x ||
1571	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1572	    !ecc->u.pm.result.y ||
1573	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1574		return -EINVAL;
1575
1576	memset(&op, 0, sizeof(op));
1577	op.cmd_q = cmd_q;
1578	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1579
1580	/* Concatenate the modulus and the operands. Both the modulus and
1581	 * the operands must be in little endian format.  Since the input
1582	 * is in big endian format it must be converted and placed in a
1583	 * fixed length buffer.
1584	 */
1585	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1586				   DMA_TO_DEVICE);
1587	if (ret)
1588		return ret;
1589
1590	/* Save the workarea address since it is updated in order to perform
1591	 * the concatenation
1592	 */
1593	save = src.address;
1594
1595	/* Copy the ECC modulus */
1596	ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1597				      CCP_ECC_OPERAND_SIZE, false);
1598	if (ret)
1599		goto e_src;
1600	src.address += CCP_ECC_OPERAND_SIZE;
1601
1602	/* Copy the first point X and Y coordinate */
1603	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1604				      ecc->u.pm.point_1.x_len,
1605				      CCP_ECC_OPERAND_SIZE, false);
1606	if (ret)
1607		goto e_src;
1608	src.address += CCP_ECC_OPERAND_SIZE;
1609	ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1610				      ecc->u.pm.point_1.y_len,
1611				      CCP_ECC_OPERAND_SIZE, false);
1612	if (ret)
1613		goto e_src;
1614	src.address += CCP_ECC_OPERAND_SIZE;
1615
1616	/* Set the first point Z coordianate to 1 */
1617	*src.address = 0x01;
1618	src.address += CCP_ECC_OPERAND_SIZE;
1619
1620	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1621		/* Copy the second point X and Y coordinate */
1622		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1623					      ecc->u.pm.point_2.x_len,
1624					      CCP_ECC_OPERAND_SIZE, false);
1625		if (ret)
1626			goto e_src;
1627		src.address += CCP_ECC_OPERAND_SIZE;
1628		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1629					      ecc->u.pm.point_2.y_len,
1630					      CCP_ECC_OPERAND_SIZE, false);
1631		if (ret)
1632			goto e_src;
1633		src.address += CCP_ECC_OPERAND_SIZE;
1634
1635		/* Set the second point Z coordianate to 1 */
1636		*src.address = 0x01;
1637		src.address += CCP_ECC_OPERAND_SIZE;
1638	} else {
1639		/* Copy the Domain "a" parameter */
1640		ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1641					      ecc->u.pm.domain_a_len,
1642					      CCP_ECC_OPERAND_SIZE, false);
1643		if (ret)
1644			goto e_src;
1645		src.address += CCP_ECC_OPERAND_SIZE;
1646
1647		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
1648			/* Copy the scalar value */
1649			ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
1650						      ecc->u.pm.scalar_len,
1651						      CCP_ECC_OPERAND_SIZE,
1652						      false);
1653			if (ret)
1654				goto e_src;
1655			src.address += CCP_ECC_OPERAND_SIZE;
1656		}
1657	}
1658
1659	/* Restore the workarea address */
1660	src.address = save;
1661
1662	/* Prepare the output area for the operation */
1663	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1664				   DMA_FROM_DEVICE);
1665	if (ret)
1666		goto e_src;
1667
1668	op.soc = 1;
1669	op.src.u.dma.address = src.dma.address;
1670	op.src.u.dma.offset = 0;
1671	op.src.u.dma.length = src.length;
1672	op.dst.u.dma.address = dst.dma.address;
1673	op.dst.u.dma.offset = 0;
1674	op.dst.u.dma.length = dst.length;
1675
1676	op.u.ecc.function = cmd->u.ecc.function;
1677
1678	ret = cmd_q->ccp->vdata->perform->perform_ecc(&op);
1679	if (ret) {
1680		cmd->engine_error = cmd_q->cmd_error;
1681		goto e_dst;
1682	}
1683
1684	ecc->ecc_result = le16_to_cpup(
1685		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1686	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1687		ret = -EIO;
1688		goto e_dst;
1689	}
1690
1691	/* Save the workarea address since it is updated as we walk through
1692	 * to copy the point math result
1693	 */
1694	save = dst.address;
1695
1696	/* Save the ECC result X and Y coordinates */
1697	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
1698				CCP_ECC_MODULUS_BYTES);
1699	dst.address += CCP_ECC_OUTPUT_SIZE;
1700	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
1701				CCP_ECC_MODULUS_BYTES);
1702	dst.address += CCP_ECC_OUTPUT_SIZE;
1703
1704	/* Restore the workarea address */
1705	dst.address = save;
1706
1707e_dst:
1708	ccp_dm_free(&dst);
1709
1710e_src:
1711	ccp_dm_free(&src);
1712
1713	return ret;
1714}
1715
1716static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1717{
1718	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1719
1720	ecc->ecc_result = 0;
1721
1722	if (!ecc->mod ||
1723	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
1724		return -EINVAL;
1725
1726	switch (ecc->function) {
1727	case CCP_ECC_FUNCTION_MMUL_384BIT:
1728	case CCP_ECC_FUNCTION_MADD_384BIT:
1729	case CCP_ECC_FUNCTION_MINV_384BIT:
1730		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
1731
1732	case CCP_ECC_FUNCTION_PADD_384BIT:
1733	case CCP_ECC_FUNCTION_PMUL_384BIT:
1734	case CCP_ECC_FUNCTION_PDBL_384BIT:
1735		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
1736
1737	default:
1738		return -EINVAL;
1739	}
1740}
1741
1742int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1743{
1744	int ret;
1745
1746	cmd->engine_error = 0;
1747	cmd_q->cmd_error = 0;
1748	cmd_q->int_rcvd = 0;
1749	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
1750
1751	switch (cmd->engine) {
1752	case CCP_ENGINE_AES:
1753		ret = ccp_run_aes_cmd(cmd_q, cmd);
1754		break;
1755	case CCP_ENGINE_XTS_AES_128:
1756		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
1757		break;
1758	case CCP_ENGINE_SHA:
1759		ret = ccp_run_sha_cmd(cmd_q, cmd);
1760		break;
1761	case CCP_ENGINE_RSA:
1762		ret = ccp_run_rsa_cmd(cmd_q, cmd);
1763		break;
1764	case CCP_ENGINE_PASSTHRU:
1765		ret = ccp_run_passthru_cmd(cmd_q, cmd);
1766		break;
1767	case CCP_ENGINE_ECC:
1768		ret = ccp_run_ecc_cmd(cmd_q, cmd);
1769		break;
1770	default:
1771		ret = -EINVAL;
1772	}
1773
1774	return ret;
1775}
v3.15
   1/*
   2 * AMD Cryptographic Coprocessor (CCP) driver
   3 *
   4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
   5 *
   6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License version 2 as
  10 * published by the Free Software Foundation.
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/kernel.h>
  15#include <linux/pci.h>
  16#include <linux/pci_ids.h>
  17#include <linux/kthread.h>
  18#include <linux/sched.h>
  19#include <linux/interrupt.h>
  20#include <linux/spinlock.h>
  21#include <linux/mutex.h>
  22#include <linux/delay.h>
  23#include <linux/ccp.h>
  24#include <linux/scatterlist.h>
  25#include <crypto/scatterwalk.h>
  26#include <crypto/sha.h>
  27
  28#include "ccp-dev.h"
  29
  30
  31enum ccp_memtype {
  32	CCP_MEMTYPE_SYSTEM = 0,
  33	CCP_MEMTYPE_KSB,
  34	CCP_MEMTYPE_LOCAL,
  35	CCP_MEMTYPE__LAST,
  36};
  37
  38struct ccp_dma_info {
  39	dma_addr_t address;
  40	unsigned int offset;
  41	unsigned int length;
  42	enum dma_data_direction dir;
  43};
  44
  45struct ccp_dm_workarea {
  46	struct device *dev;
  47	struct dma_pool *dma_pool;
  48	unsigned int length;
  49
  50	u8 *address;
  51	struct ccp_dma_info dma;
  52};
  53
  54struct ccp_sg_workarea {
  55	struct scatterlist *sg;
  56	unsigned int nents;
  57	unsigned int length;
  58
  59	struct scatterlist *dma_sg;
  60	struct device *dma_dev;
  61	unsigned int dma_count;
  62	enum dma_data_direction dma_dir;
  63
  64	unsigned int sg_used;
  65
  66	u64 bytes_left;
  67};
  68
  69struct ccp_data {
  70	struct ccp_sg_workarea sg_wa;
  71	struct ccp_dm_workarea dm_wa;
  72};
  73
  74struct ccp_mem {
  75	enum ccp_memtype type;
  76	union {
  77		struct ccp_dma_info dma;
  78		u32 ksb;
  79	} u;
  80};
  81
  82struct ccp_aes_op {
  83	enum ccp_aes_type type;
  84	enum ccp_aes_mode mode;
  85	enum ccp_aes_action action;
  86};
  87
  88struct ccp_xts_aes_op {
  89	enum ccp_aes_action action;
  90	enum ccp_xts_aes_unit_size unit_size;
  91};
  92
  93struct ccp_sha_op {
  94	enum ccp_sha_type type;
  95	u64 msg_bits;
  96};
  97
  98struct ccp_rsa_op {
  99	u32 mod_size;
 100	u32 input_len;
 101};
 102
 103struct ccp_passthru_op {
 104	enum ccp_passthru_bitwise bit_mod;
 105	enum ccp_passthru_byteswap byte_swap;
 106};
 107
 108struct ccp_ecc_op {
 109	enum ccp_ecc_function function;
 110};
 111
 112struct ccp_op {
 113	struct ccp_cmd_queue *cmd_q;
 114
 115	u32 jobid;
 116	u32 ioc;
 117	u32 soc;
 118	u32 ksb_key;
 119	u32 ksb_ctx;
 120	u32 init;
 121	u32 eom;
 122
 123	struct ccp_mem src;
 124	struct ccp_mem dst;
 125
 126	union {
 127		struct ccp_aes_op aes;
 128		struct ccp_xts_aes_op xts;
 129		struct ccp_sha_op sha;
 130		struct ccp_rsa_op rsa;
 131		struct ccp_passthru_op passthru;
 132		struct ccp_ecc_op ecc;
 133	} u;
 134};
 135
 136/* SHA initial context values */
 137static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
 138	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
 139	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
 140	cpu_to_be32(SHA1_H4), 0, 0, 0,
 141};
 142
 143static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
 144	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
 145	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
 146	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
 147	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
 148};
 149
 150static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
 151	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
 152	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
 153	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
 154	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
 155};
 156
 157/* The CCP cannot perform zero-length sha operations so the caller
 158 * is required to buffer data for the final operation.  However, a
 159 * sha operation for a message with a total length of zero is valid
 160 * so known values are required to supply the result.
 161 */
 162static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
 163	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
 164	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
 165	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
 166	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 167};
 168
 169static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
 170	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
 171	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
 172	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
 173	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
 174};
 175
 176static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
 177	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
 178	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
 179	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
 180	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
 181};
 182
 183static u32 ccp_addr_lo(struct ccp_dma_info *info)
 184{
 185	return lower_32_bits(info->address + info->offset);
 186}
 187
 188static u32 ccp_addr_hi(struct ccp_dma_info *info)
 189{
 190	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
 191}
 192
 193static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
 194{
 195	struct ccp_cmd_queue *cmd_q = op->cmd_q;
 196	struct ccp_device *ccp = cmd_q->ccp;
 197	void __iomem *cr_addr;
 198	u32 cr0, cmd;
 199	unsigned int i;
 200	int ret = 0;
 201
 202	/* We could read a status register to see how many free slots
 203	 * are actually available, but reading that register resets it
 204	 * and you could lose some error information.
 205	 */
 206	cmd_q->free_slots--;
 207
 208	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
 209	      | (op->jobid << REQ0_JOBID_SHIFT)
 210	      | REQ0_WAIT_FOR_WRITE;
 211
 212	if (op->soc)
 213		cr0 |= REQ0_STOP_ON_COMPLETE
 214		       | REQ0_INT_ON_COMPLETE;
 215
 216	if (op->ioc || !cmd_q->free_slots)
 217		cr0 |= REQ0_INT_ON_COMPLETE;
 218
 219	/* Start at CMD_REQ1 */
 220	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
 221
 222	mutex_lock(&ccp->req_mutex);
 223
 224	/* Write CMD_REQ1 through CMD_REQx first */
 225	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
 226		iowrite32(*(cr + i), cr_addr);
 227
 228	/* Tell the CCP to start */
 229	wmb();
 230	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
 231
 232	mutex_unlock(&ccp->req_mutex);
 233
 234	if (cr0 & REQ0_INT_ON_COMPLETE) {
 235		/* Wait for the job to complete */
 236		ret = wait_event_interruptible(cmd_q->int_queue,
 237					       cmd_q->int_rcvd);
 238		if (ret || cmd_q->cmd_error) {
 239			/* On error delete all related jobs from the queue */
 240			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
 241			      | op->jobid;
 242
 243			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
 244
 245			if (!ret)
 246				ret = -EIO;
 247		} else if (op->soc) {
 248			/* Delete just head job from the queue on SoC */
 249			cmd = DEL_Q_ACTIVE
 250			      | (cmd_q->id << DEL_Q_ID_SHIFT)
 251			      | op->jobid;
 252
 253			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
 254		}
 255
 256		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
 257
 258		cmd_q->int_rcvd = 0;
 259	}
 260
 261	return ret;
 262}
 263
 264static int ccp_perform_aes(struct ccp_op *op)
 265{
 266	u32 cr[6];
 267
 268	/* Fill out the register contents for REQ1 through REQ6 */
 269	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
 270		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
 271		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
 272		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
 273		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
 274	cr[1] = op->src.u.dma.length - 1;
 275	cr[2] = ccp_addr_lo(&op->src.u.dma);
 276	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
 277		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 278		| ccp_addr_hi(&op->src.u.dma);
 279	cr[4] = ccp_addr_lo(&op->dst.u.dma);
 280	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
 281		| ccp_addr_hi(&op->dst.u.dma);
 282
 283	if (op->u.aes.mode == CCP_AES_MODE_CFB)
 284		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
 285
 286	if (op->eom)
 287		cr[0] |= REQ1_EOM;
 288
 289	if (op->init)
 290		cr[0] |= REQ1_INIT;
 291
 292	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 293}
 294
 295static int ccp_perform_xts_aes(struct ccp_op *op)
 296{
 297	u32 cr[6];
 298
 299	/* Fill out the register contents for REQ1 through REQ6 */
 300	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
 301		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
 302		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
 303		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
 304	cr[1] = op->src.u.dma.length - 1;
 305	cr[2] = ccp_addr_lo(&op->src.u.dma);
 306	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
 307		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 308		| ccp_addr_hi(&op->src.u.dma);
 309	cr[4] = ccp_addr_lo(&op->dst.u.dma);
 310	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
 311		| ccp_addr_hi(&op->dst.u.dma);
 312
 313	if (op->eom)
 314		cr[0] |= REQ1_EOM;
 315
 316	if (op->init)
 317		cr[0] |= REQ1_INIT;
 318
 319	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 320}
 321
 322static int ccp_perform_sha(struct ccp_op *op)
 323{
 324	u32 cr[6];
 325
 326	/* Fill out the register contents for REQ1 through REQ6 */
 327	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
 328		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
 329		| REQ1_INIT;
 330	cr[1] = op->src.u.dma.length - 1;
 331	cr[2] = ccp_addr_lo(&op->src.u.dma);
 332	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
 333		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 334		| ccp_addr_hi(&op->src.u.dma);
 335
 336	if (op->eom) {
 337		cr[0] |= REQ1_EOM;
 338		cr[4] = lower_32_bits(op->u.sha.msg_bits);
 339		cr[5] = upper_32_bits(op->u.sha.msg_bits);
 340	} else {
 341		cr[4] = 0;
 342		cr[5] = 0;
 343	}
 344
 345	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 346}
 347
 348static int ccp_perform_rsa(struct ccp_op *op)
 349{
 350	u32 cr[6];
 351
 352	/* Fill out the register contents for REQ1 through REQ6 */
 353	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
 354		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
 355		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
 356		| REQ1_EOM;
 357	cr[1] = op->u.rsa.input_len - 1;
 358	cr[2] = ccp_addr_lo(&op->src.u.dma);
 359	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
 360		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 361		| ccp_addr_hi(&op->src.u.dma);
 362	cr[4] = ccp_addr_lo(&op->dst.u.dma);
 363	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
 364		| ccp_addr_hi(&op->dst.u.dma);
 365
 366	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 367}
 368
 369static int ccp_perform_passthru(struct ccp_op *op)
 370{
 371	u32 cr[6];
 372
 373	/* Fill out the register contents for REQ1 through REQ6 */
 374	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
 375		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
 376		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
 377
 378	if (op->src.type == CCP_MEMTYPE_SYSTEM)
 379		cr[1] = op->src.u.dma.length - 1;
 380	else
 381		cr[1] = op->dst.u.dma.length - 1;
 382
 383	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
 384		cr[2] = ccp_addr_lo(&op->src.u.dma);
 385		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 386			| ccp_addr_hi(&op->src.u.dma);
 387
 388		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
 389			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
 390	} else {
 391		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
 392		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
 393	}
 394
 395	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
 396		cr[4] = ccp_addr_lo(&op->dst.u.dma);
 397		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
 398			| ccp_addr_hi(&op->dst.u.dma);
 399	} else {
 400		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
 401		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
 402	}
 403
 404	if (op->eom)
 405		cr[0] |= REQ1_EOM;
 406
 407	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 408}
 409
 410static int ccp_perform_ecc(struct ccp_op *op)
 411{
 412	u32 cr[6];
 413
 414	/* Fill out the register contents for REQ1 through REQ6 */
 415	cr[0] = REQ1_ECC_AFFINE_CONVERT
 416		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
 417		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
 418		| REQ1_EOM;
 419	cr[1] = op->src.u.dma.length - 1;
 420	cr[2] = ccp_addr_lo(&op->src.u.dma);
 421	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 422		| ccp_addr_hi(&op->src.u.dma);
 423	cr[4] = ccp_addr_lo(&op->dst.u.dma);
 424	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
 425		| ccp_addr_hi(&op->dst.u.dma);
 426
 427	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 428}
 429
 430static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
 431{
 432	int start;
 433
 434	for (;;) {
 435		mutex_lock(&ccp->ksb_mutex);
 436
 437		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
 438							ccp->ksb_count,
 439							ccp->ksb_start,
 440							count, 0);
 441		if (start <= ccp->ksb_count) {
 442			bitmap_set(ccp->ksb, start, count);
 443
 444			mutex_unlock(&ccp->ksb_mutex);
 445			break;
 446		}
 447
 448		ccp->ksb_avail = 0;
 449
 450		mutex_unlock(&ccp->ksb_mutex);
 451
 452		/* Wait for KSB entries to become available */
 453		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
 454			return 0;
 455	}
 456
 457	return KSB_START + start;
 458}
 459
 460static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
 461			 unsigned int count)
 462{
 463	if (!start)
 464		return;
 465
 466	mutex_lock(&ccp->ksb_mutex);
 467
 468	bitmap_clear(ccp->ksb, start - KSB_START, count);
 469
 470	ccp->ksb_avail = 1;
 471
 472	mutex_unlock(&ccp->ksb_mutex);
 473
 474	wake_up_interruptible_all(&ccp->ksb_queue);
 475}
 476
 477static u32 ccp_gen_jobid(struct ccp_device *ccp)
 478{
 479	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
 480}
 481
 482static void ccp_sg_free(struct ccp_sg_workarea *wa)
 483{
 484	if (wa->dma_count)
 485		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
 486
 487	wa->dma_count = 0;
 488}
 489
 490static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
 491				struct scatterlist *sg, u64 len,
 492				enum dma_data_direction dma_dir)
 493{
 494	memset(wa, 0, sizeof(*wa));
 495
 496	wa->sg = sg;
 497	if (!sg)
 498		return 0;
 499
 500	wa->nents = sg_nents(sg);
 501	wa->length = sg->length;
 
 
 502	wa->bytes_left = len;
 503	wa->sg_used = 0;
 504
 505	if (len == 0)
 506		return 0;
 507
 508	if (dma_dir == DMA_NONE)
 509		return 0;
 510
 511	wa->dma_sg = sg;
 512	wa->dma_dev = dev;
 513	wa->dma_dir = dma_dir;
 514	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
 515	if (!wa->dma_count)
 516		return -ENOMEM;
 517
 518
 519	return 0;
 520}
 521
 522static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
 523{
 524	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
 525
 526	if (!wa->sg)
 527		return;
 528
 529	wa->sg_used += nbytes;
 530	wa->bytes_left -= nbytes;
 531	if (wa->sg_used == wa->sg->length) {
 532		wa->sg = sg_next(wa->sg);
 533		wa->sg_used = 0;
 534	}
 535}
 536
 537static void ccp_dm_free(struct ccp_dm_workarea *wa)
 538{
 539	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
 540		if (wa->address)
 541			dma_pool_free(wa->dma_pool, wa->address,
 542				      wa->dma.address);
 543	} else {
 544		if (wa->dma.address)
 545			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
 546					 wa->dma.dir);
 547		kfree(wa->address);
 548	}
 549
 550	wa->address = NULL;
 551	wa->dma.address = 0;
 552}
 553
 554static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
 555				struct ccp_cmd_queue *cmd_q,
 556				unsigned int len,
 557				enum dma_data_direction dir)
 558{
 559	memset(wa, 0, sizeof(*wa));
 560
 561	if (!len)
 562		return 0;
 563
 564	wa->dev = cmd_q->ccp->dev;
 565	wa->length = len;
 566
 567	if (len <= CCP_DMAPOOL_MAX_SIZE) {
 568		wa->dma_pool = cmd_q->dma_pool;
 569
 570		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
 571					     &wa->dma.address);
 572		if (!wa->address)
 573			return -ENOMEM;
 574
 575		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
 576
 577		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
 578	} else {
 579		wa->address = kzalloc(len, GFP_KERNEL);
 580		if (!wa->address)
 581			return -ENOMEM;
 582
 583		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
 584						 dir);
 585		if (!wa->dma.address)
 586			return -ENOMEM;
 587
 588		wa->dma.length = len;
 589	}
 590	wa->dma.dir = dir;
 591
 592	return 0;
 593}
 594
 595static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
 596			    struct scatterlist *sg, unsigned int sg_offset,
 597			    unsigned int len)
 598{
 599	WARN_ON(!wa->address);
 600
 601	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
 602				 0);
 603}
 604
 605static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
 606			    struct scatterlist *sg, unsigned int sg_offset,
 607			    unsigned int len)
 608{
 609	WARN_ON(!wa->address);
 610
 611	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
 612				 1);
 613}
 614
 615static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
 616				    struct scatterlist *sg,
 617				    unsigned int len, unsigned int se_len,
 618				    bool sign_extend)
 619{
 620	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
 621	u8 buffer[CCP_REVERSE_BUF_SIZE];
 622
 623	BUG_ON(se_len > sizeof(buffer));
 
 624
 625	sg_offset = len;
 626	dm_offset = 0;
 627	nbytes = len;
 628	while (nbytes) {
 629		ksb_len = min_t(unsigned int, nbytes, se_len);
 630		sg_offset -= ksb_len;
 631
 632		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
 633		for (i = 0; i < ksb_len; i++)
 634			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
 635
 636		dm_offset += ksb_len;
 637		nbytes -= ksb_len;
 638
 639		if ((ksb_len != se_len) && sign_extend) {
 640			/* Must sign-extend to nearest sign-extend length */
 641			if (wa->address[dm_offset - 1] & 0x80)
 642				memset(wa->address + dm_offset, 0xff,
 643				       se_len - ksb_len);
 644		}
 645	}
 
 
 646}
 647
 648static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
 649				    struct scatterlist *sg,
 650				    unsigned int len)
 651{
 652	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
 653	u8 buffer[CCP_REVERSE_BUF_SIZE];
 654
 655	sg_offset = 0;
 656	dm_offset = len;
 657	nbytes = len;
 658	while (nbytes) {
 659		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
 660		dm_offset -= ksb_len;
 661
 662		for (i = 0; i < ksb_len; i++)
 663			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
 664		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
 665
 666		sg_offset += ksb_len;
 667		nbytes -= ksb_len;
 668	}
 669}
 670
 671static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
 672{
 673	ccp_dm_free(&data->dm_wa);
 674	ccp_sg_free(&data->sg_wa);
 675}
 676
 677static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
 678			 struct scatterlist *sg, u64 sg_len,
 679			 unsigned int dm_len,
 680			 enum dma_data_direction dir)
 681{
 682	int ret;
 683
 684	memset(data, 0, sizeof(*data));
 685
 686	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
 687				   dir);
 688	if (ret)
 689		goto e_err;
 690
 691	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
 692	if (ret)
 693		goto e_err;
 694
 695	return 0;
 696
 697e_err:
 698	ccp_free_data(data, cmd_q);
 699
 700	return ret;
 701}
 702
 703static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
 704{
 705	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
 706	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
 707	unsigned int buf_count, nbytes;
 708
 709	/* Clear the buffer if setting it */
 710	if (!from)
 711		memset(dm_wa->address, 0, dm_wa->length);
 712
 713	if (!sg_wa->sg)
 714		return 0;
 715
 716	/* Perform the copy operation
 717	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
 718	 *   an unsigned int
 719	 */
 720	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
 721	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
 722				 nbytes, from);
 723
 724	/* Update the structures and generate the count */
 725	buf_count = 0;
 726	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
 727		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
 728			     dm_wa->length - buf_count);
 729		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
 730
 731		buf_count += nbytes;
 732		ccp_update_sg_workarea(sg_wa, nbytes);
 733	}
 734
 735	return buf_count;
 736}
 737
 738static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
 739{
 740	return ccp_queue_buf(data, 0);
 741}
 742
 743static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
 744{
 745	return ccp_queue_buf(data, 1);
 746}
 747
 748static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
 749			     struct ccp_op *op, unsigned int block_size,
 750			     bool blocksize_op)
 751{
 752	unsigned int sg_src_len, sg_dst_len, op_len;
 753
 754	/* The CCP can only DMA from/to one address each per operation. This
 755	 * requires that we find the smallest DMA area between the source
 756	 * and destination. The resulting len values will always be <= UINT_MAX
 757	 * because the dma length is an unsigned int.
 758	 */
 759	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
 760	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
 761
 762	if (dst) {
 763		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
 764		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
 765		op_len = min(sg_src_len, sg_dst_len);
 766	} else
 767		op_len = sg_src_len;
 
 768
 769	/* The data operation length will be at least block_size in length
 770	 * or the smaller of available sg room remaining for the source or
 771	 * the destination
 772	 */
 773	op_len = max(op_len, block_size);
 774
 775	/* Unless we have to buffer data, there's no reason to wait */
 776	op->soc = 0;
 777
 778	if (sg_src_len < block_size) {
 779		/* Not enough data in the sg element, so it
 780		 * needs to be buffered into a blocksize chunk
 781		 */
 782		int cp_len = ccp_fill_queue_buf(src);
 783
 784		op->soc = 1;
 785		op->src.u.dma.address = src->dm_wa.dma.address;
 786		op->src.u.dma.offset = 0;
 787		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
 788	} else {
 789		/* Enough data in the sg element, but we need to
 790		 * adjust for any previously copied data
 791		 */
 792		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
 793		op->src.u.dma.offset = src->sg_wa.sg_used;
 794		op->src.u.dma.length = op_len & ~(block_size - 1);
 795
 796		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
 797	}
 798
 799	if (dst) {
 800		if (sg_dst_len < block_size) {
 801			/* Not enough room in the sg element or we're on the
 802			 * last piece of data (when using padding), so the
 803			 * output needs to be buffered into a blocksize chunk
 804			 */
 805			op->soc = 1;
 806			op->dst.u.dma.address = dst->dm_wa.dma.address;
 807			op->dst.u.dma.offset = 0;
 808			op->dst.u.dma.length = op->src.u.dma.length;
 809		} else {
 810			/* Enough room in the sg element, but we need to
 811			 * adjust for any previously used area
 812			 */
 813			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
 814			op->dst.u.dma.offset = dst->sg_wa.sg_used;
 815			op->dst.u.dma.length = op->src.u.dma.length;
 816		}
 817	}
 818}
 819
 820static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
 821			     struct ccp_op *op)
 822{
 823	op->init = 0;
 824
 825	if (dst) {
 826		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
 827			ccp_empty_queue_buf(dst);
 828		else
 829			ccp_update_sg_workarea(&dst->sg_wa,
 830					       op->dst.u.dma.length);
 831	}
 832}
 833
 834static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
 835				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
 836				u32 byte_swap, bool from)
 837{
 838	struct ccp_op op;
 839
 840	memset(&op, 0, sizeof(op));
 841
 842	op.cmd_q = cmd_q;
 843	op.jobid = jobid;
 844	op.eom = 1;
 845
 846	if (from) {
 847		op.soc = 1;
 848		op.src.type = CCP_MEMTYPE_KSB;
 849		op.src.u.ksb = ksb;
 850		op.dst.type = CCP_MEMTYPE_SYSTEM;
 851		op.dst.u.dma.address = wa->dma.address;
 852		op.dst.u.dma.length = wa->length;
 853	} else {
 854		op.src.type = CCP_MEMTYPE_SYSTEM;
 855		op.src.u.dma.address = wa->dma.address;
 856		op.src.u.dma.length = wa->length;
 857		op.dst.type = CCP_MEMTYPE_KSB;
 858		op.dst.u.ksb = ksb;
 859	}
 860
 861	op.u.passthru.byte_swap = byte_swap;
 862
 863	return ccp_perform_passthru(&op);
 864}
 865
 866static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
 867			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
 868			   u32 byte_swap)
 869{
 870	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
 871}
 872
 873static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
 874			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
 875			     u32 byte_swap)
 876{
 877	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
 878}
 879
 880static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 881				struct ccp_cmd *cmd)
 882{
 883	struct ccp_aes_engine *aes = &cmd->u.aes;
 884	struct ccp_dm_workarea key, ctx;
 885	struct ccp_data src;
 886	struct ccp_op op;
 887	unsigned int dm_offset;
 888	int ret;
 889
 890	if (!((aes->key_len == AES_KEYSIZE_128) ||
 891	      (aes->key_len == AES_KEYSIZE_192) ||
 892	      (aes->key_len == AES_KEYSIZE_256)))
 893		return -EINVAL;
 894
 895	if (aes->src_len & (AES_BLOCK_SIZE - 1))
 896		return -EINVAL;
 897
 898	if (aes->iv_len != AES_BLOCK_SIZE)
 899		return -EINVAL;
 900
 901	if (!aes->key || !aes->iv || !aes->src)
 902		return -EINVAL;
 903
 904	if (aes->cmac_final) {
 905		if (aes->cmac_key_len != AES_BLOCK_SIZE)
 906			return -EINVAL;
 907
 908		if (!aes->cmac_key)
 909			return -EINVAL;
 910	}
 911
 912	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
 913	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
 914
 915	ret = -EIO;
 916	memset(&op, 0, sizeof(op));
 917	op.cmd_q = cmd_q;
 918	op.jobid = ccp_gen_jobid(cmd_q->ccp);
 919	op.ksb_key = cmd_q->ksb_key;
 920	op.ksb_ctx = cmd_q->ksb_ctx;
 921	op.init = 1;
 922	op.u.aes.type = aes->type;
 923	op.u.aes.mode = aes->mode;
 924	op.u.aes.action = aes->action;
 925
 926	/* All supported key sizes fit in a single (32-byte) KSB entry
 927	 * and must be in little endian format. Use the 256-bit byte
 928	 * swap passthru option to convert from big endian to little
 929	 * endian.
 930	 */
 931	ret = ccp_init_dm_workarea(&key, cmd_q,
 932				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
 933				   DMA_TO_DEVICE);
 934	if (ret)
 935		return ret;
 936
 937	dm_offset = CCP_KSB_BYTES - aes->key_len;
 938	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
 939	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
 940			      CCP_PASSTHRU_BYTESWAP_256BIT);
 941	if (ret) {
 942		cmd->engine_error = cmd_q->cmd_error;
 943		goto e_key;
 944	}
 945
 946	/* The AES context fits in a single (32-byte) KSB entry and
 947	 * must be in little endian format. Use the 256-bit byte swap
 948	 * passthru option to convert from big endian to little endian.
 949	 */
 950	ret = ccp_init_dm_workarea(&ctx, cmd_q,
 951				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
 952				   DMA_BIDIRECTIONAL);
 953	if (ret)
 954		goto e_key;
 955
 956	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
 957	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 958	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 959			      CCP_PASSTHRU_BYTESWAP_256BIT);
 960	if (ret) {
 961		cmd->engine_error = cmd_q->cmd_error;
 962		goto e_ctx;
 963	}
 964
 965	/* Send data to the CCP AES engine */
 966	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
 967			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
 968	if (ret)
 969		goto e_ctx;
 970
 971	while (src.sg_wa.bytes_left) {
 972		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
 973		if (aes->cmac_final && !src.sg_wa.bytes_left) {
 974			op.eom = 1;
 975
 976			/* Push the K1/K2 key to the CCP now */
 977			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
 978						op.ksb_ctx,
 979						CCP_PASSTHRU_BYTESWAP_256BIT);
 980			if (ret) {
 981				cmd->engine_error = cmd_q->cmd_error;
 982				goto e_src;
 983			}
 984
 985			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
 986					aes->cmac_key_len);
 987			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 988					      CCP_PASSTHRU_BYTESWAP_256BIT);
 989			if (ret) {
 990				cmd->engine_error = cmd_q->cmd_error;
 991				goto e_src;
 992			}
 993		}
 994
 995		ret = ccp_perform_aes(&op);
 996		if (ret) {
 997			cmd->engine_error = cmd_q->cmd_error;
 998			goto e_src;
 999		}
1000
1001		ccp_process_data(&src, NULL, &op);
1002	}
1003
1004	/* Retrieve the AES context - convert from LE to BE using
1005	 * 32-byte (256-bit) byteswapping
1006	 */
1007	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1008				CCP_PASSTHRU_BYTESWAP_256BIT);
1009	if (ret) {
1010		cmd->engine_error = cmd_q->cmd_error;
1011		goto e_src;
1012	}
1013
1014	/* ...but we only need AES_BLOCK_SIZE bytes */
1015	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1016	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1017
1018e_src:
1019	ccp_free_data(&src, cmd_q);
1020
1021e_ctx:
1022	ccp_dm_free(&ctx);
1023
1024e_key:
1025	ccp_dm_free(&key);
1026
1027	return ret;
1028}
1029
1030static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1031{
1032	struct ccp_aes_engine *aes = &cmd->u.aes;
1033	struct ccp_dm_workarea key, ctx;
1034	struct ccp_data src, dst;
1035	struct ccp_op op;
1036	unsigned int dm_offset;
1037	bool in_place = false;
1038	int ret;
1039
1040	if (aes->mode == CCP_AES_MODE_CMAC)
1041		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1042
1043	if (!((aes->key_len == AES_KEYSIZE_128) ||
1044	      (aes->key_len == AES_KEYSIZE_192) ||
1045	      (aes->key_len == AES_KEYSIZE_256)))
1046		return -EINVAL;
1047
1048	if (((aes->mode == CCP_AES_MODE_ECB) ||
1049	     (aes->mode == CCP_AES_MODE_CBC) ||
1050	     (aes->mode == CCP_AES_MODE_CFB)) &&
1051	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
1052		return -EINVAL;
1053
1054	if (!aes->key || !aes->src || !aes->dst)
1055		return -EINVAL;
1056
1057	if (aes->mode != CCP_AES_MODE_ECB) {
1058		if (aes->iv_len != AES_BLOCK_SIZE)
1059			return -EINVAL;
1060
1061		if (!aes->iv)
1062			return -EINVAL;
1063	}
1064
1065	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1066	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1067
1068	ret = -EIO;
1069	memset(&op, 0, sizeof(op));
1070	op.cmd_q = cmd_q;
1071	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1072	op.ksb_key = cmd_q->ksb_key;
1073	op.ksb_ctx = cmd_q->ksb_ctx;
1074	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1075	op.u.aes.type = aes->type;
1076	op.u.aes.mode = aes->mode;
1077	op.u.aes.action = aes->action;
1078
1079	/* All supported key sizes fit in a single (32-byte) KSB entry
1080	 * and must be in little endian format. Use the 256-bit byte
1081	 * swap passthru option to convert from big endian to little
1082	 * endian.
1083	 */
1084	ret = ccp_init_dm_workarea(&key, cmd_q,
1085				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1086				   DMA_TO_DEVICE);
1087	if (ret)
1088		return ret;
1089
1090	dm_offset = CCP_KSB_BYTES - aes->key_len;
1091	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1092	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1093			      CCP_PASSTHRU_BYTESWAP_256BIT);
1094	if (ret) {
1095		cmd->engine_error = cmd_q->cmd_error;
1096		goto e_key;
1097	}
1098
1099	/* The AES context fits in a single (32-byte) KSB entry and
1100	 * must be in little endian format. Use the 256-bit byte swap
1101	 * passthru option to convert from big endian to little endian.
1102	 */
1103	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1104				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1105				   DMA_BIDIRECTIONAL);
1106	if (ret)
1107		goto e_key;
1108
1109	if (aes->mode != CCP_AES_MODE_ECB) {
1110		/* Load the AES context - conver to LE */
1111		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1112		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1113		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1114				      CCP_PASSTHRU_BYTESWAP_256BIT);
1115		if (ret) {
1116			cmd->engine_error = cmd_q->cmd_error;
1117			goto e_ctx;
1118		}
1119	}
1120
1121	/* Prepare the input and output data workareas. For in-place
1122	 * operations we need to set the dma direction to BIDIRECTIONAL
1123	 * and copy the src workarea to the dst workarea.
1124	 */
1125	if (sg_virt(aes->src) == sg_virt(aes->dst))
1126		in_place = true;
1127
1128	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1129			    AES_BLOCK_SIZE,
1130			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1131	if (ret)
1132		goto e_ctx;
1133
1134	if (in_place)
1135		dst = src;
1136	else {
1137		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1138				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1139		if (ret)
1140			goto e_src;
1141	}
1142
1143	/* Send data to the CCP AES engine */
1144	while (src.sg_wa.bytes_left) {
1145		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1146		if (!src.sg_wa.bytes_left) {
1147			op.eom = 1;
1148
1149			/* Since we don't retrieve the AES context in ECB
1150			 * mode we have to wait for the operation to complete
1151			 * on the last piece of data
1152			 */
1153			if (aes->mode == CCP_AES_MODE_ECB)
1154				op.soc = 1;
1155		}
1156
1157		ret = ccp_perform_aes(&op);
1158		if (ret) {
1159			cmd->engine_error = cmd_q->cmd_error;
1160			goto e_dst;
1161		}
1162
1163		ccp_process_data(&src, &dst, &op);
1164	}
1165
1166	if (aes->mode != CCP_AES_MODE_ECB) {
1167		/* Retrieve the AES context - convert from LE to BE using
1168		 * 32-byte (256-bit) byteswapping
1169		 */
1170		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1171					CCP_PASSTHRU_BYTESWAP_256BIT);
1172		if (ret) {
1173			cmd->engine_error = cmd_q->cmd_error;
1174			goto e_dst;
1175		}
1176
1177		/* ...but we only need AES_BLOCK_SIZE bytes */
1178		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1179		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1180	}
1181
1182e_dst:
1183	if (!in_place)
1184		ccp_free_data(&dst, cmd_q);
1185
1186e_src:
1187	ccp_free_data(&src, cmd_q);
1188
1189e_ctx:
1190	ccp_dm_free(&ctx);
1191
1192e_key:
1193	ccp_dm_free(&key);
1194
1195	return ret;
1196}
1197
1198static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1199			       struct ccp_cmd *cmd)
1200{
1201	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1202	struct ccp_dm_workarea key, ctx;
1203	struct ccp_data src, dst;
1204	struct ccp_op op;
1205	unsigned int unit_size, dm_offset;
1206	bool in_place = false;
1207	int ret;
1208
1209	switch (xts->unit_size) {
1210	case CCP_XTS_AES_UNIT_SIZE_16:
1211		unit_size = 16;
1212		break;
1213	case CCP_XTS_AES_UNIT_SIZE_512:
1214		unit_size = 512;
1215		break;
1216	case CCP_XTS_AES_UNIT_SIZE_1024:
1217		unit_size = 1024;
1218		break;
1219	case CCP_XTS_AES_UNIT_SIZE_2048:
1220		unit_size = 2048;
1221		break;
1222	case CCP_XTS_AES_UNIT_SIZE_4096:
1223		unit_size = 4096;
1224		break;
1225
1226	default:
1227		return -EINVAL;
1228	}
1229
1230	if (xts->key_len != AES_KEYSIZE_128)
1231		return -EINVAL;
1232
1233	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1234		return -EINVAL;
1235
1236	if (xts->iv_len != AES_BLOCK_SIZE)
1237		return -EINVAL;
1238
1239	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1240		return -EINVAL;
1241
1242	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1243	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1244
1245	ret = -EIO;
1246	memset(&op, 0, sizeof(op));
1247	op.cmd_q = cmd_q;
1248	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1249	op.ksb_key = cmd_q->ksb_key;
1250	op.ksb_ctx = cmd_q->ksb_ctx;
1251	op.init = 1;
1252	op.u.xts.action = xts->action;
1253	op.u.xts.unit_size = xts->unit_size;
1254
1255	/* All supported key sizes fit in a single (32-byte) KSB entry
1256	 * and must be in little endian format. Use the 256-bit byte
1257	 * swap passthru option to convert from big endian to little
1258	 * endian.
1259	 */
1260	ret = ccp_init_dm_workarea(&key, cmd_q,
1261				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1262				   DMA_TO_DEVICE);
1263	if (ret)
1264		return ret;
1265
1266	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1267	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1268	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1269	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1270			      CCP_PASSTHRU_BYTESWAP_256BIT);
1271	if (ret) {
1272		cmd->engine_error = cmd_q->cmd_error;
1273		goto e_key;
1274	}
1275
1276	/* The AES context fits in a single (32-byte) KSB entry and
1277	 * for XTS is already in little endian format so no byte swapping
1278	 * is needed.
1279	 */
1280	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1281				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1282				   DMA_BIDIRECTIONAL);
1283	if (ret)
1284		goto e_key;
1285
1286	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1287	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1288			      CCP_PASSTHRU_BYTESWAP_NOOP);
1289	if (ret) {
1290		cmd->engine_error = cmd_q->cmd_error;
1291		goto e_ctx;
1292	}
1293
1294	/* Prepare the input and output data workareas. For in-place
1295	 * operations we need to set the dma direction to BIDIRECTIONAL
1296	 * and copy the src workarea to the dst workarea.
1297	 */
1298	if (sg_virt(xts->src) == sg_virt(xts->dst))
1299		in_place = true;
1300
1301	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1302			    unit_size,
1303			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1304	if (ret)
1305		goto e_ctx;
1306
1307	if (in_place)
1308		dst = src;
1309	else {
1310		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1311				    unit_size, DMA_FROM_DEVICE);
1312		if (ret)
1313			goto e_src;
1314	}
1315
1316	/* Send data to the CCP AES engine */
1317	while (src.sg_wa.bytes_left) {
1318		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1319		if (!src.sg_wa.bytes_left)
1320			op.eom = 1;
1321
1322		ret = ccp_perform_xts_aes(&op);
1323		if (ret) {
1324			cmd->engine_error = cmd_q->cmd_error;
1325			goto e_dst;
1326		}
1327
1328		ccp_process_data(&src, &dst, &op);
1329	}
1330
1331	/* Retrieve the AES context - convert from LE to BE using
1332	 * 32-byte (256-bit) byteswapping
1333	 */
1334	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1335				CCP_PASSTHRU_BYTESWAP_256BIT);
1336	if (ret) {
1337		cmd->engine_error = cmd_q->cmd_error;
1338		goto e_dst;
1339	}
1340
1341	/* ...but we only need AES_BLOCK_SIZE bytes */
1342	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1343	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1344
1345e_dst:
1346	if (!in_place)
1347		ccp_free_data(&dst, cmd_q);
1348
1349e_src:
1350	ccp_free_data(&src, cmd_q);
1351
1352e_ctx:
1353	ccp_dm_free(&ctx);
1354
1355e_key:
1356	ccp_dm_free(&key);
1357
1358	return ret;
1359}
1360
1361static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1362{
1363	struct ccp_sha_engine *sha = &cmd->u.sha;
1364	struct ccp_dm_workarea ctx;
1365	struct ccp_data src;
1366	struct ccp_op op;
1367	int ret;
1368
1369	if (sha->ctx_len != CCP_SHA_CTXSIZE)
1370		return -EINVAL;
1371
1372	if (!sha->ctx)
1373		return -EINVAL;
1374
1375	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1376		return -EINVAL;
1377
1378	if (!sha->src_len) {
1379		const u8 *sha_zero;
1380
1381		/* Not final, just return */
1382		if (!sha->final)
1383			return 0;
1384
1385		/* CCP can't do a zero length sha operation so the caller
1386		 * must buffer the data.
1387		 */
1388		if (sha->msg_bits)
1389			return -EINVAL;
1390
1391		/* A sha operation for a message with a total length of zero,
1392		 * return known result.
 
 
 
1393		 */
1394		switch (sha->type) {
1395		case CCP_SHA_TYPE_1:
1396			sha_zero = ccp_sha1_zero;
1397			break;
1398		case CCP_SHA_TYPE_224:
1399			sha_zero = ccp_sha224_zero;
1400			break;
1401		case CCP_SHA_TYPE_256:
1402			sha_zero = ccp_sha256_zero;
1403			break;
1404		default:
1405			return -EINVAL;
1406		}
1407
1408		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1409					 sha->ctx_len, 1);
1410
1411		return 0;
1412	}
1413
1414	if (!sha->src)
1415		return -EINVAL;
1416
1417	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1418
1419	memset(&op, 0, sizeof(op));
1420	op.cmd_q = cmd_q;
1421	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1422	op.ksb_ctx = cmd_q->ksb_ctx;
1423	op.u.sha.type = sha->type;
1424	op.u.sha.msg_bits = sha->msg_bits;
1425
1426	/* The SHA context fits in a single (32-byte) KSB entry and
1427	 * must be in little endian format. Use the 256-bit byte swap
1428	 * passthru option to convert from big endian to little endian.
1429	 */
1430	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1431				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1432				   DMA_BIDIRECTIONAL);
1433	if (ret)
1434		return ret;
1435
1436	if (sha->first) {
1437		const __be32 *init;
1438
1439		switch (sha->type) {
1440		case CCP_SHA_TYPE_1:
1441			init = ccp_sha1_init;
1442			break;
1443		case CCP_SHA_TYPE_224:
1444			init = ccp_sha224_init;
1445			break;
1446		case CCP_SHA_TYPE_256:
1447			init = ccp_sha256_init;
1448			break;
1449		default:
1450			ret = -EINVAL;
1451			goto e_ctx;
1452		}
1453		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1454	} else
1455		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
 
1456
1457	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1458			      CCP_PASSTHRU_BYTESWAP_256BIT);
1459	if (ret) {
1460		cmd->engine_error = cmd_q->cmd_error;
1461		goto e_ctx;
1462	}
1463
1464	/* Send data to the CCP SHA engine */
1465	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1466			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1467	if (ret)
1468		goto e_ctx;
1469
1470	while (src.sg_wa.bytes_left) {
1471		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1472		if (sha->final && !src.sg_wa.bytes_left)
1473			op.eom = 1;
1474
1475		ret = ccp_perform_sha(&op);
1476		if (ret) {
1477			cmd->engine_error = cmd_q->cmd_error;
1478			goto e_data;
1479		}
1480
1481		ccp_process_data(&src, NULL, &op);
1482	}
1483
1484	/* Retrieve the SHA context - convert from LE to BE using
1485	 * 32-byte (256-bit) byteswapping to BE
1486	 */
1487	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1488				CCP_PASSTHRU_BYTESWAP_256BIT);
1489	if (ret) {
1490		cmd->engine_error = cmd_q->cmd_error;
1491		goto e_data;
1492	}
1493
1494	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1495
1496	if (sha->final && sha->opad) {
1497		/* HMAC operation, recursively perform final SHA */
1498		struct ccp_cmd hmac_cmd;
1499		struct scatterlist sg;
1500		u64 block_size, digest_size;
1501		u8 *hmac_buf;
1502
1503		switch (sha->type) {
1504		case CCP_SHA_TYPE_1:
1505			block_size = SHA1_BLOCK_SIZE;
1506			digest_size = SHA1_DIGEST_SIZE;
1507			break;
1508		case CCP_SHA_TYPE_224:
1509			block_size = SHA224_BLOCK_SIZE;
1510			digest_size = SHA224_DIGEST_SIZE;
1511			break;
1512		case CCP_SHA_TYPE_256:
1513			block_size = SHA256_BLOCK_SIZE;
1514			digest_size = SHA256_DIGEST_SIZE;
1515			break;
1516		default:
1517			ret = -EINVAL;
1518			goto e_data;
1519		}
1520
1521		if (sha->opad_len != block_size) {
1522			ret = -EINVAL;
1523			goto e_data;
1524		}
1525
1526		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1527		if (!hmac_buf) {
1528			ret = -ENOMEM;
1529			goto e_data;
1530		}
1531		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1532
1533		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1534		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1535
1536		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1537		hmac_cmd.engine = CCP_ENGINE_SHA;
1538		hmac_cmd.u.sha.type = sha->type;
1539		hmac_cmd.u.sha.ctx = sha->ctx;
1540		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1541		hmac_cmd.u.sha.src = &sg;
1542		hmac_cmd.u.sha.src_len = block_size + digest_size;
1543		hmac_cmd.u.sha.opad = NULL;
1544		hmac_cmd.u.sha.opad_len = 0;
1545		hmac_cmd.u.sha.first = 1;
1546		hmac_cmd.u.sha.final = 1;
1547		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1548
1549		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1550		if (ret)
1551			cmd->engine_error = hmac_cmd.engine_error;
1552
1553		kfree(hmac_buf);
1554	}
1555
1556e_data:
1557	ccp_free_data(&src, cmd_q);
1558
1559e_ctx:
1560	ccp_dm_free(&ctx);
1561
1562	return ret;
1563}
1564
1565static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1566{
1567	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1568	struct ccp_dm_workarea exp, src;
1569	struct ccp_data dst;
1570	struct ccp_op op;
1571	unsigned int ksb_count, i_len, o_len;
1572	int ret;
1573
1574	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1575		return -EINVAL;
1576
1577	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1578		return -EINVAL;
1579
1580	/* The RSA modulus must precede the message being acted upon, so
1581	 * it must be copied to a DMA area where the message and the
1582	 * modulus can be concatenated.  Therefore the input buffer
1583	 * length required is twice the output buffer length (which
1584	 * must be a multiple of 256-bits).
1585	 */
1586	o_len = ((rsa->key_size + 255) / 256) * 32;
1587	i_len = o_len * 2;
1588
1589	ksb_count = o_len / CCP_KSB_BYTES;
1590
1591	memset(&op, 0, sizeof(op));
1592	op.cmd_q = cmd_q;
1593	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1594	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1595	if (!op.ksb_key)
1596		return -EIO;
1597
1598	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1599	 * be in little endian format. Reverse copy each 32-byte chunk
1600	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1601	 * and each byte within that chunk and do not perform any byte swap
1602	 * operations on the passthru operation.
1603	 */
1604	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1605	if (ret)
1606		goto e_ksb;
1607
1608	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1609				true);
 
 
1610	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1611			      CCP_PASSTHRU_BYTESWAP_NOOP);
1612	if (ret) {
1613		cmd->engine_error = cmd_q->cmd_error;
1614		goto e_exp;
1615	}
1616
1617	/* Concatenate the modulus and the message. Both the modulus and
1618	 * the operands must be in little endian format.  Since the input
1619	 * is in big endian format it must be converted.
1620	 */
1621	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1622	if (ret)
1623		goto e_exp;
1624
1625	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1626				true);
 
 
1627	src.address += o_len;	/* Adjust the address for the copy operation */
1628	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1629				true);
 
 
1630	src.address -= o_len;	/* Reset the address to original value */
1631
1632	/* Prepare the output area for the operation */
1633	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1634			    o_len, DMA_FROM_DEVICE);
1635	if (ret)
1636		goto e_src;
1637
1638	op.soc = 1;
1639	op.src.u.dma.address = src.dma.address;
1640	op.src.u.dma.offset = 0;
1641	op.src.u.dma.length = i_len;
1642	op.dst.u.dma.address = dst.dm_wa.dma.address;
1643	op.dst.u.dma.offset = 0;
1644	op.dst.u.dma.length = o_len;
1645
1646	op.u.rsa.mod_size = rsa->key_size;
1647	op.u.rsa.input_len = i_len;
1648
1649	ret = ccp_perform_rsa(&op);
1650	if (ret) {
1651		cmd->engine_error = cmd_q->cmd_error;
1652		goto e_dst;
1653	}
1654
1655	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1656
1657e_dst:
1658	ccp_free_data(&dst, cmd_q);
1659
1660e_src:
1661	ccp_dm_free(&src);
1662
1663e_exp:
1664	ccp_dm_free(&exp);
1665
1666e_ksb:
1667	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1668
1669	return ret;
1670}
1671
1672static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1673				struct ccp_cmd *cmd)
1674{
1675	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1676	struct ccp_dm_workarea mask;
1677	struct ccp_data src, dst;
1678	struct ccp_op op;
1679	bool in_place = false;
1680	unsigned int i;
1681	int ret;
1682
1683	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1684		return -EINVAL;
1685
1686	if (!pt->src || !pt->dst)
1687		return -EINVAL;
1688
1689	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1690		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1691			return -EINVAL;
1692		if (!pt->mask)
1693			return -EINVAL;
1694	}
1695
1696	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1697
1698	memset(&op, 0, sizeof(op));
1699	op.cmd_q = cmd_q;
1700	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1701
1702	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1703		/* Load the mask */
1704		op.ksb_key = cmd_q->ksb_key;
1705
1706		ret = ccp_init_dm_workarea(&mask, cmd_q,
1707					   CCP_PASSTHRU_KSB_COUNT *
1708					   CCP_KSB_BYTES,
1709					   DMA_TO_DEVICE);
1710		if (ret)
1711			return ret;
1712
1713		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1714		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1715				      CCP_PASSTHRU_BYTESWAP_NOOP);
1716		if (ret) {
1717			cmd->engine_error = cmd_q->cmd_error;
1718			goto e_mask;
1719		}
1720	}
1721
1722	/* Prepare the input and output data workareas. For in-place
1723	 * operations we need to set the dma direction to BIDIRECTIONAL
1724	 * and copy the src workarea to the dst workarea.
1725	 */
1726	if (sg_virt(pt->src) == sg_virt(pt->dst))
1727		in_place = true;
1728
1729	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1730			    CCP_PASSTHRU_MASKSIZE,
1731			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1732	if (ret)
1733		goto e_mask;
1734
1735	if (in_place)
1736		dst = src;
1737	else {
1738		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1739				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1740		if (ret)
1741			goto e_src;
1742	}
1743
1744	/* Send data to the CCP Passthru engine
1745	 *   Because the CCP engine works on a single source and destination
1746	 *   dma address at a time, each entry in the source scatterlist
1747	 *   (after the dma_map_sg call) must be less than or equal to the
1748	 *   (remaining) length in the destination scatterlist entry and the
1749	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1750	 */
1751	dst.sg_wa.sg_used = 0;
1752	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1753		if (!dst.sg_wa.sg ||
1754		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1755			ret = -EINVAL;
1756			goto e_dst;
1757		}
1758
1759		if (i == src.sg_wa.dma_count) {
1760			op.eom = 1;
1761			op.soc = 1;
1762		}
1763
1764		op.src.type = CCP_MEMTYPE_SYSTEM;
1765		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1766		op.src.u.dma.offset = 0;
1767		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1768
1769		op.dst.type = CCP_MEMTYPE_SYSTEM;
1770		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1771		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1772		op.dst.u.dma.length = op.src.u.dma.length;
1773
1774		ret = ccp_perform_passthru(&op);
1775		if (ret) {
1776			cmd->engine_error = cmd_q->cmd_error;
1777			goto e_dst;
1778		}
1779
1780		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1781		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1782			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1783			dst.sg_wa.sg_used = 0;
1784		}
1785		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1786	}
1787
1788e_dst:
1789	if (!in_place)
1790		ccp_free_data(&dst, cmd_q);
1791
1792e_src:
1793	ccp_free_data(&src, cmd_q);
1794
1795e_mask:
1796	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1797		ccp_dm_free(&mask);
1798
1799	return ret;
1800}
1801
1802static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1803{
1804	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1805	struct ccp_dm_workarea src, dst;
1806	struct ccp_op op;
1807	int ret;
1808	u8 *save;
1809
1810	if (!ecc->u.mm.operand_1 ||
1811	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1812		return -EINVAL;
1813
1814	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1815		if (!ecc->u.mm.operand_2 ||
1816		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1817			return -EINVAL;
1818
1819	if (!ecc->u.mm.result ||
1820	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1821		return -EINVAL;
1822
1823	memset(&op, 0, sizeof(op));
1824	op.cmd_q = cmd_q;
1825	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1826
1827	/* Concatenate the modulus and the operands. Both the modulus and
1828	 * the operands must be in little endian format.  Since the input
1829	 * is in big endian format it must be converted and placed in a
1830	 * fixed length buffer.
1831	 */
1832	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1833				   DMA_TO_DEVICE);
1834	if (ret)
1835		return ret;
1836
1837	/* Save the workarea address since it is updated in order to perform
1838	 * the concatenation
1839	 */
1840	save = src.address;
1841
1842	/* Copy the ECC modulus */
1843	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1844				CCP_ECC_OPERAND_SIZE, true);
 
 
1845	src.address += CCP_ECC_OPERAND_SIZE;
1846
1847	/* Copy the first operand */
1848	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1849				ecc->u.mm.operand_1_len,
1850				CCP_ECC_OPERAND_SIZE, true);
 
 
1851	src.address += CCP_ECC_OPERAND_SIZE;
1852
1853	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1854		/* Copy the second operand */
1855		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1856					ecc->u.mm.operand_2_len,
1857					CCP_ECC_OPERAND_SIZE, true);
 
 
1858		src.address += CCP_ECC_OPERAND_SIZE;
1859	}
1860
1861	/* Restore the workarea address */
1862	src.address = save;
1863
1864	/* Prepare the output area for the operation */
1865	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1866				   DMA_FROM_DEVICE);
1867	if (ret)
1868		goto e_src;
1869
1870	op.soc = 1;
1871	op.src.u.dma.address = src.dma.address;
1872	op.src.u.dma.offset = 0;
1873	op.src.u.dma.length = src.length;
1874	op.dst.u.dma.address = dst.dma.address;
1875	op.dst.u.dma.offset = 0;
1876	op.dst.u.dma.length = dst.length;
1877
1878	op.u.ecc.function = cmd->u.ecc.function;
1879
1880	ret = ccp_perform_ecc(&op);
1881	if (ret) {
1882		cmd->engine_error = cmd_q->cmd_error;
1883		goto e_dst;
1884	}
1885
1886	ecc->ecc_result = le16_to_cpup(
1887		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1888	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1889		ret = -EIO;
1890		goto e_dst;
1891	}
1892
1893	/* Save the ECC result */
1894	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1895
1896e_dst:
1897	ccp_dm_free(&dst);
1898
1899e_src:
1900	ccp_dm_free(&src);
1901
1902	return ret;
1903}
1904
1905static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1906{
1907	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1908	struct ccp_dm_workarea src, dst;
1909	struct ccp_op op;
1910	int ret;
1911	u8 *save;
1912
1913	if (!ecc->u.pm.point_1.x ||
1914	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1915	    !ecc->u.pm.point_1.y ||
1916	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1917		return -EINVAL;
1918
1919	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1920		if (!ecc->u.pm.point_2.x ||
1921		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1922		    !ecc->u.pm.point_2.y ||
1923		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1924			return -EINVAL;
1925	} else {
1926		if (!ecc->u.pm.domain_a ||
1927		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1928			return -EINVAL;
1929
1930		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1931			if (!ecc->u.pm.scalar ||
1932			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1933				return -EINVAL;
1934	}
1935
1936	if (!ecc->u.pm.result.x ||
1937	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1938	    !ecc->u.pm.result.y ||
1939	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1940		return -EINVAL;
1941
1942	memset(&op, 0, sizeof(op));
1943	op.cmd_q = cmd_q;
1944	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1945
1946	/* Concatenate the modulus and the operands. Both the modulus and
1947	 * the operands must be in little endian format.  Since the input
1948	 * is in big endian format it must be converted and placed in a
1949	 * fixed length buffer.
1950	 */
1951	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1952				   DMA_TO_DEVICE);
1953	if (ret)
1954		return ret;
1955
1956	/* Save the workarea address since it is updated in order to perform
1957	 * the concatenation
1958	 */
1959	save = src.address;
1960
1961	/* Copy the ECC modulus */
1962	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1963				CCP_ECC_OPERAND_SIZE, true);
 
 
1964	src.address += CCP_ECC_OPERAND_SIZE;
1965
1966	/* Copy the first point X and Y coordinate */
1967	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1968				ecc->u.pm.point_1.x_len,
1969				CCP_ECC_OPERAND_SIZE, true);
 
 
1970	src.address += CCP_ECC_OPERAND_SIZE;
1971	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1972				ecc->u.pm.point_1.y_len,
1973				CCP_ECC_OPERAND_SIZE, true);
 
 
1974	src.address += CCP_ECC_OPERAND_SIZE;
1975
1976	/* Set the first point Z coordianate to 1 */
1977	*(src.address) = 0x01;
1978	src.address += CCP_ECC_OPERAND_SIZE;
1979
1980	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1981		/* Copy the second point X and Y coordinate */
1982		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1983					ecc->u.pm.point_2.x_len,
1984					CCP_ECC_OPERAND_SIZE, true);
 
 
1985		src.address += CCP_ECC_OPERAND_SIZE;
1986		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1987					ecc->u.pm.point_2.y_len,
1988					CCP_ECC_OPERAND_SIZE, true);
 
 
1989		src.address += CCP_ECC_OPERAND_SIZE;
1990
1991		/* Set the second point Z coordianate to 1 */
1992		*(src.address) = 0x01;
1993		src.address += CCP_ECC_OPERAND_SIZE;
1994	} else {
1995		/* Copy the Domain "a" parameter */
1996		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1997					ecc->u.pm.domain_a_len,
1998					CCP_ECC_OPERAND_SIZE, true);
 
 
1999		src.address += CCP_ECC_OPERAND_SIZE;
2000
2001		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2002			/* Copy the scalar value */
2003			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2004						ecc->u.pm.scalar_len,
2005						CCP_ECC_OPERAND_SIZE, true);
 
 
 
2006			src.address += CCP_ECC_OPERAND_SIZE;
2007		}
2008	}
2009
2010	/* Restore the workarea address */
2011	src.address = save;
2012
2013	/* Prepare the output area for the operation */
2014	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2015				   DMA_FROM_DEVICE);
2016	if (ret)
2017		goto e_src;
2018
2019	op.soc = 1;
2020	op.src.u.dma.address = src.dma.address;
2021	op.src.u.dma.offset = 0;
2022	op.src.u.dma.length = src.length;
2023	op.dst.u.dma.address = dst.dma.address;
2024	op.dst.u.dma.offset = 0;
2025	op.dst.u.dma.length = dst.length;
2026
2027	op.u.ecc.function = cmd->u.ecc.function;
2028
2029	ret = ccp_perform_ecc(&op);
2030	if (ret) {
2031		cmd->engine_error = cmd_q->cmd_error;
2032		goto e_dst;
2033	}
2034
2035	ecc->ecc_result = le16_to_cpup(
2036		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2037	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2038		ret = -EIO;
2039		goto e_dst;
2040	}
2041
2042	/* Save the workarea address since it is updated as we walk through
2043	 * to copy the point math result
2044	 */
2045	save = dst.address;
2046
2047	/* Save the ECC result X and Y coordinates */
2048	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2049				CCP_ECC_MODULUS_BYTES);
2050	dst.address += CCP_ECC_OUTPUT_SIZE;
2051	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2052				CCP_ECC_MODULUS_BYTES);
2053	dst.address += CCP_ECC_OUTPUT_SIZE;
2054
2055	/* Restore the workarea address */
2056	dst.address = save;
2057
2058e_dst:
2059	ccp_dm_free(&dst);
2060
2061e_src:
2062	ccp_dm_free(&src);
2063
2064	return ret;
2065}
2066
2067static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2068{
2069	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2070
2071	ecc->ecc_result = 0;
2072
2073	if (!ecc->mod ||
2074	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2075		return -EINVAL;
2076
2077	switch (ecc->function) {
2078	case CCP_ECC_FUNCTION_MMUL_384BIT:
2079	case CCP_ECC_FUNCTION_MADD_384BIT:
2080	case CCP_ECC_FUNCTION_MINV_384BIT:
2081		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2082
2083	case CCP_ECC_FUNCTION_PADD_384BIT:
2084	case CCP_ECC_FUNCTION_PMUL_384BIT:
2085	case CCP_ECC_FUNCTION_PDBL_384BIT:
2086		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2087
2088	default:
2089		return -EINVAL;
2090	}
2091}
2092
2093int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2094{
2095	int ret;
2096
2097	cmd->engine_error = 0;
2098	cmd_q->cmd_error = 0;
2099	cmd_q->int_rcvd = 0;
2100	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2101
2102	switch (cmd->engine) {
2103	case CCP_ENGINE_AES:
2104		ret = ccp_run_aes_cmd(cmd_q, cmd);
2105		break;
2106	case CCP_ENGINE_XTS_AES_128:
2107		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2108		break;
2109	case CCP_ENGINE_SHA:
2110		ret = ccp_run_sha_cmd(cmd_q, cmd);
2111		break;
2112	case CCP_ENGINE_RSA:
2113		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2114		break;
2115	case CCP_ENGINE_PASSTHRU:
2116		ret = ccp_run_passthru_cmd(cmd_q, cmd);
2117		break;
2118	case CCP_ENGINE_ECC:
2119		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2120		break;
2121	default:
2122		ret = -EINVAL;
2123	}
2124
2125	return ret;
2126}