Linux Audio

Check our new training course

Loading...
v5.9
  1// SPDX-License-Identifier: GPL-2.0-only
  2/* 
  3 * Cryptographic API.
  4 *
  5 * Support for VIA PadLock hardware crypto engine.
  6 *
  7 * Copyright (c) 2004  Michal Ludvig <michal@logix.cz>
  8 *
  9 */
 10
 11#include <crypto/algapi.h>
 12#include <crypto/aes.h>
 13#include <crypto/internal/skcipher.h>
 14#include <crypto/padlock.h>
 15#include <linux/module.h>
 16#include <linux/init.h>
 17#include <linux/types.h>
 18#include <linux/errno.h>
 19#include <linux/interrupt.h>
 20#include <linux/kernel.h>
 21#include <linux/percpu.h>
 22#include <linux/smp.h>
 23#include <linux/slab.h>
 24#include <asm/cpu_device_id.h>
 25#include <asm/byteorder.h>
 26#include <asm/processor.h>
 27#include <asm/fpu/api.h>
 28
 29/*
 30 * Number of data blocks actually fetched for each xcrypt insn.
 31 * Processors with prefetch errata will fetch extra blocks.
 32 */
 33static unsigned int ecb_fetch_blocks = 2;
 34#define MAX_ECB_FETCH_BLOCKS (8)
 35#define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE)
 36
 37static unsigned int cbc_fetch_blocks = 1;
 38#define MAX_CBC_FETCH_BLOCKS (4)
 39#define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE)
 40
 41/* Control word. */
 42struct cword {
 43	unsigned int __attribute__ ((__packed__))
 44		rounds:4,
 45		algo:3,
 46		keygen:1,
 47		interm:1,
 48		encdec:1,
 49		ksize:2;
 50} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
 51
 52/* Whenever making any changes to the following
 53 * structure *make sure* you keep E, d_data
 54 * and cword aligned on 16 Bytes boundaries and
 55 * the Hardware can access 16 * 16 bytes of E and d_data
 56 * (only the first 15 * 16 bytes matter but the HW reads
 57 * more).
 58 */
 59struct aes_ctx {
 60	u32 E[AES_MAX_KEYLENGTH_U32]
 61		__attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
 62	u32 d_data[AES_MAX_KEYLENGTH_U32]
 63		__attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
 64	struct {
 65		struct cword encrypt;
 66		struct cword decrypt;
 67	} cword;
 68	u32 *D;
 69};
 70
 71static DEFINE_PER_CPU(struct cword *, paes_last_cword);
 72
 73/* Tells whether the ACE is capable to generate
 74   the extended key for a given key_len. */
 75static inline int
 76aes_hw_extkey_available(uint8_t key_len)
 77{
 78	/* TODO: We should check the actual CPU model/stepping
 79	         as it's possible that the capability will be
 80	         added in the next CPU revisions. */
 81	if (key_len == 16)
 82		return 1;
 83	return 0;
 84}
 85
 86static inline struct aes_ctx *aes_ctx_common(void *ctx)
 87{
 88	unsigned long addr = (unsigned long)ctx;
 89	unsigned long align = PADLOCK_ALIGNMENT;
 90
 91	if (align <= crypto_tfm_ctx_alignment())
 92		align = 1;
 93	return (struct aes_ctx *)ALIGN(addr, align);
 94}
 95
 96static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
 97{
 98	return aes_ctx_common(crypto_tfm_ctx(tfm));
 99}
100
101static inline struct aes_ctx *skcipher_aes_ctx(struct crypto_skcipher *tfm)
102{
103	return aes_ctx_common(crypto_skcipher_ctx(tfm));
104}
105
106static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
107		       unsigned int key_len)
108{
109	struct aes_ctx *ctx = aes_ctx(tfm);
110	const __le32 *key = (const __le32 *)in_key;
 
111	struct crypto_aes_ctx gen_aes;
112	int cpu;
113
114	if (key_len % 8)
 
115		return -EINVAL;
 
116
117	/*
118	 * If the hardware is capable of generating the extended key
119	 * itself we must supply the plain key for both encryption
120	 * and decryption.
121	 */
122	ctx->D = ctx->E;
123
124	ctx->E[0] = le32_to_cpu(key[0]);
125	ctx->E[1] = le32_to_cpu(key[1]);
126	ctx->E[2] = le32_to_cpu(key[2]);
127	ctx->E[3] = le32_to_cpu(key[3]);
128
129	/* Prepare control words. */
130	memset(&ctx->cword, 0, sizeof(ctx->cword));
131
132	ctx->cword.decrypt.encdec = 1;
133	ctx->cword.encrypt.rounds = 10 + (key_len - 16) / 4;
134	ctx->cword.decrypt.rounds = ctx->cword.encrypt.rounds;
135	ctx->cword.encrypt.ksize = (key_len - 16) / 8;
136	ctx->cword.decrypt.ksize = ctx->cword.encrypt.ksize;
137
138	/* Don't generate extended keys if the hardware can do it. */
139	if (aes_hw_extkey_available(key_len))
140		goto ok;
141
142	ctx->D = ctx->d_data;
143	ctx->cword.encrypt.keygen = 1;
144	ctx->cword.decrypt.keygen = 1;
145
146	if (aes_expandkey(&gen_aes, in_key, key_len))
 
147		return -EINVAL;
 
148
149	memcpy(ctx->E, gen_aes.key_enc, AES_MAX_KEYLENGTH);
150	memcpy(ctx->D, gen_aes.key_dec, AES_MAX_KEYLENGTH);
151
152ok:
153	for_each_online_cpu(cpu)
154		if (&ctx->cword.encrypt == per_cpu(paes_last_cword, cpu) ||
155		    &ctx->cword.decrypt == per_cpu(paes_last_cword, cpu))
156			per_cpu(paes_last_cword, cpu) = NULL;
157
158	return 0;
159}
160
161static int aes_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *in_key,
162				unsigned int key_len)
163{
164	return aes_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
165}
166
167/* ====== Encryption/decryption routines ====== */
168
169/* These are the real call to PadLock. */
170static inline void padlock_reset_key(struct cword *cword)
171{
172	int cpu = raw_smp_processor_id();
173
174	if (cword != per_cpu(paes_last_cword, cpu))
175#ifndef CONFIG_X86_64
176		asm volatile ("pushfl; popfl");
177#else
178		asm volatile ("pushfq; popfq");
179#endif
180}
181
182static inline void padlock_store_cword(struct cword *cword)
183{
184	per_cpu(paes_last_cword, raw_smp_processor_id()) = cword;
185}
186
187/*
188 * While the padlock instructions don't use FP/SSE registers, they
189 * generate a spurious DNA fault when CR0.TS is '1'.  Fortunately,
190 * the kernel doesn't use CR0.TS.
191 */
192
193static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key,
194				  struct cword *control_word, int count)
195{
196	asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
197		      : "+S"(input), "+D"(output)
198		      : "d"(control_word), "b"(key), "c"(count));
199}
200
201static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key,
202				 u8 *iv, struct cword *control_word, int count)
203{
204	asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
205		      : "+S" (input), "+D" (output), "+a" (iv)
206		      : "d" (control_word), "b" (key), "c" (count));
207	return iv;
208}
209
210static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key,
211			   struct cword *cword, int count)
212{
213	/*
214	 * Padlock prefetches extra data so we must provide mapped input buffers.
215	 * Assume there are at least 16 bytes of stack already in use.
216	 */
217	u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1];
218	u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
219
220	memcpy(tmp, in, count * AES_BLOCK_SIZE);
221	rep_xcrypt_ecb(tmp, out, key, cword, count);
222}
223
224static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key,
225			   u8 *iv, struct cword *cword, int count)
226{
227	/*
228	 * Padlock prefetches extra data so we must provide mapped input buffers.
229	 * Assume there are at least 16 bytes of stack already in use.
230	 */
231	u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1];
232	u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
233
234	memcpy(tmp, in, count * AES_BLOCK_SIZE);
235	return rep_xcrypt_cbc(tmp, out, key, iv, cword, count);
236}
237
238static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key,
239			     struct cword *cword, int count)
240{
241	/* Padlock in ECB mode fetches at least ecb_fetch_bytes of data.
242	 * We could avoid some copying here but it's probably not worth it.
243	 */
244	if (unlikely(offset_in_page(in) + ecb_fetch_bytes > PAGE_SIZE)) {
245		ecb_crypt_copy(in, out, key, cword, count);
246		return;
247	}
248
249	rep_xcrypt_ecb(in, out, key, cword, count);
250}
251
252static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key,
253			    u8 *iv, struct cword *cword, int count)
254{
255	/* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */
256	if (unlikely(offset_in_page(in) + cbc_fetch_bytes > PAGE_SIZE))
257		return cbc_crypt_copy(in, out, key, iv, cword, count);
258
259	return rep_xcrypt_cbc(in, out, key, iv, cword, count);
260}
261
262static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
263				      void *control_word, u32 count)
264{
265	u32 initial = count & (ecb_fetch_blocks - 1);
266
267	if (count < ecb_fetch_blocks) {
268		ecb_crypt(input, output, key, control_word, count);
269		return;
270	}
271
272	count -= initial;
273
274	if (initial)
275		asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
276			      : "+S"(input), "+D"(output)
277			      : "d"(control_word), "b"(key), "c"(initial));
278
279	asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
280		      : "+S"(input), "+D"(output)
281		      : "d"(control_word), "b"(key), "c"(count));
282}
283
284static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
285				     u8 *iv, void *control_word, u32 count)
286{
287	u32 initial = count & (cbc_fetch_blocks - 1);
288
289	if (count < cbc_fetch_blocks)
290		return cbc_crypt(input, output, key, iv, control_word, count);
291
292	count -= initial;
293
294	if (initial)
295		asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
296			      : "+S" (input), "+D" (output), "+a" (iv)
297			      : "d" (control_word), "b" (key), "c" (initial));
298
299	asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
300		      : "+S" (input), "+D" (output), "+a" (iv)
301		      : "d" (control_word), "b" (key), "c" (count));
302	return iv;
303}
304
305static void padlock_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
306{
307	struct aes_ctx *ctx = aes_ctx(tfm);
 
308
309	padlock_reset_key(&ctx->cword.encrypt);
 
310	ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1);
 
311	padlock_store_cword(&ctx->cword.encrypt);
312}
313
314static void padlock_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
315{
316	struct aes_ctx *ctx = aes_ctx(tfm);
 
317
318	padlock_reset_key(&ctx->cword.encrypt);
 
319	ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1);
 
320	padlock_store_cword(&ctx->cword.encrypt);
321}
322
323static struct crypto_alg aes_alg = {
324	.cra_name		=	"aes",
325	.cra_driver_name	=	"aes-padlock",
326	.cra_priority		=	PADLOCK_CRA_PRIORITY,
327	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
328	.cra_blocksize		=	AES_BLOCK_SIZE,
329	.cra_ctxsize		=	sizeof(struct aes_ctx),
330	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
331	.cra_module		=	THIS_MODULE,
 
332	.cra_u			=	{
333		.cipher = {
334			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
335			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
336			.cia_setkey	   	= 	aes_set_key,
337			.cia_encrypt	 	=	padlock_aes_encrypt,
338			.cia_decrypt	  	=	padlock_aes_decrypt,
339		}
340	}
341};
342
343static int ecb_aes_encrypt(struct skcipher_request *req)
 
 
344{
345	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
346	struct aes_ctx *ctx = skcipher_aes_ctx(tfm);
347	struct skcipher_walk walk;
348	unsigned int nbytes;
349	int err;
 
350
351	padlock_reset_key(&ctx->cword.encrypt);
352
353	err = skcipher_walk_virt(&walk, req, false);
 
354
355	while ((nbytes = walk.nbytes) != 0) {
 
356		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
357				   ctx->E, &ctx->cword.encrypt,
358				   nbytes / AES_BLOCK_SIZE);
359		nbytes &= AES_BLOCK_SIZE - 1;
360		err = skcipher_walk_done(&walk, nbytes);
361	}
 
362
363	padlock_store_cword(&ctx->cword.encrypt);
364
365	return err;
366}
367
368static int ecb_aes_decrypt(struct skcipher_request *req)
 
 
369{
370	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
371	struct aes_ctx *ctx = skcipher_aes_ctx(tfm);
372	struct skcipher_walk walk;
373	unsigned int nbytes;
374	int err;
 
375
376	padlock_reset_key(&ctx->cword.decrypt);
377
378	err = skcipher_walk_virt(&walk, req, false);
 
379
380	while ((nbytes = walk.nbytes) != 0) {
 
381		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
382				   ctx->D, &ctx->cword.decrypt,
383				   nbytes / AES_BLOCK_SIZE);
384		nbytes &= AES_BLOCK_SIZE - 1;
385		err = skcipher_walk_done(&walk, nbytes);
386	}
 
387
388	padlock_store_cword(&ctx->cword.encrypt);
389
390	return err;
391}
392
393static struct skcipher_alg ecb_aes_alg = {
394	.base.cra_name		=	"ecb(aes)",
395	.base.cra_driver_name	=	"ecb-aes-padlock",
396	.base.cra_priority	=	PADLOCK_COMPOSITE_PRIORITY,
397	.base.cra_blocksize	=	AES_BLOCK_SIZE,
398	.base.cra_ctxsize	=	sizeof(struct aes_ctx),
399	.base.cra_alignmask	=	PADLOCK_ALIGNMENT - 1,
400	.base.cra_module	=	THIS_MODULE,
401	.min_keysize		=	AES_MIN_KEY_SIZE,
402	.max_keysize		=	AES_MAX_KEY_SIZE,
403	.setkey			=	aes_set_key_skcipher,
404	.encrypt		=	ecb_aes_encrypt,
405	.decrypt		=	ecb_aes_decrypt,
 
 
 
 
 
 
 
406};
407
408static int cbc_aes_encrypt(struct skcipher_request *req)
 
 
409{
410	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
411	struct aes_ctx *ctx = skcipher_aes_ctx(tfm);
412	struct skcipher_walk walk;
413	unsigned int nbytes;
414	int err;
 
415
416	padlock_reset_key(&ctx->cword.encrypt);
417
418	err = skcipher_walk_virt(&walk, req, false);
 
419
420	while ((nbytes = walk.nbytes) != 0) {
 
421		u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
422					    walk.dst.virt.addr, ctx->E,
423					    walk.iv, &ctx->cword.encrypt,
424					    nbytes / AES_BLOCK_SIZE);
425		memcpy(walk.iv, iv, AES_BLOCK_SIZE);
426		nbytes &= AES_BLOCK_SIZE - 1;
427		err = skcipher_walk_done(&walk, nbytes);
428	}
 
429
430	padlock_store_cword(&ctx->cword.decrypt);
431
432	return err;
433}
434
435static int cbc_aes_decrypt(struct skcipher_request *req)
 
 
436{
437	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
438	struct aes_ctx *ctx = skcipher_aes_ctx(tfm);
439	struct skcipher_walk walk;
440	unsigned int nbytes;
441	int err;
 
442
443	padlock_reset_key(&ctx->cword.encrypt);
444
445	err = skcipher_walk_virt(&walk, req, false);
 
446
447	while ((nbytes = walk.nbytes) != 0) {
 
448		padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
449				   ctx->D, walk.iv, &ctx->cword.decrypt,
450				   nbytes / AES_BLOCK_SIZE);
451		nbytes &= AES_BLOCK_SIZE - 1;
452		err = skcipher_walk_done(&walk, nbytes);
453	}
454
 
 
455	padlock_store_cword(&ctx->cword.encrypt);
456
457	return err;
458}
459
460static struct skcipher_alg cbc_aes_alg = {
461	.base.cra_name		=	"cbc(aes)",
462	.base.cra_driver_name	=	"cbc-aes-padlock",
463	.base.cra_priority	=	PADLOCK_COMPOSITE_PRIORITY,
464	.base.cra_blocksize	=	AES_BLOCK_SIZE,
465	.base.cra_ctxsize	=	sizeof(struct aes_ctx),
466	.base.cra_alignmask	=	PADLOCK_ALIGNMENT - 1,
467	.base.cra_module	=	THIS_MODULE,
468	.min_keysize		=	AES_MIN_KEY_SIZE,
469	.max_keysize		=	AES_MAX_KEY_SIZE,
470	.ivsize			=	AES_BLOCK_SIZE,
471	.setkey			=	aes_set_key_skcipher,
472	.encrypt		=	cbc_aes_encrypt,
473	.decrypt		=	cbc_aes_decrypt,
474};
475
476static const struct x86_cpu_id padlock_cpu_id[] = {
477	X86_MATCH_FEATURE(X86_FEATURE_XCRYPT, NULL),
478	{}
 
 
479};
480MODULE_DEVICE_TABLE(x86cpu, padlock_cpu_id);
481
482static int __init padlock_init(void)
483{
484	int ret;
485	struct cpuinfo_x86 *c = &cpu_data(0);
486
487	if (!x86_match_cpu(padlock_cpu_id))
 
488		return -ENODEV;
 
489
490	if (!boot_cpu_has(X86_FEATURE_XCRYPT_EN)) {
491		printk(KERN_NOTICE PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
492		return -ENODEV;
493	}
494
495	if ((ret = crypto_register_alg(&aes_alg)) != 0)
496		goto aes_err;
497
498	if ((ret = crypto_register_skcipher(&ecb_aes_alg)) != 0)
499		goto ecb_aes_err;
500
501	if ((ret = crypto_register_skcipher(&cbc_aes_alg)) != 0)
502		goto cbc_aes_err;
503
504	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
505
506	if (c->x86 == 6 && c->x86_model == 15 && c->x86_stepping == 2) {
507		ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS;
508		cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS;
509		printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n");
510	}
511
512out:
513	return ret;
514
515cbc_aes_err:
516	crypto_unregister_skcipher(&ecb_aes_alg);
517ecb_aes_err:
518	crypto_unregister_alg(&aes_alg);
519aes_err:
520	printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n");
521	goto out;
522}
523
524static void __exit padlock_fini(void)
525{
526	crypto_unregister_skcipher(&cbc_aes_alg);
527	crypto_unregister_skcipher(&ecb_aes_alg);
528	crypto_unregister_alg(&aes_alg);
529}
530
531module_init(padlock_init);
532module_exit(padlock_fini);
533
534MODULE_DESCRIPTION("VIA PadLock AES algorithm support");
535MODULE_LICENSE("GPL");
536MODULE_AUTHOR("Michal Ludvig");
537
538MODULE_ALIAS_CRYPTO("aes");
v3.1
 
  1/* 
  2 * Cryptographic API.
  3 *
  4 * Support for VIA PadLock hardware crypto engine.
  5 *
  6 * Copyright (c) 2004  Michal Ludvig <michal@logix.cz>
  7 *
  8 */
  9
 10#include <crypto/algapi.h>
 11#include <crypto/aes.h>
 
 12#include <crypto/padlock.h>
 13#include <linux/module.h>
 14#include <linux/init.h>
 15#include <linux/types.h>
 16#include <linux/errno.h>
 17#include <linux/interrupt.h>
 18#include <linux/kernel.h>
 19#include <linux/percpu.h>
 20#include <linux/smp.h>
 21#include <linux/slab.h>
 
 22#include <asm/byteorder.h>
 23#include <asm/processor.h>
 24#include <asm/i387.h>
 25
 26/*
 27 * Number of data blocks actually fetched for each xcrypt insn.
 28 * Processors with prefetch errata will fetch extra blocks.
 29 */
 30static unsigned int ecb_fetch_blocks = 2;
 31#define MAX_ECB_FETCH_BLOCKS (8)
 32#define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE)
 33
 34static unsigned int cbc_fetch_blocks = 1;
 35#define MAX_CBC_FETCH_BLOCKS (4)
 36#define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE)
 37
 38/* Control word. */
 39struct cword {
 40	unsigned int __attribute__ ((__packed__))
 41		rounds:4,
 42		algo:3,
 43		keygen:1,
 44		interm:1,
 45		encdec:1,
 46		ksize:2;
 47} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
 48
 49/* Whenever making any changes to the following
 50 * structure *make sure* you keep E, d_data
 51 * and cword aligned on 16 Bytes boundaries and
 52 * the Hardware can access 16 * 16 bytes of E and d_data
 53 * (only the first 15 * 16 bytes matter but the HW reads
 54 * more).
 55 */
 56struct aes_ctx {
 57	u32 E[AES_MAX_KEYLENGTH_U32]
 58		__attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
 59	u32 d_data[AES_MAX_KEYLENGTH_U32]
 60		__attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
 61	struct {
 62		struct cword encrypt;
 63		struct cword decrypt;
 64	} cword;
 65	u32 *D;
 66};
 67
 68static DEFINE_PER_CPU(struct cword *, paes_last_cword);
 69
 70/* Tells whether the ACE is capable to generate
 71   the extended key for a given key_len. */
 72static inline int
 73aes_hw_extkey_available(uint8_t key_len)
 74{
 75	/* TODO: We should check the actual CPU model/stepping
 76	         as it's possible that the capability will be
 77	         added in the next CPU revisions. */
 78	if (key_len == 16)
 79		return 1;
 80	return 0;
 81}
 82
 83static inline struct aes_ctx *aes_ctx_common(void *ctx)
 84{
 85	unsigned long addr = (unsigned long)ctx;
 86	unsigned long align = PADLOCK_ALIGNMENT;
 87
 88	if (align <= crypto_tfm_ctx_alignment())
 89		align = 1;
 90	return (struct aes_ctx *)ALIGN(addr, align);
 91}
 92
 93static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
 94{
 95	return aes_ctx_common(crypto_tfm_ctx(tfm));
 96}
 97
 98static inline struct aes_ctx *blk_aes_ctx(struct crypto_blkcipher *tfm)
 99{
100	return aes_ctx_common(crypto_blkcipher_ctx(tfm));
101}
102
103static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
104		       unsigned int key_len)
105{
106	struct aes_ctx *ctx = aes_ctx(tfm);
107	const __le32 *key = (const __le32 *)in_key;
108	u32 *flags = &tfm->crt_flags;
109	struct crypto_aes_ctx gen_aes;
110	int cpu;
111
112	if (key_len % 8) {
113		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
114		return -EINVAL;
115	}
116
117	/*
118	 * If the hardware is capable of generating the extended key
119	 * itself we must supply the plain key for both encryption
120	 * and decryption.
121	 */
122	ctx->D = ctx->E;
123
124	ctx->E[0] = le32_to_cpu(key[0]);
125	ctx->E[1] = le32_to_cpu(key[1]);
126	ctx->E[2] = le32_to_cpu(key[2]);
127	ctx->E[3] = le32_to_cpu(key[3]);
128
129	/* Prepare control words. */
130	memset(&ctx->cword, 0, sizeof(ctx->cword));
131
132	ctx->cword.decrypt.encdec = 1;
133	ctx->cword.encrypt.rounds = 10 + (key_len - 16) / 4;
134	ctx->cword.decrypt.rounds = ctx->cword.encrypt.rounds;
135	ctx->cword.encrypt.ksize = (key_len - 16) / 8;
136	ctx->cword.decrypt.ksize = ctx->cword.encrypt.ksize;
137
138	/* Don't generate extended keys if the hardware can do it. */
139	if (aes_hw_extkey_available(key_len))
140		goto ok;
141
142	ctx->D = ctx->d_data;
143	ctx->cword.encrypt.keygen = 1;
144	ctx->cword.decrypt.keygen = 1;
145
146	if (crypto_aes_expand_key(&gen_aes, in_key, key_len)) {
147		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
148		return -EINVAL;
149	}
150
151	memcpy(ctx->E, gen_aes.key_enc, AES_MAX_KEYLENGTH);
152	memcpy(ctx->D, gen_aes.key_dec, AES_MAX_KEYLENGTH);
153
154ok:
155	for_each_online_cpu(cpu)
156		if (&ctx->cword.encrypt == per_cpu(paes_last_cword, cpu) ||
157		    &ctx->cword.decrypt == per_cpu(paes_last_cword, cpu))
158			per_cpu(paes_last_cword, cpu) = NULL;
159
160	return 0;
161}
162
 
 
 
 
 
 
163/* ====== Encryption/decryption routines ====== */
164
165/* These are the real call to PadLock. */
166static inline void padlock_reset_key(struct cword *cword)
167{
168	int cpu = raw_smp_processor_id();
169
170	if (cword != per_cpu(paes_last_cword, cpu))
171#ifndef CONFIG_X86_64
172		asm volatile ("pushfl; popfl");
173#else
174		asm volatile ("pushfq; popfq");
175#endif
176}
177
178static inline void padlock_store_cword(struct cword *cword)
179{
180	per_cpu(paes_last_cword, raw_smp_processor_id()) = cword;
181}
182
183/*
184 * While the padlock instructions don't use FP/SSE registers, they
185 * generate a spurious DNA fault when cr0.ts is '1'. These instructions
186 * should be used only inside the irq_ts_save/restore() context
187 */
188
189static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key,
190				  struct cword *control_word, int count)
191{
192	asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
193		      : "+S"(input), "+D"(output)
194		      : "d"(control_word), "b"(key), "c"(count));
195}
196
197static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key,
198				 u8 *iv, struct cword *control_word, int count)
199{
200	asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
201		      : "+S" (input), "+D" (output), "+a" (iv)
202		      : "d" (control_word), "b" (key), "c" (count));
203	return iv;
204}
205
206static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key,
207			   struct cword *cword, int count)
208{
209	/*
210	 * Padlock prefetches extra data so we must provide mapped input buffers.
211	 * Assume there are at least 16 bytes of stack already in use.
212	 */
213	u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1];
214	u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
215
216	memcpy(tmp, in, count * AES_BLOCK_SIZE);
217	rep_xcrypt_ecb(tmp, out, key, cword, count);
218}
219
220static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key,
221			   u8 *iv, struct cword *cword, int count)
222{
223	/*
224	 * Padlock prefetches extra data so we must provide mapped input buffers.
225	 * Assume there are at least 16 bytes of stack already in use.
226	 */
227	u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1];
228	u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
229
230	memcpy(tmp, in, count * AES_BLOCK_SIZE);
231	return rep_xcrypt_cbc(tmp, out, key, iv, cword, count);
232}
233
234static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key,
235			     struct cword *cword, int count)
236{
237	/* Padlock in ECB mode fetches at least ecb_fetch_bytes of data.
238	 * We could avoid some copying here but it's probably not worth it.
239	 */
240	if (unlikely(((unsigned long)in & ~PAGE_MASK) + ecb_fetch_bytes > PAGE_SIZE)) {
241		ecb_crypt_copy(in, out, key, cword, count);
242		return;
243	}
244
245	rep_xcrypt_ecb(in, out, key, cword, count);
246}
247
248static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key,
249			    u8 *iv, struct cword *cword, int count)
250{
251	/* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */
252	if (unlikely(((unsigned long)in & ~PAGE_MASK) + cbc_fetch_bytes > PAGE_SIZE))
253		return cbc_crypt_copy(in, out, key, iv, cword, count);
254
255	return rep_xcrypt_cbc(in, out, key, iv, cword, count);
256}
257
258static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
259				      void *control_word, u32 count)
260{
261	u32 initial = count & (ecb_fetch_blocks - 1);
262
263	if (count < ecb_fetch_blocks) {
264		ecb_crypt(input, output, key, control_word, count);
265		return;
266	}
267
 
 
268	if (initial)
269		asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
270			      : "+S"(input), "+D"(output)
271			      : "d"(control_word), "b"(key), "c"(initial));
272
273	asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
274		      : "+S"(input), "+D"(output)
275		      : "d"(control_word), "b"(key), "c"(count - initial));
276}
277
278static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
279				     u8 *iv, void *control_word, u32 count)
280{
281	u32 initial = count & (cbc_fetch_blocks - 1);
282
283	if (count < cbc_fetch_blocks)
284		return cbc_crypt(input, output, key, iv, control_word, count);
285
 
 
286	if (initial)
287		asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
288			      : "+S" (input), "+D" (output), "+a" (iv)
289			      : "d" (control_word), "b" (key), "c" (initial));
290
291	asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"	/* rep xcryptcbc */
292		      : "+S" (input), "+D" (output), "+a" (iv)
293		      : "d" (control_word), "b" (key), "c" (count-initial));
294	return iv;
295}
296
297static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
298{
299	struct aes_ctx *ctx = aes_ctx(tfm);
300	int ts_state;
301
302	padlock_reset_key(&ctx->cword.encrypt);
303	ts_state = irq_ts_save();
304	ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1);
305	irq_ts_restore(ts_state);
306	padlock_store_cword(&ctx->cword.encrypt);
307}
308
309static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
310{
311	struct aes_ctx *ctx = aes_ctx(tfm);
312	int ts_state;
313
314	padlock_reset_key(&ctx->cword.encrypt);
315	ts_state = irq_ts_save();
316	ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1);
317	irq_ts_restore(ts_state);
318	padlock_store_cword(&ctx->cword.encrypt);
319}
320
321static struct crypto_alg aes_alg = {
322	.cra_name		=	"aes",
323	.cra_driver_name	=	"aes-padlock",
324	.cra_priority		=	PADLOCK_CRA_PRIORITY,
325	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
326	.cra_blocksize		=	AES_BLOCK_SIZE,
327	.cra_ctxsize		=	sizeof(struct aes_ctx),
328	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
329	.cra_module		=	THIS_MODULE,
330	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
331	.cra_u			=	{
332		.cipher = {
333			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
334			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
335			.cia_setkey	   	= 	aes_set_key,
336			.cia_encrypt	 	=	aes_encrypt,
337			.cia_decrypt	  	=	aes_decrypt,
338		}
339	}
340};
341
342static int ecb_aes_encrypt(struct blkcipher_desc *desc,
343			   struct scatterlist *dst, struct scatterlist *src,
344			   unsigned int nbytes)
345{
346	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
347	struct blkcipher_walk walk;
 
 
348	int err;
349	int ts_state;
350
351	padlock_reset_key(&ctx->cword.encrypt);
352
353	blkcipher_walk_init(&walk, dst, src, nbytes);
354	err = blkcipher_walk_virt(desc, &walk);
355
356	ts_state = irq_ts_save();
357	while ((nbytes = walk.nbytes)) {
358		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
359				   ctx->E, &ctx->cword.encrypt,
360				   nbytes / AES_BLOCK_SIZE);
361		nbytes &= AES_BLOCK_SIZE - 1;
362		err = blkcipher_walk_done(desc, &walk, nbytes);
363	}
364	irq_ts_restore(ts_state);
365
366	padlock_store_cword(&ctx->cword.encrypt);
367
368	return err;
369}
370
371static int ecb_aes_decrypt(struct blkcipher_desc *desc,
372			   struct scatterlist *dst, struct scatterlist *src,
373			   unsigned int nbytes)
374{
375	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
376	struct blkcipher_walk walk;
 
 
377	int err;
378	int ts_state;
379
380	padlock_reset_key(&ctx->cword.decrypt);
381
382	blkcipher_walk_init(&walk, dst, src, nbytes);
383	err = blkcipher_walk_virt(desc, &walk);
384
385	ts_state = irq_ts_save();
386	while ((nbytes = walk.nbytes)) {
387		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
388				   ctx->D, &ctx->cword.decrypt,
389				   nbytes / AES_BLOCK_SIZE);
390		nbytes &= AES_BLOCK_SIZE - 1;
391		err = blkcipher_walk_done(desc, &walk, nbytes);
392	}
393	irq_ts_restore(ts_state);
394
395	padlock_store_cword(&ctx->cword.encrypt);
396
397	return err;
398}
399
400static struct crypto_alg ecb_aes_alg = {
401	.cra_name		=	"ecb(aes)",
402	.cra_driver_name	=	"ecb-aes-padlock",
403	.cra_priority		=	PADLOCK_COMPOSITE_PRIORITY,
404	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
405	.cra_blocksize		=	AES_BLOCK_SIZE,
406	.cra_ctxsize		=	sizeof(struct aes_ctx),
407	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
408	.cra_type		=	&crypto_blkcipher_type,
409	.cra_module		=	THIS_MODULE,
410	.cra_list		=	LIST_HEAD_INIT(ecb_aes_alg.cra_list),
411	.cra_u			=	{
412		.blkcipher = {
413			.min_keysize		=	AES_MIN_KEY_SIZE,
414			.max_keysize		=	AES_MAX_KEY_SIZE,
415			.setkey	   		= 	aes_set_key,
416			.encrypt		=	ecb_aes_encrypt,
417			.decrypt		=	ecb_aes_decrypt,
418		}
419	}
420};
421
422static int cbc_aes_encrypt(struct blkcipher_desc *desc,
423			   struct scatterlist *dst, struct scatterlist *src,
424			   unsigned int nbytes)
425{
426	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
427	struct blkcipher_walk walk;
 
 
428	int err;
429	int ts_state;
430
431	padlock_reset_key(&ctx->cword.encrypt);
432
433	blkcipher_walk_init(&walk, dst, src, nbytes);
434	err = blkcipher_walk_virt(desc, &walk);
435
436	ts_state = irq_ts_save();
437	while ((nbytes = walk.nbytes)) {
438		u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
439					    walk.dst.virt.addr, ctx->E,
440					    walk.iv, &ctx->cword.encrypt,
441					    nbytes / AES_BLOCK_SIZE);
442		memcpy(walk.iv, iv, AES_BLOCK_SIZE);
443		nbytes &= AES_BLOCK_SIZE - 1;
444		err = blkcipher_walk_done(desc, &walk, nbytes);
445	}
446	irq_ts_restore(ts_state);
447
448	padlock_store_cword(&ctx->cword.decrypt);
449
450	return err;
451}
452
453static int cbc_aes_decrypt(struct blkcipher_desc *desc,
454			   struct scatterlist *dst, struct scatterlist *src,
455			   unsigned int nbytes)
456{
457	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
458	struct blkcipher_walk walk;
 
 
459	int err;
460	int ts_state;
461
462	padlock_reset_key(&ctx->cword.encrypt);
463
464	blkcipher_walk_init(&walk, dst, src, nbytes);
465	err = blkcipher_walk_virt(desc, &walk);
466
467	ts_state = irq_ts_save();
468	while ((nbytes = walk.nbytes)) {
469		padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
470				   ctx->D, walk.iv, &ctx->cword.decrypt,
471				   nbytes / AES_BLOCK_SIZE);
472		nbytes &= AES_BLOCK_SIZE - 1;
473		err = blkcipher_walk_done(desc, &walk, nbytes);
474	}
475
476	irq_ts_restore(ts_state);
477
478	padlock_store_cword(&ctx->cword.encrypt);
479
480	return err;
481}
482
483static struct crypto_alg cbc_aes_alg = {
484	.cra_name		=	"cbc(aes)",
485	.cra_driver_name	=	"cbc-aes-padlock",
486	.cra_priority		=	PADLOCK_COMPOSITE_PRIORITY,
487	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
488	.cra_blocksize		=	AES_BLOCK_SIZE,
489	.cra_ctxsize		=	sizeof(struct aes_ctx),
490	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
491	.cra_type		=	&crypto_blkcipher_type,
492	.cra_module		=	THIS_MODULE,
493	.cra_list		=	LIST_HEAD_INIT(cbc_aes_alg.cra_list),
494	.cra_u			=	{
495		.blkcipher = {
496			.min_keysize		=	AES_MIN_KEY_SIZE,
497			.max_keysize		=	AES_MAX_KEY_SIZE,
498			.ivsize			=	AES_BLOCK_SIZE,
499			.setkey	   		= 	aes_set_key,
500			.encrypt		=	cbc_aes_encrypt,
501			.decrypt		=	cbc_aes_decrypt,
502		}
503	}
504};
 
505
506static int __init padlock_init(void)
507{
508	int ret;
509	struct cpuinfo_x86 *c = &cpu_data(0);
510
511	if (!cpu_has_xcrypt) {
512		printk(KERN_NOTICE PFX "VIA PadLock not detected.\n");
513		return -ENODEV;
514	}
515
516	if (!cpu_has_xcrypt_enabled) {
517		printk(KERN_NOTICE PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
518		return -ENODEV;
519	}
520
521	if ((ret = crypto_register_alg(&aes_alg)))
522		goto aes_err;
523
524	if ((ret = crypto_register_alg(&ecb_aes_alg)))
525		goto ecb_aes_err;
526
527	if ((ret = crypto_register_alg(&cbc_aes_alg)))
528		goto cbc_aes_err;
529
530	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
531
532	if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) {
533		ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS;
534		cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS;
535		printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n");
536	}
537
538out:
539	return ret;
540
541cbc_aes_err:
542	crypto_unregister_alg(&ecb_aes_alg);
543ecb_aes_err:
544	crypto_unregister_alg(&aes_alg);
545aes_err:
546	printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n");
547	goto out;
548}
549
550static void __exit padlock_fini(void)
551{
552	crypto_unregister_alg(&cbc_aes_alg);
553	crypto_unregister_alg(&ecb_aes_alg);
554	crypto_unregister_alg(&aes_alg);
555}
556
557module_init(padlock_init);
558module_exit(padlock_fini);
559
560MODULE_DESCRIPTION("VIA PadLock AES algorithm support");
561MODULE_LICENSE("GPL");
562MODULE_AUTHOR("Michal Ludvig");
563
564MODULE_ALIAS("aes");