twofish-i586-asm_32.S - arch/x86/crypto/twofish-i586-asm_32.S - Linux diff v3.5.6

 
  1/***************************************************************************
  2*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
  3*                                                                         *
  4*   This program is free software; you can redistribute it and/or modify  *
  5*   it under the terms of the GNU General Public License as published by  *
  6*   the Free Software Foundation; either version 2 of the License, or     *
  7*   (at your option) any later version.                                   *
  8*                                                                         *
  9*   This program is distributed in the hope that it will be useful,       *
 10*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 11*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 12*   GNU General Public License for more details.                          *
 13*                                                                         *
 14*   You should have received a copy of the GNU General Public License     *
 15*   along with this program; if not, write to the                         *
 16*   Free Software Foundation, Inc.,                                       *
 17*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 18***************************************************************************/
 19
 20.file "twofish-i586-asm.S"
 21.text
 22
 
 23#include <asm/asm-offsets.h>
 24
 25/* return address at 0 */
 26
 27#define in_blk    12  /* input byte array address parameter*/
 28#define out_blk   8  /* output byte array address parameter*/
 29#define ctx       4  /* Twofish context structure */
 30
 31#define a_offset	0
 32#define b_offset	4
 33#define c_offset	8
 34#define d_offset	12
 35
 36/* Structure of the crypto context struct*/
 37
 38#define s0	0	/* S0 Array 256 Words each */
 39#define s1	1024	/* S1 Array */
 40#define s2	2048	/* S2 Array */
 41#define s3	3072	/* S3 Array */
 42#define w	4096	/* 8 whitening keys (word) */
 43#define k	4128	/* key 1-32 ( word ) */
 44
 45/* define a few register aliases to allow macro substitution */
 46
 47#define R0D    %eax
 48#define R0B    %al
 49#define R0H    %ah
 50
 51#define R1D    %ebx
 52#define R1B    %bl
 53#define R1H    %bh
 54
 55#define R2D    %ecx
 56#define R2B    %cl
 57#define R2H    %ch
 58
 59#define R3D    %edx
 60#define R3B    %dl
 61#define R3H    %dh
 62
 63
 64/* performs input whitening */
 65#define input_whitening(src,context,offset)\
 66	xor	w+offset(context),	src;
 67
 68/* performs input whitening */
 69#define output_whitening(src,context,offset)\
 70	xor	w+16+offset(context),	src;
 71
 72/*
 73 * a input register containing a (rotated 16)
 74 * b input register containing b
 75 * c input register containing c
 76 * d input register containing d (already rol $1)
 77 * operations on a and b are interleaved to increase performance
 78 */
 79#define encrypt_round(a,b,c,d,round)\
 80	push	d ## D;\
 81	movzx	b ## B,		%edi;\
 82	mov	s1(%ebp,%edi,4),d ## D;\
 83	movzx	a ## B,		%edi;\
 84	mov	s2(%ebp,%edi,4),%esi;\
 85	movzx	b ## H,		%edi;\
 86	ror	$16,		b ## D;\
 87	xor	s2(%ebp,%edi,4),d ## D;\
 88	movzx	a ## H,		%edi;\
 89	ror	$16,		a ## D;\
 90	xor	s3(%ebp,%edi,4),%esi;\
 91	movzx	b ## B,		%edi;\
 92	xor	s3(%ebp,%edi,4),d ## D;\
 93	movzx	a ## B,		%edi;\
 94	xor	(%ebp,%edi,4),	%esi;\
 95	movzx	b ## H,		%edi;\
 96	ror	$15,		b ## D;\
 97	xor	(%ebp,%edi,4),	d ## D;\
 98	movzx	a ## H,		%edi;\
 99	xor	s1(%ebp,%edi,4),%esi;\
100	pop	%edi;\
101	add	d ## D,		%esi;\
102	add	%esi,		d ## D;\
103	add	k+round(%ebp),	%esi;\
104	xor	%esi,		c ## D;\
105	rol	$15,		c ## D;\
106	add	k+4+round(%ebp),d ## D;\
107	xor	%edi,		d ## D;
108
109/*
110 * a input register containing a (rotated 16)
111 * b input register containing b
112 * c input register containing c
113 * d input register containing d (already rol $1)
114 * operations on a and b are interleaved to increase performance
115 * last round has different rotations for the output preparation
116 */
117#define encrypt_last_round(a,b,c,d,round)\
118	push	d ## D;\
119	movzx	b ## B,		%edi;\
120	mov	s1(%ebp,%edi,4),d ## D;\
121	movzx	a ## B,		%edi;\
122	mov	s2(%ebp,%edi,4),%esi;\
123	movzx	b ## H,		%edi;\
124	ror	$16,		b ## D;\
125	xor	s2(%ebp,%edi,4),d ## D;\
126	movzx	a ## H,		%edi;\
127	ror	$16,		a ## D;\
128	xor	s3(%ebp,%edi,4),%esi;\
129	movzx	b ## B,		%edi;\
130	xor	s3(%ebp,%edi,4),d ## D;\
131	movzx	a ## B,		%edi;\
132	xor	(%ebp,%edi,4),	%esi;\
133	movzx	b ## H,		%edi;\
134	ror	$16,		b ## D;\
135	xor	(%ebp,%edi,4),	d ## D;\
136	movzx	a ## H,		%edi;\
137	xor	s1(%ebp,%edi,4),%esi;\
138	pop	%edi;\
139	add	d ## D,		%esi;\
140	add	%esi,		d ## D;\
141	add	k+round(%ebp),	%esi;\
142	xor	%esi,		c ## D;\
143	ror	$1,		c ## D;\
144	add	k+4+round(%ebp),d ## D;\
145	xor	%edi,		d ## D;
146
147/*
148 * a input register containing a
149 * b input register containing b (rotated 16)
150 * c input register containing c
151 * d input register containing d (already rol $1)
152 * operations on a and b are interleaved to increase performance
153 */
154#define decrypt_round(a,b,c,d,round)\
155	push	c ## D;\
156	movzx	a ## B,		%edi;\
157	mov	(%ebp,%edi,4),	c ## D;\
158	movzx	b ## B,		%edi;\
159	mov	s3(%ebp,%edi,4),%esi;\
160	movzx	a ## H,		%edi;\
161	ror	$16,		a ## D;\
162	xor	s1(%ebp,%edi,4),c ## D;\
163	movzx	b ## H,		%edi;\
164	ror	$16,		b ## D;\
165	xor	(%ebp,%edi,4),	%esi;\
166	movzx	a ## B,		%edi;\
167	xor	s2(%ebp,%edi,4),c ## D;\
168	movzx	b ## B,		%edi;\
169	xor	s1(%ebp,%edi,4),%esi;\
170	movzx	a ## H,		%edi;\
171	ror	$15,		a ## D;\
172	xor	s3(%ebp,%edi,4),c ## D;\
173	movzx	b ## H,		%edi;\
174	xor	s2(%ebp,%edi,4),%esi;\
175	pop	%edi;\
176	add	%esi,		c ## D;\
177	add	c ## D,		%esi;\
178	add	k+round(%ebp),	c ## D;\
179	xor	%edi,		c ## D;\
180	add	k+4+round(%ebp),%esi;\
181	xor	%esi,		d ## D;\
182	rol	$15,		d ## D;
183
184/*
185 * a input register containing a
186 * b input register containing b (rotated 16)
187 * c input register containing c
188 * d input register containing d (already rol $1)
189 * operations on a and b are interleaved to increase performance
190 * last round has different rotations for the output preparation
191 */
192#define decrypt_last_round(a,b,c,d,round)\
193	push	c ## D;\
194	movzx	a ## B,		%edi;\
195	mov	(%ebp,%edi,4),	c ## D;\
196	movzx	b ## B,		%edi;\
197	mov	s3(%ebp,%edi,4),%esi;\
198	movzx	a ## H,		%edi;\
199	ror	$16,		a ## D;\
200	xor	s1(%ebp,%edi,4),c ## D;\
201	movzx	b ## H,		%edi;\
202	ror	$16,		b ## D;\
203	xor	(%ebp,%edi,4),	%esi;\
204	movzx	a ## B,		%edi;\
205	xor	s2(%ebp,%edi,4),c ## D;\
206	movzx	b ## B,		%edi;\
207	xor	s1(%ebp,%edi,4),%esi;\
208	movzx	a ## H,		%edi;\
209	ror	$16,		a ## D;\
210	xor	s3(%ebp,%edi,4),c ## D;\
211	movzx	b ## H,		%edi;\
212	xor	s2(%ebp,%edi,4),%esi;\
213	pop	%edi;\
214	add	%esi,		c ## D;\
215	add	c ## D,		%esi;\
216	add	k+round(%ebp),	c ## D;\
217	xor	%edi,		c ## D;\
218	add	k+4+round(%ebp),%esi;\
219	xor	%esi,		d ## D;\
220	ror	$1,		d ## D;
221
222.align 4
223.global twofish_enc_blk
224.global twofish_dec_blk
225
226twofish_enc_blk:
227	push	%ebp			/* save registers according to calling convention*/
228	push    %ebx
229	push    %esi
230	push    %edi
231
232	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
233					 * pointer to the ctx address */
234	mov     in_blk+16(%esp),%edi	/* input address in edi */
235
236	mov	(%edi),		%eax
237	mov	b_offset(%edi),	%ebx
238	mov	c_offset(%edi),	%ecx
239	mov	d_offset(%edi),	%edx
240	input_whitening(%eax,%ebp,a_offset)
241	ror	$16,	%eax
242	input_whitening(%ebx,%ebp,b_offset)
243	input_whitening(%ecx,%ebp,c_offset)
244	input_whitening(%edx,%ebp,d_offset)
245	rol	$1,	%edx
246
247	encrypt_round(R0,R1,R2,R3,0);
248	encrypt_round(R2,R3,R0,R1,8);
249	encrypt_round(R0,R1,R2,R3,2*8);
250	encrypt_round(R2,R3,R0,R1,3*8);
251	encrypt_round(R0,R1,R2,R3,4*8);
252	encrypt_round(R2,R3,R0,R1,5*8);
253	encrypt_round(R0,R1,R2,R3,6*8);
254	encrypt_round(R2,R3,R0,R1,7*8);
255	encrypt_round(R0,R1,R2,R3,8*8);
256	encrypt_round(R2,R3,R0,R1,9*8);
257	encrypt_round(R0,R1,R2,R3,10*8);
258	encrypt_round(R2,R3,R0,R1,11*8);
259	encrypt_round(R0,R1,R2,R3,12*8);
260	encrypt_round(R2,R3,R0,R1,13*8);
261	encrypt_round(R0,R1,R2,R3,14*8);
262	encrypt_last_round(R2,R3,R0,R1,15*8);
263
264	output_whitening(%eax,%ebp,c_offset)
265	output_whitening(%ebx,%ebp,d_offset)
266	output_whitening(%ecx,%ebp,a_offset)
267	output_whitening(%edx,%ebp,b_offset)
268	mov	out_blk+16(%esp),%edi;
269	mov	%eax,		c_offset(%edi)
270	mov	%ebx,		d_offset(%edi)
271	mov	%ecx,		(%edi)
272	mov	%edx,		b_offset(%edi)
273
274	pop	%edi
275	pop	%esi
276	pop	%ebx
277	pop	%ebp
278	mov	$1,	%eax
279	ret
 
280
281twofish_dec_blk:
282	push	%ebp			/* save registers according to calling convention*/
283	push    %ebx
284	push    %esi
285	push    %edi
286
287
288	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
289					 * pointer to the ctx address */
290	mov     in_blk+16(%esp),%edi	/* input address in edi */
291
292	mov	(%edi),		%eax
293	mov	b_offset(%edi),	%ebx
294	mov	c_offset(%edi),	%ecx
295	mov	d_offset(%edi),	%edx
296	output_whitening(%eax,%ebp,a_offset)
297	output_whitening(%ebx,%ebp,b_offset)
298	ror	$16,	%ebx
299	output_whitening(%ecx,%ebp,c_offset)
300	output_whitening(%edx,%ebp,d_offset)
301	rol	$1,	%ecx
302
303	decrypt_round(R0,R1,R2,R3,15*8);
304	decrypt_round(R2,R3,R0,R1,14*8);
305	decrypt_round(R0,R1,R2,R3,13*8);
306	decrypt_round(R2,R3,R0,R1,12*8);
307	decrypt_round(R0,R1,R2,R3,11*8);
308	decrypt_round(R2,R3,R0,R1,10*8);
309	decrypt_round(R0,R1,R2,R3,9*8);
310	decrypt_round(R2,R3,R0,R1,8*8);
311	decrypt_round(R0,R1,R2,R3,7*8);
312	decrypt_round(R2,R3,R0,R1,6*8);
313	decrypt_round(R0,R1,R2,R3,5*8);
314	decrypt_round(R2,R3,R0,R1,4*8);
315	decrypt_round(R0,R1,R2,R3,3*8);
316	decrypt_round(R2,R3,R0,R1,2*8);
317	decrypt_round(R0,R1,R2,R3,1*8);
318	decrypt_last_round(R2,R3,R0,R1,0);
319
320	input_whitening(%eax,%ebp,c_offset)
321	input_whitening(%ebx,%ebp,d_offset)
322	input_whitening(%ecx,%ebp,a_offset)
323	input_whitening(%edx,%ebp,b_offset)
324	mov	out_blk+16(%esp),%edi;
325	mov	%eax,		c_offset(%edi)
326	mov	%ebx,		d_offset(%edi)
327	mov	%ecx,		(%edi)
328	mov	%edx,		b_offset(%edi)
329
330	pop	%edi
331	pop	%esi
332	pop	%ebx
333	pop	%ebp
334	mov	$1,	%eax
335	ret

  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/***************************************************************************
  3*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
  4*                                                                         *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  5***************************************************************************/
  6
  7.file "twofish-i586-asm.S"
  8.text
  9
 10#include <linux/linkage.h>
 11#include <asm/asm-offsets.h>
 12
 13/* return address at 0 */
 14
 15#define in_blk    12  /* input byte array address parameter*/
 16#define out_blk   8  /* output byte array address parameter*/
 17#define ctx       4  /* Twofish context structure */
 18
 19#define a_offset	0
 20#define b_offset	4
 21#define c_offset	8
 22#define d_offset	12
 23
 24/* Structure of the crypto context struct*/
 25
 26#define s0	0	/* S0 Array 256 Words each */
 27#define s1	1024	/* S1 Array */
 28#define s2	2048	/* S2 Array */
 29#define s3	3072	/* S3 Array */
 30#define w	4096	/* 8 whitening keys (word) */
 31#define k	4128	/* key 1-32 ( word ) */
 32
 33/* define a few register aliases to allow macro substitution */
 34
 35#define R0D    %eax
 36#define R0B    %al
 37#define R0H    %ah
 38
 39#define R1D    %ebx
 40#define R1B    %bl
 41#define R1H    %bh
 42
 43#define R2D    %ecx
 44#define R2B    %cl
 45#define R2H    %ch
 46
 47#define R3D    %edx
 48#define R3B    %dl
 49#define R3H    %dh
 50
 51
 52/* performs input whitening */
 53#define input_whitening(src,context,offset)\
 54	xor	w+offset(context),	src;
 55
 56/* performs input whitening */
 57#define output_whitening(src,context,offset)\
 58	xor	w+16+offset(context),	src;
 59
 60/*
 61 * a input register containing a (rotated 16)
 62 * b input register containing b
 63 * c input register containing c
 64 * d input register containing d (already rol $1)
 65 * operations on a and b are interleaved to increase performance
 66 */
 67#define encrypt_round(a,b,c,d,round)\
 68	push	d ## D;\
 69	movzx	b ## B,		%edi;\
 70	mov	s1(%ebp,%edi,4),d ## D;\
 71	movzx	a ## B,		%edi;\
 72	mov	s2(%ebp,%edi,4),%esi;\
 73	movzx	b ## H,		%edi;\
 74	ror	$16,		b ## D;\
 75	xor	s2(%ebp,%edi,4),d ## D;\
 76	movzx	a ## H,		%edi;\
 77	ror	$16,		a ## D;\
 78	xor	s3(%ebp,%edi,4),%esi;\
 79	movzx	b ## B,		%edi;\
 80	xor	s3(%ebp,%edi,4),d ## D;\
 81	movzx	a ## B,		%edi;\
 82	xor	(%ebp,%edi,4),	%esi;\
 83	movzx	b ## H,		%edi;\
 84	ror	$15,		b ## D;\
 85	xor	(%ebp,%edi,4),	d ## D;\
 86	movzx	a ## H,		%edi;\
 87	xor	s1(%ebp,%edi,4),%esi;\
 88	pop	%edi;\
 89	add	d ## D,		%esi;\
 90	add	%esi,		d ## D;\
 91	add	k+round(%ebp),	%esi;\
 92	xor	%esi,		c ## D;\
 93	rol	$15,		c ## D;\
 94	add	k+4+round(%ebp),d ## D;\
 95	xor	%edi,		d ## D;
 96
 97/*
 98 * a input register containing a (rotated 16)
 99 * b input register containing b
100 * c input register containing c
101 * d input register containing d (already rol $1)
102 * operations on a and b are interleaved to increase performance
103 * last round has different rotations for the output preparation
104 */
105#define encrypt_last_round(a,b,c,d,round)\
106	push	d ## D;\
107	movzx	b ## B,		%edi;\
108	mov	s1(%ebp,%edi,4),d ## D;\
109	movzx	a ## B,		%edi;\
110	mov	s2(%ebp,%edi,4),%esi;\
111	movzx	b ## H,		%edi;\
112	ror	$16,		b ## D;\
113	xor	s2(%ebp,%edi,4),d ## D;\
114	movzx	a ## H,		%edi;\
115	ror	$16,		a ## D;\
116	xor	s3(%ebp,%edi,4),%esi;\
117	movzx	b ## B,		%edi;\
118	xor	s3(%ebp,%edi,4),d ## D;\
119	movzx	a ## B,		%edi;\
120	xor	(%ebp,%edi,4),	%esi;\
121	movzx	b ## H,		%edi;\
122	ror	$16,		b ## D;\
123	xor	(%ebp,%edi,4),	d ## D;\
124	movzx	a ## H,		%edi;\
125	xor	s1(%ebp,%edi,4),%esi;\
126	pop	%edi;\
127	add	d ## D,		%esi;\
128	add	%esi,		d ## D;\
129	add	k+round(%ebp),	%esi;\
130	xor	%esi,		c ## D;\
131	ror	$1,		c ## D;\
132	add	k+4+round(%ebp),d ## D;\
133	xor	%edi,		d ## D;
134
135/*
136 * a input register containing a
137 * b input register containing b (rotated 16)
138 * c input register containing c
139 * d input register containing d (already rol $1)
140 * operations on a and b are interleaved to increase performance
141 */
142#define decrypt_round(a,b,c,d,round)\
143	push	c ## D;\
144	movzx	a ## B,		%edi;\
145	mov	(%ebp,%edi,4),	c ## D;\
146	movzx	b ## B,		%edi;\
147	mov	s3(%ebp,%edi,4),%esi;\
148	movzx	a ## H,		%edi;\
149	ror	$16,		a ## D;\
150	xor	s1(%ebp,%edi,4),c ## D;\
151	movzx	b ## H,		%edi;\
152	ror	$16,		b ## D;\
153	xor	(%ebp,%edi,4),	%esi;\
154	movzx	a ## B,		%edi;\
155	xor	s2(%ebp,%edi,4),c ## D;\
156	movzx	b ## B,		%edi;\
157	xor	s1(%ebp,%edi,4),%esi;\
158	movzx	a ## H,		%edi;\
159	ror	$15,		a ## D;\
160	xor	s3(%ebp,%edi,4),c ## D;\
161	movzx	b ## H,		%edi;\
162	xor	s2(%ebp,%edi,4),%esi;\
163	pop	%edi;\
164	add	%esi,		c ## D;\
165	add	c ## D,		%esi;\
166	add	k+round(%ebp),	c ## D;\
167	xor	%edi,		c ## D;\
168	add	k+4+round(%ebp),%esi;\
169	xor	%esi,		d ## D;\
170	rol	$15,		d ## D;
171
172/*
173 * a input register containing a
174 * b input register containing b (rotated 16)
175 * c input register containing c
176 * d input register containing d (already rol $1)
177 * operations on a and b are interleaved to increase performance
178 * last round has different rotations for the output preparation
179 */
180#define decrypt_last_round(a,b,c,d,round)\
181	push	c ## D;\
182	movzx	a ## B,		%edi;\
183	mov	(%ebp,%edi,4),	c ## D;\
184	movzx	b ## B,		%edi;\
185	mov	s3(%ebp,%edi,4),%esi;\
186	movzx	a ## H,		%edi;\
187	ror	$16,		a ## D;\
188	xor	s1(%ebp,%edi,4),c ## D;\
189	movzx	b ## H,		%edi;\
190	ror	$16,		b ## D;\
191	xor	(%ebp,%edi,4),	%esi;\
192	movzx	a ## B,		%edi;\
193	xor	s2(%ebp,%edi,4),c ## D;\
194	movzx	b ## B,		%edi;\
195	xor	s1(%ebp,%edi,4),%esi;\
196	movzx	a ## H,		%edi;\
197	ror	$16,		a ## D;\
198	xor	s3(%ebp,%edi,4),c ## D;\
199	movzx	b ## H,		%edi;\
200	xor	s2(%ebp,%edi,4),%esi;\
201	pop	%edi;\
202	add	%esi,		c ## D;\
203	add	c ## D,		%esi;\
204	add	k+round(%ebp),	c ## D;\
205	xor	%edi,		c ## D;\
206	add	k+4+round(%ebp),%esi;\
207	xor	%esi,		d ## D;\
208	ror	$1,		d ## D;
209
210SYM_FUNC_START(twofish_enc_blk)
 
 
 
 
211	push	%ebp			/* save registers according to calling convention*/
212	push    %ebx
213	push    %esi
214	push    %edi
215
216	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
217					 * pointer to the ctx address */
218	mov     in_blk+16(%esp),%edi	/* input address in edi */
219
220	mov	(%edi),		%eax
221	mov	b_offset(%edi),	%ebx
222	mov	c_offset(%edi),	%ecx
223	mov	d_offset(%edi),	%edx
224	input_whitening(%eax,%ebp,a_offset)
225	ror	$16,	%eax
226	input_whitening(%ebx,%ebp,b_offset)
227	input_whitening(%ecx,%ebp,c_offset)
228	input_whitening(%edx,%ebp,d_offset)
229	rol	$1,	%edx
230
231	encrypt_round(R0,R1,R2,R3,0);
232	encrypt_round(R2,R3,R0,R1,8);
233	encrypt_round(R0,R1,R2,R3,2*8);
234	encrypt_round(R2,R3,R0,R1,3*8);
235	encrypt_round(R0,R1,R2,R3,4*8);
236	encrypt_round(R2,R3,R0,R1,5*8);
237	encrypt_round(R0,R1,R2,R3,6*8);
238	encrypt_round(R2,R3,R0,R1,7*8);
239	encrypt_round(R0,R1,R2,R3,8*8);
240	encrypt_round(R2,R3,R0,R1,9*8);
241	encrypt_round(R0,R1,R2,R3,10*8);
242	encrypt_round(R2,R3,R0,R1,11*8);
243	encrypt_round(R0,R1,R2,R3,12*8);
244	encrypt_round(R2,R3,R0,R1,13*8);
245	encrypt_round(R0,R1,R2,R3,14*8);
246	encrypt_last_round(R2,R3,R0,R1,15*8);
247
248	output_whitening(%eax,%ebp,c_offset)
249	output_whitening(%ebx,%ebp,d_offset)
250	output_whitening(%ecx,%ebp,a_offset)
251	output_whitening(%edx,%ebp,b_offset)
252	mov	out_blk+16(%esp),%edi;
253	mov	%eax,		c_offset(%edi)
254	mov	%ebx,		d_offset(%edi)
255	mov	%ecx,		(%edi)
256	mov	%edx,		b_offset(%edi)
257
258	pop	%edi
259	pop	%esi
260	pop	%ebx
261	pop	%ebp
262	mov	$1,	%eax
263	ret
264SYM_FUNC_END(twofish_enc_blk)
265
266SYM_FUNC_START(twofish_dec_blk)
267	push	%ebp			/* save registers according to calling convention*/
268	push    %ebx
269	push    %esi
270	push    %edi
271
272
273	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
274					 * pointer to the ctx address */
275	mov     in_blk+16(%esp),%edi	/* input address in edi */
276
277	mov	(%edi),		%eax
278	mov	b_offset(%edi),	%ebx
279	mov	c_offset(%edi),	%ecx
280	mov	d_offset(%edi),	%edx
281	output_whitening(%eax,%ebp,a_offset)
282	output_whitening(%ebx,%ebp,b_offset)
283	ror	$16,	%ebx
284	output_whitening(%ecx,%ebp,c_offset)
285	output_whitening(%edx,%ebp,d_offset)
286	rol	$1,	%ecx
287
288	decrypt_round(R0,R1,R2,R3,15*8);
289	decrypt_round(R2,R3,R0,R1,14*8);
290	decrypt_round(R0,R1,R2,R3,13*8);
291	decrypt_round(R2,R3,R0,R1,12*8);
292	decrypt_round(R0,R1,R2,R3,11*8);
293	decrypt_round(R2,R3,R0,R1,10*8);
294	decrypt_round(R0,R1,R2,R3,9*8);
295	decrypt_round(R2,R3,R0,R1,8*8);
296	decrypt_round(R0,R1,R2,R3,7*8);
297	decrypt_round(R2,R3,R0,R1,6*8);
298	decrypt_round(R0,R1,R2,R3,5*8);
299	decrypt_round(R2,R3,R0,R1,4*8);
300	decrypt_round(R0,R1,R2,R3,3*8);
301	decrypt_round(R2,R3,R0,R1,2*8);
302	decrypt_round(R0,R1,R2,R3,1*8);
303	decrypt_last_round(R2,R3,R0,R1,0);
304
305	input_whitening(%eax,%ebp,c_offset)
306	input_whitening(%ebx,%ebp,d_offset)
307	input_whitening(%ecx,%ebp,a_offset)
308	input_whitening(%edx,%ebp,b_offset)
309	mov	out_blk+16(%esp),%edi;
310	mov	%eax,		c_offset(%edi)
311	mov	%ebx,		d_offset(%edi)
312	mov	%ecx,		(%edi)
313	mov	%edx,		b_offset(%edi)
314
315	pop	%edi
316	pop	%esi
317	pop	%ebx
318	pop	%ebp
319	mov	$1,	%eax
320	ret
321SYM_FUNC_END(twofish_dec_blk)