Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.13.7.
   1# salsa20_pm.s version 20051229
   2# D. J. Bernstein
   3# Public domain.
   4
   5#include <linux/linkage.h>
   6
   7.text
   8
   9# enter salsa20_encrypt_bytes
  10ENTRY(salsa20_encrypt_bytes)
  11	mov	%esp,%eax
  12	and	$31,%eax
  13	add	$256,%eax
  14	sub	%eax,%esp
  15	# eax_stack = eax
  16	movl	%eax,80(%esp)
  17	# ebx_stack = ebx
  18	movl	%ebx,84(%esp)
  19	# esi_stack = esi
  20	movl	%esi,88(%esp)
  21	# edi_stack = edi
  22	movl	%edi,92(%esp)
  23	# ebp_stack = ebp
  24	movl	%ebp,96(%esp)
  25	# x = arg1
  26	movl	4(%esp,%eax),%edx
  27	# m = arg2
  28	movl	8(%esp,%eax),%esi
  29	# out = arg3
  30	movl	12(%esp,%eax),%edi
  31	# bytes = arg4
  32	movl	16(%esp,%eax),%ebx
  33	# bytes -= 0
  34	sub	$0,%ebx
  35	# goto done if unsigned<=
  36	jbe	._done
  37._start:
  38	# in0 = *(uint32 *) (x + 0)
  39	movl	0(%edx),%eax
  40	# in1 = *(uint32 *) (x + 4)
  41	movl	4(%edx),%ecx
  42	# in2 = *(uint32 *) (x + 8)
  43	movl	8(%edx),%ebp
  44	# j0 = in0
  45	movl	%eax,164(%esp)
  46	# in3 = *(uint32 *) (x + 12)
  47	movl	12(%edx),%eax
  48	# j1 = in1
  49	movl	%ecx,168(%esp)
  50	# in4 = *(uint32 *) (x + 16)
  51	movl	16(%edx),%ecx
  52	# j2 = in2
  53	movl	%ebp,172(%esp)
  54	# in5 = *(uint32 *) (x + 20)
  55	movl	20(%edx),%ebp
  56	# j3 = in3
  57	movl	%eax,176(%esp)
  58	# in6 = *(uint32 *) (x + 24)
  59	movl	24(%edx),%eax
  60	# j4 = in4
  61	movl	%ecx,180(%esp)
  62	# in7 = *(uint32 *) (x + 28)
  63	movl	28(%edx),%ecx
  64	# j5 = in5
  65	movl	%ebp,184(%esp)
  66	# in8 = *(uint32 *) (x + 32)
  67	movl	32(%edx),%ebp
  68	# j6 = in6
  69	movl	%eax,188(%esp)
  70	# in9 = *(uint32 *) (x + 36)
  71	movl	36(%edx),%eax
  72	# j7 = in7
  73	movl	%ecx,192(%esp)
  74	# in10 = *(uint32 *) (x + 40)
  75	movl	40(%edx),%ecx
  76	# j8 = in8
  77	movl	%ebp,196(%esp)
  78	# in11 = *(uint32 *) (x + 44)
  79	movl	44(%edx),%ebp
  80	# j9 = in9
  81	movl	%eax,200(%esp)
  82	# in12 = *(uint32 *) (x + 48)
  83	movl	48(%edx),%eax
  84	# j10 = in10
  85	movl	%ecx,204(%esp)
  86	# in13 = *(uint32 *) (x + 52)
  87	movl	52(%edx),%ecx
  88	# j11 = in11
  89	movl	%ebp,208(%esp)
  90	# in14 = *(uint32 *) (x + 56)
  91	movl	56(%edx),%ebp
  92	# j12 = in12
  93	movl	%eax,212(%esp)
  94	# in15 = *(uint32 *) (x + 60)
  95	movl	60(%edx),%eax
  96	# j13 = in13
  97	movl	%ecx,216(%esp)
  98	# j14 = in14
  99	movl	%ebp,220(%esp)
 100	# j15 = in15
 101	movl	%eax,224(%esp)
 102	# x_backup = x
 103	movl	%edx,64(%esp)
 104._bytesatleast1:
 105	#   bytes - 64
 106	cmp	$64,%ebx
 107	#   goto nocopy if unsigned>=
 108	jae	._nocopy
 109	#     ctarget = out
 110	movl	%edi,228(%esp)
 111	#     out = &tmp
 112	leal	0(%esp),%edi
 113	#     i = bytes
 114	mov	%ebx,%ecx
 115	#     while (i) { *out++ = *m++; --i }
 116	rep	movsb
 117	#     out = &tmp
 118	leal	0(%esp),%edi
 119	#     m = &tmp
 120	leal	0(%esp),%esi
 121._nocopy:
 122	#   out_backup = out
 123	movl	%edi,72(%esp)
 124	#   m_backup = m
 125	movl	%esi,68(%esp)
 126	#   bytes_backup = bytes
 127	movl	%ebx,76(%esp)
 128	#   in0 = j0
 129	movl	164(%esp),%eax
 130	#   in1 = j1
 131	movl	168(%esp),%ecx
 132	#   in2 = j2
 133	movl	172(%esp),%edx
 134	#   in3 = j3
 135	movl	176(%esp),%ebx
 136	#   x0 = in0
 137	movl	%eax,100(%esp)
 138	#   x1 = in1
 139	movl	%ecx,104(%esp)
 140	#   x2 = in2
 141	movl	%edx,108(%esp)
 142	#   x3 = in3
 143	movl	%ebx,112(%esp)
 144	#   in4 = j4
 145	movl	180(%esp),%eax
 146	#   in5 = j5
 147	movl	184(%esp),%ecx
 148	#   in6 = j6
 149	movl	188(%esp),%edx
 150	#   in7 = j7
 151	movl	192(%esp),%ebx
 152	#   x4 = in4
 153	movl	%eax,116(%esp)
 154	#   x5 = in5
 155	movl	%ecx,120(%esp)
 156	#   x6 = in6
 157	movl	%edx,124(%esp)
 158	#   x7 = in7
 159	movl	%ebx,128(%esp)
 160	#   in8 = j8
 161	movl	196(%esp),%eax
 162	#   in9 = j9
 163	movl	200(%esp),%ecx
 164	#   in10 = j10
 165	movl	204(%esp),%edx
 166	#   in11 = j11
 167	movl	208(%esp),%ebx
 168	#   x8 = in8
 169	movl	%eax,132(%esp)
 170	#   x9 = in9
 171	movl	%ecx,136(%esp)
 172	#   x10 = in10
 173	movl	%edx,140(%esp)
 174	#   x11 = in11
 175	movl	%ebx,144(%esp)
 176	#   in12 = j12
 177	movl	212(%esp),%eax
 178	#   in13 = j13
 179	movl	216(%esp),%ecx
 180	#   in14 = j14
 181	movl	220(%esp),%edx
 182	#   in15 = j15
 183	movl	224(%esp),%ebx
 184	#   x12 = in12
 185	movl	%eax,148(%esp)
 186	#   x13 = in13
 187	movl	%ecx,152(%esp)
 188	#   x14 = in14
 189	movl	%edx,156(%esp)
 190	#   x15 = in15
 191	movl	%ebx,160(%esp)
 192	#   i = 20
 193	mov	$20,%ebp
 194	# p = x0
 195	movl	100(%esp),%eax
 196	# s = x5
 197	movl	120(%esp),%ecx
 198	# t = x10
 199	movl	140(%esp),%edx
 200	# w = x15
 201	movl	160(%esp),%ebx
 202._mainloop:
 203	# x0 = p
 204	movl	%eax,100(%esp)
 205	# 				x10 = t
 206	movl	%edx,140(%esp)
 207	# p += x12
 208	addl	148(%esp),%eax
 209	# 		x5 = s
 210	movl	%ecx,120(%esp)
 211	# 				t += x6
 212	addl	124(%esp),%edx
 213	# 						x15 = w
 214	movl	%ebx,160(%esp)
 215	# 		r = x1
 216	movl	104(%esp),%esi
 217	# 		r += s
 218	add	%ecx,%esi
 219	# 						v = x11
 220	movl	144(%esp),%edi
 221	# 						v += w
 222	add	%ebx,%edi
 223	# p <<<= 7
 224	rol	$7,%eax
 225	# p ^= x4
 226	xorl	116(%esp),%eax
 227	# 				t <<<= 7
 228	rol	$7,%edx
 229	# 				t ^= x14
 230	xorl	156(%esp),%edx
 231	# 		r <<<= 7
 232	rol	$7,%esi
 233	# 		r ^= x9
 234	xorl	136(%esp),%esi
 235	# 						v <<<= 7
 236	rol	$7,%edi
 237	# 						v ^= x3
 238	xorl	112(%esp),%edi
 239	# x4 = p
 240	movl	%eax,116(%esp)
 241	# 				x14 = t
 242	movl	%edx,156(%esp)
 243	# p += x0
 244	addl	100(%esp),%eax
 245	# 		x9 = r
 246	movl	%esi,136(%esp)
 247	# 				t += x10
 248	addl	140(%esp),%edx
 249	# 						x3 = v
 250	movl	%edi,112(%esp)
 251	# p <<<= 9
 252	rol	$9,%eax
 253	# p ^= x8
 254	xorl	132(%esp),%eax
 255	# 				t <<<= 9
 256	rol	$9,%edx
 257	# 				t ^= x2
 258	xorl	108(%esp),%edx
 259	# 		s += r
 260	add	%esi,%ecx
 261	# 		s <<<= 9
 262	rol	$9,%ecx
 263	# 		s ^= x13
 264	xorl	152(%esp),%ecx
 265	# 						w += v
 266	add	%edi,%ebx
 267	# 						w <<<= 9
 268	rol	$9,%ebx
 269	# 						w ^= x7
 270	xorl	128(%esp),%ebx
 271	# x8 = p
 272	movl	%eax,132(%esp)
 273	# 				x2 = t
 274	movl	%edx,108(%esp)
 275	# p += x4
 276	addl	116(%esp),%eax
 277	# 		x13 = s
 278	movl	%ecx,152(%esp)
 279	# 				t += x14
 280	addl	156(%esp),%edx
 281	# 						x7 = w
 282	movl	%ebx,128(%esp)
 283	# p <<<= 13
 284	rol	$13,%eax
 285	# p ^= x12
 286	xorl	148(%esp),%eax
 287	# 				t <<<= 13
 288	rol	$13,%edx
 289	# 				t ^= x6
 290	xorl	124(%esp),%edx
 291	# 		r += s
 292	add	%ecx,%esi
 293	# 		r <<<= 13
 294	rol	$13,%esi
 295	# 		r ^= x1
 296	xorl	104(%esp),%esi
 297	# 						v += w
 298	add	%ebx,%edi
 299	# 						v <<<= 13
 300	rol	$13,%edi
 301	# 						v ^= x11
 302	xorl	144(%esp),%edi
 303	# x12 = p
 304	movl	%eax,148(%esp)
 305	# 				x6 = t
 306	movl	%edx,124(%esp)
 307	# p += x8
 308	addl	132(%esp),%eax
 309	# 		x1 = r
 310	movl	%esi,104(%esp)
 311	# 				t += x2
 312	addl	108(%esp),%edx
 313	# 						x11 = v
 314	movl	%edi,144(%esp)
 315	# p <<<= 18
 316	rol	$18,%eax
 317	# p ^= x0
 318	xorl	100(%esp),%eax
 319	# 				t <<<= 18
 320	rol	$18,%edx
 321	# 				t ^= x10
 322	xorl	140(%esp),%edx
 323	# 		s += r
 324	add	%esi,%ecx
 325	# 		s <<<= 18
 326	rol	$18,%ecx
 327	# 		s ^= x5
 328	xorl	120(%esp),%ecx
 329	# 						w += v
 330	add	%edi,%ebx
 331	# 						w <<<= 18
 332	rol	$18,%ebx
 333	# 						w ^= x15
 334	xorl	160(%esp),%ebx
 335	# x0 = p
 336	movl	%eax,100(%esp)
 337	# 				x10 = t
 338	movl	%edx,140(%esp)
 339	# p += x3
 340	addl	112(%esp),%eax
 341	# p <<<= 7
 342	rol	$7,%eax
 343	# 		x5 = s
 344	movl	%ecx,120(%esp)
 345	# 				t += x9
 346	addl	136(%esp),%edx
 347	# 						x15 = w
 348	movl	%ebx,160(%esp)
 349	# 		r = x4
 350	movl	116(%esp),%esi
 351	# 		r += s
 352	add	%ecx,%esi
 353	# 						v = x14
 354	movl	156(%esp),%edi
 355	# 						v += w
 356	add	%ebx,%edi
 357	# p ^= x1
 358	xorl	104(%esp),%eax
 359	# 				t <<<= 7
 360	rol	$7,%edx
 361	# 				t ^= x11
 362	xorl	144(%esp),%edx
 363	# 		r <<<= 7
 364	rol	$7,%esi
 365	# 		r ^= x6
 366	xorl	124(%esp),%esi
 367	# 						v <<<= 7
 368	rol	$7,%edi
 369	# 						v ^= x12
 370	xorl	148(%esp),%edi
 371	# x1 = p
 372	movl	%eax,104(%esp)
 373	# 				x11 = t
 374	movl	%edx,144(%esp)
 375	# p += x0
 376	addl	100(%esp),%eax
 377	# 		x6 = r
 378	movl	%esi,124(%esp)
 379	# 				t += x10
 380	addl	140(%esp),%edx
 381	# 						x12 = v
 382	movl	%edi,148(%esp)
 383	# p <<<= 9
 384	rol	$9,%eax
 385	# p ^= x2
 386	xorl	108(%esp),%eax
 387	# 				t <<<= 9
 388	rol	$9,%edx
 389	# 				t ^= x8
 390	xorl	132(%esp),%edx
 391	# 		s += r
 392	add	%esi,%ecx
 393	# 		s <<<= 9
 394	rol	$9,%ecx
 395	# 		s ^= x7
 396	xorl	128(%esp),%ecx
 397	# 						w += v
 398	add	%edi,%ebx
 399	# 						w <<<= 9
 400	rol	$9,%ebx
 401	# 						w ^= x13
 402	xorl	152(%esp),%ebx
 403	# x2 = p
 404	movl	%eax,108(%esp)
 405	# 				x8 = t
 406	movl	%edx,132(%esp)
 407	# p += x1
 408	addl	104(%esp),%eax
 409	# 		x7 = s
 410	movl	%ecx,128(%esp)
 411	# 				t += x11
 412	addl	144(%esp),%edx
 413	# 						x13 = w
 414	movl	%ebx,152(%esp)
 415	# p <<<= 13
 416	rol	$13,%eax
 417	# p ^= x3
 418	xorl	112(%esp),%eax
 419	# 				t <<<= 13
 420	rol	$13,%edx
 421	# 				t ^= x9
 422	xorl	136(%esp),%edx
 423	# 		r += s
 424	add	%ecx,%esi
 425	# 		r <<<= 13
 426	rol	$13,%esi
 427	# 		r ^= x4
 428	xorl	116(%esp),%esi
 429	# 						v += w
 430	add	%ebx,%edi
 431	# 						v <<<= 13
 432	rol	$13,%edi
 433	# 						v ^= x14
 434	xorl	156(%esp),%edi
 435	# x3 = p
 436	movl	%eax,112(%esp)
 437	# 				x9 = t
 438	movl	%edx,136(%esp)
 439	# p += x2
 440	addl	108(%esp),%eax
 441	# 		x4 = r
 442	movl	%esi,116(%esp)
 443	# 				t += x8
 444	addl	132(%esp),%edx
 445	# 						x14 = v
 446	movl	%edi,156(%esp)
 447	# p <<<= 18
 448	rol	$18,%eax
 449	# p ^= x0
 450	xorl	100(%esp),%eax
 451	# 				t <<<= 18
 452	rol	$18,%edx
 453	# 				t ^= x10
 454	xorl	140(%esp),%edx
 455	# 		s += r
 456	add	%esi,%ecx
 457	# 		s <<<= 18
 458	rol	$18,%ecx
 459	# 		s ^= x5
 460	xorl	120(%esp),%ecx
 461	# 						w += v
 462	add	%edi,%ebx
 463	# 						w <<<= 18
 464	rol	$18,%ebx
 465	# 						w ^= x15
 466	xorl	160(%esp),%ebx
 467	# x0 = p
 468	movl	%eax,100(%esp)
 469	# 				x10 = t
 470	movl	%edx,140(%esp)
 471	# p += x12
 472	addl	148(%esp),%eax
 473	# 		x5 = s
 474	movl	%ecx,120(%esp)
 475	# 				t += x6
 476	addl	124(%esp),%edx
 477	# 						x15 = w
 478	movl	%ebx,160(%esp)
 479	# 		r = x1
 480	movl	104(%esp),%esi
 481	# 		r += s
 482	add	%ecx,%esi
 483	# 						v = x11
 484	movl	144(%esp),%edi
 485	# 						v += w
 486	add	%ebx,%edi
 487	# p <<<= 7
 488	rol	$7,%eax
 489	# p ^= x4
 490	xorl	116(%esp),%eax
 491	# 				t <<<= 7
 492	rol	$7,%edx
 493	# 				t ^= x14
 494	xorl	156(%esp),%edx
 495	# 		r <<<= 7
 496	rol	$7,%esi
 497	# 		r ^= x9
 498	xorl	136(%esp),%esi
 499	# 						v <<<= 7
 500	rol	$7,%edi
 501	# 						v ^= x3
 502	xorl	112(%esp),%edi
 503	# x4 = p
 504	movl	%eax,116(%esp)
 505	# 				x14 = t
 506	movl	%edx,156(%esp)
 507	# p += x0
 508	addl	100(%esp),%eax
 509	# 		x9 = r
 510	movl	%esi,136(%esp)
 511	# 				t += x10
 512	addl	140(%esp),%edx
 513	# 						x3 = v
 514	movl	%edi,112(%esp)
 515	# p <<<= 9
 516	rol	$9,%eax
 517	# p ^= x8
 518	xorl	132(%esp),%eax
 519	# 				t <<<= 9
 520	rol	$9,%edx
 521	# 				t ^= x2
 522	xorl	108(%esp),%edx
 523	# 		s += r
 524	add	%esi,%ecx
 525	# 		s <<<= 9
 526	rol	$9,%ecx
 527	# 		s ^= x13
 528	xorl	152(%esp),%ecx
 529	# 						w += v
 530	add	%edi,%ebx
 531	# 						w <<<= 9
 532	rol	$9,%ebx
 533	# 						w ^= x7
 534	xorl	128(%esp),%ebx
 535	# x8 = p
 536	movl	%eax,132(%esp)
 537	# 				x2 = t
 538	movl	%edx,108(%esp)
 539	# p += x4
 540	addl	116(%esp),%eax
 541	# 		x13 = s
 542	movl	%ecx,152(%esp)
 543	# 				t += x14
 544	addl	156(%esp),%edx
 545	# 						x7 = w
 546	movl	%ebx,128(%esp)
 547	# p <<<= 13
 548	rol	$13,%eax
 549	# p ^= x12
 550	xorl	148(%esp),%eax
 551	# 				t <<<= 13
 552	rol	$13,%edx
 553	# 				t ^= x6
 554	xorl	124(%esp),%edx
 555	# 		r += s
 556	add	%ecx,%esi
 557	# 		r <<<= 13
 558	rol	$13,%esi
 559	# 		r ^= x1
 560	xorl	104(%esp),%esi
 561	# 						v += w
 562	add	%ebx,%edi
 563	# 						v <<<= 13
 564	rol	$13,%edi
 565	# 						v ^= x11
 566	xorl	144(%esp),%edi
 567	# x12 = p
 568	movl	%eax,148(%esp)
 569	# 				x6 = t
 570	movl	%edx,124(%esp)
 571	# p += x8
 572	addl	132(%esp),%eax
 573	# 		x1 = r
 574	movl	%esi,104(%esp)
 575	# 				t += x2
 576	addl	108(%esp),%edx
 577	# 						x11 = v
 578	movl	%edi,144(%esp)
 579	# p <<<= 18
 580	rol	$18,%eax
 581	# p ^= x0
 582	xorl	100(%esp),%eax
 583	# 				t <<<= 18
 584	rol	$18,%edx
 585	# 				t ^= x10
 586	xorl	140(%esp),%edx
 587	# 		s += r
 588	add	%esi,%ecx
 589	# 		s <<<= 18
 590	rol	$18,%ecx
 591	# 		s ^= x5
 592	xorl	120(%esp),%ecx
 593	# 						w += v
 594	add	%edi,%ebx
 595	# 						w <<<= 18
 596	rol	$18,%ebx
 597	# 						w ^= x15
 598	xorl	160(%esp),%ebx
 599	# x0 = p
 600	movl	%eax,100(%esp)
 601	# 				x10 = t
 602	movl	%edx,140(%esp)
 603	# p += x3
 604	addl	112(%esp),%eax
 605	# p <<<= 7
 606	rol	$7,%eax
 607	# 		x5 = s
 608	movl	%ecx,120(%esp)
 609	# 				t += x9
 610	addl	136(%esp),%edx
 611	# 						x15 = w
 612	movl	%ebx,160(%esp)
 613	# 		r = x4
 614	movl	116(%esp),%esi
 615	# 		r += s
 616	add	%ecx,%esi
 617	# 						v = x14
 618	movl	156(%esp),%edi
 619	# 						v += w
 620	add	%ebx,%edi
 621	# p ^= x1
 622	xorl	104(%esp),%eax
 623	# 				t <<<= 7
 624	rol	$7,%edx
 625	# 				t ^= x11
 626	xorl	144(%esp),%edx
 627	# 		r <<<= 7
 628	rol	$7,%esi
 629	# 		r ^= x6
 630	xorl	124(%esp),%esi
 631	# 						v <<<= 7
 632	rol	$7,%edi
 633	# 						v ^= x12
 634	xorl	148(%esp),%edi
 635	# x1 = p
 636	movl	%eax,104(%esp)
 637	# 				x11 = t
 638	movl	%edx,144(%esp)
 639	# p += x0
 640	addl	100(%esp),%eax
 641	# 		x6 = r
 642	movl	%esi,124(%esp)
 643	# 				t += x10
 644	addl	140(%esp),%edx
 645	# 						x12 = v
 646	movl	%edi,148(%esp)
 647	# p <<<= 9
 648	rol	$9,%eax
 649	# p ^= x2
 650	xorl	108(%esp),%eax
 651	# 				t <<<= 9
 652	rol	$9,%edx
 653	# 				t ^= x8
 654	xorl	132(%esp),%edx
 655	# 		s += r
 656	add	%esi,%ecx
 657	# 		s <<<= 9
 658	rol	$9,%ecx
 659	# 		s ^= x7
 660	xorl	128(%esp),%ecx
 661	# 						w += v
 662	add	%edi,%ebx
 663	# 						w <<<= 9
 664	rol	$9,%ebx
 665	# 						w ^= x13
 666	xorl	152(%esp),%ebx
 667	# x2 = p
 668	movl	%eax,108(%esp)
 669	# 				x8 = t
 670	movl	%edx,132(%esp)
 671	# p += x1
 672	addl	104(%esp),%eax
 673	# 		x7 = s
 674	movl	%ecx,128(%esp)
 675	# 				t += x11
 676	addl	144(%esp),%edx
 677	# 						x13 = w
 678	movl	%ebx,152(%esp)
 679	# p <<<= 13
 680	rol	$13,%eax
 681	# p ^= x3
 682	xorl	112(%esp),%eax
 683	# 				t <<<= 13
 684	rol	$13,%edx
 685	# 				t ^= x9
 686	xorl	136(%esp),%edx
 687	# 		r += s
 688	add	%ecx,%esi
 689	# 		r <<<= 13
 690	rol	$13,%esi
 691	# 		r ^= x4
 692	xorl	116(%esp),%esi
 693	# 						v += w
 694	add	%ebx,%edi
 695	# 						v <<<= 13
 696	rol	$13,%edi
 697	# 						v ^= x14
 698	xorl	156(%esp),%edi
 699	# x3 = p
 700	movl	%eax,112(%esp)
 701	# 				x9 = t
 702	movl	%edx,136(%esp)
 703	# p += x2
 704	addl	108(%esp),%eax
 705	# 		x4 = r
 706	movl	%esi,116(%esp)
 707	# 				t += x8
 708	addl	132(%esp),%edx
 709	# 						x14 = v
 710	movl	%edi,156(%esp)
 711	# p <<<= 18
 712	rol	$18,%eax
 713	# p ^= x0
 714	xorl	100(%esp),%eax
 715	# 				t <<<= 18
 716	rol	$18,%edx
 717	# 				t ^= x10
 718	xorl	140(%esp),%edx
 719	# 		s += r
 720	add	%esi,%ecx
 721	# 		s <<<= 18
 722	rol	$18,%ecx
 723	# 		s ^= x5
 724	xorl	120(%esp),%ecx
 725	# 						w += v
 726	add	%edi,%ebx
 727	# 						w <<<= 18
 728	rol	$18,%ebx
 729	# 						w ^= x15
 730	xorl	160(%esp),%ebx
 731	# i -= 4
 732	sub	$4,%ebp
 733	# goto mainloop if unsigned >
 734	ja	._mainloop
 735	# x0 = p
 736	movl	%eax,100(%esp)
 737	# x5 = s
 738	movl	%ecx,120(%esp)
 739	# x10 = t
 740	movl	%edx,140(%esp)
 741	# x15 = w
 742	movl	%ebx,160(%esp)
 743	#   out = out_backup
 744	movl	72(%esp),%edi
 745	#   m = m_backup
 746	movl	68(%esp),%esi
 747	#   in0 = x0
 748	movl	100(%esp),%eax
 749	#   in1 = x1
 750	movl	104(%esp),%ecx
 751	#   in0 += j0
 752	addl	164(%esp),%eax
 753	#   in1 += j1
 754	addl	168(%esp),%ecx
 755	#   in0 ^= *(uint32 *) (m + 0)
 756	xorl	0(%esi),%eax
 757	#   in1 ^= *(uint32 *) (m + 4)
 758	xorl	4(%esi),%ecx
 759	#   *(uint32 *) (out + 0) = in0
 760	movl	%eax,0(%edi)
 761	#   *(uint32 *) (out + 4) = in1
 762	movl	%ecx,4(%edi)
 763	#   in2 = x2
 764	movl	108(%esp),%eax
 765	#   in3 = x3
 766	movl	112(%esp),%ecx
 767	#   in2 += j2
 768	addl	172(%esp),%eax
 769	#   in3 += j3
 770	addl	176(%esp),%ecx
 771	#   in2 ^= *(uint32 *) (m + 8)
 772	xorl	8(%esi),%eax
 773	#   in3 ^= *(uint32 *) (m + 12)
 774	xorl	12(%esi),%ecx
 775	#   *(uint32 *) (out + 8) = in2
 776	movl	%eax,8(%edi)
 777	#   *(uint32 *) (out + 12) = in3
 778	movl	%ecx,12(%edi)
 779	#   in4 = x4
 780	movl	116(%esp),%eax
 781	#   in5 = x5
 782	movl	120(%esp),%ecx
 783	#   in4 += j4
 784	addl	180(%esp),%eax
 785	#   in5 += j5
 786	addl	184(%esp),%ecx
 787	#   in4 ^= *(uint32 *) (m + 16)
 788	xorl	16(%esi),%eax
 789	#   in5 ^= *(uint32 *) (m + 20)
 790	xorl	20(%esi),%ecx
 791	#   *(uint32 *) (out + 16) = in4
 792	movl	%eax,16(%edi)
 793	#   *(uint32 *) (out + 20) = in5
 794	movl	%ecx,20(%edi)
 795	#   in6 = x6
 796	movl	124(%esp),%eax
 797	#   in7 = x7
 798	movl	128(%esp),%ecx
 799	#   in6 += j6
 800	addl	188(%esp),%eax
 801	#   in7 += j7
 802	addl	192(%esp),%ecx
 803	#   in6 ^= *(uint32 *) (m + 24)
 804	xorl	24(%esi),%eax
 805	#   in7 ^= *(uint32 *) (m + 28)
 806	xorl	28(%esi),%ecx
 807	#   *(uint32 *) (out + 24) = in6
 808	movl	%eax,24(%edi)
 809	#   *(uint32 *) (out + 28) = in7
 810	movl	%ecx,28(%edi)
 811	#   in8 = x8
 812	movl	132(%esp),%eax
 813	#   in9 = x9
 814	movl	136(%esp),%ecx
 815	#   in8 += j8
 816	addl	196(%esp),%eax
 817	#   in9 += j9
 818	addl	200(%esp),%ecx
 819	#   in8 ^= *(uint32 *) (m + 32)
 820	xorl	32(%esi),%eax
 821	#   in9 ^= *(uint32 *) (m + 36)
 822	xorl	36(%esi),%ecx
 823	#   *(uint32 *) (out + 32) = in8
 824	movl	%eax,32(%edi)
 825	#   *(uint32 *) (out + 36) = in9
 826	movl	%ecx,36(%edi)
 827	#   in10 = x10
 828	movl	140(%esp),%eax
 829	#   in11 = x11
 830	movl	144(%esp),%ecx
 831	#   in10 += j10
 832	addl	204(%esp),%eax
 833	#   in11 += j11
 834	addl	208(%esp),%ecx
 835	#   in10 ^= *(uint32 *) (m + 40)
 836	xorl	40(%esi),%eax
 837	#   in11 ^= *(uint32 *) (m + 44)
 838	xorl	44(%esi),%ecx
 839	#   *(uint32 *) (out + 40) = in10
 840	movl	%eax,40(%edi)
 841	#   *(uint32 *) (out + 44) = in11
 842	movl	%ecx,44(%edi)
 843	#   in12 = x12
 844	movl	148(%esp),%eax
 845	#   in13 = x13
 846	movl	152(%esp),%ecx
 847	#   in12 += j12
 848	addl	212(%esp),%eax
 849	#   in13 += j13
 850	addl	216(%esp),%ecx
 851	#   in12 ^= *(uint32 *) (m + 48)
 852	xorl	48(%esi),%eax
 853	#   in13 ^= *(uint32 *) (m + 52)
 854	xorl	52(%esi),%ecx
 855	#   *(uint32 *) (out + 48) = in12
 856	movl	%eax,48(%edi)
 857	#   *(uint32 *) (out + 52) = in13
 858	movl	%ecx,52(%edi)
 859	#   in14 = x14
 860	movl	156(%esp),%eax
 861	#   in15 = x15
 862	movl	160(%esp),%ecx
 863	#   in14 += j14
 864	addl	220(%esp),%eax
 865	#   in15 += j15
 866	addl	224(%esp),%ecx
 867	#   in14 ^= *(uint32 *) (m + 56)
 868	xorl	56(%esi),%eax
 869	#   in15 ^= *(uint32 *) (m + 60)
 870	xorl	60(%esi),%ecx
 871	#   *(uint32 *) (out + 56) = in14
 872	movl	%eax,56(%edi)
 873	#   *(uint32 *) (out + 60) = in15
 874	movl	%ecx,60(%edi)
 875	#   bytes = bytes_backup
 876	movl	76(%esp),%ebx
 877	#   in8 = j8
 878	movl	196(%esp),%eax
 879	#   in9 = j9
 880	movl	200(%esp),%ecx
 881	#   in8 += 1
 882	add	$1,%eax
 883	#   in9 += 0 + carry
 884	adc	$0,%ecx
 885	#   j8 = in8
 886	movl	%eax,196(%esp)
 887	#   j9 = in9
 888	movl	%ecx,200(%esp)
 889	#   bytes - 64
 890	cmp	$64,%ebx
 891	#   goto bytesatleast65 if unsigned>
 892	ja	._bytesatleast65
 893	#     goto bytesatleast64 if unsigned>=
 894	jae	._bytesatleast64
 895	#       m = out
 896	mov	%edi,%esi
 897	#       out = ctarget
 898	movl	228(%esp),%edi
 899	#       i = bytes
 900	mov	%ebx,%ecx
 901	#       while (i) { *out++ = *m++; --i }
 902	rep	movsb
 903._bytesatleast64:
 904	#     x = x_backup
 905	movl	64(%esp),%eax
 906	#     in8 = j8
 907	movl	196(%esp),%ecx
 908	#     in9 = j9
 909	movl	200(%esp),%edx
 910	#     *(uint32 *) (x + 32) = in8
 911	movl	%ecx,32(%eax)
 912	#     *(uint32 *) (x + 36) = in9
 913	movl	%edx,36(%eax)
 914._done:
 915	#     eax = eax_stack
 916	movl	80(%esp),%eax
 917	#     ebx = ebx_stack
 918	movl	84(%esp),%ebx
 919	#     esi = esi_stack
 920	movl	88(%esp),%esi
 921	#     edi = edi_stack
 922	movl	92(%esp),%edi
 923	#     ebp = ebp_stack
 924	movl	96(%esp),%ebp
 925	#     leave
 926	add	%eax,%esp
 927	ret
 928._bytesatleast65:
 929	#   bytes -= 64
 930	sub	$64,%ebx
 931	#   out += 64
 932	add	$64,%edi
 933	#   m += 64
 934	add	$64,%esi
 935	# goto bytesatleast1
 936	jmp	._bytesatleast1
 937ENDPROC(salsa20_encrypt_bytes)
 938
 939# enter salsa20_keysetup
 940ENTRY(salsa20_keysetup)
 941	mov	%esp,%eax
 942	and	$31,%eax
 943	add	$256,%eax
 944	sub	%eax,%esp
 945	#   eax_stack = eax
 946	movl	%eax,64(%esp)
 947	#   ebx_stack = ebx
 948	movl	%ebx,68(%esp)
 949	#   esi_stack = esi
 950	movl	%esi,72(%esp)
 951	#   edi_stack = edi
 952	movl	%edi,76(%esp)
 953	#   ebp_stack = ebp
 954	movl	%ebp,80(%esp)
 955	#   k = arg2
 956	movl	8(%esp,%eax),%ecx
 957	#   kbits = arg3
 958	movl	12(%esp,%eax),%edx
 959	#   x = arg1
 960	movl	4(%esp,%eax),%eax
 961	#   in1 = *(uint32 *) (k + 0)
 962	movl	0(%ecx),%ebx
 963	#   in2 = *(uint32 *) (k + 4)
 964	movl	4(%ecx),%esi
 965	#   in3 = *(uint32 *) (k + 8)
 966	movl	8(%ecx),%edi
 967	#   in4 = *(uint32 *) (k + 12)
 968	movl	12(%ecx),%ebp
 969	#   *(uint32 *) (x + 4) = in1
 970	movl	%ebx,4(%eax)
 971	#   *(uint32 *) (x + 8) = in2
 972	movl	%esi,8(%eax)
 973	#   *(uint32 *) (x + 12) = in3
 974	movl	%edi,12(%eax)
 975	#   *(uint32 *) (x + 16) = in4
 976	movl	%ebp,16(%eax)
 977	#   kbits - 256
 978	cmp	$256,%edx
 979	#   goto kbits128 if unsigned<
 980	jb	._kbits128
 981._kbits256:
 982	#     in11 = *(uint32 *) (k + 16)
 983	movl	16(%ecx),%edx
 984	#     in12 = *(uint32 *) (k + 20)
 985	movl	20(%ecx),%ebx
 986	#     in13 = *(uint32 *) (k + 24)
 987	movl	24(%ecx),%esi
 988	#     in14 = *(uint32 *) (k + 28)
 989	movl	28(%ecx),%ecx
 990	#     *(uint32 *) (x + 44) = in11
 991	movl	%edx,44(%eax)
 992	#     *(uint32 *) (x + 48) = in12
 993	movl	%ebx,48(%eax)
 994	#     *(uint32 *) (x + 52) = in13
 995	movl	%esi,52(%eax)
 996	#     *(uint32 *) (x + 56) = in14
 997	movl	%ecx,56(%eax)
 998	#     in0 = 1634760805
 999	mov	$1634760805,%ecx
1000	#     in5 = 857760878
1001	mov	$857760878,%edx
1002	#     in10 = 2036477234
1003	mov	$2036477234,%ebx
1004	#     in15 = 1797285236
1005	mov	$1797285236,%esi
1006	#     *(uint32 *) (x + 0) = in0
1007	movl	%ecx,0(%eax)
1008	#     *(uint32 *) (x + 20) = in5
1009	movl	%edx,20(%eax)
1010	#     *(uint32 *) (x + 40) = in10
1011	movl	%ebx,40(%eax)
1012	#     *(uint32 *) (x + 60) = in15
1013	movl	%esi,60(%eax)
1014	#   goto keysetupdone
1015	jmp	._keysetupdone
1016._kbits128:
1017	#     in11 = *(uint32 *) (k + 0)
1018	movl	0(%ecx),%edx
1019	#     in12 = *(uint32 *) (k + 4)
1020	movl	4(%ecx),%ebx
1021	#     in13 = *(uint32 *) (k + 8)
1022	movl	8(%ecx),%esi
1023	#     in14 = *(uint32 *) (k + 12)
1024	movl	12(%ecx),%ecx
1025	#     *(uint32 *) (x + 44) = in11
1026	movl	%edx,44(%eax)
1027	#     *(uint32 *) (x + 48) = in12
1028	movl	%ebx,48(%eax)
1029	#     *(uint32 *) (x + 52) = in13
1030	movl	%esi,52(%eax)
1031	#     *(uint32 *) (x + 56) = in14
1032	movl	%ecx,56(%eax)
1033	#     in0 = 1634760805
1034	mov	$1634760805,%ecx
1035	#     in5 = 824206446
1036	mov	$824206446,%edx
1037	#     in10 = 2036477238
1038	mov	$2036477238,%ebx
1039	#     in15 = 1797285236
1040	mov	$1797285236,%esi
1041	#     *(uint32 *) (x + 0) = in0
1042	movl	%ecx,0(%eax)
1043	#     *(uint32 *) (x + 20) = in5
1044	movl	%edx,20(%eax)
1045	#     *(uint32 *) (x + 40) = in10
1046	movl	%ebx,40(%eax)
1047	#     *(uint32 *) (x + 60) = in15
1048	movl	%esi,60(%eax)
1049._keysetupdone:
1050	#   eax = eax_stack
1051	movl	64(%esp),%eax
1052	#   ebx = ebx_stack
1053	movl	68(%esp),%ebx
1054	#   esi = esi_stack
1055	movl	72(%esp),%esi
1056	#   edi = edi_stack
1057	movl	76(%esp),%edi
1058	#   ebp = ebp_stack
1059	movl	80(%esp),%ebp
1060	# leave
1061	add	%eax,%esp
1062	ret
1063ENDPROC(salsa20_keysetup)
1064
1065# enter salsa20_ivsetup
1066ENTRY(salsa20_ivsetup)
1067	mov	%esp,%eax
1068	and	$31,%eax
1069	add	$256,%eax
1070	sub	%eax,%esp
1071	#   eax_stack = eax
1072	movl	%eax,64(%esp)
1073	#   ebx_stack = ebx
1074	movl	%ebx,68(%esp)
1075	#   esi_stack = esi
1076	movl	%esi,72(%esp)
1077	#   edi_stack = edi
1078	movl	%edi,76(%esp)
1079	#   ebp_stack = ebp
1080	movl	%ebp,80(%esp)
1081	#   iv = arg2
1082	movl	8(%esp,%eax),%ecx
1083	#   x = arg1
1084	movl	4(%esp,%eax),%eax
1085	#   in6 = *(uint32 *) (iv + 0)
1086	movl	0(%ecx),%edx
1087	#   in7 = *(uint32 *) (iv + 4)
1088	movl	4(%ecx),%ecx
1089	#   in8 = 0
1090	mov	$0,%ebx
1091	#   in9 = 0
1092	mov	$0,%esi
1093	#   *(uint32 *) (x + 24) = in6
1094	movl	%edx,24(%eax)
1095	#   *(uint32 *) (x + 28) = in7
1096	movl	%ecx,28(%eax)
1097	#   *(uint32 *) (x + 32) = in8
1098	movl	%ebx,32(%eax)
1099	#   *(uint32 *) (x + 36) = in9
1100	movl	%esi,36(%eax)
1101	#   eax = eax_stack
1102	movl	64(%esp),%eax
1103	#   ebx = ebx_stack
1104	movl	68(%esp),%ebx
1105	#   esi = esi_stack
1106	movl	72(%esp),%esi
1107	#   edi = edi_stack
1108	movl	76(%esp),%edi
1109	#   ebp = ebp_stack
1110	movl	80(%esp),%ebp
1111	# leave
1112	add	%eax,%esp
1113	ret
1114ENDPROC(salsa20_ivsetup)