1%ifidn __OUTPUT_FORMAT__,obj
2section	code	use32 class=code align=64
3%elifidn __OUTPUT_FORMAT__,win32
4%ifdef __YASM_VERSION_ID__
5%if __YASM_VERSION_ID__ < 01010000h
6%error yasm version 1.1.0 or later needed.
7%endif
8; Yasm automatically includes .00 and complains about redefining it.
9; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
10%else
11$@feat.00 equ 1
12%endif
13section	.text	code align=64
14%else
15section	.text	code
16%endif
17;extern	_OPENSSL_ia32cap_P
18global	_asm_RC4
19align	16
20_asm_RC4:
21L$_asm_RC4_begin:
22	push	ebp
23	push	ebx
24	push	esi
25	push	edi
26	mov	edi,DWORD [20+esp]
27	mov	edx,DWORD [24+esp]
28	mov	esi,DWORD [28+esp]
29	mov	ebp,DWORD [32+esp]
30	xor	eax,eax
31	xor	ebx,ebx
32	cmp	edx,0
33	je	NEAR L$000abort
34	mov	al,BYTE [edi]
35	mov	bl,BYTE [4+edi]
36	add	edi,8
37	lea	ecx,[edx*1+esi]
38	sub	ebp,esi
39	mov	DWORD [24+esp],ecx
40	inc	al
41	cmp	DWORD [256+edi],-1
42	je	NEAR L$001RC4_CHAR
43	mov	ecx,DWORD [eax*4+edi]
44	and	edx,-4
45	jz	NEAR L$002loop1
46	mov	DWORD [32+esp],ebp
47	test	edx,-8
48	jz	NEAR L$003go4loop4
49	lea	ebp,[_OPENSSL_ia32cap_P]
50	bt	DWORD [ebp],26
51	jnc	NEAR L$003go4loop4
52	mov	ebp,DWORD [32+esp]
53	and	edx,-8
54	lea	edx,[edx*1+esi-8]
55	mov	DWORD [edi-4],edx
56	add	bl,cl
57	mov	edx,DWORD [ebx*4+edi]
58	mov	DWORD [ebx*4+edi],ecx
59	mov	DWORD [eax*4+edi],edx
60	inc	eax
61	add	edx,ecx
62	movzx	eax,al
63	movzx	edx,dl
64	movq	mm0,[esi]
65	mov	ecx,DWORD [eax*4+edi]
66	movd	mm2,DWORD [edx*4+edi]
67	jmp	NEAR L$004loop_mmx_enter
68align	16
69L$005loop_mmx:
70	add	bl,cl
71	psllq	mm1,56
72	mov	edx,DWORD [ebx*4+edi]
73	mov	DWORD [ebx*4+edi],ecx
74	mov	DWORD [eax*4+edi],edx
75	inc	eax
76	add	edx,ecx
77	movzx	eax,al
78	movzx	edx,dl
79	pxor	mm2,mm1
80	movq	mm0,[esi]
81	movq	[esi*1+ebp-8],mm2
82	mov	ecx,DWORD [eax*4+edi]
83	movd	mm2,DWORD [edx*4+edi]
84L$004loop_mmx_enter:
85	add	bl,cl
86	mov	edx,DWORD [ebx*4+edi]
87	mov	DWORD [ebx*4+edi],ecx
88	mov	DWORD [eax*4+edi],edx
89	inc	eax
90	add	edx,ecx
91	movzx	eax,al
92	movzx	edx,dl
93	pxor	mm2,mm0
94	mov	ecx,DWORD [eax*4+edi]
95	movd	mm1,DWORD [edx*4+edi]
96	add	bl,cl
97	psllq	mm1,8
98	mov	edx,DWORD [ebx*4+edi]
99	mov	DWORD [ebx*4+edi],ecx
100	mov	DWORD [eax*4+edi],edx
101	inc	eax
102	add	edx,ecx
103	movzx	eax,al
104	movzx	edx,dl
105	pxor	mm2,mm1
106	mov	ecx,DWORD [eax*4+edi]
107	movd	mm1,DWORD [edx*4+edi]
108	add	bl,cl
109	psllq	mm1,16
110	mov	edx,DWORD [ebx*4+edi]
111	mov	DWORD [ebx*4+edi],ecx
112	mov	DWORD [eax*4+edi],edx
113	inc	eax
114	add	edx,ecx
115	movzx	eax,al
116	movzx	edx,dl
117	pxor	mm2,mm1
118	mov	ecx,DWORD [eax*4+edi]
119	movd	mm1,DWORD [edx*4+edi]
120	add	bl,cl
121	psllq	mm1,24
122	mov	edx,DWORD [ebx*4+edi]
123	mov	DWORD [ebx*4+edi],ecx
124	mov	DWORD [eax*4+edi],edx
125	inc	eax
126	add	edx,ecx
127	movzx	eax,al
128	movzx	edx,dl
129	pxor	mm2,mm1
130	mov	ecx,DWORD [eax*4+edi]
131	movd	mm1,DWORD [edx*4+edi]
132	add	bl,cl
133	psllq	mm1,32
134	mov	edx,DWORD [ebx*4+edi]
135	mov	DWORD [ebx*4+edi],ecx
136	mov	DWORD [eax*4+edi],edx
137	inc	eax
138	add	edx,ecx
139	movzx	eax,al
140	movzx	edx,dl
141	pxor	mm2,mm1
142	mov	ecx,DWORD [eax*4+edi]
143	movd	mm1,DWORD [edx*4+edi]
144	add	bl,cl
145	psllq	mm1,40
146	mov	edx,DWORD [ebx*4+edi]
147	mov	DWORD [ebx*4+edi],ecx
148	mov	DWORD [eax*4+edi],edx
149	inc	eax
150	add	edx,ecx
151	movzx	eax,al
152	movzx	edx,dl
153	pxor	mm2,mm1
154	mov	ecx,DWORD [eax*4+edi]
155	movd	mm1,DWORD [edx*4+edi]
156	add	bl,cl
157	psllq	mm1,48
158	mov	edx,DWORD [ebx*4+edi]
159	mov	DWORD [ebx*4+edi],ecx
160	mov	DWORD [eax*4+edi],edx
161	inc	eax
162	add	edx,ecx
163	movzx	eax,al
164	movzx	edx,dl
165	pxor	mm2,mm1
166	mov	ecx,DWORD [eax*4+edi]
167	movd	mm1,DWORD [edx*4+edi]
168	mov	edx,ebx
169	xor	ebx,ebx
170	mov	bl,dl
171	cmp	esi,DWORD [edi-4]
172	lea	esi,[8+esi]
173	jb	NEAR L$005loop_mmx
174	psllq	mm1,56
175	pxor	mm2,mm1
176	movq	[esi*1+ebp-8],mm2
177	emms
178	cmp	esi,DWORD [24+esp]
179	je	NEAR L$006done
180	jmp	NEAR L$002loop1
181align	16
182L$003go4loop4:
183	lea	edx,[edx*1+esi-4]
184	mov	DWORD [28+esp],edx
185L$007loop4:
186	add	bl,cl
187	mov	edx,DWORD [ebx*4+edi]
188	mov	DWORD [ebx*4+edi],ecx
189	mov	DWORD [eax*4+edi],edx
190	add	edx,ecx
191	inc	al
192	and	edx,255
193	mov	ecx,DWORD [eax*4+edi]
194	mov	ebp,DWORD [edx*4+edi]
195	add	bl,cl
196	mov	edx,DWORD [ebx*4+edi]
197	mov	DWORD [ebx*4+edi],ecx
198	mov	DWORD [eax*4+edi],edx
199	add	edx,ecx
200	inc	al
201	and	edx,255
202	ror	ebp,8
203	mov	ecx,DWORD [eax*4+edi]
204	or	ebp,DWORD [edx*4+edi]
205	add	bl,cl
206	mov	edx,DWORD [ebx*4+edi]
207	mov	DWORD [ebx*4+edi],ecx
208	mov	DWORD [eax*4+edi],edx
209	add	edx,ecx
210	inc	al
211	and	edx,255
212	ror	ebp,8
213	mov	ecx,DWORD [eax*4+edi]
214	or	ebp,DWORD [edx*4+edi]
215	add	bl,cl
216	mov	edx,DWORD [ebx*4+edi]
217	mov	DWORD [ebx*4+edi],ecx
218	mov	DWORD [eax*4+edi],edx
219	add	edx,ecx
220	inc	al
221	and	edx,255
222	ror	ebp,8
223	mov	ecx,DWORD [32+esp]
224	or	ebp,DWORD [edx*4+edi]
225	ror	ebp,8
226	xor	ebp,DWORD [esi]
227	cmp	esi,DWORD [28+esp]
228	mov	DWORD [esi*1+ecx],ebp
229	lea	esi,[4+esi]
230	mov	ecx,DWORD [eax*4+edi]
231	jb	NEAR L$007loop4
232	cmp	esi,DWORD [24+esp]
233	je	NEAR L$006done
234	mov	ebp,DWORD [32+esp]
235align	16
236L$002loop1:
237	add	bl,cl
238	mov	edx,DWORD [ebx*4+edi]
239	mov	DWORD [ebx*4+edi],ecx
240	mov	DWORD [eax*4+edi],edx
241	add	edx,ecx
242	inc	al
243	and	edx,255
244	mov	edx,DWORD [edx*4+edi]
245	xor	dl,BYTE [esi]
246	lea	esi,[1+esi]
247	mov	ecx,DWORD [eax*4+edi]
248	cmp	esi,DWORD [24+esp]
249	mov	BYTE [esi*1+ebp-1],dl
250	jb	NEAR L$002loop1
251	jmp	NEAR L$006done
252align	16
253L$001RC4_CHAR:
254	movzx	ecx,BYTE [eax*1+edi]
255L$008cloop1:
256	add	bl,cl
257	movzx	edx,BYTE [ebx*1+edi]
258	mov	BYTE [ebx*1+edi],cl
259	mov	BYTE [eax*1+edi],dl
260	add	dl,cl
261	movzx	edx,BYTE [edx*1+edi]
262	add	al,1
263	xor	dl,BYTE [esi]
264	lea	esi,[1+esi]
265	movzx	ecx,BYTE [eax*1+edi]
266	cmp	esi,DWORD [24+esp]
267	mov	BYTE [esi*1+ebp-1],dl
268	jb	NEAR L$008cloop1
269L$006done:
270	dec	al
271	mov	DWORD [edi-4],ebx
272	mov	BYTE [edi-8],al
273L$000abort:
274	pop	edi
275	pop	esi
276	pop	ebx
277	pop	ebp
278	ret
279global	_asm_RC4_set_key
280align	16
281_asm_RC4_set_key:
282L$_asm_RC4_set_key_begin:
283	push	ebp
284	push	ebx
285	push	esi
286	push	edi
287	mov	edi,DWORD [20+esp]
288	mov	ebp,DWORD [24+esp]
289	mov	esi,DWORD [28+esp]
290	lea	edx,[_OPENSSL_ia32cap_P]
291	lea	edi,[8+edi]
292	lea	esi,[ebp*1+esi]
293	neg	ebp
294	xor	eax,eax
295	mov	DWORD [edi-4],ebp
296	bt	DWORD [edx],20
297	jc	NEAR L$009c1stloop
298align	16
299L$010w1stloop:
300	mov	DWORD [eax*4+edi],eax
301	add	al,1
302	jnc	NEAR L$010w1stloop
303	xor	ecx,ecx
304	xor	edx,edx
305align	16
306L$011w2ndloop:
307	mov	eax,DWORD [ecx*4+edi]
308	add	dl,BYTE [ebp*1+esi]
309	add	dl,al
310	add	ebp,1
311	mov	ebx,DWORD [edx*4+edi]
312	jnz	NEAR L$012wnowrap
313	mov	ebp,DWORD [edi-4]
314L$012wnowrap:
315	mov	DWORD [edx*4+edi],eax
316	mov	DWORD [ecx*4+edi],ebx
317	add	cl,1
318	jnc	NEAR L$011w2ndloop
319	jmp	NEAR L$013exit
320align	16
321L$009c1stloop:
322	mov	BYTE [eax*1+edi],al
323	add	al,1
324	jnc	NEAR L$009c1stloop
325	xor	ecx,ecx
326	xor	edx,edx
327	xor	ebx,ebx
328align	16
329L$014c2ndloop:
330	mov	al,BYTE [ecx*1+edi]
331	add	dl,BYTE [ebp*1+esi]
332	add	dl,al
333	add	ebp,1
334	mov	bl,BYTE [edx*1+edi]
335	jnz	NEAR L$015cnowrap
336	mov	ebp,DWORD [edi-4]
337L$015cnowrap:
338	mov	BYTE [edx*1+edi],al
339	mov	BYTE [ecx*1+edi],bl
340	add	cl,1
341	jnc	NEAR L$014c2ndloop
342	mov	DWORD [256+edi],-1
343L$013exit:
344	xor	eax,eax
345	mov	DWORD [edi-8],eax
346	mov	DWORD [edi-4],eax
347	pop	edi
348	pop	esi
349	pop	ebx
350	pop	ebp
351	ret
352global	_RC4_options
353align	16
354_RC4_options:
355L$_RC4_options_begin:
356	call	L$016pic_point
357L$016pic_point:
358	pop	eax
359	lea	eax,[(L$017opts-L$016pic_point)+eax]
360	lea	edx,[_OPENSSL_ia32cap_P]
361	mov	edx,DWORD [edx]
362	bt	edx,20
363	jc	NEAR L$0181xchar
364	bt	edx,26
365	jnc	NEAR L$019ret
366	add	eax,25
367	ret
368L$0181xchar:
369	add	eax,12
370L$019ret:
371	ret
372align	64
373L$017opts:
374db	114,99,52,40,52,120,44,105,110,116,41,0
375db	114,99,52,40,49,120,44,99,104,97,114,41,0
376db	114,99,52,40,56,120,44,109,109,120,41,0
377db	82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
378db	80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
379db	111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
380align	64
381segment	.bss
382common	_OPENSSL_ia32cap_P 16
383