1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%ifdef BORINGSSL_PREFIX
10%include "boringssl_prefix_symbols_nasm.inc"
11%endif
12section	.text code align=64
13
14
15
16
17
18
19
20global	gcm_gmult_ssse3
21ALIGN	16
22gcm_gmult_ssse3:
23
24$L$gmult_seh_begin:
25	sub	rsp,40
26$L$gmult_seh_allocstack:
27	movdqa	XMMWORD[rsp],xmm6
28$L$gmult_seh_save_xmm6:
29	movdqa	XMMWORD[16+rsp],xmm10
30$L$gmult_seh_save_xmm10:
31$L$gmult_seh_prolog_end:
32	movdqu	xmm0,XMMWORD[rcx]
33	movdqa	xmm10,XMMWORD[$L$reverse_bytes]
34	movdqa	xmm2,XMMWORD[$L$low4_mask]
35
36
37DB	102,65,15,56,0,194
38
39
40	movdqa	xmm1,xmm2
41	pandn	xmm1,xmm0
42	psrld	xmm1,4
43	pand	xmm0,xmm2
44
45
46
47
48	pxor	xmm2,xmm2
49	pxor	xmm3,xmm3
50	mov	rax,5
51$L$oop_row_1:
52	movdqa	xmm4,XMMWORD[rdx]
53	lea	rdx,[16+rdx]
54
55
56	movdqa	xmm6,xmm2
57DB	102,15,58,15,243,1
58	movdqa	xmm3,xmm6
59	psrldq	xmm2,1
60
61
62
63
64	movdqa	xmm5,xmm4
65DB	102,15,56,0,224
66DB	102,15,56,0,233
67
68
69	pxor	xmm2,xmm5
70
71
72
73	movdqa	xmm5,xmm4
74	psllq	xmm5,60
75	movdqa	xmm6,xmm5
76	pslldq	xmm6,8
77	pxor	xmm3,xmm6
78
79
80	psrldq	xmm5,8
81	pxor	xmm2,xmm5
82	psrlq	xmm4,4
83	pxor	xmm2,xmm4
84
85	sub	rax,1
86	jnz	NEAR $L$oop_row_1
87
88
89
90	pxor	xmm2,xmm3
91	psrlq	xmm3,1
92	pxor	xmm2,xmm3
93	psrlq	xmm3,1
94	pxor	xmm2,xmm3
95	psrlq	xmm3,5
96	pxor	xmm2,xmm3
97	pxor	xmm3,xmm3
98	mov	rax,5
99$L$oop_row_2:
100	movdqa	xmm4,XMMWORD[rdx]
101	lea	rdx,[16+rdx]
102
103
104	movdqa	xmm6,xmm2
105DB	102,15,58,15,243,1
106	movdqa	xmm3,xmm6
107	psrldq	xmm2,1
108
109
110
111
112	movdqa	xmm5,xmm4
113DB	102,15,56,0,224
114DB	102,15,56,0,233
115
116
117	pxor	xmm2,xmm5
118
119
120
121	movdqa	xmm5,xmm4
122	psllq	xmm5,60
123	movdqa	xmm6,xmm5
124	pslldq	xmm6,8
125	pxor	xmm3,xmm6
126
127
128	psrldq	xmm5,8
129	pxor	xmm2,xmm5
130	psrlq	xmm4,4
131	pxor	xmm2,xmm4
132
133	sub	rax,1
134	jnz	NEAR $L$oop_row_2
135
136
137
138	pxor	xmm2,xmm3
139	psrlq	xmm3,1
140	pxor	xmm2,xmm3
141	psrlq	xmm3,1
142	pxor	xmm2,xmm3
143	psrlq	xmm3,5
144	pxor	xmm2,xmm3
145	pxor	xmm3,xmm3
146	mov	rax,6
147$L$oop_row_3:
148	movdqa	xmm4,XMMWORD[rdx]
149	lea	rdx,[16+rdx]
150
151
152	movdqa	xmm6,xmm2
153DB	102,15,58,15,243,1
154	movdqa	xmm3,xmm6
155	psrldq	xmm2,1
156
157
158
159
160	movdqa	xmm5,xmm4
161DB	102,15,56,0,224
162DB	102,15,56,0,233
163
164
165	pxor	xmm2,xmm5
166
167
168
169	movdqa	xmm5,xmm4
170	psllq	xmm5,60
171	movdqa	xmm6,xmm5
172	pslldq	xmm6,8
173	pxor	xmm3,xmm6
174
175
176	psrldq	xmm5,8
177	pxor	xmm2,xmm5
178	psrlq	xmm4,4
179	pxor	xmm2,xmm4
180
181	sub	rax,1
182	jnz	NEAR $L$oop_row_3
183
184
185
186	pxor	xmm2,xmm3
187	psrlq	xmm3,1
188	pxor	xmm2,xmm3
189	psrlq	xmm3,1
190	pxor	xmm2,xmm3
191	psrlq	xmm3,5
192	pxor	xmm2,xmm3
193	pxor	xmm3,xmm3
194
195DB	102,65,15,56,0,210
196	movdqu	XMMWORD[rcx],xmm2
197
198
199	pxor	xmm0,xmm0
200	pxor	xmm1,xmm1
201	pxor	xmm2,xmm2
202	pxor	xmm3,xmm3
203	pxor	xmm4,xmm4
204	pxor	xmm5,xmm5
205	pxor	xmm6,xmm6
206	movdqa	xmm6,XMMWORD[rsp]
207	movdqa	xmm10,XMMWORD[16+rsp]
208	add	rsp,40
209	DB	0F3h,0C3h		;repret
210$L$gmult_seh_end:
211
212
213
214
215
216
217
218
219global	gcm_ghash_ssse3
220ALIGN	16
221gcm_ghash_ssse3:
222$L$ghash_seh_begin:
223
224	sub	rsp,56
225$L$ghash_seh_allocstack:
226	movdqa	XMMWORD[rsp],xmm6
227$L$ghash_seh_save_xmm6:
228	movdqa	XMMWORD[16+rsp],xmm10
229$L$ghash_seh_save_xmm10:
230	movdqa	XMMWORD[32+rsp],xmm11
231$L$ghash_seh_save_xmm11:
232$L$ghash_seh_prolog_end:
233	movdqu	xmm0,XMMWORD[rcx]
234	movdqa	xmm10,XMMWORD[$L$reverse_bytes]
235	movdqa	xmm11,XMMWORD[$L$low4_mask]
236
237
238	and	r9,-16
239
240
241
242DB	102,65,15,56,0,194
243
244
245	pxor	xmm3,xmm3
246$L$oop_ghash:
247
248	movdqu	xmm1,XMMWORD[r8]
249DB	102,65,15,56,0,202
250	pxor	xmm0,xmm1
251
252
253	movdqa	xmm1,xmm11
254	pandn	xmm1,xmm0
255	psrld	xmm1,4
256	pand	xmm0,xmm11
257
258
259
260
261	pxor	xmm2,xmm2
262
263	mov	rax,5
264$L$oop_row_4:
265	movdqa	xmm4,XMMWORD[rdx]
266	lea	rdx,[16+rdx]
267
268
269	movdqa	xmm6,xmm2
270DB	102,15,58,15,243,1
271	movdqa	xmm3,xmm6
272	psrldq	xmm2,1
273
274
275
276
277	movdqa	xmm5,xmm4
278DB	102,15,56,0,224
279DB	102,15,56,0,233
280
281
282	pxor	xmm2,xmm5
283
284
285
286	movdqa	xmm5,xmm4
287	psllq	xmm5,60
288	movdqa	xmm6,xmm5
289	pslldq	xmm6,8
290	pxor	xmm3,xmm6
291
292
293	psrldq	xmm5,8
294	pxor	xmm2,xmm5
295	psrlq	xmm4,4
296	pxor	xmm2,xmm4
297
298	sub	rax,1
299	jnz	NEAR $L$oop_row_4
300
301
302
303	pxor	xmm2,xmm3
304	psrlq	xmm3,1
305	pxor	xmm2,xmm3
306	psrlq	xmm3,1
307	pxor	xmm2,xmm3
308	psrlq	xmm3,5
309	pxor	xmm2,xmm3
310	pxor	xmm3,xmm3
311	mov	rax,5
312$L$oop_row_5:
313	movdqa	xmm4,XMMWORD[rdx]
314	lea	rdx,[16+rdx]
315
316
317	movdqa	xmm6,xmm2
318DB	102,15,58,15,243,1
319	movdqa	xmm3,xmm6
320	psrldq	xmm2,1
321
322
323
324
325	movdqa	xmm5,xmm4
326DB	102,15,56,0,224
327DB	102,15,56,0,233
328
329
330	pxor	xmm2,xmm5
331
332
333
334	movdqa	xmm5,xmm4
335	psllq	xmm5,60
336	movdqa	xmm6,xmm5
337	pslldq	xmm6,8
338	pxor	xmm3,xmm6
339
340
341	psrldq	xmm5,8
342	pxor	xmm2,xmm5
343	psrlq	xmm4,4
344	pxor	xmm2,xmm4
345
346	sub	rax,1
347	jnz	NEAR $L$oop_row_5
348
349
350
351	pxor	xmm2,xmm3
352	psrlq	xmm3,1
353	pxor	xmm2,xmm3
354	psrlq	xmm3,1
355	pxor	xmm2,xmm3
356	psrlq	xmm3,5
357	pxor	xmm2,xmm3
358	pxor	xmm3,xmm3
359	mov	rax,6
360$L$oop_row_6:
361	movdqa	xmm4,XMMWORD[rdx]
362	lea	rdx,[16+rdx]
363
364
365	movdqa	xmm6,xmm2
366DB	102,15,58,15,243,1
367	movdqa	xmm3,xmm6
368	psrldq	xmm2,1
369
370
371
372
373	movdqa	xmm5,xmm4
374DB	102,15,56,0,224
375DB	102,15,56,0,233
376
377
378	pxor	xmm2,xmm5
379
380
381
382	movdqa	xmm5,xmm4
383	psllq	xmm5,60
384	movdqa	xmm6,xmm5
385	pslldq	xmm6,8
386	pxor	xmm3,xmm6
387
388
389	psrldq	xmm5,8
390	pxor	xmm2,xmm5
391	psrlq	xmm4,4
392	pxor	xmm2,xmm4
393
394	sub	rax,1
395	jnz	NEAR $L$oop_row_6
396
397
398
399	pxor	xmm2,xmm3
400	psrlq	xmm3,1
401	pxor	xmm2,xmm3
402	psrlq	xmm3,1
403	pxor	xmm2,xmm3
404	psrlq	xmm3,5
405	pxor	xmm2,xmm3
406	pxor	xmm3,xmm3
407	movdqa	xmm0,xmm2
408
409
410	lea	rdx,[((-256))+rdx]
411
412
413	lea	r8,[16+r8]
414	sub	r9,16
415	jnz	NEAR $L$oop_ghash
416
417
418DB	102,65,15,56,0,194
419	movdqu	XMMWORD[rcx],xmm0
420
421
422	pxor	xmm0,xmm0
423	pxor	xmm1,xmm1
424	pxor	xmm2,xmm2
425	pxor	xmm3,xmm3
426	pxor	xmm4,xmm4
427	pxor	xmm5,xmm5
428	pxor	xmm6,xmm6
429	movdqa	xmm6,XMMWORD[rsp]
430	movdqa	xmm10,XMMWORD[16+rsp]
431	movdqa	xmm11,XMMWORD[32+rsp]
432	add	rsp,56
433	DB	0F3h,0C3h		;repret
434$L$ghash_seh_end:
435
436
437
438ALIGN	16
439
440
441$L$reverse_bytes:
442DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
443
444$L$low4_mask:
445	DQ	0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
446section	.pdata rdata align=4
447ALIGN	4
448	DD	$L$gmult_seh_begin wrt ..imagebase
449	DD	$L$gmult_seh_end wrt ..imagebase
450	DD	$L$gmult_seh_info wrt ..imagebase
451
452	DD	$L$ghash_seh_begin wrt ..imagebase
453	DD	$L$ghash_seh_end wrt ..imagebase
454	DD	$L$ghash_seh_info wrt ..imagebase
455
456section	.xdata rdata align=8
457ALIGN	8
458$L$gmult_seh_info:
459DB	1
460DB	$L$gmult_seh_prolog_end-$L$gmult_seh_begin
461DB	5
462DB	0
463
464DB	$L$gmult_seh_save_xmm10-$L$gmult_seh_begin
465DB	168
466	DW	1
467
468DB	$L$gmult_seh_save_xmm6-$L$gmult_seh_begin
469DB	104
470	DW	0
471
472DB	$L$gmult_seh_allocstack-$L$gmult_seh_begin
473DB	66
474
475ALIGN	8
476$L$ghash_seh_info:
477DB	1
478DB	$L$ghash_seh_prolog_end-$L$ghash_seh_begin
479DB	7
480DB	0
481
482DB	$L$ghash_seh_save_xmm11-$L$ghash_seh_begin
483DB	184
484	DW	2
485
486DB	$L$ghash_seh_save_xmm10-$L$ghash_seh_begin
487DB	168
488	DW	1
489
490DB	$L$ghash_seh_save_xmm6-$L$ghash_seh_begin
491DB	104
492	DW	0
493
494DB	$L$ghash_seh_allocstack-$L$ghash_seh_begin
495DB	98
496