1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build amd64,!gccgo,!appengine
6
7#include "textflag.h"
8
9#define POLY1305_ADD(msg, h0, h1, h2) \
10	ADDQ 0(msg), h0;  \
11	ADCQ 8(msg), h1;  \
12	ADCQ $1, h2;      \
13	LEAQ 16(msg), msg
14
15#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
16	MOVQ  r0, AX;                  \
17	MULQ  h0;                      \
18	MOVQ  AX, t0;                  \
19	MOVQ  DX, t1;                  \
20	MOVQ  r0, AX;                  \
21	MULQ  h1;                      \
22	ADDQ  AX, t1;                  \
23	ADCQ  $0, DX;                  \
24	MOVQ  r0, t2;                  \
25	IMULQ h2, t2;                  \
26	ADDQ  DX, t2;                  \
27	                               \
28	MOVQ  r1, AX;                  \
29	MULQ  h0;                      \
30	ADDQ  AX, t1;                  \
31	ADCQ  $0, DX;                  \
32	MOVQ  DX, h0;                  \
33	MOVQ  r1, t3;                  \
34	IMULQ h2, t3;                  \
35	MOVQ  r1, AX;                  \
36	MULQ  h1;                      \
37	ADDQ  AX, t2;                  \
38	ADCQ  DX, t3;                  \
39	ADDQ  h0, t2;                  \
40	ADCQ  $0, t3;                  \
41	                               \
42	MOVQ  t0, h0;                  \
43	MOVQ  t1, h1;                  \
44	MOVQ  t2, h2;                  \
45	ANDQ  $3, h2;                  \
46	MOVQ  t2, t0;                  \
47	ANDQ  $0xFFFFFFFFFFFFFFFC, t0; \
48	ADDQ  t0, h0;                  \
49	ADCQ  t3, h1;                  \
50	ADCQ  $0, h2;                  \
51	SHRQ  $2, t3, t2;              \
52	SHRQ  $2, t3;                  \
53	ADDQ  t2, h0;                  \
54	ADCQ  t3, h1;                  \
55	ADCQ  $0, h2
56
57DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
58DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
59GLOBL ·poly1305Mask<>(SB), RODATA, $16
60
61// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
62TEXT ·poly1305(SB), $0-32
63	MOVQ out+0(FP), DI
64	MOVQ m+8(FP), SI
65	MOVQ mlen+16(FP), R15
66	MOVQ key+24(FP), AX
67
68	MOVQ 0(AX), R11
69	MOVQ 8(AX), R12
70	ANDQ ·poly1305Mask<>(SB), R11   // r0
71	ANDQ ·poly1305Mask<>+8(SB), R12 // r1
72	XORQ R8, R8                    // h0
73	XORQ R9, R9                    // h1
74	XORQ R10, R10                  // h2
75
76	CMPQ R15, $16
77	JB   bytes_between_0_and_15
78
79loop:
80	POLY1305_ADD(SI, R8, R9, R10)
81
82multiply:
83	POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
84	SUBQ $16, R15
85	CMPQ R15, $16
86	JAE  loop
87
88bytes_between_0_and_15:
89	TESTQ R15, R15
90	JZ    done
91	MOVQ  $1, BX
92	XORQ  CX, CX
93	XORQ  R13, R13
94	ADDQ  R15, SI
95
96flush_buffer:
97	SHLQ $8, BX, CX
98	SHLQ $8, BX
99	MOVB -1(SI), R13
100	XORQ R13, BX
101	DECQ SI
102	DECQ R15
103	JNZ  flush_buffer
104
105	ADDQ BX, R8
106	ADCQ CX, R9
107	ADCQ $0, R10
108	MOVQ $16, R15
109	JMP  multiply
110
111done:
112	MOVQ    R8, AX
113	MOVQ    R9, BX
114	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
115	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
116	SBBQ    $3, R10
117	CMOVQCS R8, AX
118	CMOVQCS R9, BX
119	MOVQ    key+24(FP), R8
120	ADDQ    16(R8), AX
121	ADCQ    24(R8), BX
122
123	MOVQ AX, 0(DI)
124	MOVQ BX, 8(DI)
125	RET
126