1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package main
6
7// Based on original, public domain implementation from NaCl by D. J.
8// Bernstein.
9
10import (
11	"crypto/subtle"
12	"math"
13)
14
15const (
16	alpham80 = 0.00000000558793544769287109375
17	alpham48 = 24.0
18	alpham16 = 103079215104.0
19	alpha0   = 6755399441055744.0
20	alpha18  = 1770887431076116955136.0
21	alpha32  = 29014219670751100192948224.0
22	alpha50  = 7605903601369376408980219232256.0
23	alpha64  = 124615124604835863084731911901282304.0
24	alpha82  = 32667107224410092492483962313449748299776.0
25	alpha96  = 535217884764734955396857238543560676143529984.0
26	alpha112 = 35076039295941670036888435985190792471742381031424.0
27	alpha130 = 9194973245195333150150082162901855101712434733101613056.0
28	scale    = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125
29	offset0  = 6755408030990331.0
30	offset1  = 29014256564239239022116864.0
31	offset2  = 124615283061160854719918951570079744.0
32	offset3  = 535219245894202480694386063513315216128475136.0
33)
34
35// poly1305Verify returns true if mac is a valid authenticator for m with the
36// given key.
37func poly1305Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
38	var tmp [16]byte
39	poly1305Sum(&tmp, m, key)
40	return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1
41}
42
43// poly1305Sum generates an authenticator for m using a one-time key and puts
44// the 16-byte result into out. Authenticating two different messages with the
45// same key allows an attacker to forge messages at will.
46func poly1305Sum(out *[16]byte, m []byte, key *[32]byte) {
47	r := key
48	s := key[16:]
49	var (
50		y7        float64
51		y6        float64
52		y1        float64
53		y0        float64
54		y5        float64
55		y4        float64
56		x7        float64
57		x6        float64
58		x1        float64
59		x0        float64
60		y3        float64
61		y2        float64
62		x5        float64
63		r3lowx0   float64
64		x4        float64
65		r0lowx6   float64
66		x3        float64
67		r3highx0  float64
68		x2        float64
69		r0highx6  float64
70		r0lowx0   float64
71		sr1lowx6  float64
72		r0highx0  float64
73		sr1highx6 float64
74		sr3low    float64
75		r1lowx0   float64
76		sr2lowx6  float64
77		r1highx0  float64
78		sr2highx6 float64
79		r2lowx0   float64
80		sr3lowx6  float64
81		r2highx0  float64
82		sr3highx6 float64
83		r1highx4  float64
84		r1lowx4   float64
85		r0highx4  float64
86		r0lowx4   float64
87		sr3highx4 float64
88		sr3lowx4  float64
89		sr2highx4 float64
90		sr2lowx4  float64
91		r0lowx2   float64
92		r0highx2  float64
93		r1lowx2   float64
94		r1highx2  float64
95		r2lowx2   float64
96		r2highx2  float64
97		sr3lowx2  float64
98		sr3highx2 float64
99		z0        float64
100		z1        float64
101		z2        float64
102		z3        float64
103		m0        int64
104		m1        int64
105		m2        int64
106		m3        int64
107		m00       uint32
108		m01       uint32
109		m02       uint32
110		m03       uint32
111		m10       uint32
112		m11       uint32
113		m12       uint32
114		m13       uint32
115		m20       uint32
116		m21       uint32
117		m22       uint32
118		m23       uint32
119		m30       uint32
120		m31       uint32
121		m32       uint32
122		m33       uint64
123		lbelow2   int32
124		lbelow3   int32
125		lbelow4   int32
126		lbelow5   int32
127		lbelow6   int32
128		lbelow7   int32
129		lbelow8   int32
130		lbelow9   int32
131		lbelow10  int32
132		lbelow11  int32
133		lbelow12  int32
134		lbelow13  int32
135		lbelow14  int32
136		lbelow15  int32
137		s00       uint32
138		s01       uint32
139		s02       uint32
140		s03       uint32
141		s10       uint32
142		s11       uint32
143		s12       uint32
144		s13       uint32
145		s20       uint32
146		s21       uint32
147		s22       uint32
148		s23       uint32
149		s30       uint32
150		s31       uint32
151		s32       uint32
152		s33       uint32
153		bits32    uint64
154		f         uint64
155		f0        uint64
156		f1        uint64
157		f2        uint64
158		f3        uint64
159		f4        uint64
160		g         uint64
161		g0        uint64
162		g1        uint64
163		g2        uint64
164		g3        uint64
165		g4        uint64
166	)
167
168	var p int32
169
170	l := int32(len(m))
171
172	r00 := uint32(r[0])
173
174	r01 := uint32(r[1])
175
176	r02 := uint32(r[2])
177	r0 := int64(2151)
178
179	r03 := uint32(r[3])
180	r03 &= 15
181	r0 <<= 51
182
183	r10 := uint32(r[4])
184	r10 &= 252
185	r01 <<= 8
186	r0 += int64(r00)
187
188	r11 := uint32(r[5])
189	r02 <<= 16
190	r0 += int64(r01)
191
192	r12 := uint32(r[6])
193	r03 <<= 24
194	r0 += int64(r02)
195
196	r13 := uint32(r[7])
197	r13 &= 15
198	r1 := int64(2215)
199	r0 += int64(r03)
200
201	d0 := r0
202	r1 <<= 51
203	r2 := int64(2279)
204
205	r20 := uint32(r[8])
206	r20 &= 252
207	r11 <<= 8
208	r1 += int64(r10)
209
210	r21 := uint32(r[9])
211	r12 <<= 16
212	r1 += int64(r11)
213
214	r22 := uint32(r[10])
215	r13 <<= 24
216	r1 += int64(r12)
217
218	r23 := uint32(r[11])
219	r23 &= 15
220	r2 <<= 51
221	r1 += int64(r13)
222
223	d1 := r1
224	r21 <<= 8
225	r2 += int64(r20)
226
227	r30 := uint32(r[12])
228	r30 &= 252
229	r22 <<= 16
230	r2 += int64(r21)
231
232	r31 := uint32(r[13])
233	r23 <<= 24
234	r2 += int64(r22)
235
236	r32 := uint32(r[14])
237	r2 += int64(r23)
238	r3 := int64(2343)
239
240	d2 := r2
241	r3 <<= 51
242
243	r33 := uint32(r[15])
244	r33 &= 15
245	r31 <<= 8
246	r3 += int64(r30)
247
248	r32 <<= 16
249	r3 += int64(r31)
250
251	r33 <<= 24
252	r3 += int64(r32)
253
254	r3 += int64(r33)
255	h0 := alpha32 - alpha32
256
257	d3 := r3
258	h1 := alpha32 - alpha32
259
260	h2 := alpha32 - alpha32
261
262	h3 := alpha32 - alpha32
263
264	h4 := alpha32 - alpha32
265
266	r0low := math.Float64frombits(uint64(d0))
267	h5 := alpha32 - alpha32
268
269	r1low := math.Float64frombits(uint64(d1))
270	h6 := alpha32 - alpha32
271
272	r2low := math.Float64frombits(uint64(d2))
273	h7 := alpha32 - alpha32
274
275	r0low -= alpha0
276
277	r1low -= alpha32
278
279	r2low -= alpha64
280
281	r0high := r0low + alpha18
282
283	r3low := math.Float64frombits(uint64(d3))
284
285	r1high := r1low + alpha50
286	sr1low := scale * r1low
287
288	r2high := r2low + alpha82
289	sr2low := scale * r2low
290
291	r0high -= alpha18
292	r0high_stack := r0high
293
294	r3low -= alpha96
295
296	r1high -= alpha50
297	r1high_stack := r1high
298
299	sr1high := sr1low + alpham80
300
301	r0low -= r0high
302
303	r2high -= alpha82
304	sr3low = scale * r3low
305
306	sr2high := sr2low + alpham48
307
308	r1low -= r1high
309	r1low_stack := r1low
310
311	sr1high -= alpham80
312	sr1high_stack := sr1high
313
314	r2low -= r2high
315	r2low_stack := r2low
316
317	sr2high -= alpham48
318	sr2high_stack := sr2high
319
320	r3high := r3low + alpha112
321	r0low_stack := r0low
322
323	sr1low -= sr1high
324	sr1low_stack := sr1low
325
326	sr3high := sr3low + alpham16
327	r2high_stack := r2high
328
329	sr2low -= sr2high
330	sr2low_stack := sr2low
331
332	r3high -= alpha112
333	r3high_stack := r3high
334
335	sr3high -= alpham16
336	sr3high_stack := sr3high
337
338	r3low -= r3high
339	r3low_stack := r3low
340
341	sr3low -= sr3high
342	sr3low_stack := sr3low
343
344	if l < 16 {
345		goto addatmost15bytes
346	}
347
348	m00 = uint32(m[p+0])
349	m0 = 2151
350
351	m0 <<= 51
352	m1 = 2215
353	m01 = uint32(m[p+1])
354
355	m1 <<= 51
356	m2 = 2279
357	m02 = uint32(m[p+2])
358
359	m2 <<= 51
360	m3 = 2343
361	m03 = uint32(m[p+3])
362
363	m10 = uint32(m[p+4])
364	m01 <<= 8
365	m0 += int64(m00)
366
367	m11 = uint32(m[p+5])
368	m02 <<= 16
369	m0 += int64(m01)
370
371	m12 = uint32(m[p+6])
372	m03 <<= 24
373	m0 += int64(m02)
374
375	m13 = uint32(m[p+7])
376	m3 <<= 51
377	m0 += int64(m03)
378
379	m20 = uint32(m[p+8])
380	m11 <<= 8
381	m1 += int64(m10)
382
383	m21 = uint32(m[p+9])
384	m12 <<= 16
385	m1 += int64(m11)
386
387	m22 = uint32(m[p+10])
388	m13 <<= 24
389	m1 += int64(m12)
390
391	m23 = uint32(m[p+11])
392	m1 += int64(m13)
393
394	m30 = uint32(m[p+12])
395	m21 <<= 8
396	m2 += int64(m20)
397
398	m31 = uint32(m[p+13])
399	m22 <<= 16
400	m2 += int64(m21)
401
402	m32 = uint32(m[p+14])
403	m23 <<= 24
404	m2 += int64(m22)
405
406	m33 = uint64(m[p+15])
407	m2 += int64(m23)
408
409	d0 = m0
410	m31 <<= 8
411	m3 += int64(m30)
412
413	d1 = m1
414	m32 <<= 16
415	m3 += int64(m31)
416
417	d2 = m2
418	m33 += 256
419
420	m33 <<= 24
421	m3 += int64(m32)
422
423	m3 += int64(m33)
424	d3 = m3
425
426	p += 16
427	l -= 16
428
429	z0 = math.Float64frombits(uint64(d0))
430
431	z1 = math.Float64frombits(uint64(d1))
432
433	z2 = math.Float64frombits(uint64(d2))
434
435	z3 = math.Float64frombits(uint64(d3))
436
437	z0 -= alpha0
438
439	z1 -= alpha32
440
441	z2 -= alpha64
442
443	z3 -= alpha96
444
445	h0 += z0
446
447	h1 += z1
448
449	h3 += z2
450
451	h5 += z3
452
453	if l < 16 {
454		goto multiplyaddatmost15bytes
455	}
456
457multiplyaddatleast16bytes:
458
459	m2 = 2279
460	m20 = uint32(m[p+8])
461	y7 = h7 + alpha130
462
463	m2 <<= 51
464	m3 = 2343
465	m21 = uint32(m[p+9])
466	y6 = h6 + alpha130
467
468	m3 <<= 51
469	m0 = 2151
470	m22 = uint32(m[p+10])
471	y1 = h1 + alpha32
472
473	m0 <<= 51
474	m1 = 2215
475	m23 = uint32(m[p+11])
476	y0 = h0 + alpha32
477
478	m1 <<= 51
479	m30 = uint32(m[p+12])
480	y7 -= alpha130
481
482	m21 <<= 8
483	m2 += int64(m20)
484	m31 = uint32(m[p+13])
485	y6 -= alpha130
486
487	m22 <<= 16
488	m2 += int64(m21)
489	m32 = uint32(m[p+14])
490	y1 -= alpha32
491
492	m23 <<= 24
493	m2 += int64(m22)
494	m33 = uint64(m[p+15])
495	y0 -= alpha32
496
497	m2 += int64(m23)
498	m00 = uint32(m[p+0])
499	y5 = h5 + alpha96
500
501	m31 <<= 8
502	m3 += int64(m30)
503	m01 = uint32(m[p+1])
504	y4 = h4 + alpha96
505
506	m32 <<= 16
507	m02 = uint32(m[p+2])
508	x7 = h7 - y7
509	y7 *= scale
510
511	m33 += 256
512	m03 = uint32(m[p+3])
513	x6 = h6 - y6
514	y6 *= scale
515
516	m33 <<= 24
517	m3 += int64(m31)
518	m10 = uint32(m[p+4])
519	x1 = h1 - y1
520
521	m01 <<= 8
522	m3 += int64(m32)
523	m11 = uint32(m[p+5])
524	x0 = h0 - y0
525
526	m3 += int64(m33)
527	m0 += int64(m00)
528	m12 = uint32(m[p+6])
529	y5 -= alpha96
530
531	m02 <<= 16
532	m0 += int64(m01)
533	m13 = uint32(m[p+7])
534	y4 -= alpha96
535
536	m03 <<= 24
537	m0 += int64(m02)
538	d2 = m2
539	x1 += y7
540
541	m0 += int64(m03)
542	d3 = m3
543	x0 += y6
544
545	m11 <<= 8
546	m1 += int64(m10)
547	d0 = m0
548	x7 += y5
549
550	m12 <<= 16
551	m1 += int64(m11)
552	x6 += y4
553
554	m13 <<= 24
555	m1 += int64(m12)
556	y3 = h3 + alpha64
557
558	m1 += int64(m13)
559	d1 = m1
560	y2 = h2 + alpha64
561
562	x0 += x1
563
564	x6 += x7
565
566	y3 -= alpha64
567	r3low = r3low_stack
568
569	y2 -= alpha64
570	r0low = r0low_stack
571
572	x5 = h5 - y5
573	r3lowx0 = r3low * x0
574	r3high = r3high_stack
575
576	x4 = h4 - y4
577	r0lowx6 = r0low * x6
578	r0high = r0high_stack
579
580	x3 = h3 - y3
581	r3highx0 = r3high * x0
582	sr1low = sr1low_stack
583
584	x2 = h2 - y2
585	r0highx6 = r0high * x6
586	sr1high = sr1high_stack
587
588	x5 += y3
589	r0lowx0 = r0low * x0
590	r1low = r1low_stack
591
592	h6 = r3lowx0 + r0lowx6
593	sr1lowx6 = sr1low * x6
594	r1high = r1high_stack
595
596	x4 += y2
597	r0highx0 = r0high * x0
598	sr2low = sr2low_stack
599
600	h7 = r3highx0 + r0highx6
601	sr1highx6 = sr1high * x6
602	sr2high = sr2high_stack
603
604	x3 += y1
605	r1lowx0 = r1low * x0
606	r2low = r2low_stack
607
608	h0 = r0lowx0 + sr1lowx6
609	sr2lowx6 = sr2low * x6
610	r2high = r2high_stack
611
612	x2 += y0
613	r1highx0 = r1high * x0
614	sr3low = sr3low_stack
615
616	h1 = r0highx0 + sr1highx6
617	sr2highx6 = sr2high * x6
618	sr3high = sr3high_stack
619
620	x4 += x5
621	r2lowx0 = r2low * x0
622	z2 = math.Float64frombits(uint64(d2))
623
624	h2 = r1lowx0 + sr2lowx6
625	sr3lowx6 = sr3low * x6
626
627	x2 += x3
628	r2highx0 = r2high * x0
629	z3 = math.Float64frombits(uint64(d3))
630
631	h3 = r1highx0 + sr2highx6
632	sr3highx6 = sr3high * x6
633
634	r1highx4 = r1high * x4
635	z2 -= alpha64
636
637	h4 = r2lowx0 + sr3lowx6
638	r1lowx4 = r1low * x4
639
640	r0highx4 = r0high * x4
641	z3 -= alpha96
642
643	h5 = r2highx0 + sr3highx6
644	r0lowx4 = r0low * x4
645
646	h7 += r1highx4
647	sr3highx4 = sr3high * x4
648
649	h6 += r1lowx4
650	sr3lowx4 = sr3low * x4
651
652	h5 += r0highx4
653	sr2highx4 = sr2high * x4
654
655	h4 += r0lowx4
656	sr2lowx4 = sr2low * x4
657
658	h3 += sr3highx4
659	r0lowx2 = r0low * x2
660
661	h2 += sr3lowx4
662	r0highx2 = r0high * x2
663
664	h1 += sr2highx4
665	r1lowx2 = r1low * x2
666
667	h0 += sr2lowx4
668	r1highx2 = r1high * x2
669
670	h2 += r0lowx2
671	r2lowx2 = r2low * x2
672
673	h3 += r0highx2
674	r2highx2 = r2high * x2
675
676	h4 += r1lowx2
677	sr3lowx2 = sr3low * x2
678
679	h5 += r1highx2
680	sr3highx2 = sr3high * x2
681
682	p += 16
683	l -= 16
684	h6 += r2lowx2
685
686	h7 += r2highx2
687
688	z1 = math.Float64frombits(uint64(d1))
689	h0 += sr3lowx2
690
691	z0 = math.Float64frombits(uint64(d0))
692	h1 += sr3highx2
693
694	z1 -= alpha32
695
696	z0 -= alpha0
697
698	h5 += z3
699
700	h3 += z2
701
702	h1 += z1
703
704	h0 += z0
705
706	if l >= 16 {
707		goto multiplyaddatleast16bytes
708	}
709
710multiplyaddatmost15bytes:
711
712	y7 = h7 + alpha130
713
714	y6 = h6 + alpha130
715
716	y1 = h1 + alpha32
717
718	y0 = h0 + alpha32
719
720	y7 -= alpha130
721
722	y6 -= alpha130
723
724	y1 -= alpha32
725
726	y0 -= alpha32
727
728	y5 = h5 + alpha96
729
730	y4 = h4 + alpha96
731
732	x7 = h7 - y7
733	y7 *= scale
734
735	x6 = h6 - y6
736	y6 *= scale
737
738	x1 = h1 - y1
739
740	x0 = h0 - y0
741
742	y5 -= alpha96
743
744	y4 -= alpha96
745
746	x1 += y7
747
748	x0 += y6
749
750	x7 += y5
751
752	x6 += y4
753
754	y3 = h3 + alpha64
755
756	y2 = h2 + alpha64
757
758	x0 += x1
759
760	x6 += x7
761
762	y3 -= alpha64
763	r3low = r3low_stack
764
765	y2 -= alpha64
766	r0low = r0low_stack
767
768	x5 = h5 - y5
769	r3lowx0 = r3low * x0
770	r3high = r3high_stack
771
772	x4 = h4 - y4
773	r0lowx6 = r0low * x6
774	r0high = r0high_stack
775
776	x3 = h3 - y3
777	r3highx0 = r3high * x0
778	sr1low = sr1low_stack
779
780	x2 = h2 - y2
781	r0highx6 = r0high * x6
782	sr1high = sr1high_stack
783
784	x5 += y3
785	r0lowx0 = r0low * x0
786	r1low = r1low_stack
787
788	h6 = r3lowx0 + r0lowx6
789	sr1lowx6 = sr1low * x6
790	r1high = r1high_stack
791
792	x4 += y2
793	r0highx0 = r0high * x0
794	sr2low = sr2low_stack
795
796	h7 = r3highx0 + r0highx6
797	sr1highx6 = sr1high * x6
798	sr2high = sr2high_stack
799
800	x3 += y1
801	r1lowx0 = r1low * x0
802	r2low = r2low_stack
803
804	h0 = r0lowx0 + sr1lowx6
805	sr2lowx6 = sr2low * x6
806	r2high = r2high_stack
807
808	x2 += y0
809	r1highx0 = r1high * x0
810	sr3low = sr3low_stack
811
812	h1 = r0highx0 + sr1highx6
813	sr2highx6 = sr2high * x6
814	sr3high = sr3high_stack
815
816	x4 += x5
817	r2lowx0 = r2low * x0
818
819	h2 = r1lowx0 + sr2lowx6
820	sr3lowx6 = sr3low * x6
821
822	x2 += x3
823	r2highx0 = r2high * x0
824
825	h3 = r1highx0 + sr2highx6
826	sr3highx6 = sr3high * x6
827
828	r1highx4 = r1high * x4
829
830	h4 = r2lowx0 + sr3lowx6
831	r1lowx4 = r1low * x4
832
833	r0highx4 = r0high * x4
834
835	h5 = r2highx0 + sr3highx6
836	r0lowx4 = r0low * x4
837
838	h7 += r1highx4
839	sr3highx4 = sr3high * x4
840
841	h6 += r1lowx4
842	sr3lowx4 = sr3low * x4
843
844	h5 += r0highx4
845	sr2highx4 = sr2high * x4
846
847	h4 += r0lowx4
848	sr2lowx4 = sr2low * x4
849
850	h3 += sr3highx4
851	r0lowx2 = r0low * x2
852
853	h2 += sr3lowx4
854	r0highx2 = r0high * x2
855
856	h1 += sr2highx4
857	r1lowx2 = r1low * x2
858
859	h0 += sr2lowx4
860	r1highx2 = r1high * x2
861
862	h2 += r0lowx2
863	r2lowx2 = r2low * x2
864
865	h3 += r0highx2
866	r2highx2 = r2high * x2
867
868	h4 += r1lowx2
869	sr3lowx2 = sr3low * x2
870
871	h5 += r1highx2
872	sr3highx2 = sr3high * x2
873
874	h6 += r2lowx2
875
876	h7 += r2highx2
877
878	h0 += sr3lowx2
879
880	h1 += sr3highx2
881
882addatmost15bytes:
883
884	if l == 0 {
885		goto nomorebytes
886	}
887
888	lbelow2 = l - 2
889
890	lbelow3 = l - 3
891
892	lbelow2 >>= 31
893	lbelow4 = l - 4
894
895	m00 = uint32(m[p+0])
896	lbelow3 >>= 31
897	p += lbelow2
898
899	m01 = uint32(m[p+1])
900	lbelow4 >>= 31
901	p += lbelow3
902
903	m02 = uint32(m[p+2])
904	p += lbelow4
905	m0 = 2151
906
907	m03 = uint32(m[p+3])
908	m0 <<= 51
909	m1 = 2215
910
911	m0 += int64(m00)
912	m01 &^= uint32(lbelow2)
913
914	m02 &^= uint32(lbelow3)
915	m01 -= uint32(lbelow2)
916
917	m01 <<= 8
918	m03 &^= uint32(lbelow4)
919
920	m0 += int64(m01)
921	lbelow2 -= lbelow3
922
923	m02 += uint32(lbelow2)
924	lbelow3 -= lbelow4
925
926	m02 <<= 16
927	m03 += uint32(lbelow3)
928
929	m03 <<= 24
930	m0 += int64(m02)
931
932	m0 += int64(m03)
933	lbelow5 = l - 5
934
935	lbelow6 = l - 6
936	lbelow7 = l - 7
937
938	lbelow5 >>= 31
939	lbelow8 = l - 8
940
941	lbelow6 >>= 31
942	p += lbelow5
943
944	m10 = uint32(m[p+4])
945	lbelow7 >>= 31
946	p += lbelow6
947
948	m11 = uint32(m[p+5])
949	lbelow8 >>= 31
950	p += lbelow7
951
952	m12 = uint32(m[p+6])
953	m1 <<= 51
954	p += lbelow8
955
956	m13 = uint32(m[p+7])
957	m10 &^= uint32(lbelow5)
958	lbelow4 -= lbelow5
959
960	m10 += uint32(lbelow4)
961	lbelow5 -= lbelow6
962
963	m11 &^= uint32(lbelow6)
964	m11 += uint32(lbelow5)
965
966	m11 <<= 8
967	m1 += int64(m10)
968
969	m1 += int64(m11)
970	m12 &^= uint32(lbelow7)
971
972	lbelow6 -= lbelow7
973	m13 &^= uint32(lbelow8)
974
975	m12 += uint32(lbelow6)
976	lbelow7 -= lbelow8
977
978	m12 <<= 16
979	m13 += uint32(lbelow7)
980
981	m13 <<= 24
982	m1 += int64(m12)
983
984	m1 += int64(m13)
985	m2 = 2279
986
987	lbelow9 = l - 9
988	m3 = 2343
989
990	lbelow10 = l - 10
991	lbelow11 = l - 11
992
993	lbelow9 >>= 31
994	lbelow12 = l - 12
995
996	lbelow10 >>= 31
997	p += lbelow9
998
999	m20 = uint32(m[p+8])
1000	lbelow11 >>= 31
1001	p += lbelow10
1002
1003	m21 = uint32(m[p+9])
1004	lbelow12 >>= 31
1005	p += lbelow11
1006
1007	m22 = uint32(m[p+10])
1008	m2 <<= 51
1009	p += lbelow12
1010
1011	m23 = uint32(m[p+11])
1012	m20 &^= uint32(lbelow9)
1013	lbelow8 -= lbelow9
1014
1015	m20 += uint32(lbelow8)
1016	lbelow9 -= lbelow10
1017
1018	m21 &^= uint32(lbelow10)
1019	m21 += uint32(lbelow9)
1020
1021	m21 <<= 8
1022	m2 += int64(m20)
1023
1024	m2 += int64(m21)
1025	m22 &^= uint32(lbelow11)
1026
1027	lbelow10 -= lbelow11
1028	m23 &^= uint32(lbelow12)
1029
1030	m22 += uint32(lbelow10)
1031	lbelow11 -= lbelow12
1032
1033	m22 <<= 16
1034	m23 += uint32(lbelow11)
1035
1036	m23 <<= 24
1037	m2 += int64(m22)
1038
1039	m3 <<= 51
1040	lbelow13 = l - 13
1041
1042	lbelow13 >>= 31
1043	lbelow14 = l - 14
1044
1045	lbelow14 >>= 31
1046	p += lbelow13
1047	lbelow15 = l - 15
1048
1049	m30 = uint32(m[p+12])
1050	lbelow15 >>= 31
1051	p += lbelow14
1052
1053	m31 = uint32(m[p+13])
1054	p += lbelow15
1055	m2 += int64(m23)
1056
1057	m32 = uint32(m[p+14])
1058	m30 &^= uint32(lbelow13)
1059	lbelow12 -= lbelow13
1060
1061	m30 += uint32(lbelow12)
1062	lbelow13 -= lbelow14
1063
1064	m3 += int64(m30)
1065	m31 &^= uint32(lbelow14)
1066
1067	m31 += uint32(lbelow13)
1068	m32 &^= uint32(lbelow15)
1069
1070	m31 <<= 8
1071	lbelow14 -= lbelow15
1072
1073	m3 += int64(m31)
1074	m32 += uint32(lbelow14)
1075	d0 = m0
1076
1077	m32 <<= 16
1078	m33 = uint64(lbelow15 + 1)
1079	d1 = m1
1080
1081	m33 <<= 24
1082	m3 += int64(m32)
1083	d2 = m2
1084
1085	m3 += int64(m33)
1086	d3 = m3
1087
1088	z3 = math.Float64frombits(uint64(d3))
1089
1090	z2 = math.Float64frombits(uint64(d2))
1091
1092	z1 = math.Float64frombits(uint64(d1))
1093
1094	z0 = math.Float64frombits(uint64(d0))
1095
1096	z3 -= alpha96
1097
1098	z2 -= alpha64
1099
1100	z1 -= alpha32
1101
1102	z0 -= alpha0
1103
1104	h5 += z3
1105
1106	h3 += z2
1107
1108	h1 += z1
1109
1110	h0 += z0
1111
1112	y7 = h7 + alpha130
1113
1114	y6 = h6 + alpha130
1115
1116	y1 = h1 + alpha32
1117
1118	y0 = h0 + alpha32
1119
1120	y7 -= alpha130
1121
1122	y6 -= alpha130
1123
1124	y1 -= alpha32
1125
1126	y0 -= alpha32
1127
1128	y5 = h5 + alpha96
1129
1130	y4 = h4 + alpha96
1131
1132	x7 = h7 - y7
1133	y7 *= scale
1134
1135	x6 = h6 - y6
1136	y6 *= scale
1137
1138	x1 = h1 - y1
1139
1140	x0 = h0 - y0
1141
1142	y5 -= alpha96
1143
1144	y4 -= alpha96
1145
1146	x1 += y7
1147
1148	x0 += y6
1149
1150	x7 += y5
1151
1152	x6 += y4
1153
1154	y3 = h3 + alpha64
1155
1156	y2 = h2 + alpha64
1157
1158	x0 += x1
1159
1160	x6 += x7
1161
1162	y3 -= alpha64
1163	r3low = r3low_stack
1164
1165	y2 -= alpha64
1166	r0low = r0low_stack
1167
1168	x5 = h5 - y5
1169	r3lowx0 = r3low * x0
1170	r3high = r3high_stack
1171
1172	x4 = h4 - y4
1173	r0lowx6 = r0low * x6
1174	r0high = r0high_stack
1175
1176	x3 = h3 - y3
1177	r3highx0 = r3high * x0
1178	sr1low = sr1low_stack
1179
1180	x2 = h2 - y2
1181	r0highx6 = r0high * x6
1182	sr1high = sr1high_stack
1183
1184	x5 += y3
1185	r0lowx0 = r0low * x0
1186	r1low = r1low_stack
1187
1188	h6 = r3lowx0 + r0lowx6
1189	sr1lowx6 = sr1low * x6
1190	r1high = r1high_stack
1191
1192	x4 += y2
1193	r0highx0 = r0high * x0
1194	sr2low = sr2low_stack
1195
1196	h7 = r3highx0 + r0highx6
1197	sr1highx6 = sr1high * x6
1198	sr2high = sr2high_stack
1199
1200	x3 += y1
1201	r1lowx0 = r1low * x0
1202	r2low = r2low_stack
1203
1204	h0 = r0lowx0 + sr1lowx6
1205	sr2lowx6 = sr2low * x6
1206	r2high = r2high_stack
1207
1208	x2 += y0
1209	r1highx0 = r1high * x0
1210	sr3low = sr3low_stack
1211
1212	h1 = r0highx0 + sr1highx6
1213	sr2highx6 = sr2high * x6
1214	sr3high = sr3high_stack
1215
1216	x4 += x5
1217	r2lowx0 = r2low * x0
1218
1219	h2 = r1lowx0 + sr2lowx6
1220	sr3lowx6 = sr3low * x6
1221
1222	x2 += x3
1223	r2highx0 = r2high * x0
1224
1225	h3 = r1highx0 + sr2highx6
1226	sr3highx6 = sr3high * x6
1227
1228	r1highx4 = r1high * x4
1229
1230	h4 = r2lowx0 + sr3lowx6
1231	r1lowx4 = r1low * x4
1232
1233	r0highx4 = r0high * x4
1234
1235	h5 = r2highx0 + sr3highx6
1236	r0lowx4 = r0low * x4
1237
1238	h7 += r1highx4
1239	sr3highx4 = sr3high * x4
1240
1241	h6 += r1lowx4
1242	sr3lowx4 = sr3low * x4
1243
1244	h5 += r0highx4
1245	sr2highx4 = sr2high * x4
1246
1247	h4 += r0lowx4
1248	sr2lowx4 = sr2low * x4
1249
1250	h3 += sr3highx4
1251	r0lowx2 = r0low * x2
1252
1253	h2 += sr3lowx4
1254	r0highx2 = r0high * x2
1255
1256	h1 += sr2highx4
1257	r1lowx2 = r1low * x2
1258
1259	h0 += sr2lowx4
1260	r1highx2 = r1high * x2
1261
1262	h2 += r0lowx2
1263	r2lowx2 = r2low * x2
1264
1265	h3 += r0highx2
1266	r2highx2 = r2high * x2
1267
1268	h4 += r1lowx2
1269	sr3lowx2 = sr3low * x2
1270
1271	h5 += r1highx2
1272	sr3highx2 = sr3high * x2
1273
1274	h6 += r2lowx2
1275
1276	h7 += r2highx2
1277
1278	h0 += sr3lowx2
1279
1280	h1 += sr3highx2
1281
1282nomorebytes:
1283
1284	y7 = h7 + alpha130
1285
1286	y0 = h0 + alpha32
1287
1288	y1 = h1 + alpha32
1289
1290	y2 = h2 + alpha64
1291
1292	y7 -= alpha130
1293
1294	y3 = h3 + alpha64
1295
1296	y4 = h4 + alpha96
1297
1298	y5 = h5 + alpha96
1299
1300	x7 = h7 - y7
1301	y7 *= scale
1302
1303	y0 -= alpha32
1304
1305	y1 -= alpha32
1306
1307	y2 -= alpha64
1308
1309	h6 += x7
1310
1311	y3 -= alpha64
1312
1313	y4 -= alpha96
1314
1315	y5 -= alpha96
1316
1317	y6 = h6 + alpha130
1318
1319	x0 = h0 - y0
1320
1321	x1 = h1 - y1
1322
1323	x2 = h2 - y2
1324
1325	y6 -= alpha130
1326
1327	x0 += y7
1328
1329	x3 = h3 - y3
1330
1331	x4 = h4 - y4
1332
1333	x5 = h5 - y5
1334
1335	x6 = h6 - y6
1336
1337	y6 *= scale
1338
1339	x2 += y0
1340
1341	x3 += y1
1342
1343	x4 += y2
1344
1345	x0 += y6
1346
1347	x5 += y3
1348
1349	x6 += y4
1350
1351	x2 += x3
1352
1353	x0 += x1
1354
1355	x4 += x5
1356
1357	x6 += y5
1358
1359	x2 += offset1
1360	d1 = int64(math.Float64bits(x2))
1361
1362	x0 += offset0
1363	d0 = int64(math.Float64bits(x0))
1364
1365	x4 += offset2
1366	d2 = int64(math.Float64bits(x4))
1367
1368	x6 += offset3
1369	d3 = int64(math.Float64bits(x6))
1370
1371	f0 = uint64(d0)
1372
1373	f1 = uint64(d1)
1374	bits32 = math.MaxUint64
1375
1376	f2 = uint64(d2)
1377	bits32 >>= 32
1378
1379	f3 = uint64(d3)
1380	f = f0 >> 32
1381
1382	f0 &= bits32
1383	f &= 255
1384
1385	f1 += f
1386	g0 = f0 + 5
1387
1388	g = g0 >> 32
1389	g0 &= bits32
1390
1391	f = f1 >> 32
1392	f1 &= bits32
1393
1394	f &= 255
1395	g1 = f1 + g
1396
1397	g = g1 >> 32
1398	f2 += f
1399
1400	f = f2 >> 32
1401	g1 &= bits32
1402
1403	f2 &= bits32
1404	f &= 255
1405
1406	f3 += f
1407	g2 = f2 + g
1408
1409	g = g2 >> 32
1410	g2 &= bits32
1411
1412	f4 = f3 >> 32
1413	f3 &= bits32
1414
1415	f4 &= 255
1416	g3 = f3 + g
1417
1418	g = g3 >> 32
1419	g3 &= bits32
1420
1421	g4 = f4 + g
1422
1423	g4 = g4 - 4
1424	s00 = uint32(s[0])
1425
1426	f = uint64(int64(g4) >> 63)
1427	s01 = uint32(s[1])
1428
1429	f0 &= f
1430	g0 &^= f
1431	s02 = uint32(s[2])
1432
1433	f1 &= f
1434	f0 |= g0
1435	s03 = uint32(s[3])
1436
1437	g1 &^= f
1438	f2 &= f
1439	s10 = uint32(s[4])
1440
1441	f3 &= f
1442	g2 &^= f
1443	s11 = uint32(s[5])
1444
1445	g3 &^= f
1446	f1 |= g1
1447	s12 = uint32(s[6])
1448
1449	f2 |= g2
1450	f3 |= g3
1451	s13 = uint32(s[7])
1452
1453	s01 <<= 8
1454	f0 += uint64(s00)
1455	s20 = uint32(s[8])
1456
1457	s02 <<= 16
1458	f0 += uint64(s01)
1459	s21 = uint32(s[9])
1460
1461	s03 <<= 24
1462	f0 += uint64(s02)
1463	s22 = uint32(s[10])
1464
1465	s11 <<= 8
1466	f1 += uint64(s10)
1467	s23 = uint32(s[11])
1468
1469	s12 <<= 16
1470	f1 += uint64(s11)
1471	s30 = uint32(s[12])
1472
1473	s13 <<= 24
1474	f1 += uint64(s12)
1475	s31 = uint32(s[13])
1476
1477	f0 += uint64(s03)
1478	f1 += uint64(s13)
1479	s32 = uint32(s[14])
1480
1481	s21 <<= 8
1482	f2 += uint64(s20)
1483	s33 = uint32(s[15])
1484
1485	s22 <<= 16
1486	f2 += uint64(s21)
1487
1488	s23 <<= 24
1489	f2 += uint64(s22)
1490
1491	s31 <<= 8
1492	f3 += uint64(s30)
1493
1494	s32 <<= 16
1495	f3 += uint64(s31)
1496
1497	s33 <<= 24
1498	f3 += uint64(s32)
1499
1500	f2 += uint64(s23)
1501	f3 += uint64(s33)
1502
1503	out[0] = byte(f0)
1504	f0 >>= 8
1505	out[1] = byte(f0)
1506	f0 >>= 8
1507	out[2] = byte(f0)
1508	f0 >>= 8
1509	out[3] = byte(f0)
1510	f0 >>= 8
1511	f1 += f0
1512
1513	out[4] = byte(f1)
1514	f1 >>= 8
1515	out[5] = byte(f1)
1516	f1 >>= 8
1517	out[6] = byte(f1)
1518	f1 >>= 8
1519	out[7] = byte(f1)
1520	f1 >>= 8
1521	f2 += f1
1522
1523	out[8] = byte(f2)
1524	f2 >>= 8
1525	out[9] = byte(f2)
1526	f2 >>= 8
1527	out[10] = byte(f2)
1528	f2 >>= 8
1529	out[11] = byte(f2)
1530	f2 >>= 8
1531	f3 += f2
1532
1533	out[12] = byte(f3)
1534	f3 >>= 8
1535	out[13] = byte(f3)
1536	f3 >>= 8
1537	out[14] = byte(f3)
1538	f3 >>= 8
1539	out[15] = byte(f3)
1540}
1541