1 /*
2  * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 
29 #include "mpdecimal.h"
30 #include "bits.h"
31 #include "constants.h"
32 #include "convolute.h"
33 #include "fnt.h"
34 #include "fourstep.h"
35 #include "numbertheory.h"
36 #include "sixstep.h"
37 #include "umodarith.h"
38 
39 
40 /* Bignum: Fast convolution using the Number Theoretic Transform. Used for
41    the multiplication of very large coefficients. */
42 
43 
44 /* Convolute the data in c1 and c2. Result is in c1. */
45 int
fnt_convolute(mpd_uint_t * c1,mpd_uint_t * c2,mpd_size_t n,int modnum)46 fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
47 {
48     int (*fnt)(mpd_uint_t *, mpd_size_t, int);
49     int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
50 #ifdef PPRO
51     double dmod;
52     uint32_t dinvmod[3];
53 #endif
54     mpd_uint_t n_inv, umod;
55     mpd_size_t i;
56 
57 
58     SETMODULUS(modnum);
59     n_inv = POWMOD(n, (umod-2));
60 
61     if (ispower2(n)) {
62         if (n > SIX_STEP_THRESHOLD) {
63             fnt = six_step_fnt;
64             inv_fnt = inv_six_step_fnt;
65         }
66         else {
67             fnt = std_fnt;
68             inv_fnt = std_inv_fnt;
69         }
70     }
71     else {
72         fnt = four_step_fnt;
73         inv_fnt = inv_four_step_fnt;
74     }
75 
76     if (!fnt(c1, n, modnum)) {
77         return 0;
78     }
79     if (!fnt(c2, n, modnum)) {
80         return 0;
81     }
82     for (i = 0; i < n-1; i += 2) {
83         mpd_uint_t x0 = c1[i];
84         mpd_uint_t y0 = c2[i];
85         mpd_uint_t x1 = c1[i+1];
86         mpd_uint_t y1 = c2[i+1];
87         MULMOD2(&x0, y0, &x1, y1);
88         c1[i] = x0;
89         c1[i+1] = x1;
90     }
91 
92     if (!inv_fnt(c1, n, modnum)) {
93         return 0;
94     }
95     for (i = 0; i < n-3; i += 4) {
96         mpd_uint_t x0 = c1[i];
97         mpd_uint_t x1 = c1[i+1];
98         mpd_uint_t x2 = c1[i+2];
99         mpd_uint_t x3 = c1[i+3];
100         MULMOD2C(&x0, &x1, n_inv);
101         MULMOD2C(&x2, &x3, n_inv);
102         c1[i] = x0;
103         c1[i+1] = x1;
104         c1[i+2] = x2;
105         c1[i+3] = x3;
106     }
107 
108     return 1;
109 }
110 
111 /* Autoconvolute the data in c1. Result is in c1. */
112 int
fnt_autoconvolute(mpd_uint_t * c1,mpd_size_t n,int modnum)113 fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
114 {
115     int (*fnt)(mpd_uint_t *, mpd_size_t, int);
116     int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
117 #ifdef PPRO
118     double dmod;
119     uint32_t dinvmod[3];
120 #endif
121     mpd_uint_t n_inv, umod;
122     mpd_size_t i;
123 
124 
125     SETMODULUS(modnum);
126     n_inv = POWMOD(n, (umod-2));
127 
128     if (ispower2(n)) {
129         if (n > SIX_STEP_THRESHOLD) {
130             fnt = six_step_fnt;
131             inv_fnt = inv_six_step_fnt;
132         }
133         else {
134             fnt = std_fnt;
135             inv_fnt = std_inv_fnt;
136         }
137     }
138     else {
139         fnt = four_step_fnt;
140         inv_fnt = inv_four_step_fnt;
141     }
142 
143     if (!fnt(c1, n, modnum)) {
144         return 0;
145     }
146     for (i = 0; i < n-1; i += 2) {
147         mpd_uint_t x0 = c1[i];
148         mpd_uint_t x1 = c1[i+1];
149         MULMOD2(&x0, x0, &x1, x1);
150         c1[i] = x0;
151         c1[i+1] = x1;
152     }
153 
154     if (!inv_fnt(c1, n, modnum)) {
155         return 0;
156     }
157     for (i = 0; i < n-3; i += 4) {
158         mpd_uint_t x0 = c1[i];
159         mpd_uint_t x1 = c1[i+1];
160         mpd_uint_t x2 = c1[i+2];
161         mpd_uint_t x3 = c1[i+3];
162         MULMOD2C(&x0, &x1, n_inv);
163         MULMOD2C(&x2, &x3, n_inv);
164         c1[i] = x0;
165         c1[i+1] = x1;
166         c1[i+2] = x2;
167         c1[i+3] = x3;
168     }
169 
170     return 1;
171 }
172