1 /*
2  * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 
29 #include "mpdecimal.h"
30 #include <stdio.h>
31 #include "bits.h"
32 #include "constants.h"
33 #include "fnt.h"
34 #include "fourstep.h"
35 #include "numbertheory.h"
36 #include "sixstep.h"
37 #include "umodarith.h"
38 #include "convolute.h"
39 
40 
41 /* Bignum: Fast convolution using the Number Theoretic Transform. Used for
42    the multiplication of very large coefficients. */
43 
44 
45 /* Convolute the data in c1 and c2. Result is in c1. */
46 int
fnt_convolute(mpd_uint_t * c1,mpd_uint_t * c2,mpd_size_t n,int modnum)47 fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
48 {
49     int (*fnt)(mpd_uint_t *, mpd_size_t, int);
50     int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
51 #ifdef PPRO
52     double dmod;
53     uint32_t dinvmod[3];
54 #endif
55     mpd_uint_t n_inv, umod;
56     mpd_size_t i;
57 
58 
59     SETMODULUS(modnum);
60     n_inv = POWMOD(n, (umod-2));
61 
62     if (ispower2(n)) {
63         if (n > SIX_STEP_THRESHOLD) {
64             fnt = six_step_fnt;
65             inv_fnt = inv_six_step_fnt;
66         }
67         else {
68             fnt = std_fnt;
69             inv_fnt = std_inv_fnt;
70         }
71     }
72     else {
73         fnt = four_step_fnt;
74         inv_fnt = inv_four_step_fnt;
75     }
76 
77     if (!fnt(c1, n, modnum)) {
78         return 0;
79     }
80     if (!fnt(c2, n, modnum)) {
81         return 0;
82     }
83     for (i = 0; i < n-1; i += 2) {
84         mpd_uint_t x0 = c1[i];
85         mpd_uint_t y0 = c2[i];
86         mpd_uint_t x1 = c1[i+1];
87         mpd_uint_t y1 = c2[i+1];
88         MULMOD2(&x0, y0, &x1, y1);
89         c1[i] = x0;
90         c1[i+1] = x1;
91     }
92 
93     if (!inv_fnt(c1, n, modnum)) {
94         return 0;
95     }
96     for (i = 0; i < n-3; i += 4) {
97         mpd_uint_t x0 = c1[i];
98         mpd_uint_t x1 = c1[i+1];
99         mpd_uint_t x2 = c1[i+2];
100         mpd_uint_t x3 = c1[i+3];
101         MULMOD2C(&x0, &x1, n_inv);
102         MULMOD2C(&x2, &x3, n_inv);
103         c1[i] = x0;
104         c1[i+1] = x1;
105         c1[i+2] = x2;
106         c1[i+3] = x3;
107     }
108 
109     return 1;
110 }
111 
112 /* Autoconvolute the data in c1. Result is in c1. */
113 int
fnt_autoconvolute(mpd_uint_t * c1,mpd_size_t n,int modnum)114 fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
115 {
116     int (*fnt)(mpd_uint_t *, mpd_size_t, int);
117     int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
118 #ifdef PPRO
119     double dmod;
120     uint32_t dinvmod[3];
121 #endif
122     mpd_uint_t n_inv, umod;
123     mpd_size_t i;
124 
125 
126     SETMODULUS(modnum);
127     n_inv = POWMOD(n, (umod-2));
128 
129     if (ispower2(n)) {
130         if (n > SIX_STEP_THRESHOLD) {
131             fnt = six_step_fnt;
132             inv_fnt = inv_six_step_fnt;
133         }
134         else {
135             fnt = std_fnt;
136             inv_fnt = std_inv_fnt;
137         }
138     }
139     else {
140         fnt = four_step_fnt;
141         inv_fnt = inv_four_step_fnt;
142     }
143 
144     if (!fnt(c1, n, modnum)) {
145         return 0;
146     }
147     for (i = 0; i < n-1; i += 2) {
148         mpd_uint_t x0 = c1[i];
149         mpd_uint_t x1 = c1[i+1];
150         MULMOD2(&x0, x0, &x1, x1);
151         c1[i] = x0;
152         c1[i+1] = x1;
153     }
154 
155     if (!inv_fnt(c1, n, modnum)) {
156         return 0;
157     }
158     for (i = 0; i < n-3; i += 4) {
159         mpd_uint_t x0 = c1[i];
160         mpd_uint_t x1 = c1[i+1];
161         mpd_uint_t x2 = c1[i+2];
162         mpd_uint_t x3 = c1[i+3];
163         MULMOD2C(&x0, &x1, n_inv);
164         MULMOD2C(&x2, &x3, n_inv);
165         c1[i] = x0;
166         c1[i+1] = x1;
167         c1[i+2] = x2;
168         c1[i+3] = x3;
169     }
170 
171     return 1;
172 }
173 
174 
175