1 /* crc32.c -- compute the CRC-32 of a data stream
2  * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016, 2018 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  *
5  * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
6  * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
7  * tables for updating the shift register in one step with three exclusive-ors
8  * instead of four steps with four exclusive-ors.  This results in about a
9  * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
10  */
11 
12 #include "zbuild.h"
13 #include "zendian.h"
14 #include <inttypes.h>
15 #include "deflate.h"
16 #include "functable.h"
17 #include "crc32_p.h"
18 #include "crc32_tbl.h"
19 
20 
21 /* Local functions for crc concatenation */
22 static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2);
23 static void crc32_combine_gen_(uint32_t *op, z_off64_t len2);
24 
25 /* =========================================================================
26  * This function can be used by asm versions of crc32()
27  */
PREFIX(get_crc_table)28 const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) {
29     return (const uint32_t *)crc_table;
30 }
31 
32 #ifdef ZLIB_COMPAT
PREFIX(crc32_z)33 unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) {
34     if (buf == NULL) return 0;
35 
36     return (unsigned long)functable.crc32((uint32_t)crc, buf, len);
37 }
38 #else
PREFIX(crc32_z)39 uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) {
40     if (buf == NULL) return 0;
41 
42     return functable.crc32(crc, buf, len);
43 }
44 #endif
45 /* ========================================================================= */
46 #define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
47 #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
48 #define DO4 DO1; DO1; DO1; DO1
49 
50 /* ========================================================================= */
crc32_generic(uint32_t crc,const unsigned char * buf,uint64_t len)51 Z_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uint64_t len) {
52     crc = crc ^ 0xffffffff;
53 
54 #ifdef UNROLL_MORE
55     while (len >= 8) {
56         DO8;
57         len -= 8;
58     }
59 #else
60     while (len >= 4) {
61         DO4;
62         len -= 4;
63     }
64 #endif
65 
66     if (len) do {
67         DO1;
68     } while (--len);
69     return crc ^ 0xffffffff;
70 }
71 
72 #ifdef ZLIB_COMPAT
PREFIX(crc32)73 unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) {
74     return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len);
75 }
76 #else
PREFIX(crc32)77 uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
78     return PREFIX(crc32_z)(crc, buf, len);
79 }
80 #endif
81 
82 /*
83    This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit
84    integer pointer type. This violates the strict aliasing rule, where a
85    compiler can assume, for optimization purposes, that two pointers to
86    fundamentally different types won't ever point to the same memory. This can
87    manifest as a problem only if one of the pointers is written to. This code
88    only reads from those pointers. So long as this code remains isolated in
89    this compilation unit, there won't be a problem. For this reason, this code
90    should not be copied and pasted into a compilation unit in which other code
91    writes to the buffer that is passed to these routines.
92  */
93 
94 /* ========================================================================= */
95 #if BYTE_ORDER == LITTLE_ENDIAN
96 #define DOLIT4 c ^= *buf4++; \
97         c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
98             crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
99 #define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
100 
101 /* ========================================================================= */
crc32_little(uint32_t crc,const unsigned char * buf,uint64_t len)102 Z_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint64_t len) {
103     Z_REGISTER uint32_t c;
104     Z_REGISTER const uint32_t *buf4;
105 
106     c = crc;
107     c = ~c;
108     while (len && ((ptrdiff_t)buf & 3)) {
109         c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
110         len--;
111     }
112 
113     buf4 = (const uint32_t *)(const void *)buf;
114 
115 #ifdef UNROLL_MORE
116     while (len >= 32) {
117         DOLIT32;
118         len -= 32;
119     }
120 #endif
121 
122     while (len >= 4) {
123         DOLIT4;
124         len -= 4;
125     }
126     buf = (const unsigned char *)buf4;
127 
128     if (len) do {
129         c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
130     } while (--len);
131     c = ~c;
132     return c;
133 }
134 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
135 
136 /* ========================================================================= */
137 #if BYTE_ORDER == BIG_ENDIAN
138 #define DOBIG4 c ^= *buf4++; \
139         c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
140             crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
141 #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
142 
143 /* ========================================================================= */
crc32_big(uint32_t crc,const unsigned char * buf,uint64_t len)144 Z_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_t len) {
145     Z_REGISTER uint32_t c;
146     Z_REGISTER const uint32_t *buf4;
147 
148     c = ZSWAP32(crc);
149     c = ~c;
150     while (len && ((ptrdiff_t)buf & 3)) {
151         c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
152         len--;
153     }
154 
155     buf4 = (const uint32_t *)(const void *)buf;
156 
157 #ifdef UNROLL_MORE
158     while (len >= 32) {
159         DOBIG32;
160         len -= 32;
161     }
162 #endif
163 
164     while (len >= 4) {
165         DOBIG4;
166         len -= 4;
167     }
168     buf = (const unsigned char *)buf4;
169 
170     if (len) do {
171         c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
172     } while (--len);
173     c = ~c;
174     return ZSWAP32(c);
175 }
176 #endif /* BYTE_ORDER == BIG_ENDIAN */
177 
178 
179 /* ========================================================================= */
crc32_combine_(uint32_t crc1,uint32_t crc2,z_off64_t len2)180 static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
181     int n;
182 
183     if (len2 > 0)
184         /* operator for 2^n zeros repeats every GF2_DIM n values */
185         for (n = 0; len2; n = (n + 1) % GF2_DIM, len2 >>= 1)
186             if (len2 & 1)
187                 crc1 = gf2_matrix_times(crc_comb[n], crc1);
188     return crc1 ^ crc2;
189 }
190 
191 /* ========================================================================= */
192 #ifdef ZLIB_COMPAT
PREFIX(crc32_combine)193 unsigned long Z_EXPORT PREFIX(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off_t len2) {
194     return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2);
195 }
196 
PREFIX4(crc32_combine)197 unsigned long Z_EXPORT PREFIX4(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off64_t len2) {
198     return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2);
199 }
200 #else
PREFIX4(crc32_combine)201 uint32_t Z_EXPORT PREFIX4(crc32_combine)(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
202     return crc32_combine_(crc1, crc2, len2);
203 }
204 #endif
205 
206 #ifdef X86_PCLMULQDQ_CRC
207 #include "arch/x86/x86.h"
208 #include "arch/x86/crc_folding.h"
209 
crc_finalize(deflate_state * const s)210 Z_INTERNAL void crc_finalize(deflate_state *const s) {
211     if (x86_cpu_has_pclmulqdq)
212         s->strm->adler = crc_fold_512to32(s);
213 }
214 #endif
215 
crc_reset(deflate_state * const s)216 Z_INTERNAL void crc_reset(deflate_state *const s) {
217 #ifdef X86_PCLMULQDQ_CRC
218     x86_check_features();
219     if (x86_cpu_has_pclmulqdq) {
220         crc_fold_init(s);
221         return;
222     }
223 #endif
224     s->strm->adler = PREFIX(crc32)(0L, NULL, 0);
225 }
226 
copy_with_crc(PREFIX3 (stream)* strm,unsigned char * dst,unsigned long size)227 Z_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size) {
228 #ifdef X86_PCLMULQDQ_CRC
229     if (x86_cpu_has_pclmulqdq) {
230         crc_fold_copy(strm->state, dst, strm->next_in, size);
231         return;
232     }
233 #endif
234     memcpy(dst, strm->next_in, size);
235     strm->adler = PREFIX(crc32)(strm->adler, dst, size);
236 }
237 
238 /* ========================================================================= */
239 
crc32_combine_gen_(uint32_t * op,z_off64_t len2)240 static void crc32_combine_gen_(uint32_t *op, z_off64_t len2) {
241     uint32_t row;
242     int j;
243     unsigned i;
244 
245     /* if len2 is zero or negative, return the identity matrix */
246     if (len2 <= 0) {
247         row = 1;
248         for (j = 0; j < GF2_DIM; j++) {
249             op[j] = row;
250             row <<= 1;
251         }
252         return;
253     }
254 
255     /* at least one bit in len2 is set -- find it, and copy the operator
256        corresponding to that position into op */
257     i = 0;
258     for (;;) {
259         if (len2 & 1) {
260             for (j = 0; j < GF2_DIM; j++)
261                 op[j] = crc_comb[i][j];
262             break;
263         }
264         len2 >>= 1;
265         i = (i + 1) % GF2_DIM;
266     }
267 
268     /* for each remaining bit set in len2 (if any), multiply op by the operator
269        corresponding to that position */
270     for (;;) {
271         len2 >>= 1;
272         i = (i + 1) % GF2_DIM;
273         if (len2 == 0)
274             break;
275         if (len2 & 1)
276             for (j = 0; j < GF2_DIM; j++)
277                 op[j] = gf2_matrix_times(crc_comb[i], op[j]);
278     }
279 }
280 
281 /* ========================================================================= */
282 
283 #ifdef ZLIB_COMPAT
PREFIX(crc32_combine_gen)284 void Z_EXPORT PREFIX(crc32_combine_gen)(uint32_t *op, z_off_t len2) {
285     crc32_combine_gen_(op, len2);
286 }
287 #endif
288 
PREFIX4(crc32_combine_gen)289 void Z_EXPORT PREFIX4(crc32_combine_gen)(uint32_t *op, z_off64_t len2) {
290     crc32_combine_gen_(op, len2);
291 }
292 
293 /* ========================================================================= */
PREFIX(crc32_combine_op)294 uint32_t Z_EXPORT PREFIX(crc32_combine_op)(uint32_t crc1, uint32_t crc2, const uint32_t *op) {
295     return gf2_matrix_times(op, crc1) ^ crc2;
296 }
297