1 #include "crc32c.h"
2 
3 #define CRC32C3X8(ITR) \
4 	crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\
5 	crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\
6 	crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR)));
7 
8 #define CRC32C7X3X8(ITR) do {\
9 	CRC32C3X8((ITR)*7+0) \
10 	CRC32C3X8((ITR)*7+1) \
11 	CRC32C3X8((ITR)*7+2) \
12 	CRC32C3X8((ITR)*7+3) \
13 	CRC32C3X8((ITR)*7+4) \
14 	CRC32C3X8((ITR)*7+5) \
15 	CRC32C3X8((ITR)*7+6) \
16 	} while(0)
17 
18 #ifndef HWCAP_CRC32
19 #define HWCAP_CRC32             (1 << 7)
20 #endif /* HWCAP_CRC32 */
21 
22 int crc32c_arm64_available = 0;
23 
24 #ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
25 
26 #include <sys/auxv.h>
27 #include <arm_acle.h>
28 #include <arm_neon.h>
29 
30 static int crc32c_probed;
31 
32 /*
33  * Function to calculate reflected crc with PMULL Instruction
34  * crc done "by 3" for fixed input block size of 1024 bytes
35  */
crc32c_arm64(unsigned char const * data,unsigned long length)36 uint32_t crc32c_arm64(unsigned char const *data, unsigned long length)
37 {
38 	signed long len = length;
39 	uint32_t crc = ~0;
40 	uint32_t crc0, crc1, crc2;
41 
42 	/* Load two consts: K1 and K2 */
43 	const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
44 	uint64_t t0, t1;
45 
46 	while ((len -= 1024) >= 0) {
47 		/* Do first 8 bytes here for better pipelining */
48 		crc0 = __crc32cd(crc, *(const uint64_t *)data);
49 		crc1 = 0;
50 		crc2 = 0;
51 		data += sizeof(uint64_t);
52 
53 		/* Process block inline
54 		   Process crc0 last to avoid dependency with above */
55 		CRC32C7X3X8(0);
56 		CRC32C7X3X8(1);
57 		CRC32C7X3X8(2);
58 		CRC32C7X3X8(3);
59 		CRC32C7X3X8(4);
60 		CRC32C7X3X8(5);
61 
62 		data += 42*3*sizeof(uint64_t);
63 
64 		/* Merge crc0 and crc1 into crc2
65 		   crc1 multiply by K2
66 		   crc0 multiply by K1 */
67 
68 		t1 = (uint64_t)vmull_p64(crc1, k2);
69 		t0 = (uint64_t)vmull_p64(crc0, k1);
70 		crc = __crc32cd(crc2, *(const uint64_t *)data);
71 		crc1 = __crc32cd(0, t1);
72 		crc ^= crc1;
73 		crc0 = __crc32cd(0, t0);
74 		crc ^= crc0;
75 
76 		data += sizeof(uint64_t);
77 	}
78 
79 	if (!(len += 1024))
80 		return crc;
81 
82 	while ((len -= sizeof(uint64_t)) >= 0) {
83                 crc = __crc32cd(crc, *(const uint64_t *)data);
84                 data += sizeof(uint64_t);
85         }
86 
87         /* The following is more efficient than the straight loop */
88         if (len & sizeof(uint32_t)) {
89                 crc = __crc32cw(crc, *(const uint32_t *)data);
90                 data += sizeof(uint32_t);
91         }
92         if (len & sizeof(uint16_t)) {
93                 crc = __crc32ch(crc, *(const uint16_t *)data);
94                 data += sizeof(uint16_t);
95         }
96         if (len & sizeof(uint8_t)) {
97                 crc = __crc32cb(crc, *(const uint8_t *)data);
98         }
99 
100 	return crc;
101 }
102 
crc32c_arm64_probe(void)103 void crc32c_arm64_probe(void)
104 {
105 	unsigned long hwcap;
106 
107 	if (!crc32c_probed) {
108 		hwcap = getauxval(AT_HWCAP);
109 		if (hwcap & HWCAP_CRC32)
110 			crc32c_arm64_available = 1;
111 		crc32c_probed = 1;
112 	}
113 }
114 
115 #endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */
116