1; XzCrc64Opt.asm -- CRC64 calculation : optimized version
2; 2011-06-28 : Igor Pavlov : Public domain
3
4include 7zAsm.asm
5
6MY_ASM_START
7
8ifdef x64
9
10    rD   equ  r9
11    rN   equ  r10
12
13    num_VAR     equ  r8
14    table_VAR   equ  r9
15
16    SRCDAT  equ  rN + rD
17
18CRC_XOR macro dest:req, src:req, t:req
19    xor     dest, QWORD PTR [r5 + src * 8 + 0800h * t]
20endm
21
22CRC1b macro
23    movzx   x6, BYTE PTR [rD]
24    inc     rD
25    movzx   x3, x0_L
26    xor     x6, x3
27    shr     r0, 8
28    CRC_XOR r0, r6, 0
29    dec     rN
30endm
31
32MY_PROLOG macro crc_end:req
33    MY_PUSH_4_REGS
34
35    mov     r0, r1
36    mov     rN, num_VAR
37    mov     r5, table_VAR
38    mov     rD, r2
39    test    rN, rN
40    jz      crc_end
41  @@:
42    test    rD, 3
43    jz      @F
44    CRC1b
45    jnz     @B
46  @@:
47    cmp     rN, 8
48    jb      crc_end
49    add     rN, rD
50    mov     num_VAR, rN
51    sub     rN, 4
52    and     rN, NOT 3
53    sub     rD, rN
54    mov     x1, [SRCDAT]
55    xor     r0, r1
56    add     rN, 4
57endm
58
59MY_EPILOG macro crc_end:req
60    sub     rN, 4
61    mov     x1, [SRCDAT]
62    xor     r0, r1
63    mov     rD, rN
64    mov     rN, num_VAR
65    sub     rN, rD
66  crc_end:
67    test    rN, rN
68    jz      @F
69    CRC1b
70    jmp     crc_end
71  @@:
72    MY_POP_4_REGS
73endm
74
75MY_PROC XzCrc64UpdateT4, 4
76    MY_PROLOG crc_end_4
77    align 16
78  main_loop_4:
79    mov     x1, [SRCDAT]
80    movzx   x2, x0_L
81    movzx   x3, x0_H
82    shr     r0, 16
83    movzx   x6, x0_L
84    movzx   x7, x0_H
85    shr     r0, 16
86    CRC_XOR r1, r2, 3
87    CRC_XOR r0, r3, 2
88    CRC_XOR r1, r6, 1
89    CRC_XOR r0, r7, 0
90    xor     r0, r1
91
92    add     rD, 4
93    jnz     main_loop_4
94
95    MY_EPILOG crc_end_4
96MY_ENDP
97
98else
99
100    rD   equ  r1
101    rN   equ  r7
102
103    crc_val     equ (REG_SIZE * 5)
104    crc_table   equ (8 + crc_val)
105    table_VAR   equ [r4 + crc_table]
106    num_VAR     equ table_VAR
107
108
109    SRCDAT  equ  rN + rD
110
111CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req
112    op0     dest0, DWORD PTR [r5 + src * 8 + 0800h * t]
113    op1     dest1, DWORD PTR [r5 + src * 8 + 0800h * t + 4]
114endm
115
116CRC_XOR macro dest0:req, dest1:req, src:req, t:req
117    CRC xor, xor, dest0, dest1, src, t
118endm
119
120
121CRC1b macro
122    movzx   x6, BYTE PTR [rD]
123    inc     rD
124    movzx   x3, x0_L
125    xor     x6, x3
126    shrd    r0, r2, 8
127    shr     r2, 8
128    CRC_XOR r0, r2, r6, 0
129    dec     rN
130endm
131
132MY_PROLOG macro crc_end:req
133    MY_PUSH_4_REGS
134
135    mov     rN, r2
136
137    mov     x0, [r4 + crc_val]
138    mov     x2, [r4 + crc_val + 4]
139    mov     r5, table_VAR
140    test    rN, rN
141    jz      crc_end
142  @@:
143    test    rD, 3
144    jz      @F
145    CRC1b
146    jnz     @B
147  @@:
148    cmp     rN, 8
149    jb      crc_end
150    add     rN, rD
151
152    mov     num_VAR, rN
153
154    sub     rN, 4
155    and     rN, NOT 3
156    sub     rD, rN
157    xor     r0, [SRCDAT]
158    add     rN, 4
159endm
160
161MY_EPILOG macro crc_end:req
162    sub     rN, 4
163    xor     r0, [SRCDAT]
164
165    mov     rD, rN
166    mov     rN, num_VAR
167    sub     rN, rD
168  crc_end:
169    test    rN, rN
170    jz      @F
171    CRC1b
172    jmp     crc_end
173  @@:
174    MY_POP_4_REGS
175endm
176
177MY_PROC XzCrc64UpdateT4, 5
178    MY_PROLOG crc_end_4
179    movzx   x6, x0_L
180    align 16
181  main_loop_4:
182    mov     r3, [SRCDAT]
183    xor     r3, r2
184
185    CRC xor, mov, r3, r2, r6, 3
186    movzx   x6, x0_H
187    shr     r0, 16
188    CRC_XOR r3, r2, r6, 2
189
190    movzx   x6, x0_L
191    movzx   x0, x0_H
192    CRC_XOR r3, r2, r6, 1
193    CRC_XOR r3, r2, r0, 0
194    movzx   x6, x3_L
195    mov     r0, r3
196
197    add     rD, 4
198    jnz     main_loop_4
199
200    MY_EPILOG crc_end_4
201MY_ENDP
202
203endif
204
205end
206