1/* lzo1x_d.ash -- assembler implementation of the LZO1X decompression algorithm
2
3   This file is part of the LZO real-time data compression library.
4
5   Copyright (C) 1996-2014 Markus Franz Xaver Johannes Oberhumer
6   All Rights Reserved.
7
8   The LZO library is free software; you can redistribute it and/or
9   modify it under the terms of the GNU General Public License as
10   published by the Free Software Foundation; either version 2 of
11   the License, or (at your option) any later version.
12
13   The LZO library is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with the LZO library; see the file COPYING.
20   If not, write to the Free Software Foundation, Inc.,
21   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22
23   Markus F.X.J. Oberhumer
24   <markus@oberhumer.com>
25   http://www.oberhumer.com/opensource/lzo/
26 */
27
28
29#if !defined(LZO1X) && !defined(LZO1Y)
30#  define LZO1X
31#endif
32
33#if defined(LZO_FAST)
34#  define NN    3
35#else
36#  define NN    0
37#endif
38
39
40/***********************************************************************
41// init
42************************************************************************/
43
44        xorl    %eax,%eax
45        xorl    %ebx,%ebx       /* high bits 9-32 stay 0 */
46        lodsb
47        cmpb    $17,%al
48        jbe     .L01
49        subb    $17-NN,%al
50#if defined(LZO_FAST)
51        jmp     .LFLR
52#else
53        cmpb    $4,%al
54        jae     .LFLR
55#if 1
56        TEST_OP((%edi,%eax),%edx)
57        TEST_IP((%esi,%eax),%edx)
58        movl    %eax,%ecx
59        jmp     .LFLR2
60#else
61        jmp     .LFLR3
62#endif
63#endif
64
65
66/***********************************************************************
67// literal run
68************************************************************************/
69
700:      addl    N_255,%eax
71        TEST_IP(18(%esi,%eax),%edx)     /* minimum */
721:      movb    (%esi),%bl
73        incl    %esi
74        orb     %bl,%bl
75        jz      0b
76        leal    18+NN(%eax,%ebx),%eax
77        jmp     3f
78
79
80        ALIGN3
81.L00:
82#ifdef LZO_DEBUG
83    andl $0xffffff00,%eax ; jnz .L_assert_fail
84    andl $0xffffff00,%ebx ; jnz .L_assert_fail
85    xorl %eax,%eax ; xorl %ebx,%ebx
86    xorl %ecx,%ecx ; xorl %edx,%edx
87#endif
88        TEST_IP_R(%esi)
89        LODSB
90.L01:
91        cmpb    $16,%al
92        jae     .LMATCH
93
94/* a literal run */
95        orb     %al,%al
96        jz      1b
97        addl    $3+NN,%eax
983:
99.LFLR:
100        TEST_OP(-NN(%edi,%eax),%edx)
101        TEST_IP(-NN(%esi,%eax),%edx)
102#if defined(LZO_FAST)
103        movl    %eax,%ecx
104        NOTL_3(%eax)
105        shrl    $2,%ecx
106        andl    N_3,%eax
107        COPYL(%esi,%edi,%edx)
108        subl    %eax,%esi
109        subl    %eax,%edi
110#else
111        movl    %eax,%ecx
112        shrl    $2,%eax
113        andl    N_3,%ecx
114        COPYL_C(%esi,%edi,%edx,%eax)
115.LFLR2:
116        rep
117        movsb
118#endif
119
120#ifdef LZO_DEBUG
121    andl $0xffffff00,%eax ; jnz .L_assert_fail
122    andl $0xffffff00,%ebx ; jnz .L_assert_fail
123    xorl %eax,%eax ; xorl %ebx,%ebx
124    xorl %ecx,%ecx ; xorl %edx,%edx
125#endif
126        LODSB
127        cmpb    $16,%al
128        jae     .LMATCH
129
130
131/***********************************************************************
132// R1
133************************************************************************/
134
135        TEST_OP(3(%edi),%edx)
136        shrl    $2,%eax
137        movb    (%esi),%bl
138#if defined(LZO1X)
139        leal    -0x801(%edi),%edx
140#elif defined(LZO1Y)
141        leal    -0x401(%edi),%edx
142#endif
143        leal    (%eax,%ebx,4),%eax
144        incl    %esi
145        subl    %eax,%edx
146        TEST_LOOKBEHIND(%edx)
147#if defined(LZO_FAST)
148        movl    (%edx),%ecx
149        movl    %ecx,(%edi)
150#else
151        movb    (%edx),%al
152        movb    %al,(%edi)
153        movb    1(%edx),%al
154        movb    %al,1(%edi)
155        movb    2(%edx),%al
156        movb    %al,2(%edi)
157#endif
158        addl    N_3,%edi
159        jmp     .LMDONE
160
161
162/***********************************************************************
163// M2
164************************************************************************/
165
166        ALIGN3
167.LMATCH:
168        cmpb    $64,%al
169        jb      .LM3MATCH
170
171/* a M2 match */
172        movl    %eax,%ecx
173        shrl    $2,%eax
174        leal    -1(%edi),%edx
175#if defined(LZO1X)
176        andl    $7,%eax
177        movb    (%esi),%bl
178        shrl    $5,%ecx
179        leal    (%eax,%ebx,8),%eax
180#elif defined(LZO1Y)
181        andl    N_3,%eax
182        movb    (%esi),%bl
183        shrl    $4,%ecx
184        leal    (%eax,%ebx,4),%eax
185#endif
186        incl    %esi
187        subl    %eax,%edx
188
189#if defined(LZO_FAST)
190#if defined(LZO1X)
191        addl    $1+3,%ecx
192#elif defined(LZO1Y)
193        addl    $2,%ecx
194#endif
195#else
196#if defined(LZO1X)
197        incl    %ecx
198#elif defined(LZO1Y)
199        decl    %ecx
200#endif
201#endif
202
203        cmpl    N_3,%eax
204        jae     .LCOPYLONG
205        jmp     .LCOPYBYTE
206
207
208/***********************************************************************
209// M3
210************************************************************************/
211
2120:      addl    N_255,%eax
213        TEST_IP(3(%esi),%edx)       /* minimum */
2141:      movb    (%esi),%bl
215        incl    %esi
216        orb     %bl,%bl
217        jz      0b
218        leal    33+NN(%eax,%ebx),%ecx
219        xorl    %eax,%eax
220        jmp     3f
221
222
223        ALIGN3
224.LM3MATCH:
225        cmpb    $32,%al
226        jb      .LM4MATCH
227
228/* a M3 match */
229        andl    $31,%eax
230        jz      1b
231        lea     2+NN(%eax),%ecx
2323:
233#ifdef LZO_DEBUG
234    andl $0xffff0000,%eax ; jnz .L_assert_fail
235#endif
236        movw    (%esi),%ax
237        leal    -1(%edi),%edx
238        shrl    $2,%eax
239        addl    $2,%esi
240        subl    %eax,%edx
241
242        cmpl    N_3,%eax
243        jb      .LCOPYBYTE
244
245
246/***********************************************************************
247// copy match
248************************************************************************/
249
250        ALIGN1
251.LCOPYLONG:                      /* copy match using longwords */
252        TEST_LOOKBEHIND(%edx)
253#if defined(LZO_FAST)
254        leal    -3(%edi,%ecx),%eax
255        shrl    $2,%ecx
256        TEST_OP_R(%eax)
257        COPYL(%edx,%edi,%ebx)
258        movl    %eax,%edi
259        xorl    %ebx,%ebx
260#else
261        TEST_OP((%edi,%ecx),%eax)
262        movl    %ecx,%ebx
263        shrl    $2,%ebx
264        jz      2f
265        COPYL_C(%edx,%edi,%eax,%ebx)
266        andl    N_3,%ecx
267        jz      1f
2682:      COPYB_C(%edx,%edi,%al,%ecx)
2691:
270#endif
271
272.LMDONE:
273        movb    -2(%esi),%al
274        andl    N_3,%eax
275        jz      .L00
276.LFLR3:
277        TEST_OP((%edi,%eax),%edx)
278        TEST_IP((%esi,%eax),%edx)
279#if defined(LZO_FAST)
280        movl    (%esi),%edx
281        addl    %eax,%esi
282        movl    %edx,(%edi)
283        addl    %eax,%edi
284#else
285        COPYB_C(%esi,%edi,%cl,%eax)
286#endif
287
288#ifdef LZO_DEBUG
289    andl $0xffffff00,%eax ; jnz .L_assert_fail
290    andl $0xffffff00,%ebx ; jnz .L_assert_fail
291    xorl %eax,%eax ; xorl %ebx,%ebx
292    xorl %ecx,%ecx ; xorl %edx,%edx
293#endif
294        LODSB
295        jmp     .LMATCH
296
297
298        ALIGN3
299.LCOPYBYTE:                      /* copy match using bytes */
300        TEST_LOOKBEHIND(%edx)
301        TEST_OP(-NN(%edi,%ecx),%eax)
302        xchgl   %edx,%esi
303#if defined(LZO_FAST)
304        subl    N_3,%ecx
305#endif
306        rep
307        movsb
308        movl    %edx,%esi
309        jmp     .LMDONE
310
311
312/***********************************************************************
313// M4
314************************************************************************/
315
3160:      addl    N_255,%ecx
317        TEST_IP(3(%esi),%edx)       /* minimum */
3181:      movb    (%esi),%bl
319        incl    %esi
320        orb     %bl,%bl
321        jz      0b
322        leal    9+NN(%ebx,%ecx),%ecx
323        jmp     3f
324
325
326        ALIGN3
327.LM4MATCH:
328        cmpb    $16,%al
329        jb      .LM1MATCH
330
331/* a M4 match */
332        movl    %eax,%ecx
333        andl    $8,%eax
334        shll    $13,%eax        /* save in bit 16 */
335        andl    $7,%ecx
336        jz      1b
337        addl    $2+NN,%ecx
3383:
339#ifdef LZO_DEBUG
340    movl %eax,%edx ; andl $0xfffe0000,%edx ; jnz .L_assert_fail
341#endif
342        movw    (%esi),%ax
343        addl    $2,%esi
344        leal    -0x4000(%edi),%edx
345        shrl    $2,%eax
346        jz      .LEOF
347        subl    %eax,%edx
348        jmp     .LCOPYLONG
349
350
351/***********************************************************************
352// M1
353************************************************************************/
354
355        ALIGN3
356.LM1MATCH:
357/* a M1 match */
358        TEST_OP(2(%edi),%edx)
359        shrl    $2,%eax
360        movb    (%esi),%bl
361        leal    -1(%edi),%edx
362        leal    (%eax,%ebx,4),%eax
363        incl    %esi
364        subl    %eax,%edx
365        TEST_LOOKBEHIND(%edx)
366
367        movb    (%edx),%al      /* we must use this because edx can be edi-1 */
368        movb    %al,(%edi)
369        movb    1(%edx),%bl
370        movb    %bl,1(%edi)
371        addl    $2,%edi
372        jmp     .LMDONE
373
374
375/***********************************************************************
376//
377************************************************************************/
378
379.LEOF:
380/****   xorl    %eax,%eax          eax=0 from above */
381
382        cmpl    $3+NN,%ecx      /* ecx must be 3/6 */
383        setnz   %al
384
385
386/*
387vi:ts=4
388*/
389
390