Lines Matching +full:k +full:- +full:block

2 ; jcphuff-sse2.asm - prepare data for progressive Huffman encoding
3 ; (64-bit SSE2)
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
22 ; --------------------------------------------------------------------------
26 ; --------------------------------------------------------------------------
36 pinsrw X0, word [BLOCK + T0 * 2], 0
37 pinsrw X1, word [BLOCK + T1 * 2], 0
41 pinsrw X0, word [BLOCK + T0 * 2], 1
42 pinsrw X1, word [BLOCK + T1 * 2], 1
46 pinsrw X0, word [BLOCK + T0 * 2], 2
47 pinsrw X1, word [BLOCK + T1 * 2], 2
51 pinsrw X0, word [BLOCK + T0 * 2], 3
52 pinsrw X1, word [BLOCK + T1 * 2], 3
56 pinsrw X0, word [BLOCK + T0 * 2], 4
57 pinsrw X1, word [BLOCK + T1 * 2], 4
61 pinsrw X0, word [BLOCK + T0 * 2], 5
62 pinsrw X1, word [BLOCK + T1 * 2], 5
66 pinsrw X0, word [BLOCK + T0 * 2], 6
67 pinsrw X1, word [BLOCK + T1 * 2], 6
71 pinsrw X0, word [BLOCK + T0 * 2], 7
72 pinsrw X1, word [BLOCK + T1 * 2], 7
82 pinsrw X0, word [BLOCK + T0 * 2], 0
83 pinsrw X1, word [BLOCK + T1 * 2], 0
86 pinsrw X0, word [BLOCK + T0 * 2], 1
89 pinsrw X0, word [BLOCK + T0 * 2], 2
92 pinsrw X0, word [BLOCK + T0 * 2], 3
95 pinsrw X0, word [BLOCK + T0 * 2], 4
98 pinsrw X0, word [BLOCK + T0 * 2], 5
101 pinsrw X0, word [BLOCK + T0 * 2], 6
104 pinsrw X0, word [BLOCK + T0 * 2], 7
109 pinsrw X1, word [BLOCK + T1 * 2], 1
114 pinsrw X1, word [BLOCK + T1 * 2], 2
119 pinsrw X1, word [BLOCK + T1 * 2], 3
124 pinsrw X1, word [BLOCK + T1 * 2], 4
129 pinsrw X1, word [BLOCK + T1 * 2], 5
134 pinsrw X1, word [BLOCK + T1 * 2], 6
142 pinsrw X0, word [BLOCK + T0 * 2], 0
145 pinsrw X0, word [BLOCK + T0 * 2], 1
148 pinsrw X0, word [BLOCK + T0 * 2], 2
151 pinsrw X0, word [BLOCK + T0 * 2], 3
154 pinsrw X0, word [BLOCK + T0 * 2], 4
157 pinsrw X0, word [BLOCK + T0 * 2], 5
160 pinsrw X0, word [BLOCK + T0 * 2], 6
163 pinsrw X0, word [BLOCK + T0 * 2], 7
171 pinsrw X0, word [BLOCK + T1 * 2], 0
176 pinsrw X0, word [BLOCK + T1 * 2], 1
181 pinsrw X0, word [BLOCK + T1 * 2], 2
186 pinsrw X0, word [BLOCK + T1 * 2], 3
191 pinsrw X0, word [BLOCK + T1 * 2], 4
196 pinsrw X0, word [BLOCK + T1 * 2], 5
201 pinsrw X0, word [BLOCK + T1 * 2], 6
251 ; jsimd_encode_mcu_AC_first_prepare_sse2(const JCOEF *block,
256 ; r10 = const JCOEF *block
269 %define K eax
275 %define BLOCK r10
287 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
290 lea rsp, [rbp - 16]
293 movdqa XMMWORD [rbp - 16], ZERO
297 mov K, LEN
299 and K, -16
301 shr K, 4
321 dec K
370 mov K, LEN
371 add K, 7
372 and K, -8
373 shr K, 3
374 sub K, DCTSIZE2/8
380 inc K
387 movdqa ZERO, XMMWORD [rbp - 16]
389 mov rsp, rbp ; rsp <- aligned rbp
390 pop rsp ; rsp <- original rbp
400 %undef K
406 %undef BLOCK
415 ; jsimd_encode_mcu_AC_refine_prepare_sse2(const JCOEF *block,
420 ; r10 = const JCOEF *block
434 %define K eax
443 %define BLOCK r10
455 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
458 lea rsp, [rbp - 16]
461 movdqa XMMWORD [rbp - 16], ZERO
470 mov K, LEN
472 and K, -16
474 shr K, 4
492 pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
497 bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1);
500 add EOB, T1d ; EOB = k + idx;
505 dec K
530 pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
535 bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1);
538 add EOB, T1d ; EOB = k + idx;
553 pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
558 bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1);
561 add EOB, T1d ; EOB = k + idx;
576 pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
581 bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1);
584 add EOB, T1d ; EOB = k + idx;
588 mov K, LEN
589 add K, 7
590 and K, -8
591 shr K, 3
592 sub K, DCTSIZE2/8
599 inc K
609 movdqa ZERO, XMMWORD [rbp - 16]
611 mov rsp, rbp ; rsp <- aligned rbp
612 pop rsp ; rsp <- original rbp
623 %undef K
632 %undef BLOCK