Lines Matching +full:k +full:- +full:block
2 ; jcphuff-sse2.asm - prepare data for progressive Huffman encoding (SSE2)
7 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
21 ; --------------------------------------------------------------------------
25 ; --------------------------------------------------------------------------
35 pinsrw X0, word [BLOCK + T0 * 2], 0
36 pinsrw X1, word [BLOCK + T1 * 2], 0
40 pinsrw X0, word [BLOCK + T0 * 2], 1
41 pinsrw X1, word [BLOCK + T1 * 2], 1
45 pinsrw X0, word [BLOCK + T0 * 2], 2
46 pinsrw X1, word [BLOCK + T1 * 2], 2
50 pinsrw X0, word [BLOCK + T0 * 2], 3
51 pinsrw X1, word [BLOCK + T1 * 2], 3
55 pinsrw X0, word [BLOCK + T0 * 2], 4
56 pinsrw X1, word [BLOCK + T1 * 2], 4
60 pinsrw X0, word [BLOCK + T0 * 2], 5
61 pinsrw X1, word [BLOCK + T1 * 2], 5
65 pinsrw X0, word [BLOCK + T0 * 2], 6
66 pinsrw X1, word [BLOCK + T1 * 2], 6
70 pinsrw X0, word [BLOCK + T0 * 2], 7
71 pinsrw X1, word [BLOCK + T1 * 2], 7
81 pinsrw X0, word [BLOCK + T0 * 2], 0
82 pinsrw X1, word [BLOCK + T1 * 2], 0
85 pinsrw X0, word [BLOCK + T0 * 2], 1
88 pinsrw X0, word [BLOCK + T0 * 2], 2
91 pinsrw X0, word [BLOCK + T0 * 2], 3
94 pinsrw X0, word [BLOCK + T0 * 2], 4
97 pinsrw X0, word [BLOCK + T0 * 2], 5
100 pinsrw X0, word [BLOCK + T0 * 2], 6
103 pinsrw X0, word [BLOCK + T0 * 2], 7
108 pinsrw X1, word [BLOCK + T1 * 2], 1
113 pinsrw X1, word [BLOCK + T1 * 2], 2
118 pinsrw X1, word [BLOCK + T1 * 2], 3
123 pinsrw X1, word [BLOCK + T1 * 2], 4
128 pinsrw X1, word [BLOCK + T1 * 2], 5
133 pinsrw X1, word [BLOCK + T1 * 2], 6
141 pinsrw X0, word [BLOCK + T0 * 2], 0
144 pinsrw X0, word [BLOCK + T0 * 2], 1
147 pinsrw X0, word [BLOCK + T0 * 2], 2
150 pinsrw X0, word [BLOCK + T0 * 2], 3
153 pinsrw X0, word [BLOCK + T0 * 2], 4
156 pinsrw X0, word [BLOCK + T0 * 2], 5
159 pinsrw X0, word [BLOCK + T0 * 2], 6
162 pinsrw X0, word [BLOCK + T0 * 2], 7
170 pinsrw X0, word [BLOCK + T1 * 2], 0
175 pinsrw X0, word [BLOCK + T1 * 2], 1
180 pinsrw X0, word [BLOCK + T1 * 2], 2
185 pinsrw X0, word [BLOCK + T1 * 2], 3
190 pinsrw X0, word [BLOCK + T1 * 2], 4
195 pinsrw X0, word [BLOCK + T1 * 2], 5
200 pinsrw X0, word [BLOCK + T1 * 2], 6
251 ; jsimd_encode_mcu_AC_first_prepare_sse2(const JCOEF *block,
256 ; eax + 8 = const JCOEF *block
269 %define K eax
274 %define BLOCK esi
287 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
298 mov BLOCK, INT [eax + 8]
306 mov K, LEN
307 and K, -16
308 shr K, 4
328 dec K
380 mov K, LEN
381 add K, 7
382 and K, -8
383 shr K, 3
384 sub K, DCTSIZE2/8
390 inc K
403 mov esp, ebp ; esp <- aligned ebp
404 pop esp ; esp <- original ebp
414 %undef K
418 %undef BLOCK
426 ; jsimd_encode_mcu_AC_refine_prepare_sse2(const JCOEF *block,
431 ; eax + 8 = const JCOEF *block
445 %define K eax
451 %define BLOCK esi
466 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
479 mov BLOCK, INT [eax + 8]
484 mov K, INT [eax + 16]
485 mov INT [T0 + 2 * SIZEOF_INT], -1
486 mov INT [T0 + 3 * SIZEOF_INT], -1
488 mov LEN, K
490 and K, -16
493 shr K, 4
511 pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
516 bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1);
519 mov EOB, T1 ; EOB = k + idx;
524 dec K
552 pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
557 bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1);
560 mov EOB, T1 ; EOB = k + idx;
575 pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
580 bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1);
583 mov EOB, T1 ; EOB = k + idx;
599 pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
604 bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1);
607 mov EOB, T1 ; EOB = k + idx;
611 mov K, LEN
612 add K, 7
613 and K, -8
614 shr K, 3
615 sub K, DCTSIZE2/8
621 inc K
636 mov esp, ebp ; esp <- aligned ebp
637 pop esp ; esp <- original ebp
648 %undef K
655 %undef BLOCK