Home
last modified time | relevance | path

Searched refs:__m128i (Results 1 – 25 of 985) sorted by relevance

12345678910>>...40

/external/llvm-project/clang/test/CodeGen/X86/
Dxop-builtins-cmp.c9 __m128i test_mm_comlt_epu8(__m128i a, __m128i b) { in test_mm_comlt_epu8()
16 __m128i test_mm_comlt_epu16(__m128i a, __m128i b) { in test_mm_comlt_epu16()
23 __m128i test_mm_comlt_epu32(__m128i a, __m128i b) { in test_mm_comlt_epu32()
30 __m128i test_mm_comlt_epu64(__m128i a, __m128i b) { in test_mm_comlt_epu64()
37 __m128i test_mm_comlt_epi8(__m128i a, __m128i b) { in test_mm_comlt_epi8()
44 __m128i test_mm_comlt_epi16(__m128i a, __m128i b) { in test_mm_comlt_epi16()
51 __m128i test_mm_comlt_epi32(__m128i a, __m128i b) { in test_mm_comlt_epi32()
58 __m128i test_mm_comlt_epi64(__m128i a, __m128i b) { in test_mm_comlt_epi64()
67 __m128i test_mm_comle_epu8(__m128i a, __m128i b) { in test_mm_comle_epu8()
74 __m128i test_mm_comle_epu16(__m128i a, __m128i b) { in test_mm_comle_epu16()
[all …]
Dxop-builtins.c9 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccs_epi16()
15 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_macc_epi16()
21 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccsd_epi16()
27 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccd_epi16()
33 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccs_epi32()
39 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macc_epi32()
45 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccslo_epi32()
51 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macclo_epi32()
57 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccshi_epi32()
63 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macchi_epi32()
[all …]
Dsse2-builtins.c10 __m128i test_mm_add_epi8(__m128i A, __m128i B) { in test_mm_add_epi8()
16 __m128i test_mm_add_epi16(__m128i A, __m128i B) { in test_mm_add_epi16()
22 __m128i test_mm_add_epi32(__m128i A, __m128i B) { in test_mm_add_epi32()
28 __m128i test_mm_add_epi64(__m128i A, __m128i B) { in test_mm_add_epi64()
49 __m128i test_mm_adds_epi8(__m128i A, __m128i B) { in test_mm_adds_epi8()
55 __m128i test_mm_adds_epi16(__m128i A, __m128i B) { in test_mm_adds_epi16()
61 __m128i test_mm_adds_epu8(__m128i A, __m128i B) { in test_mm_adds_epu8()
68 __m128i test_mm_adds_epu16(__m128i A, __m128i B) { in test_mm_adds_epu16()
81 __m128i test_mm_and_si128(__m128i A, __m128i B) { in test_mm_and_si128()
94 __m128i test_mm_andnot_si128(__m128i A, __m128i B) { in test_mm_andnot_si128()
[all …]
/external/clang/lib/Headers/
Dxopintrin.h36 static __inline__ __m128i __DEFAULT_FN_ATTRS
37 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_maccs_epi16()
39 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); in _mm_maccs_epi16()
42 static __inline__ __m128i __DEFAULT_FN_ATTRS
43 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_macc_epi16()
45 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); in _mm_macc_epi16()
48 static __inline__ __m128i __DEFAULT_FN_ATTRS
49 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_maccsd_epi16()
51 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); in _mm_maccsd_epi16()
54 static __inline__ __m128i __DEFAULT_FN_ATTRS
[all …]
/external/llvm-project/clang/lib/Headers/
Dxopintrin.h23 static __inline__ __m128i __DEFAULT_FN_ATTRS
24 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_maccs_epi16()
26 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); in _mm_maccs_epi16()
29 static __inline__ __m128i __DEFAULT_FN_ATTRS
30 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_macc_epi16()
32 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); in _mm_macc_epi16()
35 static __inline__ __m128i __DEFAULT_FN_ATTRS
36 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_maccsd_epi16()
38 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); in _mm_maccsd_epi16()
41 static __inline__ __m128i __DEFAULT_FN_ATTRS
[all …]
/external/clang/test/CodeGen/
Dxop-builtins.c11 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccs_epi16()
17 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_macc_epi16()
23 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccsd_epi16()
29 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccd_epi16()
35 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccs_epi32()
41 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macc_epi32()
47 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccslo_epi32()
53 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macclo_epi32()
59 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccshi_epi32()
65 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macchi_epi32()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Dfwd_txfm_sse2.h21 static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) { in k_madd_epi32()
22 __m128i buf0, buf1; in k_madd_epi32()
30 static INLINE __m128i k_packs_epi64(__m128i a, __m128i b) { in k_packs_epi64()
31 __m128i buf0 = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64()
32 __m128i buf1 = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64()
36 static INLINE int check_epi16_overflow_x2(const __m128i *preg0, in check_epi16_overflow_x2()
37 const __m128i *preg1) { in check_epi16_overflow_x2()
38 const __m128i max_overflow = _mm_set1_epi16(0x7fff); in check_epi16_overflow_x2()
39 const __m128i min_overflow = _mm_set1_epi16((short)0x8000); in check_epi16_overflow_x2()
40 __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow), in check_epi16_overflow_x2()
[all …]
Dhighbd_intrapred_intrin_ssse3.c29 static INLINE __m128i avg3_epu16(const __m128i *x, const __m128i *y, in avg3_epu16()
30 const __m128i *z) { in avg3_epu16()
31 const __m128i one = _mm_set1_epi16(1); in avg3_epu16()
32 const __m128i a = _mm_avg_epu16(*x, *z); in avg3_epu16()
33 const __m128i b = in avg3_epu16()
41 const __m128i ABCDEFGH = _mm_loadu_si128((const __m128i *)above); in vpx_highbd_d45_predictor_4x4_ssse3()
42 const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 2); in vpx_highbd_d45_predictor_4x4_ssse3()
43 const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 4); in vpx_highbd_d45_predictor_4x4_ssse3()
44 const __m128i avg3 = avg3_epu16(&ABCDEFGH, &BCDEFGH0, &CDEFGH00); in vpx_highbd_d45_predictor_4x4_ssse3()
47 _mm_storel_epi64((__m128i *)dst, avg3); in vpx_highbd_d45_predictor_4x4_ssse3()
[all …]
/external/XNNPACK/src/qs8-gavgpool/gen/
D7p7x-minmax-sse2-c24-acc2.c40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
45 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
46 const __m128i vi0xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i0 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
48 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
49 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
50 const __m128i vi1xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i1 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
52 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
53 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
54 const __m128i vi2xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i2 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2()
[all …]
D7p7x-minmax-ssse3-c24-acc2.c40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
45 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
46 const __m128i vi0xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i0 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
48 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
49 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
50 const __m128i vi1xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i1 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
52 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
53 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
54 const __m128i vi2xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i2 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2()
[all …]
D7p7x-minmax-sse41-c24-acc2.c40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
44 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
45 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
46 const __m128i vxi0xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
48 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
49 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
50 const __m128i vxi1xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
52 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
53 const __m128i vxi2x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
54 const __m128i vxi2xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
[all …]
D7p7x-minmax-ssse3-c16-acc2.c40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
45 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
47 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
48 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
50 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
51 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
53 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
54 const __m128i vi3x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i3 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
56 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
[all …]
D7p7x-minmax-sse41-c16-acc2.c40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
44 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
45 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
47 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
48 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
50 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
51 const __m128i vxi2x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
53 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
54 const __m128i vxi3x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i3 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
56 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
[all …]
D7x-minmax-sse2-c24-acc2.c56 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
57 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
59 const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
61 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
62 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
63 const __m128i vi0xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i0 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
65 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
66 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
67 const __m128i vi1xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i1 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
[all …]
D7p7x-minmax-sse2-c16-acc2.c40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
45 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
47 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
48 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
50 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
51 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
53 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
54 const __m128i vi3x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i3 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
56 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
[all …]
/external/XNNPACK/src/qs8-dwconv/gen/
Dup24x9-minmax-sse41-mul16.c83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
87 __m128i vaccGHIJ = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
88 __m128i vaccKLMN = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 20 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
91 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
92 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(vi0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
93 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
94 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
[all …]
Dup16x9-minmax-ssse3-mul16.c83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
89 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
90 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
91 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
92 …const __m128i vk0x89ABCDEF = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
95 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
96 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
[all …]
Dup16x9-minmax-sse2-mul16.c83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
89 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
90 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
91 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
92 …const __m128i vk0x89ABCDEF = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
95 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
96 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16()
[all …]
Dup16x9-minmax-sse41-mul16.c83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
89 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
90 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(vi0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
91 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
92 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
93 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
94 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(vi0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
[all …]
Dup24x9-minmax-ssse3-mul16.c83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
87 __m128i vaccGHIJ = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
88 __m128i vaccKLMN = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 20 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
91 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
92 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
93 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
94 …const __m128i vk0x89ABCDEF = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16()
[all …]
Dup24x9-minmax-sse2-mul16.c83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
87 __m128i vaccGHIJ = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
88 __m128i vaccKLMN = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 20 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
91 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
92 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
93 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
94 …const __m128i vk0x89ABCDEF = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Dfwd_txfm_sse2.h19 static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) { in k_madd_epi32()
20 __m128i buf0, buf1; in k_madd_epi32()
28 static INLINE __m128i k_packs_epi64(__m128i a, __m128i b) { in k_packs_epi64()
29 __m128i buf0 = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64()
30 __m128i buf1 = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64()
34 static INLINE int check_epi16_overflow_x2(const __m128i *preg0, in check_epi16_overflow_x2()
35 const __m128i *preg1) { in check_epi16_overflow_x2()
36 const __m128i max_overflow = _mm_set1_epi16(0x7fff); in check_epi16_overflow_x2()
37 const __m128i min_overflow = _mm_set1_epi16(0x8000); in check_epi16_overflow_x2()
38 __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow), in check_epi16_overflow_x2()
[all …]
Dblend_sse4.h26 static INLINE __m128i blend_4(const uint8_t *src0, const uint8_t *src1, in blend_4()
27 const __m128i *v_m0_w, const __m128i *v_m1_w) { in blend_4()
28 const __m128i v_s0_b = xx_loadl_32(src0); in blend_4()
29 const __m128i v_s1_b = xx_loadl_32(src1); in blend_4()
30 const __m128i v_s0_w = _mm_cvtepu8_epi16(v_s0_b); in blend_4()
31 const __m128i v_s1_w = _mm_cvtepu8_epi16(v_s1_b); in blend_4()
33 const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, *v_m0_w); in blend_4()
34 const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, *v_m1_w); in blend_4()
35 const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w); in blend_4()
36 const __m128i v_res_w = xx_roundn_epu16(v_sum_w, AOM_BLEND_A64_ROUND_BITS); in blend_4()
[all …]
/external/libgav1/libgav1/src/dsp/x86/
Dloop_filter_sse4.cc35 inline __m128i FilterAdd2Sub2(const __m128i& total, const __m128i& a1, in FilterAdd2Sub2()
36 const __m128i& a2, const __m128i& s1, in FilterAdd2Sub2()
37 const __m128i& s2) { in FilterAdd2Sub2()
38 __m128i x = _mm_add_epi16(a1, total); in FilterAdd2Sub2()
48 inline __m128i AbsDiff(const __m128i& a, const __m128i& b) { in AbsDiff()
52 inline __m128i CheckOuterThreshF4(const __m128i& q1q0, const __m128i& p1p0, in CheckOuterThreshF4()
53 const __m128i& outer_thresh) { in CheckOuterThreshF4()
54 const __m128i fe = _mm_set1_epi8(static_cast<int8_t>(0xfe)); in CheckOuterThreshF4()
56 const __m128i abs_pmq = AbsDiff(p1p0, q1q0); in CheckOuterThreshF4()
57 const __m128i a = _mm_adds_epu8(abs_pmq, abs_pmq); in CheckOuterThreshF4()
[all …]
/external/XNNPACK/src/qu8-dwconv/
Dup8x9-minmax-sse2.c26 …const __m128i vkernel_zero_point = _mm_load_si128((const __m128i*) params->sse2.kernel_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
27 const __m128i vzero = _mm_setzero_si128(); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
72 __m128i vacc_lo = _mm_loadu_si128((const __m128i*) w); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
73 __m128i vacc_hi = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
75 const __m128i vi0 = _mm_loadl_epi64((const __m128i*) i0); i0 += 8; in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
76 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
77 const __m128i vk0 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
78 const __m128i vxk0 = _mm_sub_epi16(_mm_unpacklo_epi8(vk0, vzero), vkernel_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
79 const __m128i vprod0_odd = _mm_mullo_epi16(vxi0, vxk0); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
80 const __m128i vprod0_even = _mm_mulhi_epi16(vxi0, vxk0); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2()
[all …]

12345678910>>...40