/external/llvm-project/clang/test/CodeGen/X86/ |
D | xop-builtins-cmp.c | 9 __m128i test_mm_comlt_epu8(__m128i a, __m128i b) { in test_mm_comlt_epu8() 16 __m128i test_mm_comlt_epu16(__m128i a, __m128i b) { in test_mm_comlt_epu16() 23 __m128i test_mm_comlt_epu32(__m128i a, __m128i b) { in test_mm_comlt_epu32() 30 __m128i test_mm_comlt_epu64(__m128i a, __m128i b) { in test_mm_comlt_epu64() 37 __m128i test_mm_comlt_epi8(__m128i a, __m128i b) { in test_mm_comlt_epi8() 44 __m128i test_mm_comlt_epi16(__m128i a, __m128i b) { in test_mm_comlt_epi16() 51 __m128i test_mm_comlt_epi32(__m128i a, __m128i b) { in test_mm_comlt_epi32() 58 __m128i test_mm_comlt_epi64(__m128i a, __m128i b) { in test_mm_comlt_epi64() 67 __m128i test_mm_comle_epu8(__m128i a, __m128i b) { in test_mm_comle_epu8() 74 __m128i test_mm_comle_epu16(__m128i a, __m128i b) { in test_mm_comle_epu16() [all …]
|
D | xop-builtins.c | 9 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccs_epi16() 15 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_macc_epi16() 21 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccsd_epi16() 27 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccd_epi16() 33 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccs_epi32() 39 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macc_epi32() 45 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccslo_epi32() 51 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macclo_epi32() 57 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccshi_epi32() 63 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macchi_epi32() [all …]
|
D | sse2-builtins.c | 10 __m128i test_mm_add_epi8(__m128i A, __m128i B) { in test_mm_add_epi8() 16 __m128i test_mm_add_epi16(__m128i A, __m128i B) { in test_mm_add_epi16() 22 __m128i test_mm_add_epi32(__m128i A, __m128i B) { in test_mm_add_epi32() 28 __m128i test_mm_add_epi64(__m128i A, __m128i B) { in test_mm_add_epi64() 49 __m128i test_mm_adds_epi8(__m128i A, __m128i B) { in test_mm_adds_epi8() 55 __m128i test_mm_adds_epi16(__m128i A, __m128i B) { in test_mm_adds_epi16() 61 __m128i test_mm_adds_epu8(__m128i A, __m128i B) { in test_mm_adds_epu8() 68 __m128i test_mm_adds_epu16(__m128i A, __m128i B) { in test_mm_adds_epu16() 81 __m128i test_mm_and_si128(__m128i A, __m128i B) { in test_mm_and_si128() 94 __m128i test_mm_andnot_si128(__m128i A, __m128i B) { in test_mm_andnot_si128() [all …]
|
/external/clang/lib/Headers/ |
D | xopintrin.h | 36 static __inline__ __m128i __DEFAULT_FN_ATTRS 37 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_maccs_epi16() 39 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); in _mm_maccs_epi16() 42 static __inline__ __m128i __DEFAULT_FN_ATTRS 43 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_macc_epi16() 45 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); in _mm_macc_epi16() 48 static __inline__ __m128i __DEFAULT_FN_ATTRS 49 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_maccsd_epi16() 51 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); in _mm_maccsd_epi16() 54 static __inline__ __m128i __DEFAULT_FN_ATTRS [all …]
|
/external/llvm-project/clang/lib/Headers/ |
D | xopintrin.h | 23 static __inline__ __m128i __DEFAULT_FN_ATTRS 24 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_maccs_epi16() 26 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); in _mm_maccs_epi16() 29 static __inline__ __m128i __DEFAULT_FN_ATTRS 30 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_macc_epi16() 32 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); in _mm_macc_epi16() 35 static __inline__ __m128i __DEFAULT_FN_ATTRS 36 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) in _mm_maccsd_epi16() 38 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); in _mm_maccsd_epi16() 41 static __inline__ __m128i __DEFAULT_FN_ATTRS [all …]
|
/external/clang/test/CodeGen/ |
D | xop-builtins.c | 11 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccs_epi16() 17 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_macc_epi16() 23 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccsd_epi16() 29 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) { in test_mm_maccd_epi16() 35 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccs_epi32() 41 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macc_epi32() 47 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccslo_epi32() 53 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macclo_epi32() 59 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_maccshi_epi32() 65 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) { in test_mm_macchi_epi32() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | fwd_txfm_sse2.h | 21 static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) { in k_madd_epi32() 22 __m128i buf0, buf1; in k_madd_epi32() 30 static INLINE __m128i k_packs_epi64(__m128i a, __m128i b) { in k_packs_epi64() 31 __m128i buf0 = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64() 32 __m128i buf1 = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64() 36 static INLINE int check_epi16_overflow_x2(const __m128i *preg0, in check_epi16_overflow_x2() 37 const __m128i *preg1) { in check_epi16_overflow_x2() 38 const __m128i max_overflow = _mm_set1_epi16(0x7fff); in check_epi16_overflow_x2() 39 const __m128i min_overflow = _mm_set1_epi16((short)0x8000); in check_epi16_overflow_x2() 40 __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow), in check_epi16_overflow_x2() [all …]
|
D | highbd_intrapred_intrin_ssse3.c | 29 static INLINE __m128i avg3_epu16(const __m128i *x, const __m128i *y, in avg3_epu16() 30 const __m128i *z) { in avg3_epu16() 31 const __m128i one = _mm_set1_epi16(1); in avg3_epu16() 32 const __m128i a = _mm_avg_epu16(*x, *z); in avg3_epu16() 33 const __m128i b = in avg3_epu16() 41 const __m128i ABCDEFGH = _mm_loadu_si128((const __m128i *)above); in vpx_highbd_d45_predictor_4x4_ssse3() 42 const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 2); in vpx_highbd_d45_predictor_4x4_ssse3() 43 const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 4); in vpx_highbd_d45_predictor_4x4_ssse3() 44 const __m128i avg3 = avg3_epu16(&ABCDEFGH, &BCDEFGH0, &CDEFGH00); in vpx_highbd_d45_predictor_4x4_ssse3() 47 _mm_storel_epi64((__m128i *)dst, avg3); in vpx_highbd_d45_predictor_4x4_ssse3() [all …]
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-sse2-c24-acc2.c | 40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 45 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 46 const __m128i vi0xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i0 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 48 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 49 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 50 const __m128i vi1xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i1 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 52 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 53 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 54 const __m128i vi2xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i2 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c24-acc2.c | 40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 45 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 46 const __m128i vi0xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i0 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 48 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 49 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 50 const __m128i vi1xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i1 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 52 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 53 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 54 const __m128i vi2xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i2 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() [all …]
|
D | 7p7x-minmax-sse41-c24-acc2.c | 40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 44 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 45 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 46 const __m128i vxi0xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 48 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 49 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 50 const __m128i vxi1xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 52 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 53 const __m128i vxi2x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 54 const __m128i vxi2xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 45 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 47 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 48 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 50 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 51 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 53 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 54 const __m128i vi3x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i3 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 56 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() [all …]
|
D | 7p7x-minmax-sse41-c16-acc2.c | 40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 44 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 45 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 47 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 48 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 50 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 51 const __m128i vxi2x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 53 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 54 const __m128i vxi3x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i3 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 56 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() [all …]
|
D | 7x-minmax-sse2-c24-acc2.c | 56 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 57 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 59 const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 61 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 62 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 63 const __m128i vi0xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i0 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 65 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 66 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 67 const __m128i vi1xGHIJKLMN = _mm_loadl_epi64((const __m128i*) (i1 + 16)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() [all …]
|
D | 7p7x-minmax-sse2-c16-acc2.c | 40 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 45 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 47 const __m128i vi1x01234567 = _mm_loadl_epi64((const __m128i*) i1); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 48 const __m128i vi1x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i1 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 50 const __m128i vi2x01234567 = _mm_loadl_epi64((const __m128i*) i2); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 51 const __m128i vi2x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i2 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 53 const __m128i vi3x01234567 = _mm_loadl_epi64((const __m128i*) i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 54 const __m128i vi3x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i3 + 8)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 56 const __m128i vi4x01234567 = _mm_loadl_epi64((const __m128i*) i4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() [all …]
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x9-minmax-sse41-mul16.c | 83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 87 __m128i vaccGHIJ = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 88 __m128i vaccKLMN = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 20 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 91 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 92 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(vi0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 93 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() 94 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16() [all …]
|
D | up16x9-minmax-ssse3-mul16.c | 83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 89 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 90 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 91 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 92 …const __m128i vk0x89ABCDEF = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 95 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 96 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() [all …]
|
D | up16x9-minmax-sse2-mul16.c | 83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 89 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 90 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 91 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 92 …const __m128i vk0x89ABCDEF = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 95 …const __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() 96 …const __m128i vxk0x01234567 = _mm_unpacklo_epi8(vk0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16() [all …]
|
D | up16x9-minmax-sse41-mul16.c | 83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 89 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 90 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(vi0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 91 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 92 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 93 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() 94 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(vi0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16() [all …]
|
D | up24x9-minmax-ssse3-mul16.c | 83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 87 __m128i vaccGHIJ = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 88 __m128i vaccKLMN = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 20 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 91 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 92 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 93 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() 94 …const __m128i vk0x89ABCDEF = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16() [all …]
|
D | up24x9-minmax-sse2-mul16.c | 83 __m128i vacc0123 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 85 __m128i vacc89AB = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 86 __m128i vaccCDEF = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 12 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 87 __m128i vaccGHIJ = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 88 __m128i vaccKLMN = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 20 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 91 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 92 …const __m128i vk0x01234567 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 93 const __m128i vi0x89ABCDEF = _mm_loadl_epi64((const __m128i*) (i0 + 8)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() 94 …const __m128i vk0x89ABCDEF = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t… in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | fwd_txfm_sse2.h | 19 static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) { in k_madd_epi32() 20 __m128i buf0, buf1; in k_madd_epi32() 28 static INLINE __m128i k_packs_epi64(__m128i a, __m128i b) { in k_packs_epi64() 29 __m128i buf0 = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64() 30 __m128i buf1 = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0)); in k_packs_epi64() 34 static INLINE int check_epi16_overflow_x2(const __m128i *preg0, in check_epi16_overflow_x2() 35 const __m128i *preg1) { in check_epi16_overflow_x2() 36 const __m128i max_overflow = _mm_set1_epi16(0x7fff); in check_epi16_overflow_x2() 37 const __m128i min_overflow = _mm_set1_epi16(0x8000); in check_epi16_overflow_x2() 38 __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow), in check_epi16_overflow_x2() [all …]
|
D | blend_sse4.h | 26 static INLINE __m128i blend_4(const uint8_t *src0, const uint8_t *src1, in blend_4() 27 const __m128i *v_m0_w, const __m128i *v_m1_w) { in blend_4() 28 const __m128i v_s0_b = xx_loadl_32(src0); in blend_4() 29 const __m128i v_s1_b = xx_loadl_32(src1); in blend_4() 30 const __m128i v_s0_w = _mm_cvtepu8_epi16(v_s0_b); in blend_4() 31 const __m128i v_s1_w = _mm_cvtepu8_epi16(v_s1_b); in blend_4() 33 const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, *v_m0_w); in blend_4() 34 const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, *v_m1_w); in blend_4() 35 const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w); in blend_4() 36 const __m128i v_res_w = xx_roundn_epu16(v_sum_w, AOM_BLEND_A64_ROUND_BITS); in blend_4() [all …]
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | loop_filter_sse4.cc | 35 inline __m128i FilterAdd2Sub2(const __m128i& total, const __m128i& a1, in FilterAdd2Sub2() 36 const __m128i& a2, const __m128i& s1, in FilterAdd2Sub2() 37 const __m128i& s2) { in FilterAdd2Sub2() 38 __m128i x = _mm_add_epi16(a1, total); in FilterAdd2Sub2() 48 inline __m128i AbsDiff(const __m128i& a, const __m128i& b) { in AbsDiff() 52 inline __m128i CheckOuterThreshF4(const __m128i& q1q0, const __m128i& p1p0, in CheckOuterThreshF4() 53 const __m128i& outer_thresh) { in CheckOuterThreshF4() 54 const __m128i fe = _mm_set1_epi8(static_cast<int8_t>(0xfe)); in CheckOuterThreshF4() 56 const __m128i abs_pmq = AbsDiff(p1p0, q1q0); in CheckOuterThreshF4() 57 const __m128i a = _mm_adds_epu8(abs_pmq, abs_pmq); in CheckOuterThreshF4() [all …]
|
/external/XNNPACK/src/qu8-dwconv/ |
D | up8x9-minmax-sse2.c | 26 …const __m128i vkernel_zero_point = _mm_load_si128((const __m128i*) params->sse2.kernel_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 27 const __m128i vzero = _mm_setzero_si128(); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 72 __m128i vacc_lo = _mm_loadu_si128((const __m128i*) w); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 73 __m128i vacc_hi = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 75 const __m128i vi0 = _mm_loadl_epi64((const __m128i*) i0); i0 += 8; in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 76 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 77 const __m128i vk0 = _mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32)); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 78 const __m128i vxk0 = _mm_sub_epi16(_mm_unpacklo_epi8(vk0, vzero), vkernel_zero_point); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 79 const __m128i vprod0_odd = _mm_mullo_epi16(vxi0, vxk0); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() 80 const __m128i vprod0_even = _mm_mulhi_epi16(vxi0, vxk0); in xnn_qu8_dwconv_minmax_ukernel_up8x9__sse2() [all …]
|