/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16-minmax-neon-mull-addw-dup.c | 105 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() 106 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() 108 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() 109 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() 111 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() 112 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() 114 vacc3x0123 = vaddw_s16(vacc3x0123, vget_low_s16(vprod3x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() 115 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() 119 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() 120 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup() [all …]
|
D | 3x16-minmax-neon-mull-addw-dup.c | 92 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 93 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 95 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 96 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 98 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 99 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 103 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 104 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 106 vacc1x89AB = vaddw_s16(vacc1x89AB, vget_low_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() 107 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup() [all …]
|
D | 4x8-minmax-neon-mull-addw-dup.c | 97 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() 98 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() 100 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() 101 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() 103 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() 104 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() 106 vacc3x0123 = vaddw_s16(vacc3x0123, vget_low_s16(vprod3x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() 107 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() 111 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() 112 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup() [all …]
|
D | 2x16-minmax-neon-mull-addw-dup.c | 79 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 80 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 82 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 83 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 87 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 88 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 90 vacc1x89AB = vaddw_s16(vacc1x89AB, vget_low_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 91 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 95 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() 96 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup() [all …]
|
D | 3x8-minmax-neon-mull-addw-dup.c | 86 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() 87 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() 89 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() 90 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() 92 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() 93 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() 97 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() 98 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() 100 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() 101 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup() [all …]
|
D | 2x8-minmax-neon-mull-addw-dup.c | 75 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() 76 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() 78 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() 79 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() 83 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() 84 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() 86 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() 87 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() 91 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() 92 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup() [all …]
|
D | 1x16-minmax-neon-mull-addw-dup.c | 66 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() 67 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() 71 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() 72 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() 76 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() 77 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() 81 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() 82 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc1)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() 86 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() 87 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() [all …]
|
D | 1x8-minmax-neon-mull-addw-dup.c | 64 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() 65 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() 69 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() 70 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() 74 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() 75 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() 79 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c3)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() 80 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c3)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() 84 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c4)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() 85 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c4)); in xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16-minmax-neon-mull-addw-dup.c | 88 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() 89 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() 91 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() 92 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() 94 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() 95 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() 97 vacc3x0123 = vaddw_s16(vacc3x0123, vget_low_s16(vprod3x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() 98 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() 102 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() 103 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup() [all …]
|
D | 3x16-minmax-neon-mull-addw-dup.c | 77 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() 78 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() 80 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() 81 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() 83 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() 84 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() 88 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() 89 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() 91 vacc1x89AB = vaddw_s16(vacc1x89AB, vget_low_s16(vprod1x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() 92 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup() [all …]
|
D | 4x8-minmax-neon-mull-addw-dup.c | 80 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() 81 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() 83 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() 84 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() 86 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() 87 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() 89 vacc3x0123 = vaddw_s16(vacc3x0123, vget_low_s16(vprod3x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() 90 vacc3x4567 = vaddw_s16(vacc3x4567, vget_high_s16(vprod3x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() 94 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() 95 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup() [all …]
|
D | 2x16-minmax-neon-mull-addw-dup.c | 66 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 67 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 69 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 70 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 74 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 75 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 77 vacc1x89AB = vaddw_s16(vacc1x89AB, vget_low_s16(vprod1x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 78 vacc1xCDEF = vaddw_s16(vacc1xCDEF, vget_high_s16(vprod1x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 82 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() 83 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup() [all …]
|
D | 3x8-minmax-neon-mull-addw-dup.c | 71 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() 72 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() 74 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() 75 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() 77 vacc2x0123 = vaddw_s16(vacc2x0123, vget_low_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() 78 vacc2x4567 = vaddw_s16(vacc2x4567, vget_high_s16(vprod2x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() 82 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() 83 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() 85 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() 86 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup() [all …]
|
D | 2x8-minmax-neon-mull-addw-dup.c | 62 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() 63 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() 65 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() 66 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() 70 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() 71 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() 73 vacc1x0123 = vaddw_s16(vacc1x0123, vget_low_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() 74 vacc1x4567 = vaddw_s16(vacc1x4567, vget_high_s16(vprod1x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() 78 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() 79 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup() [all …]
|
D | 1x16-minmax-neon-mull-addw-dup.c | 55 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() 56 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() 60 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() 61 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc0)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() 65 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() 66 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() 70 vacc0x89AB = vaddw_s16(vacc0x89AB, vget_low_s16(vprod0x89ABCDEFc1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() 71 vacc0xCDEF = vaddw_s16(vacc0xCDEF, vget_high_s16(vprod0x89ABCDEFc1)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() 75 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() 76 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() [all …]
|
D | 1x8-minmax-neon-mull-addw-dup.c | 53 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() 54 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c0)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() 58 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() 59 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c1)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() 63 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() 64 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c2)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() 68 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c3)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() 69 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c3)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() 73 vacc0x0123 = vaddw_s16(vacc0x0123, vget_low_s16(vprod0x01234567c4)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() 74 vacc0x4567 = vaddw_s16(vacc0x4567, vget_high_s16(vprod0x01234567c4)); in xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup() [all …]
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-neon-c32-acc2.c | 101 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 102 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 103 const int32x4_t vacc89AB = vaddw_s16(vbias, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 104 const int32x4_t vaccCDEF = vaddw_s16(vbias, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 105 const int32x4_t vaccGHIJ = vaddw_s16(vbias, vget_low_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 106 const int32x4_t vaccKLMN = vaddw_s16(vbias, vget_high_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 107 const int32x4_t vaccOPQR = vaddw_s16(vbias, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 108 const int32x4_t vaccSTUV = vaddw_s16(vbias, vget_high_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 139 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() 140 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c32_acc2() [all …]
|
D | 7p7x-minmax-neon-c24-acc2.c | 88 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 89 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 90 const int32x4_t vacc89AB = vaddw_s16(vbias, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 91 const int32x4_t vaccCDEF = vaddw_s16(vbias, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 92 const int32x4_t vaccGHIJ = vaddw_s16(vbias, vget_low_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 93 const int32x4_t vaccKLMN = vaddw_s16(vbias, vget_high_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 122 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 123 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 195 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() 196 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c24_acc2() [all …]
|
D | 7p7x-minmax-neon-c16-acc2.c | 75 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 76 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 77 const int32x4_t vacc89AB = vaddw_s16(vbias, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 78 const int32x4_t vaccCDEF = vaddw_s16(vbias, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 134 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 135 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 136 vacc89AB = vaddw_s16(vacc89AB, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 137 vaccCDEF = vaddw_s16(vaccCDEF, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 218 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() 219 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c16_acc2() [all …]
|
D | 7p7x-minmax-neon-c8-acc2.c | 62 const int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 63 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 102 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 103 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 167 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 168 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 247 vacc0123 = vaddw_s16(vacc0123, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 248 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
|
D | 7x-minmax-neon-c32-acc2.c | 124 int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 125 int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 126 int32x4_t vacc89AB = vaddw_s16(vbias, vget_low_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 127 int32x4_t vaccCDEF = vaddw_s16(vbias, vget_high_s16(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 128 int32x4_t vaccGHIJ = vaddw_s16(vbias, vget_low_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 129 int32x4_t vaccKLMN = vaddw_s16(vbias, vget_high_s16(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 130 int32x4_t vaccOPQR = vaddw_s16(vbias, vget_low_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 131 int32x4_t vaccSTUV = vaddw_s16(vbias, vget_high_s16(vacc0xOPQRSTUV)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 296 int32x4_t vacc0123 = vaddw_s16(vbias, vget_low_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2() 297 int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__neon_c32_acc2()
|
/external/XNNPACK/src/qs8-igemm/ |
D | neon-mull-addw-dup.c.in | 85 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c${… 86 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… 100 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c0)… 101 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… 110 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c1)… 111 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… 120 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c2)… 121 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… 130 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c3)… 131 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… [all …]
|
/external/XNNPACK/src/qs8-gemm/ |
D | neon-mull-addw-dup.c.in | 77 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c${… 78 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… 92 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c0)… 93 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… 102 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c1)… 103 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… 112 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c2)… 113 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… 122 …vacc${M}x${ABC[N:N+4]} = vaddw_s16(vacc${M}x${ABC[N:N+4]}, vget_low_s16(vprod${M}x${ABC[N:N+8]}c3)… 123 …vacc${M}x${ABC[N+4:N+8]} = vaddw_s16(vacc${M}x${ABC[N+4:N+8]}, vget_high_s16(vprod${M}x${ABC[N:N+8… [all …]
|
/external/XNNPACK/src/qu8-gavgpool/ |
D | 7p7x-minmax-neon-c8.c | 61 const int32x4_t vacc_lo = vaddw_s16(vbias, vget_low_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 62 const int32x4_t vacc_hi = vaddw_s16(vbias, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 98 vst1q_s32(acc, vaddw_s16(vacc_lo, vget_low_s16(vsum))); acc += 4; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 99 vst1q_s32(acc, vaddw_s16(vacc_hi, vget_high_s16(vsum))); acc += 4; in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 159 vacc_lo = vaddw_s16(vacc_lo, vget_low_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 160 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 231 vacc_lo = vaddw_s16(vacc_lo, vget_low_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8() 232 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__neon_c8()
|
/external/XNNPACK/src/qu8-avgpool/ |
D | 9p8x-minmax-neon-c8.c | 114 const int32x4_t vacc_lo = vaddw_s16(vbias, vreinterpret_s16_u16(vget_low_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 115 const int32x4_t vacc_hi = vaddw_s16(vbias, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 187 vacc_lo = vaddw_s16(vacc_lo, vreinterpret_s16_u16(vget_low_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 188 vacc_hi = vaddw_s16(vacc_hi, vreinterpret_s16_u16(vget_high_u16(vsum))); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 282 vacc_lo = vaddw_s16(vacc_lo, vget_low_s16(vsum)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 283 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 356 vacc_lo = vaddw_s16(vacc_lo, vget_low_s16(vsum)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8() 357 vacc_hi = vaddw_s16(vacc_hi, vget_high_s16(vsum)); in xnn_qu8_avgpool_minmax_ukernel_9p8x__neon_c8()
|