/external/llvm-project/llvm/test/CodeGen/PowerPC/ |
D | fp-strict-minmax.ll | 12 define <4 x float> @fmaxnum_v4f32(<4 x float> %vf0, <4 x float> %vf1) #0 { 18 <4 x float> %vf0, <4 x float> %vf1, 23 define <2 x double> @fmaxnum_v2f64(<2 x double> %vf0, <2 x double> %vf1) #0 { 29 <2 x double> %vf0, <2 x double> %vf1, 35 define <4 x float> @fminnum_v4f32(<4 x float> %vf0, <4 x float> %vf1) #0 { 41 <4 x float> %vf0, <4 x float> %vf1, 46 define <2 x double> @fminnum_v2f64(<2 x double> %vf0, <2 x double> %vf1) #0 { 52 <2 x double> %vf0, <2 x double> %vf1,
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-lut2048-p1-div-x2.c | 76 float vf0 = vy0 / vd0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() local 80 vf0 = 0.0f; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 87 vf0 = vone - vf0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 93 y[0] = vf0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
|
D | scalar-p5-div-x2.c | 83 float vf0 = ve0 / vd0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local 87 vf0 = 0.0f; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 94 vf0 = vone - vf0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 100 y[0] = vf0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
|
D | scalar-lut64-p2-div-x2.c | 79 float vf0 = vy0 / vd0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() local 83 vf0 = 0.0f; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 90 vf0 = vone - vf0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 96 y[0] = vf0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
|
D | avx2-rr1-p5-div-x16.c | 81 __m256 vf0 = _mm256_div_ps(ve0, vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() local 84 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() 87 vf0 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf0), vf0, vx0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() 90 _mm256_storeu_ps(y, vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16()
|
D | avx2-rr1-p5-nr1fma-x16.c | 88 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local 91 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 94 vf0 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf0), vf0, vx0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 97 _mm256_storeu_ps(y, vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
|
D | avx2-rr1-p5-div-x24.c | 94 __m256 vf0 = _mm256_div_ps(ve0, vd0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24() local 98 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24() 102 vf0 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf0), vf0, vx0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24() 106 _mm256_storeu_ps(y, vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24()
|
D | avx2-rr1-p5-nr2fma-x16.c | 90 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local 93 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 96 vf0 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf0), vf0, vx0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 99 _mm256_storeu_ps(y, vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
|
D | avx2-rr1-p5-nr1fma-x24.c | 103 __m256 vf0 = _mm256_mul_ps(ve0, vr0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local 107 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 111 vf0 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf0), vf0, vx0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 115 _mm256_storeu_ps(y, vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
|
D | avx-rr2-p5-div-x16.c | 89 __m256 vf0 = _mm256_div_ps(ve0, vd0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x16() local 92 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x16() 95 vf0 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf0), vf0, vx0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x16() 98 _mm256_storeu_ps(y, vf0); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x16()
|
D | scalar-lut2048-p1-div-x4.c | 100 float vf0 = vy0 / vd0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local 106 vf0 = 0.0f; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 119 vf0 = vone - vf0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 131 y[0] = vf0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
|
D | scalar-p5-div-x4.c | 111 float vf0 = ve0 / vd0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 117 vf0 = 0.0f; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 130 vf0 = vone - vf0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 142 y[0] = vf0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
D | scalar-lut64-p2-div-x4.c | 105 float vf0 = vy0 / vd0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local 111 vf0 = 0.0f; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 124 vf0 = vone - vf0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 136 y[0] = vf0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x8.c | 84 __m256 vf0 = _mm256_fmadd_ps(vt0, vp0, vs0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() local 88 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() 91 vf0 = _mm256_mul_ps(vf0, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8() 94 _mm256_storeu_ps(output, vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x8()
|
D | avx512f-p5-scalef-x16.c | 72 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() local 75 vf0 = _mm512_mul_ps(vf0, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 78 _mm512_storeu_ps(output, vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16() 79 _mm512_storeu_ps(output + 0, vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x16()
|
D | avx2-p5-x16.c | 96 __m256 vf0 = _mm256_fmadd_ps(vt0, vp0, vs0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() local 101 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vx0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 105 vf0 = _mm256_mul_ps(vf0, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16() 109 _mm256_storeu_ps(output, vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x16()
|
D | avx512f-p5-scalef-x32.c | 82 __m512 vf0 = _mm512_scalef_ps(vp0, vn0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() local 86 vf0 = _mm512_mul_ps(vf0, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 90 _mm512_storeu_ps(output, vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32() 91 _mm512_storeu_ps(output + 0, vf0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x32()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | wasmsimd-p5-x4.c | 190 const float vf0 = wasm_f32x4_extract_lane(vf, 0); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4() local 191 output[0] = vf0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4() 192 vsum += vf0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4() 203 const float vf0 = wasm_f32x4_extract_lane(vf, 0); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4() local 204 *output = vf0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4() 205 vsum += vf0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x4()
|
D | scalar-p5-x2.c | 97 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() local 103 vf0 = 0.0f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 110 output[0] = vf0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2() 115 vacc0 += vf0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2()
|
D | scalar-p5-x2-acc2.c | 98 float vf0 = vt0 * vp0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() local 104 vf0 = 0.0f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 111 output[0] = vf0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2() 116 vacc0 += vf0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x2_acc2()
|
D | scalar-lut64-p2-x2-acc2.c | 108 float vf0 = vp0 * vs0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() local 114 vf0 = 0.0f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() 121 output[0] = vf0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() 126 vacc0 += vf0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
|
D | scalar-lut64-p2-x2.c | 107 float vf0 = vp0 * vs0 + vs0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() local 113 vf0 = 0.0f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() 120 output[0] = vf0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() 125 vacc0 += vf0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
|
D | wasmsimd-p5-x8-acc2.c | 209 const float vf0 = wasm_f32x4_extract_lane(vf, 0); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8_acc2() local 210 output[0] = vf0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8_acc2() 211 vsum += vf0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8_acc2() 222 const float vf0 = wasm_f32x4_extract_lane(vf, 0); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8_acc2() local 223 *output = vf0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8_acc2() 224 vsum += vf0; in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x8_acc2()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x16.c | 73 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() local 78 vf0 = _mm512_scalef_ps(vf0, ve0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 81 _mm512_storeu_ps(y, vf0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16() 82 _mm512_storeu_ps(y + 0, vf0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x16()
|
D | avx512f-p5-scalef-x32.c | 82 __m512 vf0 = _mm512_mul_ps(vp0, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() local 89 vf0 = _mm512_scalef_ps(vf0, ve0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 93 _mm512_storeu_ps(y, vf0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32() 94 _mm512_storeu_ps(y + 0, vf0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x32()
|