/external/deqp/external/vulkancts/framework/vulkan/ |
D | vkDeviceProperties.inl | 87 propertyType.driverID = allPropertiesBlobs.vk12.driverID; 88 …memcpy(propertyType.driverName, allPropertiesBlobs.vk12.driverName, sizeof(char) * VK_MAX_DRIVER_N… 89 …memcpy(propertyType.driverInfo, allPropertiesBlobs.vk12.driverInfo, sizeof(char) * VK_MAX_DRIVER_I… 90 propertyType.conformanceVersion = allPropertiesBlobs.vk12.conformanceVersion; 94 propertyType.denormBehaviorIndependence = allPropertiesBlobs.vk12.denormBehaviorIndependence; 95 propertyType.roundingModeIndependence = allPropertiesBlobs.vk12.roundingModeIndependence; 96 …propertyType.shaderSignedZeroInfNanPreserveFloat16 = allPropertiesBlobs.vk12.shaderSignedZeroInfNa… 97 …propertyType.shaderSignedZeroInfNanPreserveFloat32 = allPropertiesBlobs.vk12.shaderSignedZeroInfNa… 98 …propertyType.shaderSignedZeroInfNanPreserveFloat64 = allPropertiesBlobs.vk12.shaderSignedZeroInfNa… 99 propertyType.shaderDenormPreserveFloat16 = allPropertiesBlobs.vk12.shaderDenormPreserveFloat16; [all …]
|
D | vkDeviceFeatures.inl | 125 featureType.storageBuffer8BitAccess = allFeaturesBlobs.vk12.storageBuffer8BitAccess; 126 …featureType.uniformAndStorageBuffer8BitAccess = allFeaturesBlobs.vk12.uniformAndStorageBuffer8BitA… 127 featureType.storagePushConstant8 = allFeaturesBlobs.vk12.storagePushConstant8; 131 featureType.shaderBufferInt64Atomics = allFeaturesBlobs.vk12.shaderBufferInt64Atomics; 132 featureType.shaderSharedInt64Atomics = allFeaturesBlobs.vk12.shaderSharedInt64Atomics; 136 featureType.shaderFloat16 = allFeaturesBlobs.vk12.shaderFloat16; 137 featureType.shaderInt8 = allFeaturesBlobs.vk12.shaderInt8; 141 …featureType.shaderInputAttachmentArrayDynamicIndexing = allFeaturesBlobs.vk12.shaderInputAttachmen… 142 …featureType.shaderUniformTexelBufferArrayDynamicIndexing = allFeaturesBlobs.vk12.shaderUniformTexe… 143 …featureType.shaderStorageTexelBufferArrayDynamicIndexing = allFeaturesBlobs.vk12.shaderStorageTexe… [all …]
|
/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-3x1.c | 42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 184 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 185 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 186 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 311 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 312 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 313 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 400 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 401 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 402 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 184 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 185 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 186 vo2p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 314 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 315 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 316 vo2p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 406 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 407 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 408 vo2p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
|
D | 5x5s2p2-minmax-scalar-3x1.c | 43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 223 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 224 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 225 vo2p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 337 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 338 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 339 vo2p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 414 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 415 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 416 vo2p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 223 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 224 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 225 vo2p1 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 340 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 341 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 342 vo2p1 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 420 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 421 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 422 vo2p1 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
|
D | 3x3p1-minmax-scalar-5x1.c | 40 const float vk12 = weights[6]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 160 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 161 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 162 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 163 vo3p0 += vi4x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 164 vo4p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
|
D | 5x5p2-minmax-scalar-2x1.c | 42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 160 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 161 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 255 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 256 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 320 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 321 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 3x3p1-minmax-scalar-6x1.c | 40 const float vk12 = weights[6]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local 178 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 179 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 180 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 181 vo3p0 += vi4x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 182 vo4p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() 183 vo5p0 += vi6x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c | 48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local 194 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 195 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 196 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 197 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 198 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 301 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 302 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 303 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() 304 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() [all …]
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c | 48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local 215 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 216 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 217 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 218 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 219 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 220 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi6x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 339 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 340 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() 341 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() [all …]
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c | 48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local 215 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 216 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 217 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 218 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 219 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 220 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi6x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 339 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 340 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() 341 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() [all …]
|
D | 3x3p1-minmax-ssse3-6x4.c | 43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local 208 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 209 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 210 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 211 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 212 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 213 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi6x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 334 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 335 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 336 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() [all …]
|
D | 5x5s2p2-minmax-scalar-2x1.c | 43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 186 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 187 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 268 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 269 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 322 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 323 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 160 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 161 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 259 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 260 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 328 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 329 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 160 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 161 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 257 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 258 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 324 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 325 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 186 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 187 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 270 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 271 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 326 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 327 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 186 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 187 vo1p2 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 272 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 273 vo1p2 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 330 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 331 vo1p2 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 3x3p1-minmax-sse-6x4.c | 43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 274 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 275 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 276 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 277 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 278 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 279 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi6x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 450 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 451 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 452 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() [all …]
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-5x4.c | 48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local 194 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 195 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 196 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 197 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 198 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 301 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 302 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 303 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() 304 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() [all …]
|
D | 3x3p1-minmax-ssse3-5x4.c | 43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() local 187 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 188 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 189 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 190 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 191 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 296 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 297 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 298 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 299 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() [all …]
|
D | 3x3p1-minmax-sse-5x4.c | 43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 245 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 246 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 247 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 248 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 249 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 398 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 399 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 400 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 401 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() [all …]
|
D | 3x3p1-minmax-scalar-4x1.c | 40 const float vk12 = weights[6]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local 142 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 143 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 144 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() 145 vo3p0 += vi4x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-4x4.c | 48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() local 173 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() 174 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() 175 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() 176 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() 263 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() 264 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() 265 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() 266 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
|
D | 3x3p1-minmax-ssse3-4x4.c | 43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local 166 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 167 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 168 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 169 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 258 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 259 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 260 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 261 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
|