Home
last modified time | relevance | path

Searched refs:vk12 (Results 1 – 25 of 202) sorted by relevance

123456789

/external/deqp/external/vulkancts/framework/vulkan/
DvkDeviceProperties.inl87 propertyType.driverID = allPropertiesBlobs.vk12.driverID;
88 …memcpy(propertyType.driverName, allPropertiesBlobs.vk12.driverName, sizeof(char) * VK_MAX_DRIVER_N…
89 …memcpy(propertyType.driverInfo, allPropertiesBlobs.vk12.driverInfo, sizeof(char) * VK_MAX_DRIVER_I…
90 propertyType.conformanceVersion = allPropertiesBlobs.vk12.conformanceVersion;
94 propertyType.denormBehaviorIndependence = allPropertiesBlobs.vk12.denormBehaviorIndependence;
95 propertyType.roundingModeIndependence = allPropertiesBlobs.vk12.roundingModeIndependence;
96 …propertyType.shaderSignedZeroInfNanPreserveFloat16 = allPropertiesBlobs.vk12.shaderSignedZeroInfNa…
97 …propertyType.shaderSignedZeroInfNanPreserveFloat32 = allPropertiesBlobs.vk12.shaderSignedZeroInfNa…
98 …propertyType.shaderSignedZeroInfNanPreserveFloat64 = allPropertiesBlobs.vk12.shaderSignedZeroInfNa…
99 propertyType.shaderDenormPreserveFloat16 = allPropertiesBlobs.vk12.shaderDenormPreserveFloat16;
[all …]
DvkDeviceFeatures.inl125 featureType.storageBuffer8BitAccess = allFeaturesBlobs.vk12.storageBuffer8BitAccess;
126 …featureType.uniformAndStorageBuffer8BitAccess = allFeaturesBlobs.vk12.uniformAndStorageBuffer8BitA…
127 featureType.storagePushConstant8 = allFeaturesBlobs.vk12.storagePushConstant8;
131 featureType.shaderBufferInt64Atomics = allFeaturesBlobs.vk12.shaderBufferInt64Atomics;
132 featureType.shaderSharedInt64Atomics = allFeaturesBlobs.vk12.shaderSharedInt64Atomics;
136 featureType.shaderFloat16 = allFeaturesBlobs.vk12.shaderFloat16;
137 featureType.shaderInt8 = allFeaturesBlobs.vk12.shaderInt8;
141 …featureType.shaderInputAttachmentArrayDynamicIndexing = allFeaturesBlobs.vk12.shaderInputAttachmen…
142 …featureType.shaderUniformTexelBufferArrayDynamicIndexing = allFeaturesBlobs.vk12.shaderUniformTexe…
143 …featureType.shaderStorageTexelBufferArrayDynamicIndexing = allFeaturesBlobs.vk12.shaderStorageTexe…
[all …]
/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-3x1.c42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
184 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
185 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
186 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
311 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
312 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
313 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
400 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
401 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
402 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5p2-minmax-scalar-3x1-acc2.c42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
184 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
185 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
186 vo2p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
314 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
315 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
316 vo2p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
406 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
407 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
408 vo2p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D5x5s2p2-minmax-scalar-3x1.c43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
223 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
224 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
225 vo2p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
337 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
338 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
339 vo2p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
414 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
415 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
416 vo2p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
223 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
224 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
225 vo2p1 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
340 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
341 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
342 vo2p1 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
420 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
421 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
422 vo2p1 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D3x3p1-minmax-scalar-5x1.c40 const float vk12 = weights[6]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
160 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
161 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
162 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
163 vo3p0 += vi4x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
164 vo4p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
D5x5p2-minmax-scalar-2x1.c42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
160 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
161 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
255 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
256 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
320 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
321 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D3x3p1-minmax-scalar-6x1.c40 const float vk12 = weights[6]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
178 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
179 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
180 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
181 vo3p0 += vi4x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
182 vo4p0 += vi5x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
183 vo5p0 += vi6x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
D3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local
194 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
195 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
196 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
197 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
198 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
301 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
302 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
303 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
304 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
[all …]
D3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
215 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
216 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
217 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
218 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
219 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
220 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi6x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
339 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
340 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
341 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
[all …]
D3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
215 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
216 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
217 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
218 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
219 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
220 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi6x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
339 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
340 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
341 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
[all …]
D3x3p1-minmax-ssse3-6x4.c43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
208 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
209 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
210 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
211 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
212 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
213 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi6x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
334 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
335 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
336 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
[all …]
D5x5s2p2-minmax-scalar-2x1.c43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
186 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
187 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
268 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
269 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
322 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
323 vo1p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5p2-minmax-scalar-2x1-acc3.c42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
160 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
161 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
259 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
260 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
328 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
329 vo1p2 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc2.c42 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
160 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
161 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
257 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
258 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
324 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
325 vo1p1 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1-acc2.c43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
186 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
187 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
270 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
271 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
326 vo0p1 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
327 vo1p1 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1-acc3.c43 const float vk12 = weights[8]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
186 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
187 vo1p2 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
272 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
273 vo1p2 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
330 vo0p2 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
331 vo1p2 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D3x3p1-minmax-sse-6x4.c43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local
274 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
275 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
276 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
277 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
278 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
279 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi6x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
450 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
451 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
452 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
[all …]
D3x3p1-minmax-wasmsimd-x86-loadsplat-5x4.c48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local
194 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
195 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
196 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
197 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
198 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
301 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
302 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
303 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
304 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
[all …]
D3x3p1-minmax-ssse3-5x4.c43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() local
187 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
188 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
189 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
190 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
191 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
296 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
297 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
298 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
299 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
[all …]
D3x3p1-minmax-sse-5x4.c43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local
245 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
246 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
247 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
248 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
249 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi5x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
398 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
399 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
400 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
401 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
[all …]
D3x3p1-minmax-scalar-4x1.c40 const float vk12 = weights[6]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
142 vo0p0 += vi1x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
143 vo1p0 += vi2x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
144 vo2p0 += vi3x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
145 vo3p0 += vi4x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
D3x3p1-minmax-wasmsimd-x86-loadsplat-4x4.c48 const v128_t vk12 = wasm_v32x4_shuffle(vw4567, vw4567, 2, 2, 2, 2); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() local
173 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
174 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
175 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
176 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
263 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
264 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
265 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
266 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
D3x3p1-minmax-ssse3-4x4.c43 const __m128 vk12 = _mm_load1_ps(weights + 6); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local
166 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
167 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
168 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
169 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
258 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi1x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
259 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi2x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
260 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi3x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
261 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi4x5678, vk12)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()

123456789