1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SpirvShader.hpp"
16 
17 #include "System/Types.hpp"
18 
19 #include "Vulkan/VkDescriptorSetLayout.hpp"
20 #include "Vulkan/VkPipelineLayout.hpp"
21 
22 #include <spirv/unified1/spirv.hpp>
23 
24 namespace {
25 
SpirvFormatToVulkanFormat(spv::ImageFormat format)26 VkFormat SpirvFormatToVulkanFormat(spv::ImageFormat format)
27 {
28 	switch(format)
29 	{
30 		case spv::ImageFormatRgba32f: return VK_FORMAT_R32G32B32A32_SFLOAT;
31 		case spv::ImageFormatRgba16f: return VK_FORMAT_R16G16B16A16_SFLOAT;
32 		case spv::ImageFormatR32f: return VK_FORMAT_R32_SFLOAT;
33 		case spv::ImageFormatRgba8: return VK_FORMAT_R8G8B8A8_UNORM;
34 		case spv::ImageFormatRgba8Snorm: return VK_FORMAT_R8G8B8A8_SNORM;
35 		case spv::ImageFormatRg32f: return VK_FORMAT_R32G32_SFLOAT;
36 		case spv::ImageFormatRg16f: return VK_FORMAT_R16G16_SFLOAT;
37 		case spv::ImageFormatR11fG11fB10f: return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
38 		case spv::ImageFormatR16f: return VK_FORMAT_R16_SFLOAT;
39 		case spv::ImageFormatRgba16: return VK_FORMAT_R16G16B16A16_UNORM;
40 		case spv::ImageFormatRgb10A2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
41 		case spv::ImageFormatRg16: return VK_FORMAT_R16G16_UNORM;
42 		case spv::ImageFormatRg8: return VK_FORMAT_R8G8_UNORM;
43 		case spv::ImageFormatR16: return VK_FORMAT_R16_UNORM;
44 		case spv::ImageFormatR8: return VK_FORMAT_R8_UNORM;
45 		case spv::ImageFormatRgba16Snorm: return VK_FORMAT_R16G16B16A16_SNORM;
46 		case spv::ImageFormatRg16Snorm: return VK_FORMAT_R16G16_SNORM;
47 		case spv::ImageFormatRg8Snorm: return VK_FORMAT_R8G8_SNORM;
48 		case spv::ImageFormatR16Snorm: return VK_FORMAT_R16_SNORM;
49 		case spv::ImageFormatR8Snorm: return VK_FORMAT_R8_SNORM;
50 		case spv::ImageFormatRgba32i: return VK_FORMAT_R32G32B32A32_SINT;
51 		case spv::ImageFormatRgba16i: return VK_FORMAT_R16G16B16A16_SINT;
52 		case spv::ImageFormatRgba8i: return VK_FORMAT_R8G8B8A8_SINT;
53 		case spv::ImageFormatR32i: return VK_FORMAT_R32_SINT;
54 		case spv::ImageFormatRg32i: return VK_FORMAT_R32G32_SINT;
55 		case spv::ImageFormatRg16i: return VK_FORMAT_R16G16_SINT;
56 		case spv::ImageFormatRg8i: return VK_FORMAT_R8G8_SINT;
57 		case spv::ImageFormatR16i: return VK_FORMAT_R16_SINT;
58 		case spv::ImageFormatR8i: return VK_FORMAT_R8_SINT;
59 		case spv::ImageFormatRgba32ui: return VK_FORMAT_R32G32B32A32_UINT;
60 		case spv::ImageFormatRgba16ui: return VK_FORMAT_R16G16B16A16_UINT;
61 		case spv::ImageFormatRgba8ui: return VK_FORMAT_R8G8B8A8_UINT;
62 		case spv::ImageFormatR32ui: return VK_FORMAT_R32_UINT;
63 		case spv::ImageFormatRgb10a2ui: return VK_FORMAT_A2B10G10R10_UINT_PACK32;
64 		case spv::ImageFormatRg32ui: return VK_FORMAT_R32G32_UINT;
65 		case spv::ImageFormatRg16ui: return VK_FORMAT_R16G16_UINT;
66 		case spv::ImageFormatRg8ui: return VK_FORMAT_R8G8_UINT;
67 		case spv::ImageFormatR16ui: return VK_FORMAT_R16_UINT;
68 		case spv::ImageFormatR8ui: return VK_FORMAT_R8_UINT;
69 
70 		default:
71 			UNSUPPORTED("SPIR-V ImageFormat %u", format);
72 			return VK_FORMAT_UNDEFINED;
73 	}
74 }
75 
sRGBtoLinear(sw::SIMD::Float c)76 sw::SIMD::Float sRGBtoLinear(sw::SIMD::Float c)
77 {
78 	sw::SIMD::Float lc = c * sw::SIMD::Float(1.0f / 12.92f);
79 	sw::SIMD::Float ec = sw::power((c + sw::SIMD::Float(0.055f)) * sw::SIMD::Float(1.0f / 1.055f), sw::SIMD::Float(2.4f));
80 
81 	sw::SIMD::Int linear = CmpLT(c, sw::SIMD::Float(0.04045f));
82 
83 	return rr::As<sw::SIMD::Float>((linear & rr::As<sw::SIMD::Int>(lc)) | (~linear & rr::As<sw::SIMD::Int>(ec)));  // TODO: IfThenElse()
84 }
85 
86 }  // anonymous namespace
87 
88 namespace sw {
89 
EmitImageSampleImplicitLod(Variant variant,InsnIterator insn,EmitState * state) const90 SpirvShader::EmitResult SpirvShader::EmitImageSampleImplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
91 {
92 	return EmitImageSample({ variant, Implicit }, insn, state);
93 }
94 
EmitImageGather(Variant variant,InsnIterator insn,EmitState * state) const95 SpirvShader::EmitResult SpirvShader::EmitImageGather(Variant variant, InsnIterator insn, EmitState *state) const
96 {
97 	ImageInstruction instruction = { variant, Gather };
98 	instruction.gatherComponent = !instruction.isDref() ? getObject(insn.word(5)).constantValue[0] : 0;
99 
100 	return EmitImageSample(instruction, insn, state);
101 }
102 
EmitImageSampleExplicitLod(Variant variant,InsnIterator insn,EmitState * state) const103 SpirvShader::EmitResult SpirvShader::EmitImageSampleExplicitLod(Variant variant, InsnIterator insn, EmitState *state) const
104 {
105 	auto isDref = (variant == Dref) || (variant == ProjDref);
106 	uint32_t imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(isDref ? 6 : 5));
107 	imageOperands &= ~spv::ImageOperandsConstOffsetMask;  // Dealt with later.
108 
109 	if((imageOperands & spv::ImageOperandsLodMask) == imageOperands)
110 	{
111 		return EmitImageSample({ variant, Lod }, insn, state);
112 	}
113 	else if((imageOperands & spv::ImageOperandsGradMask) == imageOperands)
114 	{
115 		return EmitImageSample({ variant, Grad }, insn, state);
116 	}
117 	else
118 		UNSUPPORTED("Image operands 0x%08X", imageOperands);
119 
120 	return EmitResult::Continue;
121 }
122 
EmitImageFetch(InsnIterator insn,EmitState * state) const123 SpirvShader::EmitResult SpirvShader::EmitImageFetch(InsnIterator insn, EmitState *state) const
124 {
125 	return EmitImageSample({ None, Fetch }, insn, state);
126 }
127 
EmitImageSample(ImageInstruction instruction,InsnIterator insn,EmitState * state) const128 SpirvShader::EmitResult SpirvShader::EmitImageSample(ImageInstruction instruction, InsnIterator insn, EmitState *state) const
129 {
130 	auto &resultType = getType(insn.resultTypeId());
131 	auto &result = state->createIntermediate(insn.resultId(), resultType.componentCount);
132 	Array<SIMD::Float> out(4);
133 
134 	// TODO(b/153380916): When we're in a code path that is always executed,
135 	// i.e. post-dominators of the entry block, we don't have to dynamically
136 	// check whether any lanes are active, and can elide the jump.
137 	If(AnyTrue(state->activeLaneMask()))
138 	{
139 		EmitImageSampleUnconditional(out, instruction, insn, state);
140 	}
141 
142 	for(auto i = 0u; i < resultType.componentCount; i++) { result.move(i, out[i]); }
143 
144 	return EmitResult::Continue;
145 }
146 
EmitImageSampleUnconditional(Array<SIMD::Float> & out,ImageInstruction instruction,InsnIterator insn,EmitState * state) const147 void SpirvShader::EmitImageSampleUnconditional(Array<SIMD::Float> &out, ImageInstruction instruction, InsnIterator insn, EmitState *state) const
148 {
149 	Object::ID sampledImageId = insn.word(3);  // For OpImageFetch this is just an Image, not a SampledImage.
150 	Object::ID coordinateId = insn.word(4);
151 
152 	auto imageDescriptor = state->getPointer(sampledImageId).base;  // vk::SampledImageDescriptor*
153 
154 	// If using a separate sampler, look through the OpSampledImage instruction to find the sampler descriptor
155 	auto &sampledImage = getObject(sampledImageId);
156 	auto samplerDescriptor = (sampledImage.opcode() == spv::OpSampledImage) ? state->getPointer(sampledImage.definition.word(4)).base : imageDescriptor;
157 
158 	auto coordinate = Operand(this, state, coordinateId);
159 
160 	Pointer<Byte> sampler = samplerDescriptor + OFFSET(vk::SampledImageDescriptor, sampler);  // vk::Sampler*
161 	Pointer<Byte> texture = imageDescriptor + OFFSET(vk::SampledImageDescriptor, texture);    // sw::Texture*
162 
163 	// Above we assumed that if the SampledImage operand is not the result of an OpSampledImage,
164 	// it must be a combined image sampler loaded straight from the descriptor set. For OpImageFetch
165 	// it's just an Image operand, so there's no sampler descriptor data.
166 	if(getType(sampledImage).opcode() != spv::OpTypeSampledImage)
167 	{
168 		sampler = Pointer<Byte>(nullptr);
169 	}
170 
171 	uint32_t imageOperands = spv::ImageOperandsMaskNone;
172 	bool lodOrBias = false;
173 	Object::ID lodOrBiasId = 0;
174 	bool grad = false;
175 	Object::ID gradDxId = 0;
176 	Object::ID gradDyId = 0;
177 	bool constOffset = false;
178 	Object::ID offsetId = 0;
179 	bool sample = false;
180 	Object::ID sampleId = 0;
181 
182 	uint32_t operand = (instruction.isDref() || instruction.samplerMethod == Gather) ? 6 : 5;
183 
184 	if(insn.wordCount() > operand)
185 	{
186 		imageOperands = static_cast<spv::ImageOperandsMask>(insn.word(operand++));
187 
188 		if(imageOperands & spv::ImageOperandsBiasMask)
189 		{
190 			lodOrBias = true;
191 			lodOrBiasId = insn.word(operand);
192 			operand++;
193 			imageOperands &= ~spv::ImageOperandsBiasMask;
194 
195 			ASSERT(instruction.samplerMethod == Implicit);
196 			instruction.samplerMethod = Bias;
197 		}
198 
199 		if(imageOperands & spv::ImageOperandsLodMask)
200 		{
201 			lodOrBias = true;
202 			lodOrBiasId = insn.word(operand);
203 			operand++;
204 			imageOperands &= ~spv::ImageOperandsLodMask;
205 		}
206 
207 		if(imageOperands & spv::ImageOperandsGradMask)
208 		{
209 			ASSERT(!lodOrBias);  // SPIR-V 1.3: "It is invalid to set both the Lod and Grad bits." Bias is for ImplicitLod, Grad for ExplicitLod.
210 			grad = true;
211 			gradDxId = insn.word(operand + 0);
212 			gradDyId = insn.word(operand + 1);
213 			operand += 2;
214 			imageOperands &= ~spv::ImageOperandsGradMask;
215 		}
216 
217 		if(imageOperands & spv::ImageOperandsConstOffsetMask)
218 		{
219 			constOffset = true;
220 			offsetId = insn.word(operand);
221 			operand++;
222 			imageOperands &= ~spv::ImageOperandsConstOffsetMask;
223 		}
224 
225 		if(imageOperands & spv::ImageOperandsSampleMask)
226 		{
227 			sample = true;
228 			sampleId = insn.word(operand);
229 			imageOperands &= ~spv::ImageOperandsSampleMask;
230 
231 			ASSERT(instruction.samplerMethod == Fetch);
232 			instruction.sample = true;
233 		}
234 
235 		if(imageOperands != 0)
236 		{
237 			UNSUPPORTED("Image operands 0x%08X", imageOperands);
238 		}
239 	}
240 
241 	Array<SIMD::Float> in(16);  // Maximum 16 input parameter components.
242 
243 	uint32_t coordinates = coordinate.componentCount - instruction.isProj();
244 	instruction.coordinates = coordinates;
245 
246 	uint32_t i = 0;
247 	for(; i < coordinates; i++)
248 	{
249 		if(instruction.isProj())
250 		{
251 			in[i] = coordinate.Float(i) / coordinate.Float(coordinates);  // TODO(b/129523279): Optimize using reciprocal.
252 		}
253 		else
254 		{
255 			in[i] = coordinate.Float(i);
256 		}
257 	}
258 
259 	if(instruction.isDref())
260 	{
261 		auto drefValue = Operand(this, state, insn.word(5));
262 
263 		if(instruction.isProj())
264 		{
265 			in[i] = drefValue.Float(0) / coordinate.Float(coordinates);  // TODO(b/129523279): Optimize using reciprocal.
266 		}
267 		else
268 		{
269 			in[i] = drefValue.Float(0);
270 		}
271 
272 		i++;
273 	}
274 
275 	if(lodOrBias)
276 	{
277 		auto lodValue = Operand(this, state, lodOrBiasId);
278 		in[i] = lodValue.Float(0);
279 		i++;
280 	}
281 	else if(grad)
282 	{
283 		auto dxValue = Operand(this, state, gradDxId);
284 		auto dyValue = Operand(this, state, gradDyId);
285 		ASSERT(dxValue.componentCount == dxValue.componentCount);
286 
287 		instruction.grad = dxValue.componentCount;
288 
289 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
290 		{
291 			in[i] = dxValue.Float(j);
292 		}
293 
294 		for(uint32_t j = 0; j < dxValue.componentCount; j++, i++)
295 		{
296 			in[i] = dyValue.Float(j);
297 		}
298 	}
299 	else if(instruction.samplerMethod == Fetch)
300 	{
301 		// The instruction didn't provide a lod operand, but the sampler's Fetch
302 		// function requires one to be present. If no lod is supplied, the default
303 		// is zero.
304 		in[i] = As<SIMD::Float>(SIMD::Int(0));
305 		i++;
306 	}
307 
308 	if(constOffset)
309 	{
310 		auto offsetValue = Operand(this, state, offsetId);
311 		instruction.offset = offsetValue.componentCount;
312 
313 		for(uint32_t j = 0; j < offsetValue.componentCount; j++, i++)
314 		{
315 			in[i] = As<SIMD::Float>(offsetValue.Int(j));  // Integer values, but transfered as float.
316 		}
317 	}
318 
319 	if(sample)
320 	{
321 		auto sampleValue = Operand(this, state, sampleId);
322 		in[i] = As<SIMD::Float>(sampleValue.Int(0));
323 	}
324 
325 	auto cacheIt = state->routine->samplerCache.find(insn.resultId());
326 	ASSERT(cacheIt != state->routine->samplerCache.end());
327 	auto &cache = cacheIt->second;
328 	auto cacheHit = cache.imageDescriptor == imageDescriptor && cache.sampler == sampler;
329 
330 	If(!cacheHit)
331 	{
332 		cache.function = Call(getImageSampler, instruction.parameters, imageDescriptor, sampler);
333 		cache.imageDescriptor = imageDescriptor;
334 		cache.sampler = sampler;
335 	}
336 
337 	Call<ImageSampler>(cache.function, texture, &in[0], &out[0], state->routine->constants);
338 }
339 
EmitImageQuerySizeLod(InsnIterator insn,EmitState * state) const340 SpirvShader::EmitResult SpirvShader::EmitImageQuerySizeLod(InsnIterator insn, EmitState *state) const
341 {
342 	auto &resultTy = getType(Type::ID(insn.resultTypeId()));
343 	auto imageId = Object::ID(insn.word(3));
344 	auto lodId = Object::ID(insn.word(4));
345 
346 	auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
347 	GetImageDimensions(state, resultTy, imageId, lodId, dst);
348 
349 	return EmitResult::Continue;
350 }
351 
EmitImageQuerySize(InsnIterator insn,EmitState * state) const352 SpirvShader::EmitResult SpirvShader::EmitImageQuerySize(InsnIterator insn, EmitState *state) const
353 {
354 	auto &resultTy = getType(Type::ID(insn.resultTypeId()));
355 	auto imageId = Object::ID(insn.word(3));
356 	auto lodId = Object::ID(0);
357 
358 	auto &dst = state->createIntermediate(insn.resultId(), resultTy.componentCount);
359 	GetImageDimensions(state, resultTy, imageId, lodId, dst);
360 
361 	return EmitResult::Continue;
362 }
363 
EmitImageQueryLod(InsnIterator insn,EmitState * state) const364 SpirvShader::EmitResult SpirvShader::EmitImageQueryLod(InsnIterator insn, EmitState *state) const
365 {
366 	return EmitImageSample({ None, Query }, insn, state);
367 }
368 
GetImageDimensions(EmitState const * state,Type const & resultTy,Object::ID imageId,Object::ID lodId,Intermediate & dst) const369 void SpirvShader::GetImageDimensions(EmitState const *state, Type const &resultTy, Object::ID imageId, Object::ID lodId, Intermediate &dst) const
370 {
371 	auto routine = state->routine;
372 	auto &image = getObject(imageId);
373 	auto &imageType = getType(image);
374 
375 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
376 	bool isArrayed = imageType.definition.word(5) != 0;
377 	uint32_t dimensions = resultTy.componentCount - (isArrayed ? 1 : 0);
378 
379 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
380 	auto descriptorType = routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
381 
382 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
383 
384 	Int width;
385 	Int height;
386 	Int depth;
387 
388 	switch(descriptorType)
389 	{
390 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
391 		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
392 			width = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
393 			height = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
394 			depth = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
395 			break;
396 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
397 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
398 		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
399 			width = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, width));
400 			height = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, height));
401 			depth = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, depth));
402 			break;
403 		default:
404 			UNREACHABLE("Image descriptorType: %d", int(descriptorType));
405 	}
406 
407 	if(lodId != 0)
408 	{
409 		auto lodVal = Operand(this, state, lodId);
410 		ASSERT(lodVal.componentCount == 1);
411 		auto lod = lodVal.Int(0);
412 		auto one = SIMD::Int(1);
413 
414 		if(dimensions >= 1) dst.move(0, Max(SIMD::Int(width) >> lod, one));
415 		if(dimensions >= 2) dst.move(1, Max(SIMD::Int(height) >> lod, one));
416 		if(dimensions >= 3) dst.move(2, Max(SIMD::Int(depth) >> lod, one));
417 	}
418 	else
419 	{
420 
421 		if(dimensions >= 1) dst.move(0, SIMD::Int(width));
422 		if(dimensions >= 2) dst.move(1, SIMD::Int(height));
423 		if(dimensions >= 3) dst.move(2, SIMD::Int(depth));
424 	}
425 
426 	if(isArrayed)
427 	{
428 		dst.move(dimensions, SIMD::Int(depth));
429 	}
430 }
431 
EmitImageQueryLevels(InsnIterator insn,EmitState * state) const432 SpirvShader::EmitResult SpirvShader::EmitImageQueryLevels(InsnIterator insn, EmitState *state) const
433 {
434 	auto &resultTy = getType(Type::ID(insn.resultTypeId()));
435 	ASSERT(resultTy.componentCount == 1);
436 	auto imageId = Object::ID(insn.word(3));
437 
438 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
439 	auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
440 
441 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
442 	Int mipLevels = 0;
443 	switch(descriptorType)
444 	{
445 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
446 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
447 		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
448 			mipLevels = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, mipLevels));  // uint32_t
449 			break;
450 		default:
451 			UNREACHABLE("Image descriptorType: %d", int(descriptorType));
452 	}
453 
454 	auto &dst = state->createIntermediate(insn.resultId(), 1);
455 	dst.move(0, SIMD::Int(mipLevels));
456 
457 	return EmitResult::Continue;
458 }
459 
EmitImageQuerySamples(InsnIterator insn,EmitState * state) const460 SpirvShader::EmitResult SpirvShader::EmitImageQuerySamples(InsnIterator insn, EmitState *state) const
461 {
462 	auto &resultTy = getType(Type::ID(insn.resultTypeId()));
463 	ASSERT(resultTy.componentCount == 1);
464 	auto imageId = Object::ID(insn.word(3));
465 	auto imageTy = getType(getObject(imageId));
466 	ASSERT(imageTy.definition.opcode() == spv::OpTypeImage);
467 	ASSERT(imageTy.definition.word(3) == spv::Dim2D);
468 	ASSERT(imageTy.definition.word(6 /* MS */) == 1);
469 
470 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
471 	auto descriptorType = state->routine->pipelineLayout->getDescriptorType(d.DescriptorSet, d.Binding);
472 
473 	Pointer<Byte> descriptor = state->getPointer(imageId).base;
474 	Int sampleCount = 0;
475 	switch(descriptorType)
476 	{
477 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
478 			sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));  // uint32_t
479 			break;
480 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
481 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
482 		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
483 			sampleCount = *Pointer<Int>(descriptor + OFFSET(vk::SampledImageDescriptor, sampleCount));  // uint32_t
484 			break;
485 		default:
486 			UNREACHABLE("Image descriptorType: %d", int(descriptorType));
487 	}
488 
489 	auto &dst = state->createIntermediate(insn.resultId(), 1);
490 	dst.move(0, SIMD::Int(sampleCount));
491 
492 	return EmitResult::Continue;
493 }
494 
GetTexelAddress(EmitState const * state,Pointer<Byte> imageBase,Int imageSizeInBytes,Operand const & coordinate,Type const & imageType,Pointer<Byte> descriptor,int texelSize,Object::ID sampleId,bool useStencilAspect,OutOfBoundsBehavior outOfBoundsBehavior) const495 SIMD::Pointer SpirvShader::GetTexelAddress(EmitState const *state, Pointer<Byte> imageBase, Int imageSizeInBytes, Operand const &coordinate, Type const &imageType, Pointer<Byte> descriptor, int texelSize, Object::ID sampleId, bool useStencilAspect, OutOfBoundsBehavior outOfBoundsBehavior) const
496 {
497 	auto routine = state->routine;
498 	bool isArrayed = imageType.definition.word(5) != 0;
499 	auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
500 	int dims = coordinate.componentCount - (isArrayed ? 1 : 0);
501 
502 	SIMD::Int u = coordinate.Int(0);
503 	SIMD::Int v = SIMD::Int(0);
504 
505 	if(coordinate.componentCount > 1)
506 	{
507 		v = coordinate.Int(1);
508 	}
509 
510 	if(dim == spv::DimSubpassData)
511 	{
512 		u += routine->windowSpacePosition[0];
513 		v += routine->windowSpacePosition[1];
514 	}
515 
516 	auto rowPitch = SIMD::Int(*Pointer<Int>(descriptor + (useStencilAspect
517 	                                                          ? OFFSET(vk::StorageImageDescriptor, stencilRowPitchBytes)
518 	                                                          : OFFSET(vk::StorageImageDescriptor, rowPitchBytes))));
519 	auto slicePitch = SIMD::Int(
520 	    *Pointer<Int>(descriptor + (useStencilAspect
521 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSlicePitchBytes)
522 	                                    : OFFSET(vk::StorageImageDescriptor, slicePitchBytes))));
523 	auto samplePitch = SIMD::Int(
524 	    *Pointer<Int>(descriptor + (useStencilAspect
525 	                                    ? OFFSET(vk::StorageImageDescriptor, stencilSamplePitchBytes)
526 	                                    : OFFSET(vk::StorageImageDescriptor, samplePitchBytes))));
527 
528 	SIMD::Int ptrOffset = u * SIMD::Int(texelSize);
529 
530 	if(dims > 1)
531 	{
532 		ptrOffset += v * rowPitch;
533 	}
534 
535 	SIMD::Int w = 0;
536 	if((dims > 2) || isArrayed)
537 	{
538 		if(dims > 2)
539 		{
540 			w += coordinate.Int(2);
541 		}
542 
543 		if(isArrayed)
544 		{
545 			w += coordinate.Int(dims);
546 		}
547 
548 		ptrOffset += w * slicePitch;
549 	}
550 
551 	if(dim == spv::DimSubpassData)
552 	{
553 		// Multiview input attachment access is to the layer corresponding to the current view
554 		ptrOffset += SIMD::Int(routine->viewID) * slicePitch;
555 	}
556 
557 	SIMD::Int n = 0;
558 	if(sampleId.value())
559 	{
560 		Operand sample(this, state, sampleId);
561 		if(!sample.isConstantZero())
562 		{
563 			n = sample.Int(0);
564 			ptrOffset += n * samplePitch;
565 		}
566 	}
567 
568 	// If the out-of-bounds behavior is set to nullify, then each coordinate must be tested individually.
569 	// Other out-of-bounds behaviors work properly by just comparing the offset against the total size.
570 	if(outOfBoundsBehavior == OutOfBoundsBehavior::Nullify)
571 	{
572 		SIMD::UInt width = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, width));
573 		SIMD::Int oobMask = As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(u), width));
574 
575 		if(dims > 1)
576 		{
577 			SIMD::UInt height = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, height));
578 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(v), height));
579 		}
580 
581 		if((dims > 2) || isArrayed)
582 		{
583 			UInt depth = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, depth));
584 			if(dim == spv::DimCube) { depth *= 6; }
585 			oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(w), SIMD::UInt(depth)));
586 		}
587 
588 		if(sampleId.value())
589 		{
590 			Operand sample(this, state, sampleId);
591 			if(!sample.isConstantZero())
592 			{
593 				SIMD::UInt sampleCount = *Pointer<UInt>(descriptor + OFFSET(vk::StorageImageDescriptor, sampleCount));
594 				oobMask |= As<SIMD::Int>(CmpNLT(As<SIMD::UInt>(n), sampleCount));
595 			}
596 		}
597 
598 		constexpr int32_t OOB_OFFSET = 0x7FFFFFFF - 16;  // SIMD pointer offsets are signed 32-bit, so this is the largest offset (for 16-byte texels).
599 		static_assert(OOB_OFFSET >= MAX_MEMORY_ALLOCATION_SIZE, "the largest offset must be guaranteed to be out-of-bounds");
600 
601 		ptrOffset = (ptrOffset & ~oobMask) | (oobMask & SIMD::Int(OOB_OFFSET));  // oob ? OOB_OFFSET : ptrOffset  // TODO: IfThenElse()
602 	}
603 
604 	return SIMD::Pointer(imageBase, imageSizeInBytes, ptrOffset);
605 }
606 
EmitImageRead(InsnIterator insn,EmitState * state) const607 SpirvShader::EmitResult SpirvShader::EmitImageRead(InsnIterator insn, EmitState *state) const
608 {
609 	auto &resultType = getType(Type::ID(insn.word(1)));
610 	auto imageId = Object::ID(insn.word(3));
611 	auto &image = getObject(imageId);
612 	auto &imageType = getType(image);
613 
614 	Object::ID sampleId = 0;
615 
616 	if(insn.wordCount() > 5)
617 	{
618 		int operand = 6;
619 		uint32_t imageOperands = insn.word(5);
620 		if(imageOperands & spv::ImageOperandsSampleMask)
621 		{
622 			sampleId = insn.word(operand++);
623 			imageOperands &= ~spv::ImageOperandsSampleMask;
624 		}
625 		// TODO(b/174475384)
626 		if(imageOperands & spv::ImageOperandsZeroExtendMask)
627 		{
628 			imageOperands &= ~spv::ImageOperandsZeroExtendMask;
629 		}
630 		else if(imageOperands & spv::ImageOperandsSignExtendMask)
631 		{
632 			imageOperands &= ~spv::ImageOperandsSignExtendMask;
633 		}
634 
635 		// Should be no remaining image operands.
636 		if(imageOperands != 0)
637 		{
638 			UNSUPPORTED("Image operands 0x%08X", imageOperands);
639 		}
640 	}
641 
642 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
643 	auto dim = static_cast<spv::Dim>(imageType.definition.word(3));
644 
645 	auto coordinate = Operand(this, state, insn.word(4));
646 	const DescriptorDecorations &d = descriptorDecorations.at(imageId);
647 
648 	// For subpass data, format in the instruction is spv::ImageFormatUnknown. Get it from
649 	// the renderpass data instead. In all other cases, we can use the format in the instruction.
650 	auto vkFormat = (dim == spv::DimSubpassData)
651 	                    ? inputAttachmentFormats[d.InputAttachmentIndex]
652 	                    : SpirvFormatToVulkanFormat(static_cast<spv::ImageFormat>(imageType.definition.word(8)));
653 
654 	// Depth+Stencil image attachments select aspect based on the Sampled Type of the
655 	// OpTypeImage. If float, then we want the depth aspect. If int, we want the stencil aspect.
656 	auto useStencilAspect = (vkFormat == VK_FORMAT_D32_SFLOAT_S8_UINT &&
657 	                         getType(imageType.definition.word(2)).opcode() == spv::OpTypeInt);
658 
659 	if(useStencilAspect)
660 	{
661 		vkFormat = VK_FORMAT_S8_UINT;
662 	}
663 
664 	auto pointer = state->getPointer(imageId);
665 	Pointer<Byte> binding = pointer.base;
666 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + (useStencilAspect
667 	                                                                 ? OFFSET(vk::StorageImageDescriptor, stencilPtr)
668 	                                                                 : OFFSET(vk::StorageImageDescriptor, ptr)));
669 
670 	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
671 
672 	auto &dst = state->createIntermediate(insn.resultId(), resultType.componentCount);
673 
674 	// VK_EXT_image_robustness requires replacing out-of-bounds access with zero.
675 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
676 	auto robustness = OutOfBoundsBehavior::Nullify;
677 
678 	auto texelSize = vk::Format(vkFormat).bytes();
679 	auto texelPtr = GetTexelAddress(state, imageBase, imageSizeInBytes, coordinate, imageType, binding, texelSize, sampleId, useStencilAspect, robustness);
680 
681 	// Gather packed texel data. Texels larger than 4 bytes occupy multiple SIMD::Int elements.
682 	// TODO(b/160531165): Provide gather abstractions for various element sizes.
683 	SIMD::Int packed[4];
684 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
685 	{
686 		for(auto i = 0; i < texelSize / 4; i++)
687 		{
688 			packed[i] = texelPtr.Load<SIMD::Int>(robustness, state->activeLaneMask());
689 			texelPtr += sizeof(float);
690 		}
691 	}
692 	else if(texelSize == 2)
693 	{
694 		SIMD::Int offsets = texelPtr.offsets();
695 		SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(2, robustness);
696 
697 		for(int i = 0; i < SIMD::Width; i++)
698 		{
699 			If(Extract(mask, i) != 0)
700 			{
701 				packed[0] = Insert(packed[0], Int(*Pointer<Short>(texelPtr.base + Extract(offsets, i))), i);
702 			}
703 		}
704 	}
705 	else if(texelSize == 1)
706 	{
707 		SIMD::Int offsets = texelPtr.offsets();
708 		SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(1, robustness);
709 
710 		for(int i = 0; i < SIMD::Width; i++)
711 		{
712 			If(Extract(mask, i) != 0)
713 			{
714 				packed[0] = Insert(packed[0], Int(*Pointer<Byte>(texelPtr.base + Extract(offsets, i))), i);
715 			}
716 		}
717 	}
718 	else
719 		UNREACHABLE("texelSize: %d", int(texelSize));
720 
721 	// Format support requirements here come from two sources:
722 	// - Minimum required set of formats for loads from storage images
723 	// - Any format supported as a color or depth/stencil attachment, for input attachments
724 	switch(vkFormat)
725 	{
726 		case VK_FORMAT_R32G32B32A32_SFLOAT:
727 		case VK_FORMAT_R32G32B32A32_SINT:
728 		case VK_FORMAT_R32G32B32A32_UINT:
729 			dst.move(0, packed[0]);
730 			dst.move(1, packed[1]);
731 			dst.move(2, packed[2]);
732 			dst.move(3, packed[3]);
733 			break;
734 		case VK_FORMAT_R32_SINT:
735 		case VK_FORMAT_R32_UINT:
736 			dst.move(0, packed[0]);
737 			// Fill remaining channels with 0,0,1 (of the correct type)
738 			dst.move(1, SIMD::Int(0));
739 			dst.move(2, SIMD::Int(0));
740 			dst.move(3, SIMD::Int(1));
741 			break;
742 		case VK_FORMAT_R32_SFLOAT:
743 		case VK_FORMAT_D32_SFLOAT:
744 		case VK_FORMAT_D32_SFLOAT_S8_UINT:
745 			dst.move(0, packed[0]);
746 			// Fill remaining channels with 0,0,1 (of the correct type)
747 			dst.move(1, SIMD::Float(0.0f));
748 			dst.move(2, SIMD::Float(0.0f));
749 			dst.move(3, SIMD::Float(1.0f));
750 			break;
751 		case VK_FORMAT_D16_UNORM:
752 			dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
753 			dst.move(1, SIMD::Float(0.0f));
754 			dst.move(2, SIMD::Float(0.0f));
755 			dst.move(3, SIMD::Float(1.0f));
756 			break;
757 		case VK_FORMAT_R16G16B16A16_UNORM:
758 			dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
759 			dst.move(1, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
760 			dst.move(2, SIMD::Float(packed[1] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
761 			dst.move(3, SIMD::Float((packed[1] >> 16) & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
762 			break;
763 		case VK_FORMAT_R16G16B16A16_SNORM:
764 			dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
765 			dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
766 			dst.move(2, Max(SIMD::Float((packed[1] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
767 			dst.move(3, Max(SIMD::Float(packed[1] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
768 			break;
769 		case VK_FORMAT_R16G16B16A16_SINT:
770 			dst.move(0, (packed[0] << 16) >> 16);
771 			dst.move(1, packed[0] >> 16);
772 			dst.move(2, (packed[1] << 16) >> 16);
773 			dst.move(3, packed[1] >> 16);
774 			break;
775 		case VK_FORMAT_R16G16B16A16_UINT:
776 			dst.move(0, packed[0] & SIMD::Int(0xFFFF));
777 			dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
778 			dst.move(2, packed[1] & SIMD::Int(0xFFFF));
779 			dst.move(3, (packed[1] >> 16) & SIMD::Int(0xFFFF));
780 			break;
781 		case VK_FORMAT_R16G16B16A16_SFLOAT:
782 			dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
783 			dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
784 			dst.move(2, halfToFloatBits(As<SIMD::UInt>(packed[1]) & SIMD::UInt(0x0000FFFF)));
785 			dst.move(3, halfToFloatBits((As<SIMD::UInt>(packed[1]) & SIMD::UInt(0xFFFF0000)) >> 16));
786 			break;
787 		case VK_FORMAT_R8G8B8A8_SNORM:
788 		case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
789 			dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
790 			dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
791 			dst.move(2, Max(SIMD::Float((packed[0] << 8) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
792 			dst.move(3, Max(SIMD::Float((packed[0]) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
793 			break;
794 		case VK_FORMAT_R8G8B8A8_UNORM:
795 		case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
796 			dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
797 			dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
798 			dst.move(2, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
799 			dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
800 			break;
801 		case VK_FORMAT_R8G8B8A8_SRGB:
802 		case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
803 			dst.move(0, ::sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
804 			dst.move(1, ::sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
805 			dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
806 			dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
807 			break;
808 		case VK_FORMAT_B8G8R8A8_UNORM:
809 			dst.move(0, SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
810 			dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
811 			dst.move(2, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
812 			dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
813 			break;
814 		case VK_FORMAT_B8G8R8A8_SRGB:
815 			dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
816 			dst.move(1, ::sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
817 			dst.move(2, ::sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
818 			dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
819 			break;
820 		case VK_FORMAT_R8G8B8A8_UINT:
821 		case VK_FORMAT_A8B8G8R8_UINT_PACK32:
822 			dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
823 			dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
824 			dst.move(2, (As<SIMD::UInt>(packed[0]) >> 16) & SIMD::UInt(0xFF));
825 			dst.move(3, (As<SIMD::UInt>(packed[0]) >> 24) & SIMD::UInt(0xFF));
826 			break;
827 		case VK_FORMAT_R8G8B8A8_SINT:
828 		case VK_FORMAT_A8B8G8R8_SINT_PACK32:
829 			dst.move(0, (packed[0] << 24) >> 24);
830 			dst.move(1, (packed[0] << 16) >> 24);
831 			dst.move(2, (packed[0] << 8) >> 24);
832 			dst.move(3, packed[0] >> 24);
833 			break;
834 		case VK_FORMAT_R8_UNORM:
835 			dst.move(0, SIMD::Float((packed[0] & SIMD::Int(0xFF))) * SIMD::Float(1.0f / 0xFF));
836 			dst.move(1, SIMD::Float(0.0f));
837 			dst.move(2, SIMD::Float(0.0f));
838 			dst.move(3, SIMD::Float(1.0f));
839 			break;
840 		case VK_FORMAT_R8_SNORM:
841 			dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
842 			dst.move(1, SIMD::Float(0.0f));
843 			dst.move(2, SIMD::Float(0.0f));
844 			dst.move(3, SIMD::Float(1.0f));
845 			break;
846 		case VK_FORMAT_R8_UINT:
847 		case VK_FORMAT_S8_UINT:
848 			dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
849 			dst.move(1, SIMD::UInt(0));
850 			dst.move(2, SIMD::UInt(0));
851 			dst.move(3, SIMD::UInt(1));
852 			break;
853 		case VK_FORMAT_R8_SINT:
854 			dst.move(0, (packed[0] << 24) >> 24);
855 			dst.move(1, SIMD::Int(0));
856 			dst.move(2, SIMD::Int(0));
857 			dst.move(3, SIMD::Int(1));
858 			break;
859 		case VK_FORMAT_R8G8_UNORM:
860 			dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
861 			dst.move(1, SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
862 			dst.move(2, SIMD::Float(0.0f));
863 			dst.move(3, SIMD::Float(1.0f));
864 			break;
865 		case VK_FORMAT_R8G8_SNORM:
866 			dst.move(0, Max(SIMD::Float((packed[0] << 24) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
867 			dst.move(1, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFF000000)) * SIMD::Float(1.0f / 0x7F000000), SIMD::Float(-1.0f)));
868 			dst.move(2, SIMD::Float(0.0f));
869 			dst.move(3, SIMD::Float(1.0f));
870 			break;
871 		case VK_FORMAT_R8G8_UINT:
872 			dst.move(0, As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFF));
873 			dst.move(1, (As<SIMD::UInt>(packed[0]) >> 8) & SIMD::UInt(0xFF));
874 			dst.move(2, SIMD::UInt(0));
875 			dst.move(3, SIMD::UInt(1));
876 			break;
877 		case VK_FORMAT_R8G8_SINT:
878 			dst.move(0, (packed[0] << 24) >> 24);
879 			dst.move(1, (packed[0] << 16) >> 24);
880 			dst.move(2, SIMD::Int(0));
881 			dst.move(3, SIMD::Int(1));
882 			break;
883 		case VK_FORMAT_R16_SFLOAT:
884 			dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
885 			dst.move(1, SIMD::Float(0.0f));
886 			dst.move(2, SIMD::Float(0.0f));
887 			dst.move(3, SIMD::Float(1.0f));
888 			break;
889 		case VK_FORMAT_R16_UNORM:
890 			dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
891 			dst.move(1, SIMD::Float(0.0f));
892 			dst.move(2, SIMD::Float(0.0f));
893 			dst.move(3, SIMD::Float(1.0f));
894 			break;
895 		case VK_FORMAT_R16_SNORM:
896 			dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
897 			dst.move(1, SIMD::Float(0.0f));
898 			dst.move(2, SIMD::Float(0.0f));
899 			dst.move(3, SIMD::Float(1.0f));
900 			break;
901 		case VK_FORMAT_R16_UINT:
902 			dst.move(0, packed[0] & SIMD::Int(0xFFFF));
903 			dst.move(1, SIMD::UInt(0));
904 			dst.move(2, SIMD::UInt(0));
905 			dst.move(3, SIMD::UInt(1));
906 			break;
907 		case VK_FORMAT_R16_SINT:
908 			dst.move(0, (packed[0] << 16) >> 16);
909 			dst.move(1, SIMD::Int(0));
910 			dst.move(2, SIMD::Int(0));
911 			dst.move(3, SIMD::Int(1));
912 			break;
913 		case VK_FORMAT_R16G16_SFLOAT:
914 			dst.move(0, halfToFloatBits(As<SIMD::UInt>(packed[0]) & SIMD::UInt(0x0000FFFF)));
915 			dst.move(1, halfToFloatBits((As<SIMD::UInt>(packed[0]) & SIMD::UInt(0xFFFF0000)) >> 16));
916 			dst.move(2, SIMD::Float(0.0f));
917 			dst.move(3, SIMD::Float(1.0f));
918 			break;
919 		case VK_FORMAT_R16G16_UNORM:
920 			dst.move(0, SIMD::Float(packed[0] & SIMD::Int(0xFFFF)) * SIMD::Float(1.0f / 0xFFFF));
921 			dst.move(1, SIMD::Float(As<SIMD::UInt>(packed[0]) >> 16) * SIMD::Float(1.0f / 0xFFFF));
922 			dst.move(2, SIMD::Float(0.0f));
923 			dst.move(3, SIMD::Float(1.0f));
924 			break;
925 		case VK_FORMAT_R16G16_SNORM:
926 			dst.move(0, Max(SIMD::Float((packed[0] << 16) & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
927 			dst.move(1, Max(SIMD::Float(packed[0] & SIMD::Int(0xFFFF0000)) * SIMD::Float(1.0f / 0x7FFF0000), SIMD::Float(-1.0f)));
928 			dst.move(2, SIMD::Float(0.0f));
929 			dst.move(3, SIMD::Float(1.0f));
930 			break;
931 		case VK_FORMAT_R16G16_UINT:
932 			dst.move(0, packed[0] & SIMD::Int(0xFFFF));
933 			dst.move(1, (packed[0] >> 16) & SIMD::Int(0xFFFF));
934 			dst.move(2, SIMD::UInt(0));
935 			dst.move(3, SIMD::UInt(1));
936 			break;
937 		case VK_FORMAT_R16G16_SINT:
938 			dst.move(0, (packed[0] << 16) >> 16);
939 			dst.move(1, packed[0] >> 16);
940 			dst.move(2, SIMD::Int(0));
941 			dst.move(3, SIMD::Int(1));
942 			break;
943 		case VK_FORMAT_R32G32_SINT:
944 		case VK_FORMAT_R32G32_UINT:
945 			dst.move(0, packed[0]);
946 			dst.move(1, packed[1]);
947 			dst.move(2, SIMD::Int(0));
948 			dst.move(3, SIMD::Int(1));
949 			break;
950 		case VK_FORMAT_R32G32_SFLOAT:
951 			dst.move(0, packed[0]);
952 			dst.move(1, packed[1]);
953 			dst.move(2, SIMD::Float(0.0f));
954 			dst.move(3, SIMD::Float(1.0f));
955 			break;
956 		case VK_FORMAT_A2B10G10R10_UINT_PACK32:
957 			dst.move(0, packed[0] & SIMD::Int(0x3FF));
958 			dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
959 			dst.move(2, (packed[0] >> 20) & SIMD::Int(0x3FF));
960 			dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
961 			break;
962 		case VK_FORMAT_A2R10G10B10_UINT_PACK32:
963 			dst.move(2, packed[0] & SIMD::Int(0x3FF));
964 			dst.move(1, (packed[0] >> 10) & SIMD::Int(0x3FF));
965 			dst.move(0, (packed[0] >> 20) & SIMD::Int(0x3FF));
966 			dst.move(3, (packed[0] >> 30) & SIMD::Int(0x3));
967 			break;
968 		case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
969 			dst.move(0, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
970 			dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
971 			dst.move(2, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
972 			dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
973 			break;
974 		case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
975 			dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
976 			dst.move(1, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
977 			dst.move(0, SIMD::Float((packed[0] >> 20) & SIMD::Int(0x3FF)) * SIMD::Float(1.0f / 0x3FF));
978 			dst.move(3, SIMD::Float((packed[0] >> 30) & SIMD::Int(0x3)) * SIMD::Float(1.0f / 0x3));
979 			break;
980 		case VK_FORMAT_R5G6B5_UNORM_PACK16:
981 			dst.move(0, SIMD::Float((packed[0] >> 11) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
982 			dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x3F)) * SIMD::Float(1.0f / 0x3F));
983 			dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
984 			dst.move(3, SIMD::Float(1.0f));
985 			break;
986 		case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
987 			dst.move(0, SIMD::Float((packed[0] >> 10) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
988 			dst.move(1, SIMD::Float((packed[0] >> 5) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
989 			dst.move(2, SIMD::Float((packed[0]) & SIMD::Int(0x1F)) * SIMD::Float(1.0f / 0x1F));
990 			dst.move(3, SIMD::Float((packed[0] >> 15) & SIMD::Int(0x1)));
991 			break;
992 		case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
993 			dst.move(0, halfToFloatBits((packed[0] << 4) & SIMD::Int(0x7FF0)));
994 			dst.move(1, halfToFloatBits((packed[0] >> 7) & SIMD::Int(0x7FF0)));
995 			dst.move(2, halfToFloatBits((packed[0] >> 17) & SIMD::Int(0x7FE0)));
996 			dst.move(3, SIMD::Float(1.0f));
997 			break;
998 		default:
999 			UNSUPPORTED("VkFormat %d", int(vkFormat));
1000 			break;
1001 	}
1002 
1003 	return EmitResult::Continue;
1004 }
1005 
EmitImageWrite(InsnIterator insn,EmitState * state) const1006 SpirvShader::EmitResult SpirvShader::EmitImageWrite(InsnIterator insn, EmitState *state) const
1007 {
1008 	imageWriteEmitted = true;
1009 
1010 	auto imageId = Object::ID(insn.word(1));
1011 	auto &image = getObject(imageId);
1012 	auto &imageType = getType(image);
1013 
1014 	ASSERT(imageType.definition.opcode() == spv::OpTypeImage);
1015 
1016 	Object::ID sampleId = 0;
1017 
1018 	if(insn.wordCount() > 4)
1019 	{
1020 		int operand = 5;
1021 		uint32_t imageOperands = insn.word(4);
1022 		if(imageOperands & spv::ImageOperandsSampleMask)
1023 		{
1024 			sampleId = insn.word(operand++);
1025 			imageOperands &= ~spv::ImageOperandsSampleMask;
1026 		}
1027 		// TODO(b/174475384)
1028 		if(imageOperands & spv::ImageOperandsZeroExtendMask)
1029 		{
1030 			imageOperands &= ~spv::ImageOperandsZeroExtendMask;
1031 		}
1032 		else if(imageOperands & spv::ImageOperandsSignExtendMask)
1033 		{
1034 			imageOperands &= ~spv::ImageOperandsSignExtendMask;
1035 		}
1036 
1037 		// Should be no remaining image operands.
1038 		if(imageOperands != 0)
1039 		{
1040 			UNSUPPORTED("Image operands 0x%08X", (int)imageOperands);
1041 		}
1042 	}
1043 
1044 	auto coordinate = Operand(this, state, insn.word(2));
1045 	auto texel = Operand(this, state, insn.word(3));
1046 
1047 	Pointer<Byte> binding = state->getPointer(imageId).base;
1048 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
1049 	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
1050 
1051 	SIMD::Int packed[4];
1052 	int texelSize = 0;
1053 	auto format = static_cast<spv::ImageFormat>(imageType.definition.word(8));
1054 	switch(format)
1055 	{
1056 		case spv::ImageFormatRgba32f:
1057 		case spv::ImageFormatRgba32i:
1058 		case spv::ImageFormatRgba32ui:
1059 			texelSize = 16;
1060 			packed[0] = texel.Int(0);
1061 			packed[1] = texel.Int(1);
1062 			packed[2] = texel.Int(2);
1063 			packed[3] = texel.Int(3);
1064 			break;
1065 		case spv::ImageFormatR32f:
1066 		case spv::ImageFormatR32i:
1067 		case spv::ImageFormatR32ui:
1068 			texelSize = 4;
1069 			packed[0] = texel.Int(0);
1070 			break;
1071 		case spv::ImageFormatRgba8:
1072 			texelSize = 4;
1073 			packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
1074 			            ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
1075 			            ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
1076 			            ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
1077 			break;
1078 		case spv::ImageFormatRgba8Snorm:
1079 			texelSize = 4;
1080 			packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1081 			             SIMD::Int(0xFF)) |
1082 			            ((SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1083 			              SIMD::Int(0xFF))
1084 			             << 8) |
1085 			            ((SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1086 			              SIMD::Int(0xFF))
1087 			             << 16) |
1088 			            ((SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
1089 			              SIMD::Int(0xFF))
1090 			             << 24);
1091 			break;
1092 		case spv::ImageFormatRgba8i:
1093 		case spv::ImageFormatRgba8ui:
1094 			texelSize = 4;
1095 			packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xff))) |
1096 			            (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xff)) << 8) |
1097 			            (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xff)) << 16) |
1098 			            (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xff)) << 24);
1099 			break;
1100 		case spv::ImageFormatRgba16f:
1101 			texelSize = 8;
1102 			packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
1103 			packed[1] = floatToHalfBits(texel.UInt(2), false) | floatToHalfBits(texel.UInt(3), true);
1104 			break;
1105 		case spv::ImageFormatRgba16i:
1106 		case spv::ImageFormatRgba16ui:
1107 			texelSize = 8;
1108 			packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xFFFF)) << 16);
1109 			packed[1] = SIMD::UInt(texel.UInt(2) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0xFFFF)) << 16);
1110 			break;
1111 		case spv::ImageFormatRg32f:
1112 		case spv::ImageFormatRg32i:
1113 		case spv::ImageFormatRg32ui:
1114 			texelSize = 8;
1115 			packed[0] = texel.Int(0);
1116 			packed[1] = texel.Int(1);
1117 			break;
1118 		case spv::ImageFormatRg16f:
1119 			texelSize = 4;
1120 			packed[0] = floatToHalfBits(texel.UInt(0), false) | floatToHalfBits(texel.UInt(1), true);
1121 			break;
1122 		case spv::ImageFormatRg16i:
1123 		case spv::ImageFormatRg16ui:
1124 			texelSize = 4;
1125 			packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xFFFF)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xFFFF)) << 16);
1126 			break;
1127 		case spv::ImageFormatR11fG11fB10f:
1128 			texelSize = 4;
1129 			// Truncates instead of rounding. See b/147900455
1130 			packed[0] = ((floatToHalfBits(As<SIMD::UInt>(Max(texel.Float(0), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) >> 4) |
1131 			            ((floatToHalfBits(As<SIMD::UInt>(Max(texel.Float(1), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FF0)) << 7) |
1132 			            ((floatToHalfBits(As<SIMD::UInt>(Max(texel.Float(2), SIMD::Float(0.0f))), false) & SIMD::UInt(0x7FE0)) << 17);
1133 			break;
1134 		case spv::ImageFormatR16f:
1135 			texelSize = 2;
1136 			packed[0] = floatToHalfBits(texel.UInt(0), false);
1137 			break;
1138 		case spv::ImageFormatRgba16:
1139 			texelSize = 8;
1140 			packed[0] = SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1141 			            (SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1142 			packed[1] = SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1143 			            (SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1144 			break;
1145 		case spv::ImageFormatRgb10A2:
1146 			texelSize = 4;
1147 			packed[0] = (SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) |
1148 			            ((SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 10) |
1149 			            ((SIMD::UInt(Round(Min(Max(texel.Float(2), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3FF)))) << 20) |
1150 			            ((SIMD::UInt(Round(Min(Max(texel.Float(3), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x3)))) << 30);
1151 			break;
1152 		case spv::ImageFormatRg16:
1153 			texelSize = 4;
1154 			packed[0] = SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) |
1155 			            (SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF))) << 16);
1156 			break;
1157 		case spv::ImageFormatRg8:
1158 			texelSize = 2;
1159 			packed[0] = SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) |
1160 			            (SIMD::UInt(Round(Min(Max(texel.Float(1), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF))) << 8);
1161 			break;
1162 		case spv::ImageFormatR16:
1163 			texelSize = 2;
1164 			packed[0] = SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFFFF)));
1165 			break;
1166 		case spv::ImageFormatR8:
1167 			texelSize = 1;
1168 			packed[0] = SIMD::UInt(Round(Min(Max(texel.Float(0), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(0xFF)));
1169 			break;
1170 		case spv::ImageFormatRgba16Snorm:
1171 			texelSize = 8;
1172 			packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1173 			            (SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1174 			packed[1] = (SIMD::Int(Round(Min(Max(texel.Float(2), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1175 			            (SIMD::Int(Round(Min(Max(texel.Float(3), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1176 			break;
1177 		case spv::ImageFormatRg16Snorm:
1178 			texelSize = 4;
1179 			packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) & SIMD::Int(0xFFFF)) |
1180 			            (SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF))) << 16);
1181 			break;
1182 		case spv::ImageFormatRg8Snorm:
1183 			texelSize = 2;
1184 			packed[0] = (SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) & SIMD::Int(0xFF)) |
1185 			            (SIMD::Int(Round(Min(Max(texel.Float(1), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F))) << 8);
1186 			break;
1187 		case spv::ImageFormatR16Snorm:
1188 			texelSize = 2;
1189 			packed[0] = SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7FFF)));
1190 			break;
1191 		case spv::ImageFormatR8Snorm:
1192 			texelSize = 1;
1193 			packed[0] = SIMD::Int(Round(Min(Max(texel.Float(0), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(0x7F)));
1194 			break;
1195 		case spv::ImageFormatRg8i:
1196 		case spv::ImageFormatRg8ui:
1197 			texelSize = 2;
1198 			packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xFF)) | (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0xFF)) << 8);
1199 			break;
1200 		case spv::ImageFormatR16i:
1201 		case spv::ImageFormatR16ui:
1202 			texelSize = 2;
1203 			packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xFFFF));
1204 			break;
1205 		case spv::ImageFormatR8i:
1206 		case spv::ImageFormatR8ui:
1207 			texelSize = 1;
1208 			packed[0] = SIMD::UInt(texel.UInt(0) & SIMD::UInt(0xFF));
1209 			break;
1210 		case spv::ImageFormatRgb10a2ui:
1211 			texelSize = 4;
1212 			packed[0] = (SIMD::UInt(texel.UInt(0) & SIMD::UInt(0x3FF))) |
1213 			            (SIMD::UInt(texel.UInt(1) & SIMD::UInt(0x3FF)) << 10) |
1214 			            (SIMD::UInt(texel.UInt(2) & SIMD::UInt(0x3FF)) << 20) |
1215 			            (SIMD::UInt(texel.UInt(3) & SIMD::UInt(0x3)) << 30);
1216 			break;
1217 		default:
1218 			UNSUPPORTED("spv::ImageFormat %d", int(format));
1219 			break;
1220 	}
1221 
1222 	// "The integer texel coordinates are validated according to the same rules as for texel input coordinate
1223 	//  validation. If the texel fails integer texel coordinate validation, then the write has no effect."
1224 	// - https://www.khronos.org/registry/vulkan/specs/1.2/html/chap16.html#textures-output-coordinate-validation
1225 	auto robustness = OutOfBoundsBehavior::Nullify;
1226 
1227 	auto texelPtr = GetTexelAddress(state, imageBase, imageSizeInBytes, coordinate, imageType, binding, texelSize, sampleId, false, robustness);
1228 
1229 	// Scatter packed texel data.
1230 	// TODO(b/160531165): Provide scatter abstractions for various element sizes.
1231 	if(texelSize == 4 || texelSize == 8 || texelSize == 16)
1232 	{
1233 		for(auto i = 0; i < texelSize / 4; i++)
1234 		{
1235 			texelPtr.Store(packed[i], robustness, state->activeLaneMask());
1236 			texelPtr += sizeof(float);
1237 		}
1238 	}
1239 	else if(texelSize == 2)
1240 	{
1241 		SIMD::Int offsets = texelPtr.offsets();
1242 		SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(2, robustness);
1243 
1244 		for(int i = 0; i < SIMD::Width; i++)
1245 		{
1246 			If(Extract(mask, i) != 0)
1247 			{
1248 				*Pointer<Short>(texelPtr.base + Extract(offsets, i)) = Short(Extract(packed[0], i));
1249 			}
1250 		}
1251 	}
1252 	else if(texelSize == 1)
1253 	{
1254 		SIMD::Int offsets = texelPtr.offsets();
1255 		SIMD::Int mask = state->activeLaneMask() & texelPtr.isInBounds(1, robustness);
1256 
1257 		for(int i = 0; i < SIMD::Width; i++)
1258 		{
1259 			If(Extract(mask, i) != 0)
1260 			{
1261 				*Pointer<Byte>(texelPtr.base + Extract(offsets, i)) = Byte(Extract(packed[0], i));
1262 			}
1263 		}
1264 	}
1265 	else
1266 		UNREACHABLE("texelSize: %d", int(texelSize));
1267 
1268 	return EmitResult::Continue;
1269 }
1270 
EmitImageTexelPointer(InsnIterator insn,EmitState * state) const1271 SpirvShader::EmitResult SpirvShader::EmitImageTexelPointer(InsnIterator insn, EmitState *state) const
1272 {
1273 	auto &resultType = getType(Type::ID(insn.word(1)));
1274 	auto imageId = Object::ID(insn.word(3));
1275 	auto &image = getObject(imageId);
1276 	// Note: OpImageTexelPointer is unusual in that the image is passed by pointer.
1277 	// Look through to get the actual image type.
1278 	auto &imageType = getType(getType(image).element);
1279 	Object::ID resultId = insn.word(2);
1280 
1281 	ASSERT(imageType.opcode() == spv::OpTypeImage);
1282 	ASSERT(resultType.storageClass == spv::StorageClassImage);
1283 	ASSERT(getType(resultType.element).opcode() == spv::OpTypeInt);
1284 
1285 	auto coordinate = Operand(this, state, insn.word(4));
1286 	Object::ID sampleId = insn.word(5);
1287 
1288 	Pointer<Byte> binding = state->getPointer(imageId).base;
1289 	Pointer<Byte> imageBase = *Pointer<Pointer<Byte>>(binding + OFFSET(vk::StorageImageDescriptor, ptr));
1290 	auto imageSizeInBytes = *Pointer<Int>(binding + OFFSET(vk::StorageImageDescriptor, sizeInBytes));
1291 
1292 	// VK_EXT_image_robustness requires checking for out-of-bounds accesses.
1293 	// TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
1294 	auto robustness = OutOfBoundsBehavior::Nullify;
1295 
1296 	auto ptr = GetTexelAddress(state, imageBase, imageSizeInBytes, coordinate, imageType, binding, sizeof(uint32_t), sampleId, false, robustness);
1297 
1298 	state->createPointer(resultId, ptr);
1299 
1300 	return EmitResult::Continue;
1301 }
1302 
EmitSampledImageCombineOrSplit(InsnIterator insn,EmitState * state) const1303 SpirvShader::EmitResult SpirvShader::EmitSampledImageCombineOrSplit(InsnIterator insn, EmitState *state) const
1304 {
1305 	// Propagate the image pointer in both cases.
1306 	// Consumers of OpSampledImage will look through to find the sampler pointer.
1307 
1308 	Object::ID resultId = insn.word(2);
1309 	Object::ID imageId = insn.word(3);
1310 
1311 	state->createPointer(resultId, state->getPointer(imageId));
1312 
1313 	return EmitResult::Continue;
1314 }
1315 
1316 }  // namespace sw