1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Reactor.hpp"
16 
17 #include "x86.hpp"
18 #include "CPUID.hpp"
19 #include "Thread.hpp"
20 #include "ExecutableMemory.hpp"
21 #include "MutexLock.hpp"
22 
23 #undef min
24 #undef max
25 
26 #if REACTOR_LLVM_VERSION < 7
27 	#include "llvm/Analysis/LoopPass.h"
28 	#include "llvm/Constants.h"
29 	#include "llvm/Function.h"
30 	#include "llvm/GlobalVariable.h"
31 	#include "llvm/Intrinsics.h"
32 	#include "llvm/LLVMContext.h"
33 	#include "llvm/Module.h"
34 	#include "llvm/PassManager.h"
35 	#include "llvm/Support/IRBuilder.h"
36 	#include "llvm/Support/TargetSelect.h"
37 	#include "llvm/Target/TargetData.h"
38 	#include "llvm/Target/TargetOptions.h"
39 	#include "llvm/Transforms/Scalar.h"
40 	#include "../lib/ExecutionEngine/JIT/JIT.h"
41 
42 	#include "LLVMRoutine.hpp"
43 	#include "LLVMRoutineManager.hpp"
44 
45 	#define ARGS(...) __VA_ARGS__
46 #else
47 	#include "llvm/Analysis/LoopPass.h"
48 	#include "llvm/ExecutionEngine/ExecutionEngine.h"
49 	#include "llvm/ExecutionEngine/JITSymbol.h"
50 	#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
51 	#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
52 	#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
53 	#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
54 	#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
55 	#include "llvm/ExecutionEngine/SectionMemoryManager.h"
56 	#include "llvm/IR/Constants.h"
57 	#include "llvm/IR/DataLayout.h"
58 	#include "llvm/IR/Function.h"
59 	#include "llvm/IR/GlobalVariable.h"
60 	#include "llvm/IR/IRBuilder.h"
61 	#include "llvm/IR/Intrinsics.h"
62 	#include "llvm/IR/LLVMContext.h"
63 	#include "llvm/IR/LegacyPassManager.h"
64 	#include "llvm/IR/Mangler.h"
65 	#include "llvm/IR/Module.h"
66 	#include "llvm/Support/Error.h"
67 	#include "llvm/Support/TargetSelect.h"
68 	#include "llvm/Target/TargetOptions.h"
69 	#include "llvm/Transforms/InstCombine/InstCombine.h"
70 	#include "llvm/Transforms/Scalar.h"
71 	#include "llvm/Transforms/Scalar/GVN.h"
72 
73 	#include "LLVMRoutine.hpp"
74 
75 	#define ARGS(...) {__VA_ARGS__}
76 	#define CreateCall2 CreateCall
77 	#define CreateCall3 CreateCall
78 
79 	#include <unordered_map>
80 #endif
81 
82 #include <numeric>
83 #include <fstream>
84 
85 #if defined(__i386__) || defined(__x86_64__)
86 #include <xmmintrin.h>
87 #endif
88 
89 #include <math.h>
90 
91 #if defined(__x86_64__) && defined(_WIN32)
X86CompilationCallback()92 extern "C" void X86CompilationCallback()
93 {
94 	assert(false);   // UNIMPLEMENTED
95 }
96 #endif
97 
98 #if REACTOR_LLVM_VERSION < 7
99 namespace llvm
100 {
101 	extern bool JITEmitDebugInfo;
102 }
103 #endif
104 
105 namespace rr
106 {
107 	class LLVMReactorJIT;
108 }
109 
110 namespace
111 {
112 	rr::LLVMReactorJIT *reactorJIT = nullptr;
113 	llvm::IRBuilder<> *builder = nullptr;
114 	llvm::LLVMContext *context = nullptr;
115 	llvm::Module *module = nullptr;
116 	llvm::Function *function = nullptr;
117 
118 	rr::MutexLock codegenMutex;
119 
120 #if REACTOR_LLVM_VERSION >= 7
lowerPAVG(llvm::Value * x,llvm::Value * y)121 	llvm::Value *lowerPAVG(llvm::Value *x, llvm::Value *y)
122 	{
123 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
124 
125 		llvm::VectorType *extTy =
126 			llvm::VectorType::getExtendedElementVectorType(ty);
127 		x = ::builder->CreateZExt(x, extTy);
128 		y = ::builder->CreateZExt(y, extTy);
129 
130 		// (x + y + 1) >> 1
131 		llvm::Constant *one = llvm::ConstantInt::get(extTy, 1);
132 		llvm::Value *res = ::builder->CreateAdd(x, y);
133 		res = ::builder->CreateAdd(res, one);
134 		res = ::builder->CreateLShr(res, one);
135 		return ::builder->CreateTrunc(res, ty);
136 	}
137 
lowerPMINMAX(llvm::Value * x,llvm::Value * y,llvm::ICmpInst::Predicate pred)138 	llvm::Value *lowerPMINMAX(llvm::Value *x, llvm::Value *y,
139 	                          llvm::ICmpInst::Predicate pred)
140 	{
141 		return ::builder->CreateSelect(::builder->CreateICmp(pred, x, y), x, y);
142 	}
143 
lowerPCMP(llvm::ICmpInst::Predicate pred,llvm::Value * x,llvm::Value * y,llvm::Type * dstTy)144 	llvm::Value *lowerPCMP(llvm::ICmpInst::Predicate pred, llvm::Value *x,
145 	                       llvm::Value *y, llvm::Type *dstTy)
146 	{
147 		return ::builder->CreateSExt(::builder->CreateICmp(pred, x, y), dstTy, "");
148 	}
149 
150 #if defined(__i386__) || defined(__x86_64__)
lowerPMOV(llvm::Value * op,llvm::Type * dstType,bool sext)151 	llvm::Value *lowerPMOV(llvm::Value *op, llvm::Type *dstType, bool sext)
152 	{
153 		llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(op->getType());
154 		llvm::VectorType *dstTy = llvm::cast<llvm::VectorType>(dstType);
155 
156 		llvm::Value *undef = llvm::UndefValue::get(srcTy);
157 		llvm::SmallVector<uint32_t, 16> mask(dstTy->getNumElements());
158 		std::iota(mask.begin(), mask.end(), 0);
159 		llvm::Value *v = ::builder->CreateShuffleVector(op, undef, mask);
160 
161 		return sext ? ::builder->CreateSExt(v, dstTy)
162 		            : ::builder->CreateZExt(v, dstTy);
163 	}
164 
lowerPABS(llvm::Value * v)165 	llvm::Value *lowerPABS(llvm::Value *v)
166 	{
167 		llvm::Value *zero = llvm::Constant::getNullValue(v->getType());
168 		llvm::Value *cmp = ::builder->CreateICmp(llvm::ICmpInst::ICMP_SGT, v, zero);
169 		llvm::Value *neg = ::builder->CreateNeg(v);
170 		return ::builder->CreateSelect(cmp, v, neg);
171 	}
172 #endif  // defined(__i386__) || defined(__x86_64__)
173 
174 #if !defined(__i386__) && !defined(__x86_64__)
lowerPFMINMAX(llvm::Value * x,llvm::Value * y,llvm::FCmpInst::Predicate pred)175 	llvm::Value *lowerPFMINMAX(llvm::Value *x, llvm::Value *y,
176 	                           llvm::FCmpInst::Predicate pred)
177 	{
178 		return ::builder->CreateSelect(::builder->CreateFCmp(pred, x, y), x, y);
179 	}
180 
lowerRound(llvm::Value * x)181 	llvm::Value *lowerRound(llvm::Value *x)
182 	{
183 		llvm::Function *nearbyint = llvm::Intrinsic::getDeclaration(
184 			::module, llvm::Intrinsic::nearbyint, {x->getType()});
185 		return ::builder->CreateCall(nearbyint, ARGS(x));
186 	}
187 
lowerRoundInt(llvm::Value * x,llvm::Type * ty)188 	llvm::Value *lowerRoundInt(llvm::Value *x, llvm::Type *ty)
189 	{
190 		return ::builder->CreateFPToSI(lowerRound(x), ty);
191 	}
192 
lowerFloor(llvm::Value * x)193 	llvm::Value *lowerFloor(llvm::Value *x)
194 	{
195 		llvm::Function *floor = llvm::Intrinsic::getDeclaration(
196 			::module, llvm::Intrinsic::floor, {x->getType()});
197 		return ::builder->CreateCall(floor, ARGS(x));
198 	}
199 
lowerTrunc(llvm::Value * x)200 	llvm::Value *lowerTrunc(llvm::Value *x)
201 	{
202 		llvm::Function *trunc = llvm::Intrinsic::getDeclaration(
203 			::module, llvm::Intrinsic::trunc, {x->getType()});
204 		return ::builder->CreateCall(trunc, ARGS(x));
205 	}
206 
207 	// Packed add/sub saturatation
lowerPSAT(llvm::Value * x,llvm::Value * y,bool isAdd,bool isSigned)208 	llvm::Value *lowerPSAT(llvm::Value *x, llvm::Value *y, bool isAdd, bool isSigned)
209 	{
210 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
211 		llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
212 
213 		unsigned numBits = ty->getScalarSizeInBits();
214 
215 		llvm::Value *max, *min, *extX, *extY;
216 		if (isSigned)
217 		{
218 			max = llvm::ConstantInt::get(extTy, (1LL << (numBits - 1)) - 1, true);
219 			min = llvm::ConstantInt::get(extTy, (-1LL << (numBits - 1)), true);
220 			extX = ::builder->CreateSExt(x, extTy);
221 			extY = ::builder->CreateSExt(y, extTy);
222 		}
223 		else
224 		{
225 			assert(numBits <= 64);
226 			uint64_t maxVal = (numBits == 64) ? ~0ULL : (1ULL << numBits) - 1;
227 			max = llvm::ConstantInt::get(extTy, maxVal, false);
228 			min = llvm::ConstantInt::get(extTy, 0, false);
229 			extX = ::builder->CreateZExt(x, extTy);
230 			extY = ::builder->CreateZExt(y, extTy);
231 		}
232 
233 		llvm::Value *res = isAdd ? ::builder->CreateAdd(extX, extY)
234 		                         : ::builder->CreateSub(extX, extY);
235 
236 		res = lowerPMINMAX(res, min, llvm::ICmpInst::ICMP_SGT);
237 		res = lowerPMINMAX(res, max, llvm::ICmpInst::ICMP_SLT);
238 
239 		return ::builder->CreateTrunc(res, ty);
240 	}
241 
lowerPUADDSAT(llvm::Value * x,llvm::Value * y)242 	llvm::Value *lowerPUADDSAT(llvm::Value *x, llvm::Value *y)
243 	{
244 		return lowerPSAT(x, y, true, false);
245 	}
246 
lowerPSADDSAT(llvm::Value * x,llvm::Value * y)247 	llvm::Value *lowerPSADDSAT(llvm::Value *x, llvm::Value *y)
248 	{
249 		return lowerPSAT(x, y, true, true);
250 	}
251 
lowerPUSUBSAT(llvm::Value * x,llvm::Value * y)252 	llvm::Value *lowerPUSUBSAT(llvm::Value *x, llvm::Value *y)
253 	{
254 		return lowerPSAT(x, y, false, false);
255 	}
256 
lowerPSSUBSAT(llvm::Value * x,llvm::Value * y)257 	llvm::Value *lowerPSSUBSAT(llvm::Value *x, llvm::Value *y)
258 	{
259 		return lowerPSAT(x, y, false, true);
260 	}
261 
lowerSQRT(llvm::Value * x)262 	llvm::Value *lowerSQRT(llvm::Value *x)
263 	{
264 		llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(
265 			::module, llvm::Intrinsic::sqrt, {x->getType()});
266 		return ::builder->CreateCall(sqrt, ARGS(x));
267 	}
268 
lowerRCP(llvm::Value * x)269 	llvm::Value *lowerRCP(llvm::Value *x)
270 	{
271 		llvm::Type *ty = x->getType();
272 		llvm::Constant *one;
273 		if (llvm::VectorType *vectorTy = llvm::dyn_cast<llvm::VectorType>(ty))
274 		{
275 			one = llvm::ConstantVector::getSplat(
276 				vectorTy->getNumElements(),
277 				llvm::ConstantFP::get(vectorTy->getElementType(), 1));
278 		}
279 		else
280 		{
281 			one = llvm::ConstantFP::get(ty, 1);
282 		}
283 		return ::builder->CreateFDiv(one, x);
284 	}
285 
lowerRSQRT(llvm::Value * x)286 	llvm::Value *lowerRSQRT(llvm::Value *x)
287 	{
288 		return lowerRCP(lowerSQRT(x));
289 	}
290 
lowerVectorShl(llvm::Value * x,uint64_t scalarY)291 	llvm::Value *lowerVectorShl(llvm::Value *x, uint64_t scalarY)
292 	{
293 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
294 		llvm::Value *y = llvm::ConstantVector::getSplat(
295 			ty->getNumElements(),
296 			llvm::ConstantInt::get(ty->getElementType(), scalarY));
297 		return ::builder->CreateShl(x, y);
298 	}
299 
lowerVectorAShr(llvm::Value * x,uint64_t scalarY)300 	llvm::Value *lowerVectorAShr(llvm::Value *x, uint64_t scalarY)
301 	{
302 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
303 		llvm::Value *y = llvm::ConstantVector::getSplat(
304 			ty->getNumElements(),
305 			llvm::ConstantInt::get(ty->getElementType(), scalarY));
306 		return ::builder->CreateAShr(x, y);
307 	}
308 
lowerVectorLShr(llvm::Value * x,uint64_t scalarY)309 	llvm::Value *lowerVectorLShr(llvm::Value *x, uint64_t scalarY)
310 	{
311 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
312 		llvm::Value *y = llvm::ConstantVector::getSplat(
313 			ty->getNumElements(),
314 			llvm::ConstantInt::get(ty->getElementType(), scalarY));
315 		return ::builder->CreateLShr(x, y);
316 	}
317 
lowerMulAdd(llvm::Value * x,llvm::Value * y)318 	llvm::Value *lowerMulAdd(llvm::Value *x, llvm::Value *y)
319 	{
320 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
321 		llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
322 
323 		llvm::Value *extX = ::builder->CreateSExt(x, extTy);
324 		llvm::Value *extY = ::builder->CreateSExt(y, extTy);
325 		llvm::Value *mult = ::builder->CreateMul(extX, extY);
326 
327 		llvm::Value *undef = llvm::UndefValue::get(extTy);
328 
329 		llvm::SmallVector<uint32_t, 16> evenIdx;
330 		llvm::SmallVector<uint32_t, 16> oddIdx;
331 		for (uint64_t i = 0, n = ty->getNumElements(); i < n; i += 2)
332 		{
333 			evenIdx.push_back(i);
334 			oddIdx.push_back(i + 1);
335 		}
336 
337 		llvm::Value *lhs = ::builder->CreateShuffleVector(mult, undef, evenIdx);
338 		llvm::Value *rhs = ::builder->CreateShuffleVector(mult, undef, oddIdx);
339 		return ::builder->CreateAdd(lhs, rhs);
340 	}
341 
lowerMulHigh(llvm::Value * x,llvm::Value * y,bool sext)342 	llvm::Value *lowerMulHigh(llvm::Value *x, llvm::Value *y, bool sext)
343 	{
344 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
345 		llvm::VectorType *extTy = llvm::VectorType::getExtendedElementVectorType(ty);
346 
347 		llvm::Value *extX, *extY;
348 		if (sext)
349 		{
350 			extX = ::builder->CreateSExt(x, extTy);
351 			extY = ::builder->CreateSExt(y, extTy);
352 		}
353 		else
354 		{
355 			extX = ::builder->CreateZExt(x, extTy);
356 			extY = ::builder->CreateZExt(y, extTy);
357 		}
358 
359 		llvm::Value *mult = ::builder->CreateMul(extX, extY);
360 
361 		llvm::IntegerType *intTy = llvm::cast<llvm::IntegerType>(ty->getElementType());
362 		llvm::Value *mulh = ::builder->CreateAShr(mult, intTy->getIntegerBitWidth());
363 		return ::builder->CreateTrunc(mulh, ty);
364 	}
365 
lowerPack(llvm::Value * x,llvm::Value * y,bool isSigned)366 	llvm::Value *lowerPack(llvm::Value *x, llvm::Value *y, bool isSigned)
367 	{
368 		llvm::VectorType *srcTy = llvm::cast<llvm::VectorType>(x->getType());
369 		llvm::VectorType *dstTy = llvm::VectorType::getTruncatedElementVectorType(srcTy);
370 
371 		llvm::IntegerType *dstElemTy =
372 			llvm::cast<llvm::IntegerType>(dstTy->getElementType());
373 
374 		uint64_t truncNumBits = dstElemTy->getIntegerBitWidth();
375 		assert(truncNumBits < 64 && "shift 64 must be handled separately");
376 		llvm::Constant *max, *min;
377 		if (isSigned)
378 		{
379 			max = llvm::ConstantInt::get(srcTy, (1LL << (truncNumBits - 1)) - 1, true);
380 			min = llvm::ConstantInt::get(srcTy, (-1LL << (truncNumBits - 1)), true);
381 		}
382 		else
383 		{
384 			max = llvm::ConstantInt::get(srcTy, (1ULL << truncNumBits) - 1, false);
385 			min = llvm::ConstantInt::get(srcTy, 0, false);
386 		}
387 
388 		x = lowerPMINMAX(x, min, llvm::ICmpInst::ICMP_SGT);
389 		x = lowerPMINMAX(x, max, llvm::ICmpInst::ICMP_SLT);
390 		y = lowerPMINMAX(y, min, llvm::ICmpInst::ICMP_SGT);
391 		y = lowerPMINMAX(y, max, llvm::ICmpInst::ICMP_SLT);
392 
393 		x = ::builder->CreateTrunc(x, dstTy);
394 		y = ::builder->CreateTrunc(y, dstTy);
395 
396 		llvm::SmallVector<uint32_t, 16> index(srcTy->getNumElements() * 2);
397 		std::iota(index.begin(), index.end(), 0);
398 
399 		return ::builder->CreateShuffleVector(x, y, index);
400 	}
401 
lowerSignMask(llvm::Value * x,llvm::Type * retTy)402 	llvm::Value *lowerSignMask(llvm::Value *x, llvm::Type *retTy)
403 	{
404 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
405 		llvm::Constant *zero = llvm::ConstantInt::get(ty, 0);
406 		llvm::Value *cmp = ::builder->CreateICmpSLT(x, zero);
407 
408 		llvm::Value *ret = ::builder->CreateZExt(
409 			::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
410 		for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
411 		{
412 			llvm::Value *elem = ::builder->CreateZExt(
413 				::builder->CreateExtractElement(cmp, i), retTy);
414 			ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
415 		}
416 		return ret;
417 	}
418 
lowerFPSignMask(llvm::Value * x,llvm::Type * retTy)419 	llvm::Value *lowerFPSignMask(llvm::Value *x, llvm::Type *retTy)
420 	{
421 		llvm::VectorType *ty = llvm::cast<llvm::VectorType>(x->getType());
422 		llvm::Constant *zero = llvm::ConstantFP::get(ty, 0);
423 		llvm::Value *cmp = ::builder->CreateFCmpULT(x, zero);
424 
425 		llvm::Value *ret = ::builder->CreateZExt(
426 			::builder->CreateExtractElement(cmp, static_cast<uint64_t>(0)), retTy);
427 		for (uint64_t i = 1, n = ty->getNumElements(); i < n; ++i)
428 		{
429 			llvm::Value *elem = ::builder->CreateZExt(
430 				::builder->CreateExtractElement(cmp, i), retTy);
431 			ret = ::builder->CreateOr(ret, ::builder->CreateShl(elem, i));
432 		}
433 		return ret;
434 	}
435 #endif  // !defined(__i386__) && !defined(__x86_64__)
436 #endif  // REACTOR_LLVM_VERSION >= 7
437 }
438 
439 namespace rr
440 {
441 #if REACTOR_LLVM_VERSION < 7
442 	class LLVMReactorJIT
443 	{
444 	private:
445 		std::string arch;
446 		llvm::SmallVector<std::string, 16> mattrs;
447 		llvm::ExecutionEngine *executionEngine;
448 		LLVMRoutineManager *routineManager;
449 
450 	public:
LLVMReactorJIT(const std::string & arch_,const llvm::SmallVectorImpl<std::string> & mattrs_)451 		LLVMReactorJIT(const std::string &arch_,
452 		               const llvm::SmallVectorImpl<std::string> &mattrs_) :
453 			arch(arch_),
454 			mattrs(mattrs_.begin(), mattrs_.end()),
455 			executionEngine(nullptr),
456 			routineManager(nullptr)
457 		{
458 		}
459 
startSession()460 		void startSession()
461 		{
462 			std::string error;
463 
464 			::module = new llvm::Module("", *::context);
465 
466 			routineManager = new LLVMRoutineManager();
467 
468 			llvm::TargetMachine *targetMachine =
469 				llvm::EngineBuilder::selectTarget(
470 					::module, arch, "", mattrs, llvm::Reloc::Default,
471 					llvm::CodeModel::JITDefault, &error);
472 
473 			executionEngine = llvm::JIT::createJIT(
474 				::module, &error, routineManager, llvm::CodeGenOpt::Aggressive,
475 				true, targetMachine);
476 		}
477 
endSession()478 		void endSession()
479 		{
480 			delete executionEngine;
481 			executionEngine = nullptr;
482 			routineManager = nullptr;
483 
484 			::function = nullptr;
485 			::module = nullptr;
486 		}
487 
acquireRoutine(llvm::Function * func)488 		LLVMRoutine *acquireRoutine(llvm::Function *func)
489 		{
490 			void *entry = executionEngine->getPointerToFunction(::function);
491 			return routineManager->acquireRoutine(entry);
492 		}
493 
optimize(llvm::Module * module)494 		void optimize(llvm::Module *module)
495 		{
496 			static llvm::PassManager *passManager = nullptr;
497 
498 			if(!passManager)
499 			{
500 				passManager = new llvm::PassManager();
501 
502 				passManager->add(new llvm::TargetData(*executionEngine->getTargetData()));
503 				passManager->add(llvm::createScalarReplAggregatesPass());
504 
505 				for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
506 				{
507 					switch(optimization[pass])
508 					{
509 					case Disabled:                                                                       break;
510 					case CFGSimplification:    passManager->add(llvm::createCFGSimplificationPass());    break;
511 					case LICM:                 passManager->add(llvm::createLICMPass());                 break;
512 					case AggressiveDCE:        passManager->add(llvm::createAggressiveDCEPass());        break;
513 					case GVN:                  passManager->add(llvm::createGVNPass());                  break;
514 					case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
515 					case Reassociate:          passManager->add(llvm::createReassociatePass());          break;
516 					case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
517 					case SCCP:                 passManager->add(llvm::createSCCPPass());                 break;
518 					case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break;
519 					default:
520 						assert(false);
521 					}
522 				}
523 			}
524 
525 			passManager->run(*::module);
526 		}
527 	};
528 #else
529 	class ExternalFunctionSymbolResolver
530 	{
531 	private:
532 		using FunctionMap = std::unordered_map<std::string, void *>;
533 		FunctionMap func_;
534 
535 	public:
536 		ExternalFunctionSymbolResolver()
537 		{
538 			func_.emplace("floorf", reinterpret_cast<void*>(floorf));
539 			func_.emplace("nearbyintf", reinterpret_cast<void*>(nearbyintf));
540 			func_.emplace("truncf", reinterpret_cast<void*>(truncf));
541 		}
542 
543 		void *findSymbol(const std::string &name) const
544 		{
545 			FunctionMap::const_iterator it = func_.find(name);
546 			return (it != func_.end()) ? it->second : nullptr;
547 		}
548 	};
549 
550 	class LLVMReactorJIT
551 	{
552 	private:
553 		using ObjLayer = llvm::orc::RTDyldObjectLinkingLayer;
554 		using CompileLayer = llvm::orc::IRCompileLayer<ObjLayer, llvm::orc::SimpleCompiler>;
555 
556 		llvm::orc::ExecutionSession session;
557 		ExternalFunctionSymbolResolver externalSymbolResolver;
558 		std::shared_ptr<llvm::orc::SymbolResolver> resolver;
559 		std::unique_ptr<llvm::TargetMachine> targetMachine;
560 		const llvm::DataLayout dataLayout;
561 		ObjLayer objLayer;
562 		CompileLayer compileLayer;
563 		size_t emittedFunctionsNum;
564 
565 	public:
566 		LLVMReactorJIT(const char *arch, const llvm::SmallVectorImpl<std::string>& mattrs,
567 					   const llvm::TargetOptions &targetOpts):
568 			resolver(createLegacyLookupResolver(
569 				session,
570 				[this](const std::string &name) {
571 					void *func = externalSymbolResolver.findSymbol(name);
572 					if (func != nullptr)
573 					{
574 						return llvm::JITSymbol(
575 							reinterpret_cast<uintptr_t>(func), llvm::JITSymbolFlags::Absolute);
576 					}
577 
578 					return objLayer.findSymbol(name, true);
579 				},
580 				[](llvm::Error err) {
581 					if (err)
582 					{
583 						// TODO: Log the symbol resolution errors.
584 						return;
585 					}
586 				})),
587 			targetMachine(llvm::EngineBuilder()
588 				.setMArch(arch)
589 				.setMAttrs(mattrs)
590 				.setTargetOptions(targetOpts)
591 				.selectTarget()),
592 			dataLayout(targetMachine->createDataLayout()),
593 			objLayer(
594 				session,
595 				[this](llvm::orc::VModuleKey) {
596 					return ObjLayer::Resources{
597 						std::make_shared<llvm::SectionMemoryManager>(),
598 						resolver};
599 				}),
600 			compileLayer(objLayer, llvm::orc::SimpleCompiler(*targetMachine)),
601 			emittedFunctionsNum(0)
602 		{
603 		}
604 
605 		void startSession()
606 		{
607 			::module = new llvm::Module("", *::context);
608 		}
609 
610 		void endSession()
611 		{
612 			::function = nullptr;
613 			::module = nullptr;
614 		}
615 
616 		LLVMRoutine *acquireRoutine(llvm::Function *func)
617 		{
618 			std::string name = "f" + llvm::Twine(emittedFunctionsNum++).str();
619 			func->setName(name);
620 			func->setLinkage(llvm::GlobalValue::ExternalLinkage);
621 			func->setDoesNotThrow();
622 
623 			std::unique_ptr<llvm::Module> mod(::module);
624 			::module = nullptr;
625 			mod->setDataLayout(dataLayout);
626 
627 			auto moduleKey = session.allocateVModule();
628 			llvm::cantFail(compileLayer.addModule(moduleKey, std::move(mod)));
629 
630 			std::string mangledName;
631 			{
632 				llvm::raw_string_ostream mangledNameStream(mangledName);
633 				llvm::Mangler::getNameWithPrefix(mangledNameStream, name, dataLayout);
634 			}
635 
636 			llvm::JITSymbol symbol = compileLayer.findSymbolIn(moduleKey, mangledName, false);
637 
638 			llvm::Expected<llvm::JITTargetAddress> expectAddr = symbol.getAddress();
639 			if(!expectAddr)
640 			{
641 				return nullptr;
642 			}
643 
644 			void *addr = reinterpret_cast<void *>(static_cast<intptr_t>(expectAddr.get()));
645 			return new LLVMRoutine(addr, releaseRoutineCallback, this, moduleKey);
646 		}
647 
648 		void optimize(llvm::Module *module)
649 		{
650 			std::unique_ptr<llvm::legacy::PassManager> passManager(
651 				new llvm::legacy::PassManager());
652 
653 			passManager->add(llvm::createSROAPass());
654 
655 			for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
656 			{
657 				switch(optimization[pass])
658 				{
659 				case Disabled:                                                                       break;
660 				case CFGSimplification:    passManager->add(llvm::createCFGSimplificationPass());    break;
661 				case LICM:                 passManager->add(llvm::createLICMPass());                 break;
662 				case AggressiveDCE:        passManager->add(llvm::createAggressiveDCEPass());        break;
663 				case GVN:                  passManager->add(llvm::createGVNPass());                  break;
664 				case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
665 				case Reassociate:          passManager->add(llvm::createReassociatePass());          break;
666 				case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
667 				case SCCP:                 passManager->add(llvm::createSCCPPass());                 break;
668 				case ScalarReplAggregates: passManager->add(llvm::createSROAPass());                 break;
669 				default:
670 				                           assert(false);
671 				}
672 			}
673 
674 			passManager->run(*::module);
675 		}
676 
677 	private:
678 		void releaseRoutineModule(llvm::orc::VModuleKey moduleKey)
679 		{
680 			llvm::cantFail(compileLayer.removeModule(moduleKey));
681 		}
682 
683 		static void releaseRoutineCallback(LLVMReactorJIT *jit, uint64_t moduleKey)
684 		{
685 			jit->releaseRoutineModule(moduleKey);
686 		}
687 	};
688 #endif
689 
690 	Optimization optimization[10] = {InstructionCombining, Disabled};
691 
692 	enum EmulatedType
693 	{
694 		Type_v2i32,
695 		Type_v4i16,
696 		Type_v2i16,
697 		Type_v8i8,
698 		Type_v4i8,
699 		Type_v2f32,
700 		EmulatedTypeCount
701 	};
702 
T(Type * t)703 	llvm::Type *T(Type *t)
704 	{
705 		uintptr_t type = reinterpret_cast<uintptr_t>(t);
706 		if(type < EmulatedTypeCount)
707 		{
708 			// Use 128-bit vectors to implement logically shorter ones.
709 			switch(type)
710 			{
711 			case Type_v2i32: return T(Int4::getType());
712 			case Type_v4i16: return T(Short8::getType());
713 			case Type_v2i16: return T(Short8::getType());
714 			case Type_v8i8:  return T(Byte16::getType());
715 			case Type_v4i8:  return T(Byte16::getType());
716 			case Type_v2f32: return T(Float4::getType());
717 			default: assert(false);
718 			}
719 		}
720 
721 		return reinterpret_cast<llvm::Type*>(t);
722 	}
723 
T(llvm::Type * t)724 	inline Type *T(llvm::Type *t)
725 	{
726 		return reinterpret_cast<Type*>(t);
727 	}
728 
T(EmulatedType t)729 	Type *T(EmulatedType t)
730 	{
731 		return reinterpret_cast<Type*>(t);
732 	}
733 
V(Value * t)734 	inline llvm::Value *V(Value *t)
735 	{
736 		return reinterpret_cast<llvm::Value*>(t);
737 	}
738 
V(llvm::Value * t)739 	inline Value *V(llvm::Value *t)
740 	{
741 		return reinterpret_cast<Value*>(t);
742 	}
743 
T(std::vector<Type * > & t)744 	inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
745 	{
746 		return reinterpret_cast<std::vector<llvm::Type*>&>(t);
747 	}
748 
B(BasicBlock * t)749 	inline llvm::BasicBlock *B(BasicBlock *t)
750 	{
751 		return reinterpret_cast<llvm::BasicBlock*>(t);
752 	}
753 
B(llvm::BasicBlock * t)754 	inline BasicBlock *B(llvm::BasicBlock *t)
755 	{
756 		return reinterpret_cast<BasicBlock*>(t);
757 	}
758 
typeSize(Type * type)759 	static size_t typeSize(Type *type)
760 	{
761 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
762 		if(t < EmulatedTypeCount)
763 		{
764 			switch(t)
765 			{
766 			case Type_v2i32: return 8;
767 			case Type_v4i16: return 8;
768 			case Type_v2i16: return 4;
769 			case Type_v8i8:  return 8;
770 			case Type_v4i8:  return 4;
771 			case Type_v2f32: return 8;
772 			default: assert(false);
773 			}
774 		}
775 
776 		return T(type)->getPrimitiveSizeInBits() / 8;
777 	}
778 
elementCount(Type * type)779 	static unsigned int elementCount(Type *type)
780 	{
781 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
782 		if(t < EmulatedTypeCount)
783 		{
784 			switch(t)
785 			{
786 			case Type_v2i32: return 2;
787 			case Type_v4i16: return 4;
788 			case Type_v2i16: return 2;
789 			case Type_v8i8:  return 8;
790 			case Type_v4i8:  return 4;
791 			case Type_v2f32: return 2;
792 			default: assert(false);
793 			}
794 		}
795 
796 		return llvm::cast<llvm::VectorType>(T(type))->getNumElements();
797 	}
798 
Nucleus()799 	Nucleus::Nucleus()
800 	{
801 		::codegenMutex.lock();   // Reactor and LLVM are currently not thread safe
802 
803 		llvm::InitializeNativeTarget();
804 
805 #if REACTOR_LLVM_VERSION >= 7
806 		llvm::InitializeNativeTargetAsmPrinter();
807 		llvm::InitializeNativeTargetAsmParser();
808 #endif
809 
810 		if(!::context)
811 		{
812 			::context = new llvm::LLVMContext();
813 		}
814 
815 		#if defined(__x86_64__)
816 			static const char arch[] = "x86-64";
817 		#elif defined(__i386__)
818 			static const char arch[] = "x86";
819 		#elif defined(__aarch64__)
820 			static const char arch[] = "arm64";
821 		#elif defined(__arm__)
822 			static const char arch[] = "arm";
823 		#elif defined(__mips__)
824 			#if defined(__mips64)
825 			    static const char arch[] = "mips64el";
826 			#else
827 			    static const char arch[] = "mipsel";
828 			#endif
829 		#else
830 		#error "unknown architecture"
831 		#endif
832 
833 		llvm::SmallVector<std::string, 1> mattrs;
834 #if defined(__i386__) || defined(__x86_64__)
835 		mattrs.push_back(CPUID::supportsMMX()    ? "+mmx"    : "-mmx");
836 		mattrs.push_back(CPUID::supportsCMOV()   ? "+cmov"   : "-cmov");
837 		mattrs.push_back(CPUID::supportsSSE()    ? "+sse"    : "-sse");
838 		mattrs.push_back(CPUID::supportsSSE2()   ? "+sse2"   : "-sse2");
839 		mattrs.push_back(CPUID::supportsSSE3()   ? "+sse3"   : "-sse3");
840 		mattrs.push_back(CPUID::supportsSSSE3()  ? "+ssse3"  : "-ssse3");
841 #if REACTOR_LLVM_VERSION < 7
842 		mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse41"  : "-sse41");
843 #else
844 		mattrs.push_back(CPUID::supportsSSE4_1() ? "+sse4.1" : "-sse4.1");
845 #endif
846 #elif defined(__arm__)
847 #if __ARM_ARCH >= 8
848 		mattrs.push_back("+armv8-a");
849 #else
850 		// armv7-a requires compiler-rt routines; otherwise, compiled kernel
851 		// might fail to link.
852 #endif
853 #endif
854 
855 #if REACTOR_LLVM_VERSION < 7
856 		llvm::JITEmitDebugInfo = false;
857 		llvm::UnsafeFPMath = true;
858 		// llvm::NoInfsFPMath = true;
859 		// llvm::NoNaNsFPMath = true;
860 #else
861 		llvm::TargetOptions targetOpts;
862 		targetOpts.UnsafeFPMath = false;
863 		// targetOpts.NoInfsFPMath = true;
864 		// targetOpts.NoNaNsFPMath = true;
865 #endif
866 
867 		if(!::reactorJIT)
868 		{
869 #if REACTOR_LLVM_VERSION < 7
870 			::reactorJIT = new LLVMReactorJIT(arch, mattrs);
871 #else
872 			::reactorJIT = new LLVMReactorJIT(arch, mattrs, targetOpts);
873 #endif
874 		}
875 
876 		::reactorJIT->startSession();
877 
878 		if(!::builder)
879 		{
880 			::builder = new llvm::IRBuilder<>(*::context);
881 		}
882 	}
883 
~Nucleus()884 	Nucleus::~Nucleus()
885 	{
886 		::reactorJIT->endSession();
887 
888 		::codegenMutex.unlock();
889 	}
890 
acquireRoutine(const char * name,bool runOptimizations)891 	Routine *Nucleus::acquireRoutine(const char *name, bool runOptimizations)
892 	{
893 		if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
894 		{
895 			llvm::Type *type = ::function->getReturnType();
896 
897 			if(type->isVoidTy())
898 			{
899 				createRetVoid();
900 			}
901 			else
902 			{
903 				createRet(V(llvm::UndefValue::get(type)));
904 			}
905 		}
906 
907 		if(false)
908 		{
909 			#if REACTOR_LLVM_VERSION < 7
910 				std::string error;
911 				llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-unopt.txt").c_str(), error);
912 			#else
913 				std::error_code error;
914 				llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-unopt.txt", error);
915 			#endif
916 
917 			::module->print(file, 0);
918 		}
919 
920 		if(runOptimizations)
921 		{
922 			optimize();
923 		}
924 
925 		if(false)
926 		{
927 			#if REACTOR_LLVM_VERSION < 7
928 				std::string error;
929 				llvm::raw_fd_ostream file((std::string(name) + "-llvm-dump-opt.txt").c_str(), error);
930 			#else
931 				std::error_code error;
932 				llvm::raw_fd_ostream file(std::string(name) + "-llvm-dump-opt.txt", error);
933 			#endif
934 
935 			::module->print(file, 0);
936 		}
937 
938 		LLVMRoutine *routine = ::reactorJIT->acquireRoutine(::function);
939 
940 		return routine;
941 	}
942 
optimize()943 	void Nucleus::optimize()
944 	{
945 		::reactorJIT->optimize(::module);
946 	}
947 
allocateStackVariable(Type * type,int arraySize)948 	Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
949 	{
950 		// Need to allocate it in the entry block for mem2reg to work
951 		llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
952 
953 		llvm::Instruction *declaration;
954 
955 		if(arraySize)
956 		{
957 #if REACTOR_LLVM_VERSION < 7
958 			declaration = new llvm::AllocaInst(T(type), V(Nucleus::createConstantInt(arraySize)));
959 #else
960 			declaration = new llvm::AllocaInst(T(type), 0, V(Nucleus::createConstantInt(arraySize)));
961 #endif
962 		}
963 		else
964 		{
965 #if REACTOR_LLVM_VERSION < 7
966 			declaration = new llvm::AllocaInst(T(type), (llvm::Value*)nullptr);
967 #else
968 			declaration = new llvm::AllocaInst(T(type), 0, (llvm::Value*)nullptr);
969 #endif
970 		}
971 
972 		entryBlock.getInstList().push_front(declaration);
973 
974 		return V(declaration);
975 	}
976 
createBasicBlock()977 	BasicBlock *Nucleus::createBasicBlock()
978 	{
979 		return B(llvm::BasicBlock::Create(*::context, "", ::function));
980 	}
981 
getInsertBlock()982 	BasicBlock *Nucleus::getInsertBlock()
983 	{
984 		return B(::builder->GetInsertBlock());
985 	}
986 
setInsertBlock(BasicBlock * basicBlock)987 	void Nucleus::setInsertBlock(BasicBlock *basicBlock)
988 	{
989 	//	assert(::builder->GetInsertBlock()->back().isTerminator());
990 		::builder->SetInsertPoint(B(basicBlock));
991 	}
992 
createFunction(Type * ReturnType,std::vector<Type * > & Params)993 	void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
994 	{
995 		llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
996 		::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
997 		::function->setCallingConv(llvm::CallingConv::C);
998 
999 		#if defined(_WIN32) && REACTOR_LLVM_VERSION >= 7
1000 			// FIXME(capn):
1001 			// On Windows, stack memory is committed in increments of 4 kB pages, with the last page
1002 			// having a trap which allows the OS to grow the stack. For functions with a stack frame
1003 			// larger than 4 kB this can cause an issue when a variable is accessed beyond the guard
1004 			// page. Therefore the compiler emits a call to __chkstk in the function prolog to probe
1005 			// the stack and ensure all pages have been committed. This is currently broken in LLVM
1006 			// JIT, but we can prevent emitting the stack probe call:
1007 			::function->addFnAttr("stack-probe-size", "1048576");
1008 		#endif
1009 
1010 		::builder->SetInsertPoint(llvm::BasicBlock::Create(*::context, "", ::function));
1011 	}
1012 
getArgument(unsigned int index)1013 	Value *Nucleus::getArgument(unsigned int index)
1014 	{
1015 		llvm::Function::arg_iterator args = ::function->arg_begin();
1016 
1017 		while(index)
1018 		{
1019 			args++;
1020 			index--;
1021 		}
1022 
1023 		return V(&*args);
1024 	}
1025 
createRetVoid()1026 	void Nucleus::createRetVoid()
1027 	{
1028 		::builder->CreateRetVoid();
1029 	}
1030 
createRet(Value * v)1031 	void Nucleus::createRet(Value *v)
1032 	{
1033 		::builder->CreateRet(V(v));
1034 	}
1035 
createBr(BasicBlock * dest)1036 	void Nucleus::createBr(BasicBlock *dest)
1037 	{
1038 		::builder->CreateBr(B(dest));
1039 	}
1040 
createCondBr(Value * cond,BasicBlock * ifTrue,BasicBlock * ifFalse)1041 	void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
1042 	{
1043 		::builder->CreateCondBr(V(cond), B(ifTrue), B(ifFalse));
1044 	}
1045 
createAdd(Value * lhs,Value * rhs)1046 	Value *Nucleus::createAdd(Value *lhs, Value *rhs)
1047 	{
1048 		return V(::builder->CreateAdd(V(lhs), V(rhs)));
1049 	}
1050 
createSub(Value * lhs,Value * rhs)1051 	Value *Nucleus::createSub(Value *lhs, Value *rhs)
1052 	{
1053 		return V(::builder->CreateSub(V(lhs), V(rhs)));
1054 	}
1055 
createMul(Value * lhs,Value * rhs)1056 	Value *Nucleus::createMul(Value *lhs, Value *rhs)
1057 	{
1058 		return V(::builder->CreateMul(V(lhs), V(rhs)));
1059 	}
1060 
createUDiv(Value * lhs,Value * rhs)1061 	Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
1062 	{
1063 		return V(::builder->CreateUDiv(V(lhs), V(rhs)));
1064 	}
1065 
createSDiv(Value * lhs,Value * rhs)1066 	Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
1067 	{
1068 		return V(::builder->CreateSDiv(V(lhs), V(rhs)));
1069 	}
1070 
createFAdd(Value * lhs,Value * rhs)1071 	Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
1072 	{
1073 		return V(::builder->CreateFAdd(V(lhs), V(rhs)));
1074 	}
1075 
createFSub(Value * lhs,Value * rhs)1076 	Value *Nucleus::createFSub(Value *lhs, Value *rhs)
1077 	{
1078 		return V(::builder->CreateFSub(V(lhs), V(rhs)));
1079 	}
1080 
createFMul(Value * lhs,Value * rhs)1081 	Value *Nucleus::createFMul(Value *lhs, Value *rhs)
1082 	{
1083 		return V(::builder->CreateFMul(V(lhs), V(rhs)));
1084 	}
1085 
createFDiv(Value * lhs,Value * rhs)1086 	Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
1087 	{
1088 		return V(::builder->CreateFDiv(V(lhs), V(rhs)));
1089 	}
1090 
createURem(Value * lhs,Value * rhs)1091 	Value *Nucleus::createURem(Value *lhs, Value *rhs)
1092 	{
1093 		return V(::builder->CreateURem(V(lhs), V(rhs)));
1094 	}
1095 
createSRem(Value * lhs,Value * rhs)1096 	Value *Nucleus::createSRem(Value *lhs, Value *rhs)
1097 	{
1098 		return V(::builder->CreateSRem(V(lhs), V(rhs)));
1099 	}
1100 
createFRem(Value * lhs,Value * rhs)1101 	Value *Nucleus::createFRem(Value *lhs, Value *rhs)
1102 	{
1103 		return V(::builder->CreateFRem(V(lhs), V(rhs)));
1104 	}
1105 
createShl(Value * lhs,Value * rhs)1106 	Value *Nucleus::createShl(Value *lhs, Value *rhs)
1107 	{
1108 		return V(::builder->CreateShl(V(lhs), V(rhs)));
1109 	}
1110 
createLShr(Value * lhs,Value * rhs)1111 	Value *Nucleus::createLShr(Value *lhs, Value *rhs)
1112 	{
1113 		return V(::builder->CreateLShr(V(lhs), V(rhs)));
1114 	}
1115 
createAShr(Value * lhs,Value * rhs)1116 	Value *Nucleus::createAShr(Value *lhs, Value *rhs)
1117 	{
1118 		return V(::builder->CreateAShr(V(lhs), V(rhs)));
1119 	}
1120 
createAnd(Value * lhs,Value * rhs)1121 	Value *Nucleus::createAnd(Value *lhs, Value *rhs)
1122 	{
1123 		return V(::builder->CreateAnd(V(lhs), V(rhs)));
1124 	}
1125 
createOr(Value * lhs,Value * rhs)1126 	Value *Nucleus::createOr(Value *lhs, Value *rhs)
1127 	{
1128 		return V(::builder->CreateOr(V(lhs), V(rhs)));
1129 	}
1130 
createXor(Value * lhs,Value * rhs)1131 	Value *Nucleus::createXor(Value *lhs, Value *rhs)
1132 	{
1133 		return V(::builder->CreateXor(V(lhs), V(rhs)));
1134 	}
1135 
createNeg(Value * v)1136 	Value *Nucleus::createNeg(Value *v)
1137 	{
1138 		return V(::builder->CreateNeg(V(v)));
1139 	}
1140 
createFNeg(Value * v)1141 	Value *Nucleus::createFNeg(Value *v)
1142 	{
1143 		return V(::builder->CreateFNeg(V(v)));
1144 	}
1145 
createNot(Value * v)1146 	Value *Nucleus::createNot(Value *v)
1147 	{
1148 		return V(::builder->CreateNot(V(v)));
1149 	}
1150 
createLoad(Value * ptr,Type * type,bool isVolatile,unsigned int alignment)1151 	Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
1152 	{
1153 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
1154 		if(t < EmulatedTypeCount)
1155 		{
1156 			switch(t)
1157 			{
1158 			case Type_v2i32:
1159 			case Type_v4i16:
1160 			case Type_v8i8:
1161 			case Type_v2f32:
1162 				return createBitCast(
1163 					createInsertElement(
1164 						V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))),
1165 						createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment),
1166 						0),
1167 					type);
1168 			case Type_v2i16:
1169 			case Type_v4i8:
1170 				if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
1171 				{
1172 					Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
1173 					Value *i = createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment);
1174 					i = createZExt(i, Long::getType());
1175 					Value *v = createInsertElement(u, i, 0);
1176 					return createBitCast(v, type);
1177 				}
1178 				break;
1179 			default:
1180 				assert(false);
1181 			}
1182 		}
1183 
1184 		assert(V(ptr)->getType()->getContainedType(0) == T(type));
1185 		return V(::builder->Insert(new llvm::LoadInst(V(ptr), "", isVolatile, alignment)));
1186 	}
1187 
createStore(Value * value,Value * ptr,Type * type,bool isVolatile,unsigned int alignment)1188 	Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
1189 	{
1190 		uintptr_t t = reinterpret_cast<uintptr_t>(type);
1191 		if(t < EmulatedTypeCount)
1192 		{
1193 			switch(t)
1194 			{
1195 			case Type_v2i32:
1196 			case Type_v4i16:
1197 			case Type_v8i8:
1198 			case Type_v2f32:
1199 				createStore(
1200 					createExtractElement(
1201 						createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0),
1202 					createBitCast(ptr, Pointer<Long>::getType()),
1203 					Long::getType(), isVolatile, alignment);
1204 				return value;
1205 			case Type_v2i16:
1206 			case Type_v4i8:
1207 				if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
1208 				{
1209 					createStore(
1210 						createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0),
1211 						createBitCast(ptr, Pointer<Int>::getType()),
1212 						Int::getType(), isVolatile, alignment);
1213 					return value;
1214 				}
1215 				break;
1216 			default:
1217 				assert(false);
1218 			}
1219 		}
1220 
1221 		assert(V(ptr)->getType()->getContainedType(0) == T(type));
1222 		::builder->Insert(new llvm::StoreInst(V(value), V(ptr), isVolatile, alignment));
1223 		return value;
1224 	}
1225 
createGEP(Value * ptr,Type * type,Value * index,bool unsignedIndex)1226 	Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
1227 	{
1228 		if(sizeof(void*) == 8)
1229 		{
1230 			if(unsignedIndex)
1231 			{
1232 				index = createZExt(index, Long::getType());
1233 			}
1234 			else
1235 			{
1236 				index = createSExt(index, Long::getType());
1237 			}
1238 
1239 			index = createMul(index, createConstantLong((int64_t)typeSize(type)));
1240 		}
1241 		else
1242 		{
1243 			index = createMul(index, createConstantInt((int)typeSize(type)));
1244 		}
1245 
1246 		assert(V(ptr)->getType()->getContainedType(0) == T(type));
1247 		return createBitCast(
1248 			V(::builder->CreateGEP(V(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0)))), V(index))),
1249 			T(llvm::PointerType::get(T(type), 0)));
1250 	}
1251 
createAtomicAdd(Value * ptr,Value * value)1252 	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
1253 	{
1254 		return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, V(ptr), V(value), llvm::AtomicOrdering::SequentiallyConsistent));
1255 	}
1256 
createTrunc(Value * v,Type * destType)1257 	Value *Nucleus::createTrunc(Value *v, Type *destType)
1258 	{
1259 		return V(::builder->CreateTrunc(V(v), T(destType)));
1260 	}
1261 
createZExt(Value * v,Type * destType)1262 	Value *Nucleus::createZExt(Value *v, Type *destType)
1263 	{
1264 		return V(::builder->CreateZExt(V(v), T(destType)));
1265 	}
1266 
createSExt(Value * v,Type * destType)1267 	Value *Nucleus::createSExt(Value *v, Type *destType)
1268 	{
1269 		return V(::builder->CreateSExt(V(v), T(destType)));
1270 	}
1271 
createFPToSI(Value * v,Type * destType)1272 	Value *Nucleus::createFPToSI(Value *v, Type *destType)
1273 	{
1274 		return V(::builder->CreateFPToSI(V(v), T(destType)));
1275 	}
1276 
createSIToFP(Value * v,Type * destType)1277 	Value *Nucleus::createSIToFP(Value *v, Type *destType)
1278 	{
1279 		return V(::builder->CreateSIToFP(V(v), T(destType)));
1280 	}
1281 
createFPTrunc(Value * v,Type * destType)1282 	Value *Nucleus::createFPTrunc(Value *v, Type *destType)
1283 	{
1284 		return V(::builder->CreateFPTrunc(V(v), T(destType)));
1285 	}
1286 
createFPExt(Value * v,Type * destType)1287 	Value *Nucleus::createFPExt(Value *v, Type *destType)
1288 	{
1289 		return V(::builder->CreateFPExt(V(v), T(destType)));
1290 	}
1291 
createBitCast(Value * v,Type * destType)1292 	Value *Nucleus::createBitCast(Value *v, Type *destType)
1293 	{
1294 		// Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
1295 		// support for casting between scalars and wide vectors. Emulate them by writing to the stack and
1296 		// reading back as the destination type.
1297 		if(!V(v)->getType()->isVectorTy() && T(destType)->isVectorTy())
1298 		{
1299 			Value *readAddress = allocateStackVariable(destType);
1300 			Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(V(v)->getType(), 0)));
1301 			createStore(v, writeAddress, T(V(v)->getType()));
1302 			return createLoad(readAddress, destType);
1303 		}
1304 		else if(V(v)->getType()->isVectorTy() && !T(destType)->isVectorTy())
1305 		{
1306 			Value *writeAddress = allocateStackVariable(T(V(v)->getType()));
1307 			createStore(v, writeAddress, T(V(v)->getType()));
1308 			Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
1309 			return createLoad(readAddress, destType);
1310 		}
1311 
1312 		return V(::builder->CreateBitCast(V(v), T(destType)));
1313 	}
1314 
createICmpEQ(Value * lhs,Value * rhs)1315 	Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
1316 	{
1317 		return V(::builder->CreateICmpEQ(V(lhs), V(rhs)));
1318 	}
1319 
createICmpNE(Value * lhs,Value * rhs)1320 	Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
1321 	{
1322 		return V(::builder->CreateICmpNE(V(lhs), V(rhs)));
1323 	}
1324 
createICmpUGT(Value * lhs,Value * rhs)1325 	Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
1326 	{
1327 		return V(::builder->CreateICmpUGT(V(lhs), V(rhs)));
1328 	}
1329 
createICmpUGE(Value * lhs,Value * rhs)1330 	Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
1331 	{
1332 		return V(::builder->CreateICmpUGE(V(lhs), V(rhs)));
1333 	}
1334 
createICmpULT(Value * lhs,Value * rhs)1335 	Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
1336 	{
1337 		return V(::builder->CreateICmpULT(V(lhs), V(rhs)));
1338 	}
1339 
createICmpULE(Value * lhs,Value * rhs)1340 	Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
1341 	{
1342 		return V(::builder->CreateICmpULE(V(lhs), V(rhs)));
1343 	}
1344 
createICmpSGT(Value * lhs,Value * rhs)1345 	Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
1346 	{
1347 		return V(::builder->CreateICmpSGT(V(lhs), V(rhs)));
1348 	}
1349 
createICmpSGE(Value * lhs,Value * rhs)1350 	Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
1351 	{
1352 		return V(::builder->CreateICmpSGE(V(lhs), V(rhs)));
1353 	}
1354 
createICmpSLT(Value * lhs,Value * rhs)1355 	Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
1356 	{
1357 		return V(::builder->CreateICmpSLT(V(lhs), V(rhs)));
1358 	}
1359 
createICmpSLE(Value * lhs,Value * rhs)1360 	Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
1361 	{
1362 		return V(::builder->CreateICmpSLE(V(lhs), V(rhs)));
1363 	}
1364 
createFCmpOEQ(Value * lhs,Value * rhs)1365 	Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
1366 	{
1367 		return V(::builder->CreateFCmpOEQ(V(lhs), V(rhs)));
1368 	}
1369 
createFCmpOGT(Value * lhs,Value * rhs)1370 	Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
1371 	{
1372 		return V(::builder->CreateFCmpOGT(V(lhs), V(rhs)));
1373 	}
1374 
createFCmpOGE(Value * lhs,Value * rhs)1375 	Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
1376 	{
1377 		return V(::builder->CreateFCmpOGE(V(lhs), V(rhs)));
1378 	}
1379 
createFCmpOLT(Value * lhs,Value * rhs)1380 	Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
1381 	{
1382 		return V(::builder->CreateFCmpOLT(V(lhs), V(rhs)));
1383 	}
1384 
createFCmpOLE(Value * lhs,Value * rhs)1385 	Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
1386 	{
1387 		return V(::builder->CreateFCmpOLE(V(lhs), V(rhs)));
1388 	}
1389 
createFCmpONE(Value * lhs,Value * rhs)1390 	Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
1391 	{
1392 		return V(::builder->CreateFCmpONE(V(lhs), V(rhs)));
1393 	}
1394 
createFCmpORD(Value * lhs,Value * rhs)1395 	Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
1396 	{
1397 		return V(::builder->CreateFCmpORD(V(lhs), V(rhs)));
1398 	}
1399 
createFCmpUNO(Value * lhs,Value * rhs)1400 	Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
1401 	{
1402 		return V(::builder->CreateFCmpUNO(V(lhs), V(rhs)));
1403 	}
1404 
createFCmpUEQ(Value * lhs,Value * rhs)1405 	Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
1406 	{
1407 		return V(::builder->CreateFCmpUEQ(V(lhs), V(rhs)));
1408 	}
1409 
createFCmpUGT(Value * lhs,Value * rhs)1410 	Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
1411 	{
1412 		return V(::builder->CreateFCmpUGT(V(lhs), V(rhs)));
1413 	}
1414 
createFCmpUGE(Value * lhs,Value * rhs)1415 	Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
1416 	{
1417 		return V(::builder->CreateFCmpUGE(V(lhs), V(rhs)));
1418 	}
1419 
createFCmpULT(Value * lhs,Value * rhs)1420 	Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
1421 	{
1422 		return V(::builder->CreateFCmpULT(V(lhs), V(rhs)));
1423 	}
1424 
createFCmpULE(Value * lhs,Value * rhs)1425 	Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
1426 	{
1427 		return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
1428 	}
1429 
createFCmpUNE(Value * lhs,Value * rhs)1430 	Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
1431 	{
1432 		return V(::builder->CreateFCmpULE(V(lhs), V(rhs)));
1433 	}
1434 
createExtractElement(Value * vector,Type * type,int index)1435 	Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
1436 	{
1437 		assert(V(vector)->getType()->getContainedType(0) == T(type));
1438 		return V(::builder->CreateExtractElement(V(vector), V(createConstantInt(index))));
1439 	}
1440 
createInsertElement(Value * vector,Value * element,int index)1441 	Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
1442 	{
1443 		return V(::builder->CreateInsertElement(V(vector), V(element), V(createConstantInt(index))));
1444 	}
1445 
createShuffleVector(Value * v1,Value * v2,const int * select)1446 	Value *Nucleus::createShuffleVector(Value *v1, Value *v2, const int *select)
1447 	{
1448 		int size = llvm::cast<llvm::VectorType>(V(v1)->getType())->getNumElements();
1449 		const int maxSize = 16;
1450 		llvm::Constant *swizzle[maxSize];
1451 		assert(size <= maxSize);
1452 
1453 		for(int i = 0; i < size; i++)
1454 		{
1455 			swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
1456 		}
1457 
1458 		llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
1459 
1460 		return V(::builder->CreateShuffleVector(V(v1), V(v2), shuffle));
1461 	}
1462 
createSelect(Value * c,Value * ifTrue,Value * ifFalse)1463 	Value *Nucleus::createSelect(Value *c, Value *ifTrue, Value *ifFalse)
1464 	{
1465 		return V(::builder->CreateSelect(V(c), V(ifTrue), V(ifFalse)));
1466 	}
1467 
createSwitch(Value * control,BasicBlock * defaultBranch,unsigned numCases)1468 	SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
1469 	{
1470 		return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(V(control), B(defaultBranch), numCases));
1471 	}
1472 
addSwitchCase(SwitchCases * switchCases,int label,BasicBlock * branch)1473 	void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
1474 	{
1475 		llvm::SwitchInst *sw = reinterpret_cast<llvm::SwitchInst *>(switchCases);
1476 		sw->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), B(branch));
1477 	}
1478 
createUnreachable()1479 	void Nucleus::createUnreachable()
1480 	{
1481 		::builder->CreateUnreachable();
1482 	}
1483 
createSwizzle4(Value * val,unsigned char select)1484 	static Value *createSwizzle4(Value *val, unsigned char select)
1485 	{
1486 		int swizzle[4] =
1487 		{
1488 			(select >> 0) & 0x03,
1489 			(select >> 2) & 0x03,
1490 			(select >> 4) & 0x03,
1491 			(select >> 6) & 0x03,
1492 		};
1493 
1494 		return Nucleus::createShuffleVector(val, val, swizzle);
1495 	}
1496 
createMask4(Value * lhs,Value * rhs,unsigned char select)1497 	static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
1498 	{
1499 		bool mask[4] = {false, false, false, false};
1500 
1501 		mask[(select >> 0) & 0x03] = true;
1502 		mask[(select >> 2) & 0x03] = true;
1503 		mask[(select >> 4) & 0x03] = true;
1504 		mask[(select >> 6) & 0x03] = true;
1505 
1506 		int swizzle[4] =
1507 		{
1508 			mask[0] ? 4 : 0,
1509 			mask[1] ? 5 : 1,
1510 			mask[2] ? 6 : 2,
1511 			mask[3] ? 7 : 3,
1512 		};
1513 
1514 		return Nucleus::createShuffleVector(lhs, rhs, swizzle);
1515 	}
1516 
getPointerType(Type * ElementType)1517 	Type *Nucleus::getPointerType(Type *ElementType)
1518 	{
1519 		return T(llvm::PointerType::get(T(ElementType), 0));
1520 	}
1521 
createNullValue(Type * Ty)1522 	Value *Nucleus::createNullValue(Type *Ty)
1523 	{
1524 		return V(llvm::Constant::getNullValue(T(Ty)));
1525 	}
1526 
createConstantLong(int64_t i)1527 	Value *Nucleus::createConstantLong(int64_t i)
1528 	{
1529 		return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
1530 	}
1531 
createConstantInt(int i)1532 	Value *Nucleus::createConstantInt(int i)
1533 	{
1534 		return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
1535 	}
1536 
createConstantInt(unsigned int i)1537 	Value *Nucleus::createConstantInt(unsigned int i)
1538 	{
1539 		return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
1540 	}
1541 
createConstantBool(bool b)1542 	Value *Nucleus::createConstantBool(bool b)
1543 	{
1544 		return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
1545 	}
1546 
createConstantByte(signed char i)1547 	Value *Nucleus::createConstantByte(signed char i)
1548 	{
1549 		return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
1550 	}
1551 
createConstantByte(unsigned char i)1552 	Value *Nucleus::createConstantByte(unsigned char i)
1553 	{
1554 		return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
1555 	}
1556 
createConstantShort(short i)1557 	Value *Nucleus::createConstantShort(short i)
1558 	{
1559 		return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
1560 	}
1561 
createConstantShort(unsigned short i)1562 	Value *Nucleus::createConstantShort(unsigned short i)
1563 	{
1564 		return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
1565 	}
1566 
createConstantFloat(float x)1567 	Value *Nucleus::createConstantFloat(float x)
1568 	{
1569 		return V(llvm::ConstantFP::get(T(Float::getType()), x));
1570 	}
1571 
createNullPointer(Type * Ty)1572 	Value *Nucleus::createNullPointer(Type *Ty)
1573 	{
1574 		return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
1575 	}
1576 
createConstantVector(const int64_t * constants,Type * type)1577 	Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
1578 	{
1579 		assert(llvm::isa<llvm::VectorType>(T(type)));
1580 		const int numConstants = elementCount(type);                                       // Number of provided constants for the (emulated) type.
1581 		const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements();   // Number of elements of the underlying vector type.
1582 		assert(numElements <= 16 && numConstants <= numElements);
1583 		llvm::Constant *constantVector[16];
1584 
1585 		for(int i = 0; i < numElements; i++)
1586 		{
1587 			constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i % numConstants]);
1588 		}
1589 
1590 		return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
1591 	}
1592 
createConstantVector(const double * constants,Type * type)1593 	Value *Nucleus::createConstantVector(const double *constants, Type *type)
1594 	{
1595 		assert(llvm::isa<llvm::VectorType>(T(type)));
1596 		const int numConstants = elementCount(type);                                       // Number of provided constants for the (emulated) type.
1597 		const int numElements = llvm::cast<llvm::VectorType>(T(type))->getNumElements();   // Number of elements of the underlying vector type.
1598 		assert(numElements <= 8 && numConstants <= numElements);
1599 		llvm::Constant *constantVector[8];
1600 
1601 		for(int i = 0; i < numElements; i++)
1602 		{
1603 			constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i % numConstants]);
1604 		}
1605 
1606 		return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numElements)));
1607 	}
1608 
getType()1609 	Type *Void::getType()
1610 	{
1611 		return T(llvm::Type::getVoidTy(*::context));
1612 	}
1613 
Bool(Argument<Bool> argument)1614 	Bool::Bool(Argument<Bool> argument)
1615 	{
1616 		storeValue(argument.value);
1617 	}
1618 
Bool(bool x)1619 	Bool::Bool(bool x)
1620 	{
1621 		storeValue(Nucleus::createConstantBool(x));
1622 	}
1623 
Bool(RValue<Bool> rhs)1624 	Bool::Bool(RValue<Bool> rhs)
1625 	{
1626 		storeValue(rhs.value);
1627 	}
1628 
Bool(const Bool & rhs)1629 	Bool::Bool(const Bool &rhs)
1630 	{
1631 		Value *value = rhs.loadValue();
1632 		storeValue(value);
1633 	}
1634 
Bool(const Reference<Bool> & rhs)1635 	Bool::Bool(const Reference<Bool> &rhs)
1636 	{
1637 		Value *value = rhs.loadValue();
1638 		storeValue(value);
1639 	}
1640 
operator =(RValue<Bool> rhs)1641 	RValue<Bool> Bool::operator=(RValue<Bool> rhs)
1642 	{
1643 		storeValue(rhs.value);
1644 
1645 		return rhs;
1646 	}
1647 
operator =(const Bool & rhs)1648 	RValue<Bool> Bool::operator=(const Bool &rhs)
1649 	{
1650 		Value *value = rhs.loadValue();
1651 		storeValue(value);
1652 
1653 		return RValue<Bool>(value);
1654 	}
1655 
operator =(const Reference<Bool> & rhs)1656 	RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
1657 	{
1658 		Value *value = rhs.loadValue();
1659 		storeValue(value);
1660 
1661 		return RValue<Bool>(value);
1662 	}
1663 
operator !(RValue<Bool> val)1664 	RValue<Bool> operator!(RValue<Bool> val)
1665 	{
1666 		return RValue<Bool>(Nucleus::createNot(val.value));
1667 	}
1668 
operator &&(RValue<Bool> lhs,RValue<Bool> rhs)1669 	RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
1670 	{
1671 		return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
1672 	}
1673 
operator ||(RValue<Bool> lhs,RValue<Bool> rhs)1674 	RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1675 	{
1676 		return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1677 	}
1678 
getType()1679 	Type *Bool::getType()
1680 	{
1681 		return T(llvm::Type::getInt1Ty(*::context));
1682 	}
1683 
Byte(Argument<Byte> argument)1684 	Byte::Byte(Argument<Byte> argument)
1685 	{
1686 		storeValue(argument.value);
1687 	}
1688 
Byte(RValue<Int> cast)1689 	Byte::Byte(RValue<Int> cast)
1690 	{
1691 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1692 
1693 		storeValue(integer);
1694 	}
1695 
Byte(RValue<UInt> cast)1696 	Byte::Byte(RValue<UInt> cast)
1697 	{
1698 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1699 
1700 		storeValue(integer);
1701 	}
1702 
Byte(RValue<UShort> cast)1703 	Byte::Byte(RValue<UShort> cast)
1704 	{
1705 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1706 
1707 		storeValue(integer);
1708 	}
1709 
Byte(int x)1710 	Byte::Byte(int x)
1711 	{
1712 		storeValue(Nucleus::createConstantByte((unsigned char)x));
1713 	}
1714 
Byte(unsigned char x)1715 	Byte::Byte(unsigned char x)
1716 	{
1717 		storeValue(Nucleus::createConstantByte(x));
1718 	}
1719 
Byte(RValue<Byte> rhs)1720 	Byte::Byte(RValue<Byte> rhs)
1721 	{
1722 		storeValue(rhs.value);
1723 	}
1724 
Byte(const Byte & rhs)1725 	Byte::Byte(const Byte &rhs)
1726 	{
1727 		Value *value = rhs.loadValue();
1728 		storeValue(value);
1729 	}
1730 
Byte(const Reference<Byte> & rhs)1731 	Byte::Byte(const Reference<Byte> &rhs)
1732 	{
1733 		Value *value = rhs.loadValue();
1734 		storeValue(value);
1735 	}
1736 
operator =(RValue<Byte> rhs)1737 	RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1738 	{
1739 		storeValue(rhs.value);
1740 
1741 		return rhs;
1742 	}
1743 
operator =(const Byte & rhs)1744 	RValue<Byte> Byte::operator=(const Byte &rhs)
1745 	{
1746 		Value *value = rhs.loadValue();
1747 		storeValue(value);
1748 
1749 		return RValue<Byte>(value);
1750 	}
1751 
operator =(const Reference<Byte> & rhs)1752 	RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1753 	{
1754 		Value *value = rhs.loadValue();
1755 		storeValue(value);
1756 
1757 		return RValue<Byte>(value);
1758 	}
1759 
operator +(RValue<Byte> lhs,RValue<Byte> rhs)1760 	RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1761 	{
1762 		return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1763 	}
1764 
operator -(RValue<Byte> lhs,RValue<Byte> rhs)1765 	RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1766 	{
1767 		return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1768 	}
1769 
operator *(RValue<Byte> lhs,RValue<Byte> rhs)1770 	RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1771 	{
1772 		return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1773 	}
1774 
operator /(RValue<Byte> lhs,RValue<Byte> rhs)1775 	RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1776 	{
1777 		return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1778 	}
1779 
operator %(RValue<Byte> lhs,RValue<Byte> rhs)1780 	RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1781 	{
1782 		return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1783 	}
1784 
operator &(RValue<Byte> lhs,RValue<Byte> rhs)1785 	RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1786 	{
1787 		return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1788 	}
1789 
operator |(RValue<Byte> lhs,RValue<Byte> rhs)1790 	RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1791 	{
1792 		return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1793 	}
1794 
operator ^(RValue<Byte> lhs,RValue<Byte> rhs)1795 	RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1796 	{
1797 		return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1798 	}
1799 
operator <<(RValue<Byte> lhs,RValue<Byte> rhs)1800 	RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1801 	{
1802 		return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1803 	}
1804 
operator >>(RValue<Byte> lhs,RValue<Byte> rhs)1805 	RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1806 	{
1807 		return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1808 	}
1809 
operator +=(Byte & lhs,RValue<Byte> rhs)1810 	RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1811 	{
1812 		return lhs = lhs + rhs;
1813 	}
1814 
operator -=(Byte & lhs,RValue<Byte> rhs)1815 	RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1816 	{
1817 		return lhs = lhs - rhs;
1818 	}
1819 
operator *=(Byte & lhs,RValue<Byte> rhs)1820 	RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1821 	{
1822 		return lhs = lhs * rhs;
1823 	}
1824 
operator /=(Byte & lhs,RValue<Byte> rhs)1825 	RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1826 	{
1827 		return lhs = lhs / rhs;
1828 	}
1829 
operator %=(Byte & lhs,RValue<Byte> rhs)1830 	RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1831 	{
1832 		return lhs = lhs % rhs;
1833 	}
1834 
operator &=(Byte & lhs,RValue<Byte> rhs)1835 	RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1836 	{
1837 		return lhs = lhs & rhs;
1838 	}
1839 
operator |=(Byte & lhs,RValue<Byte> rhs)1840 	RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1841 	{
1842 		return lhs = lhs | rhs;
1843 	}
1844 
operator ^=(Byte & lhs,RValue<Byte> rhs)1845 	RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1846 	{
1847 		return lhs = lhs ^ rhs;
1848 	}
1849 
operator <<=(Byte & lhs,RValue<Byte> rhs)1850 	RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1851 	{
1852 		return lhs = lhs << rhs;
1853 	}
1854 
operator >>=(Byte & lhs,RValue<Byte> rhs)1855 	RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1856 	{
1857 		return lhs = lhs >> rhs;
1858 	}
1859 
operator +(RValue<Byte> val)1860 	RValue<Byte> operator+(RValue<Byte> val)
1861 	{
1862 		return val;
1863 	}
1864 
operator -(RValue<Byte> val)1865 	RValue<Byte> operator-(RValue<Byte> val)
1866 	{
1867 		return RValue<Byte>(Nucleus::createNeg(val.value));
1868 	}
1869 
operator ~(RValue<Byte> val)1870 	RValue<Byte> operator~(RValue<Byte> val)
1871 	{
1872 		return RValue<Byte>(Nucleus::createNot(val.value));
1873 	}
1874 
operator ++(Byte & val,int)1875 	RValue<Byte> operator++(Byte &val, int)   // Post-increment
1876 	{
1877 		RValue<Byte> res = val;
1878 
1879 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((unsigned char)1));
1880 		val.storeValue(inc);
1881 
1882 		return res;
1883 	}
1884 
operator ++(Byte & val)1885 	const Byte &operator++(Byte &val)   // Pre-increment
1886 	{
1887 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
1888 		val.storeValue(inc);
1889 
1890 		return val;
1891 	}
1892 
operator --(Byte & val,int)1893 	RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1894 	{
1895 		RValue<Byte> res = val;
1896 
1897 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((unsigned char)1));
1898 		val.storeValue(inc);
1899 
1900 		return res;
1901 	}
1902 
operator --(Byte & val)1903 	const Byte &operator--(Byte &val)   // Pre-decrement
1904 	{
1905 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
1906 		val.storeValue(inc);
1907 
1908 		return val;
1909 	}
1910 
operator <(RValue<Byte> lhs,RValue<Byte> rhs)1911 	RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1912 	{
1913 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1914 	}
1915 
operator <=(RValue<Byte> lhs,RValue<Byte> rhs)1916 	RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1917 	{
1918 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1919 	}
1920 
operator >(RValue<Byte> lhs,RValue<Byte> rhs)1921 	RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1922 	{
1923 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1924 	}
1925 
operator >=(RValue<Byte> lhs,RValue<Byte> rhs)1926 	RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1927 	{
1928 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1929 	}
1930 
operator !=(RValue<Byte> lhs,RValue<Byte> rhs)1931 	RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1932 	{
1933 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1934 	}
1935 
operator ==(RValue<Byte> lhs,RValue<Byte> rhs)1936 	RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1937 	{
1938 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1939 	}
1940 
getType()1941 	Type *Byte::getType()
1942 	{
1943 		return T(llvm::Type::getInt8Ty(*::context));
1944 	}
1945 
SByte(Argument<SByte> argument)1946 	SByte::SByte(Argument<SByte> argument)
1947 	{
1948 		storeValue(argument.value);
1949 	}
1950 
SByte(RValue<Int> cast)1951 	SByte::SByte(RValue<Int> cast)
1952 	{
1953 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1954 
1955 		storeValue(integer);
1956 	}
1957 
SByte(RValue<Short> cast)1958 	SByte::SByte(RValue<Short> cast)
1959 	{
1960 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1961 
1962 		storeValue(integer);
1963 	}
1964 
SByte(signed char x)1965 	SByte::SByte(signed char x)
1966 	{
1967 		storeValue(Nucleus::createConstantByte(x));
1968 	}
1969 
SByte(RValue<SByte> rhs)1970 	SByte::SByte(RValue<SByte> rhs)
1971 	{
1972 		storeValue(rhs.value);
1973 	}
1974 
SByte(const SByte & rhs)1975 	SByte::SByte(const SByte &rhs)
1976 	{
1977 		Value *value = rhs.loadValue();
1978 		storeValue(value);
1979 	}
1980 
SByte(const Reference<SByte> & rhs)1981 	SByte::SByte(const Reference<SByte> &rhs)
1982 	{
1983 		Value *value = rhs.loadValue();
1984 		storeValue(value);
1985 	}
1986 
operator =(RValue<SByte> rhs)1987 	RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1988 	{
1989 		storeValue(rhs.value);
1990 
1991 		return rhs;
1992 	}
1993 
operator =(const SByte & rhs)1994 	RValue<SByte> SByte::operator=(const SByte &rhs)
1995 	{
1996 		Value *value = rhs.loadValue();
1997 		storeValue(value);
1998 
1999 		return RValue<SByte>(value);
2000 	}
2001 
operator =(const Reference<SByte> & rhs)2002 	RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
2003 	{
2004 		Value *value = rhs.loadValue();
2005 		storeValue(value);
2006 
2007 		return RValue<SByte>(value);
2008 	}
2009 
operator +(RValue<SByte> lhs,RValue<SByte> rhs)2010 	RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
2011 	{
2012 		return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
2013 	}
2014 
operator -(RValue<SByte> lhs,RValue<SByte> rhs)2015 	RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
2016 	{
2017 		return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
2018 	}
2019 
operator *(RValue<SByte> lhs,RValue<SByte> rhs)2020 	RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
2021 	{
2022 		return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
2023 	}
2024 
operator /(RValue<SByte> lhs,RValue<SByte> rhs)2025 	RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
2026 	{
2027 		return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
2028 	}
2029 
operator %(RValue<SByte> lhs,RValue<SByte> rhs)2030 	RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
2031 	{
2032 		return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
2033 	}
2034 
operator &(RValue<SByte> lhs,RValue<SByte> rhs)2035 	RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
2036 	{
2037 		return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
2038 	}
2039 
operator |(RValue<SByte> lhs,RValue<SByte> rhs)2040 	RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
2041 	{
2042 		return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
2043 	}
2044 
operator ^(RValue<SByte> lhs,RValue<SByte> rhs)2045 	RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
2046 	{
2047 		return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
2048 	}
2049 
operator <<(RValue<SByte> lhs,RValue<SByte> rhs)2050 	RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
2051 	{
2052 		return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
2053 	}
2054 
operator >>(RValue<SByte> lhs,RValue<SByte> rhs)2055 	RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
2056 	{
2057 		return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
2058 	}
2059 
operator +=(SByte & lhs,RValue<SByte> rhs)2060 	RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
2061 	{
2062 		return lhs = lhs + rhs;
2063 	}
2064 
operator -=(SByte & lhs,RValue<SByte> rhs)2065 	RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
2066 	{
2067 		return lhs = lhs - rhs;
2068 	}
2069 
operator *=(SByte & lhs,RValue<SByte> rhs)2070 	RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
2071 	{
2072 		return lhs = lhs * rhs;
2073 	}
2074 
operator /=(SByte & lhs,RValue<SByte> rhs)2075 	RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
2076 	{
2077 		return lhs = lhs / rhs;
2078 	}
2079 
operator %=(SByte & lhs,RValue<SByte> rhs)2080 	RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
2081 	{
2082 		return lhs = lhs % rhs;
2083 	}
2084 
operator &=(SByte & lhs,RValue<SByte> rhs)2085 	RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
2086 	{
2087 		return lhs = lhs & rhs;
2088 	}
2089 
operator |=(SByte & lhs,RValue<SByte> rhs)2090 	RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
2091 	{
2092 		return lhs = lhs | rhs;
2093 	}
2094 
operator ^=(SByte & lhs,RValue<SByte> rhs)2095 	RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
2096 	{
2097 		return lhs = lhs ^ rhs;
2098 	}
2099 
operator <<=(SByte & lhs,RValue<SByte> rhs)2100 	RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
2101 	{
2102 		return lhs = lhs << rhs;
2103 	}
2104 
operator >>=(SByte & lhs,RValue<SByte> rhs)2105 	RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
2106 	{
2107 		return lhs = lhs >> rhs;
2108 	}
2109 
operator +(RValue<SByte> val)2110 	RValue<SByte> operator+(RValue<SByte> val)
2111 	{
2112 		return val;
2113 	}
2114 
operator -(RValue<SByte> val)2115 	RValue<SByte> operator-(RValue<SByte> val)
2116 	{
2117 		return RValue<SByte>(Nucleus::createNeg(val.value));
2118 	}
2119 
operator ~(RValue<SByte> val)2120 	RValue<SByte> operator~(RValue<SByte> val)
2121 	{
2122 		return RValue<SByte>(Nucleus::createNot(val.value));
2123 	}
2124 
operator ++(SByte & val,int)2125 	RValue<SByte> operator++(SByte &val, int)   // Post-increment
2126 	{
2127 		RValue<SByte> res = val;
2128 
2129 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((signed char)1));
2130 		val.storeValue(inc);
2131 
2132 		return res;
2133 	}
2134 
operator ++(SByte & val)2135 	const SByte &operator++(SByte &val)   // Pre-increment
2136 	{
2137 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((signed char)1));
2138 		val.storeValue(inc);
2139 
2140 		return val;
2141 	}
2142 
operator --(SByte & val,int)2143 	RValue<SByte> operator--(SByte &val, int)   // Post-decrement
2144 	{
2145 		RValue<SByte> res = val;
2146 
2147 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((signed char)1));
2148 		val.storeValue(inc);
2149 
2150 		return res;
2151 	}
2152 
operator --(SByte & val)2153 	const SByte &operator--(SByte &val)   // Pre-decrement
2154 	{
2155 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((signed char)1));
2156 		val.storeValue(inc);
2157 
2158 		return val;
2159 	}
2160 
operator <(RValue<SByte> lhs,RValue<SByte> rhs)2161 	RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
2162 	{
2163 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2164 	}
2165 
operator <=(RValue<SByte> lhs,RValue<SByte> rhs)2166 	RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
2167 	{
2168 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2169 	}
2170 
operator >(RValue<SByte> lhs,RValue<SByte> rhs)2171 	RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
2172 	{
2173 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2174 	}
2175 
operator >=(RValue<SByte> lhs,RValue<SByte> rhs)2176 	RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
2177 	{
2178 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2179 	}
2180 
operator !=(RValue<SByte> lhs,RValue<SByte> rhs)2181 	RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
2182 	{
2183 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2184 	}
2185 
operator ==(RValue<SByte> lhs,RValue<SByte> rhs)2186 	RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
2187 	{
2188 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2189 	}
2190 
getType()2191 	Type *SByte::getType()
2192 	{
2193 		return T(llvm::Type::getInt8Ty(*::context));
2194 	}
2195 
Short(Argument<Short> argument)2196 	Short::Short(Argument<Short> argument)
2197 	{
2198 		storeValue(argument.value);
2199 	}
2200 
Short(RValue<Int> cast)2201 	Short::Short(RValue<Int> cast)
2202 	{
2203 		Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
2204 
2205 		storeValue(integer);
2206 	}
2207 
Short(short x)2208 	Short::Short(short x)
2209 	{
2210 		storeValue(Nucleus::createConstantShort(x));
2211 	}
2212 
Short(RValue<Short> rhs)2213 	Short::Short(RValue<Short> rhs)
2214 	{
2215 		storeValue(rhs.value);
2216 	}
2217 
Short(const Short & rhs)2218 	Short::Short(const Short &rhs)
2219 	{
2220 		Value *value = rhs.loadValue();
2221 		storeValue(value);
2222 	}
2223 
Short(const Reference<Short> & rhs)2224 	Short::Short(const Reference<Short> &rhs)
2225 	{
2226 		Value *value = rhs.loadValue();
2227 		storeValue(value);
2228 	}
2229 
operator =(RValue<Short> rhs)2230 	RValue<Short> Short::operator=(RValue<Short> rhs)
2231 	{
2232 		storeValue(rhs.value);
2233 
2234 		return rhs;
2235 	}
2236 
operator =(const Short & rhs)2237 	RValue<Short> Short::operator=(const Short &rhs)
2238 	{
2239 		Value *value = rhs.loadValue();
2240 		storeValue(value);
2241 
2242 		return RValue<Short>(value);
2243 	}
2244 
operator =(const Reference<Short> & rhs)2245 	RValue<Short> Short::operator=(const Reference<Short> &rhs)
2246 	{
2247 		Value *value = rhs.loadValue();
2248 		storeValue(value);
2249 
2250 		return RValue<Short>(value);
2251 	}
2252 
operator +(RValue<Short> lhs,RValue<Short> rhs)2253 	RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
2254 	{
2255 		return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
2256 	}
2257 
operator -(RValue<Short> lhs,RValue<Short> rhs)2258 	RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
2259 	{
2260 		return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
2261 	}
2262 
operator *(RValue<Short> lhs,RValue<Short> rhs)2263 	RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
2264 	{
2265 		return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
2266 	}
2267 
operator /(RValue<Short> lhs,RValue<Short> rhs)2268 	RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
2269 	{
2270 		return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
2271 	}
2272 
operator %(RValue<Short> lhs,RValue<Short> rhs)2273 	RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
2274 	{
2275 		return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
2276 	}
2277 
operator &(RValue<Short> lhs,RValue<Short> rhs)2278 	RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
2279 	{
2280 		return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
2281 	}
2282 
operator |(RValue<Short> lhs,RValue<Short> rhs)2283 	RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
2284 	{
2285 		return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
2286 	}
2287 
operator ^(RValue<Short> lhs,RValue<Short> rhs)2288 	RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
2289 	{
2290 		return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
2291 	}
2292 
operator <<(RValue<Short> lhs,RValue<Short> rhs)2293 	RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
2294 	{
2295 		return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
2296 	}
2297 
operator >>(RValue<Short> lhs,RValue<Short> rhs)2298 	RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
2299 	{
2300 		return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
2301 	}
2302 
operator +=(Short & lhs,RValue<Short> rhs)2303 	RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
2304 	{
2305 		return lhs = lhs + rhs;
2306 	}
2307 
operator -=(Short & lhs,RValue<Short> rhs)2308 	RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
2309 	{
2310 		return lhs = lhs - rhs;
2311 	}
2312 
operator *=(Short & lhs,RValue<Short> rhs)2313 	RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
2314 	{
2315 		return lhs = lhs * rhs;
2316 	}
2317 
operator /=(Short & lhs,RValue<Short> rhs)2318 	RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
2319 	{
2320 		return lhs = lhs / rhs;
2321 	}
2322 
operator %=(Short & lhs,RValue<Short> rhs)2323 	RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
2324 	{
2325 		return lhs = lhs % rhs;
2326 	}
2327 
operator &=(Short & lhs,RValue<Short> rhs)2328 	RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
2329 	{
2330 		return lhs = lhs & rhs;
2331 	}
2332 
operator |=(Short & lhs,RValue<Short> rhs)2333 	RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
2334 	{
2335 		return lhs = lhs | rhs;
2336 	}
2337 
operator ^=(Short & lhs,RValue<Short> rhs)2338 	RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
2339 	{
2340 		return lhs = lhs ^ rhs;
2341 	}
2342 
operator <<=(Short & lhs,RValue<Short> rhs)2343 	RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
2344 	{
2345 		return lhs = lhs << rhs;
2346 	}
2347 
operator >>=(Short & lhs,RValue<Short> rhs)2348 	RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
2349 	{
2350 		return lhs = lhs >> rhs;
2351 	}
2352 
operator +(RValue<Short> val)2353 	RValue<Short> operator+(RValue<Short> val)
2354 	{
2355 		return val;
2356 	}
2357 
operator -(RValue<Short> val)2358 	RValue<Short> operator-(RValue<Short> val)
2359 	{
2360 		return RValue<Short>(Nucleus::createNeg(val.value));
2361 	}
2362 
operator ~(RValue<Short> val)2363 	RValue<Short> operator~(RValue<Short> val)
2364 	{
2365 		return RValue<Short>(Nucleus::createNot(val.value));
2366 	}
2367 
operator ++(Short & val,int)2368 	RValue<Short> operator++(Short &val, int)   // Post-increment
2369 	{
2370 		RValue<Short> res = val;
2371 
2372 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((short)1));
2373 		val.storeValue(inc);
2374 
2375 		return res;
2376 	}
2377 
operator ++(Short & val)2378 	const Short &operator++(Short &val)   // Pre-increment
2379 	{
2380 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((short)1));
2381 		val.storeValue(inc);
2382 
2383 		return val;
2384 	}
2385 
operator --(Short & val,int)2386 	RValue<Short> operator--(Short &val, int)   // Post-decrement
2387 	{
2388 		RValue<Short> res = val;
2389 
2390 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((short)1));
2391 		val.storeValue(inc);
2392 
2393 		return res;
2394 	}
2395 
operator --(Short & val)2396 	const Short &operator--(Short &val)   // Pre-decrement
2397 	{
2398 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((short)1));
2399 		val.storeValue(inc);
2400 
2401 		return val;
2402 	}
2403 
operator <(RValue<Short> lhs,RValue<Short> rhs)2404 	RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
2405 	{
2406 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
2407 	}
2408 
operator <=(RValue<Short> lhs,RValue<Short> rhs)2409 	RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
2410 	{
2411 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
2412 	}
2413 
operator >(RValue<Short> lhs,RValue<Short> rhs)2414 	RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
2415 	{
2416 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
2417 	}
2418 
operator >=(RValue<Short> lhs,RValue<Short> rhs)2419 	RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
2420 	{
2421 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
2422 	}
2423 
operator !=(RValue<Short> lhs,RValue<Short> rhs)2424 	RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
2425 	{
2426 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2427 	}
2428 
operator ==(RValue<Short> lhs,RValue<Short> rhs)2429 	RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
2430 	{
2431 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2432 	}
2433 
getType()2434 	Type *Short::getType()
2435 	{
2436 		return T(llvm::Type::getInt16Ty(*::context));
2437 	}
2438 
UShort(Argument<UShort> argument)2439 	UShort::UShort(Argument<UShort> argument)
2440 	{
2441 		storeValue(argument.value);
2442 	}
2443 
UShort(RValue<UInt> cast)2444 	UShort::UShort(RValue<UInt> cast)
2445 	{
2446 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2447 
2448 		storeValue(integer);
2449 	}
2450 
UShort(RValue<Int> cast)2451 	UShort::UShort(RValue<Int> cast)
2452 	{
2453 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
2454 
2455 		storeValue(integer);
2456 	}
2457 
UShort(unsigned short x)2458 	UShort::UShort(unsigned short x)
2459 	{
2460 		storeValue(Nucleus::createConstantShort(x));
2461 	}
2462 
UShort(RValue<UShort> rhs)2463 	UShort::UShort(RValue<UShort> rhs)
2464 	{
2465 		storeValue(rhs.value);
2466 	}
2467 
UShort(const UShort & rhs)2468 	UShort::UShort(const UShort &rhs)
2469 	{
2470 		Value *value = rhs.loadValue();
2471 		storeValue(value);
2472 	}
2473 
UShort(const Reference<UShort> & rhs)2474 	UShort::UShort(const Reference<UShort> &rhs)
2475 	{
2476 		Value *value = rhs.loadValue();
2477 		storeValue(value);
2478 	}
2479 
operator =(RValue<UShort> rhs)2480 	RValue<UShort> UShort::operator=(RValue<UShort> rhs)
2481 	{
2482 		storeValue(rhs.value);
2483 
2484 		return rhs;
2485 	}
2486 
operator =(const UShort & rhs)2487 	RValue<UShort> UShort::operator=(const UShort &rhs)
2488 	{
2489 		Value *value = rhs.loadValue();
2490 		storeValue(value);
2491 
2492 		return RValue<UShort>(value);
2493 	}
2494 
operator =(const Reference<UShort> & rhs)2495 	RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
2496 	{
2497 		Value *value = rhs.loadValue();
2498 		storeValue(value);
2499 
2500 		return RValue<UShort>(value);
2501 	}
2502 
operator +(RValue<UShort> lhs,RValue<UShort> rhs)2503 	RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
2504 	{
2505 		return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
2506 	}
2507 
operator -(RValue<UShort> lhs,RValue<UShort> rhs)2508 	RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
2509 	{
2510 		return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
2511 	}
2512 
operator *(RValue<UShort> lhs,RValue<UShort> rhs)2513 	RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
2514 	{
2515 		return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
2516 	}
2517 
operator /(RValue<UShort> lhs,RValue<UShort> rhs)2518 	RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
2519 	{
2520 		return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
2521 	}
2522 
operator %(RValue<UShort> lhs,RValue<UShort> rhs)2523 	RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
2524 	{
2525 		return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
2526 	}
2527 
operator &(RValue<UShort> lhs,RValue<UShort> rhs)2528 	RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
2529 	{
2530 		return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
2531 	}
2532 
operator |(RValue<UShort> lhs,RValue<UShort> rhs)2533 	RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
2534 	{
2535 		return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
2536 	}
2537 
operator ^(RValue<UShort> lhs,RValue<UShort> rhs)2538 	RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
2539 	{
2540 		return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
2541 	}
2542 
operator <<(RValue<UShort> lhs,RValue<UShort> rhs)2543 	RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
2544 	{
2545 		return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
2546 	}
2547 
operator >>(RValue<UShort> lhs,RValue<UShort> rhs)2548 	RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
2549 	{
2550 		return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
2551 	}
2552 
operator +=(UShort & lhs,RValue<UShort> rhs)2553 	RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
2554 	{
2555 		return lhs = lhs + rhs;
2556 	}
2557 
operator -=(UShort & lhs,RValue<UShort> rhs)2558 	RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
2559 	{
2560 		return lhs = lhs - rhs;
2561 	}
2562 
operator *=(UShort & lhs,RValue<UShort> rhs)2563 	RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
2564 	{
2565 		return lhs = lhs * rhs;
2566 	}
2567 
operator /=(UShort & lhs,RValue<UShort> rhs)2568 	RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
2569 	{
2570 		return lhs = lhs / rhs;
2571 	}
2572 
operator %=(UShort & lhs,RValue<UShort> rhs)2573 	RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
2574 	{
2575 		return lhs = lhs % rhs;
2576 	}
2577 
operator &=(UShort & lhs,RValue<UShort> rhs)2578 	RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
2579 	{
2580 		return lhs = lhs & rhs;
2581 	}
2582 
operator |=(UShort & lhs,RValue<UShort> rhs)2583 	RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
2584 	{
2585 		return lhs = lhs | rhs;
2586 	}
2587 
operator ^=(UShort & lhs,RValue<UShort> rhs)2588 	RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
2589 	{
2590 		return lhs = lhs ^ rhs;
2591 	}
2592 
operator <<=(UShort & lhs,RValue<UShort> rhs)2593 	RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
2594 	{
2595 		return lhs = lhs << rhs;
2596 	}
2597 
operator >>=(UShort & lhs,RValue<UShort> rhs)2598 	RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
2599 	{
2600 		return lhs = lhs >> rhs;
2601 	}
2602 
operator +(RValue<UShort> val)2603 	RValue<UShort> operator+(RValue<UShort> val)
2604 	{
2605 		return val;
2606 	}
2607 
operator -(RValue<UShort> val)2608 	RValue<UShort> operator-(RValue<UShort> val)
2609 	{
2610 		return RValue<UShort>(Nucleus::createNeg(val.value));
2611 	}
2612 
operator ~(RValue<UShort> val)2613 	RValue<UShort> operator~(RValue<UShort> val)
2614 	{
2615 		return RValue<UShort>(Nucleus::createNot(val.value));
2616 	}
2617 
operator ++(UShort & val,int)2618 	RValue<UShort> operator++(UShort &val, int)   // Post-increment
2619 	{
2620 		RValue<UShort> res = val;
2621 
2622 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((unsigned short)1));
2623 		val.storeValue(inc);
2624 
2625 		return res;
2626 	}
2627 
operator ++(UShort & val)2628 	const UShort &operator++(UShort &val)   // Pre-increment
2629 	{
2630 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
2631 		val.storeValue(inc);
2632 
2633 		return val;
2634 	}
2635 
operator --(UShort & val,int)2636 	RValue<UShort> operator--(UShort &val, int)   // Post-decrement
2637 	{
2638 		RValue<UShort> res = val;
2639 
2640 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((unsigned short)1));
2641 		val.storeValue(inc);
2642 
2643 		return res;
2644 	}
2645 
operator --(UShort & val)2646 	const UShort &operator--(UShort &val)   // Pre-decrement
2647 	{
2648 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
2649 		val.storeValue(inc);
2650 
2651 		return val;
2652 	}
2653 
operator <(RValue<UShort> lhs,RValue<UShort> rhs)2654 	RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
2655 	{
2656 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
2657 	}
2658 
operator <=(RValue<UShort> lhs,RValue<UShort> rhs)2659 	RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
2660 	{
2661 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
2662 	}
2663 
operator >(RValue<UShort> lhs,RValue<UShort> rhs)2664 	RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
2665 	{
2666 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
2667 	}
2668 
operator >=(RValue<UShort> lhs,RValue<UShort> rhs)2669 	RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
2670 	{
2671 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
2672 	}
2673 
operator !=(RValue<UShort> lhs,RValue<UShort> rhs)2674 	RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2675 	{
2676 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2677 	}
2678 
operator ==(RValue<UShort> lhs,RValue<UShort> rhs)2679 	RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2680 	{
2681 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2682 	}
2683 
getType()2684 	Type *UShort::getType()
2685 	{
2686 		return T(llvm::Type::getInt16Ty(*::context));
2687 	}
2688 
Byte4(RValue<Byte8> cast)2689 	Byte4::Byte4(RValue<Byte8> cast)
2690 	{
2691 		storeValue(Nucleus::createBitCast(cast.value, getType()));
2692 	}
2693 
Byte4(const Reference<Byte4> & rhs)2694 	Byte4::Byte4(const Reference<Byte4> &rhs)
2695 	{
2696 		Value *value = rhs.loadValue();
2697 		storeValue(value);
2698 	}
2699 
getType()2700 	Type *Byte4::getType()
2701 	{
2702 		return T(Type_v4i8);
2703 	}
2704 
getType()2705 	Type *SByte4::getType()
2706 	{
2707 		return T(Type_v4i8);
2708 	}
2709 
Byte8(uint8_t x0,uint8_t x1,uint8_t x2,uint8_t x3,uint8_t x4,uint8_t x5,uint8_t x6,uint8_t x7)2710 	Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2711 	{
2712 		int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2713 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
2714 	}
2715 
Byte8(RValue<Byte8> rhs)2716 	Byte8::Byte8(RValue<Byte8> rhs)
2717 	{
2718 		storeValue(rhs.value);
2719 	}
2720 
Byte8(const Byte8 & rhs)2721 	Byte8::Byte8(const Byte8 &rhs)
2722 	{
2723 		Value *value = rhs.loadValue();
2724 		storeValue(value);
2725 	}
2726 
Byte8(const Reference<Byte8> & rhs)2727 	Byte8::Byte8(const Reference<Byte8> &rhs)
2728 	{
2729 		Value *value = rhs.loadValue();
2730 		storeValue(value);
2731 	}
2732 
operator =(RValue<Byte8> rhs)2733 	RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2734 	{
2735 		storeValue(rhs.value);
2736 
2737 		return rhs;
2738 	}
2739 
operator =(const Byte8 & rhs)2740 	RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2741 	{
2742 		Value *value = rhs.loadValue();
2743 		storeValue(value);
2744 
2745 		return RValue<Byte8>(value);
2746 	}
2747 
operator =(const Reference<Byte8> & rhs)2748 	RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2749 	{
2750 		Value *value = rhs.loadValue();
2751 		storeValue(value);
2752 
2753 		return RValue<Byte8>(value);
2754 	}
2755 
operator +(RValue<Byte8> lhs,RValue<Byte8> rhs)2756 	RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2757 	{
2758 		return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2759 	}
2760 
operator -(RValue<Byte8> lhs,RValue<Byte8> rhs)2761 	RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2762 	{
2763 		return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2764 	}
2765 
2766 //	RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2767 //	{
2768 //		return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2769 //	}
2770 
2771 //	RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2772 //	{
2773 //		return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2774 //	}
2775 
2776 //	RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2777 //	{
2778 //		return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2779 //	}
2780 
operator &(RValue<Byte8> lhs,RValue<Byte8> rhs)2781 	RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2782 	{
2783 		return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2784 	}
2785 
operator |(RValue<Byte8> lhs,RValue<Byte8> rhs)2786 	RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2787 	{
2788 		return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2789 	}
2790 
operator ^(RValue<Byte8> lhs,RValue<Byte8> rhs)2791 	RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2792 	{
2793 		return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2794 	}
2795 
2796 //	RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2797 //	{
2798 //		return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2799 //	}
2800 
2801 //	RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2802 //	{
2803 //		return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2804 //	}
2805 
operator +=(Byte8 & lhs,RValue<Byte8> rhs)2806 	RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2807 	{
2808 		return lhs = lhs + rhs;
2809 	}
2810 
operator -=(Byte8 & lhs,RValue<Byte8> rhs)2811 	RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2812 	{
2813 		return lhs = lhs - rhs;
2814 	}
2815 
2816 //	RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2817 //	{
2818 //		return lhs = lhs * rhs;
2819 //	}
2820 
2821 //	RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2822 //	{
2823 //		return lhs = lhs / rhs;
2824 //	}
2825 
2826 //	RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2827 //	{
2828 //		return lhs = lhs % rhs;
2829 //	}
2830 
operator &=(Byte8 & lhs,RValue<Byte8> rhs)2831 	RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2832 	{
2833 		return lhs = lhs & rhs;
2834 	}
2835 
operator |=(Byte8 & lhs,RValue<Byte8> rhs)2836 	RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2837 	{
2838 		return lhs = lhs | rhs;
2839 	}
2840 
operator ^=(Byte8 & lhs,RValue<Byte8> rhs)2841 	RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2842 	{
2843 		return lhs = lhs ^ rhs;
2844 	}
2845 
2846 //	RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2847 //	{
2848 //		return lhs = lhs << rhs;
2849 //	}
2850 
2851 //	RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2852 //	{
2853 //		return lhs = lhs >> rhs;
2854 //	}
2855 
2856 //	RValue<Byte8> operator+(RValue<Byte8> val)
2857 //	{
2858 //		return val;
2859 //	}
2860 
2861 //	RValue<Byte8> operator-(RValue<Byte8> val)
2862 //	{
2863 //		return RValue<Byte8>(Nucleus::createNeg(val.value));
2864 //	}
2865 
operator ~(RValue<Byte8> val)2866 	RValue<Byte8> operator~(RValue<Byte8> val)
2867 	{
2868 		return RValue<Byte8>(Nucleus::createNot(val.value));
2869 	}
2870 
AddSat(RValue<Byte8> x,RValue<Byte8> y)2871 	RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2872 	{
2873 #if defined(__i386__) || defined(__x86_64__)
2874 		return x86::paddusb(x, y);
2875 #else
2876 		return As<Byte8>(V(lowerPUADDSAT(V(x.value), V(y.value))));
2877 #endif
2878 	}
2879 
SubSat(RValue<Byte8> x,RValue<Byte8> y)2880 	RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2881 	{
2882 #if defined(__i386__) || defined(__x86_64__)
2883 		return x86::psubusb(x, y);
2884 #else
2885 		return As<Byte8>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
2886 #endif
2887 	}
2888 
Unpack(RValue<Byte4> x)2889 	RValue<Short4> Unpack(RValue<Byte4> x)
2890 	{
2891 		int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2892 		return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2893 	}
2894 
Unpack(RValue<Byte4> x,RValue<Byte4> y)2895 	RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
2896 	{
2897 		return UnpackLow(As<Byte8>(x), As<Byte8>(y));
2898 	}
2899 
UnpackLow(RValue<Byte8> x,RValue<Byte8> y)2900 	RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2901 	{
2902 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2903 		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2904 	}
2905 
UnpackHigh(RValue<Byte8> x,RValue<Byte8> y)2906 	RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2907 	{
2908 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2909 		auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2910 		return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2911 	}
2912 
SignMask(RValue<Byte8> x)2913 	RValue<Int> SignMask(RValue<Byte8> x)
2914 	{
2915 #if defined(__i386__) || defined(__x86_64__)
2916 		return x86::pmovmskb(x);
2917 #else
2918 		return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
2919 #endif
2920 	}
2921 
2922 //	RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2923 //	{
2924 //#if defined(__i386__) || defined(__x86_64__)
2925 //		return x86::pcmpgtb(x, y);   // FIXME: Signedness
2926 //#else
2927 //		return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
2928 //#endif
2929 //	}
2930 
CmpEQ(RValue<Byte8> x,RValue<Byte8> y)2931 	RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2932 	{
2933 #if defined(__i386__) || defined(__x86_64__)
2934 		return x86::pcmpeqb(x, y);
2935 #else
2936 		return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
2937 #endif
2938 	}
2939 
getType()2940 	Type *Byte8::getType()
2941 	{
2942 		return T(Type_v8i8);
2943 	}
2944 
SByte8(uint8_t x0,uint8_t x1,uint8_t x2,uint8_t x3,uint8_t x4,uint8_t x5,uint8_t x6,uint8_t x7)2945 	SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2946 	{
2947 		int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2948 		Value *vector = Nucleus::createConstantVector(constantVector, getType());
2949 
2950 		storeValue(Nucleus::createBitCast(vector, getType()));
2951 	}
2952 
SByte8(RValue<SByte8> rhs)2953 	SByte8::SByte8(RValue<SByte8> rhs)
2954 	{
2955 		storeValue(rhs.value);
2956 	}
2957 
SByte8(const SByte8 & rhs)2958 	SByte8::SByte8(const SByte8 &rhs)
2959 	{
2960 		Value *value = rhs.loadValue();
2961 		storeValue(value);
2962 	}
2963 
SByte8(const Reference<SByte8> & rhs)2964 	SByte8::SByte8(const Reference<SByte8> &rhs)
2965 	{
2966 		Value *value = rhs.loadValue();
2967 		storeValue(value);
2968 	}
2969 
operator =(RValue<SByte8> rhs)2970 	RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2971 	{
2972 		storeValue(rhs.value);
2973 
2974 		return rhs;
2975 	}
2976 
operator =(const SByte8 & rhs)2977 	RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2978 	{
2979 		Value *value = rhs.loadValue();
2980 		storeValue(value);
2981 
2982 		return RValue<SByte8>(value);
2983 	}
2984 
operator =(const Reference<SByte8> & rhs)2985 	RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2986 	{
2987 		Value *value = rhs.loadValue();
2988 		storeValue(value);
2989 
2990 		return RValue<SByte8>(value);
2991 	}
2992 
operator +(RValue<SByte8> lhs,RValue<SByte8> rhs)2993 	RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2994 	{
2995 		return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2996 	}
2997 
operator -(RValue<SByte8> lhs,RValue<SByte8> rhs)2998 	RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2999 	{
3000 		return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
3001 	}
3002 
3003 //	RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
3004 //	{
3005 //		return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
3006 //	}
3007 
3008 //	RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
3009 //	{
3010 //		return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
3011 //	}
3012 
3013 //	RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
3014 //	{
3015 //		return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
3016 //	}
3017 
operator &(RValue<SByte8> lhs,RValue<SByte8> rhs)3018 	RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
3019 	{
3020 		return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
3021 	}
3022 
operator |(RValue<SByte8> lhs,RValue<SByte8> rhs)3023 	RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
3024 	{
3025 		return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
3026 	}
3027 
operator ^(RValue<SByte8> lhs,RValue<SByte8> rhs)3028 	RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
3029 	{
3030 		return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
3031 	}
3032 
3033 //	RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
3034 //	{
3035 //		return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
3036 //	}
3037 
3038 //	RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
3039 //	{
3040 //		return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
3041 //	}
3042 
operator +=(SByte8 & lhs,RValue<SByte8> rhs)3043 	RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
3044 	{
3045 		return lhs = lhs + rhs;
3046 	}
3047 
operator -=(SByte8 & lhs,RValue<SByte8> rhs)3048 	RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
3049 	{
3050 		return lhs = lhs - rhs;
3051 	}
3052 
3053 //	RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
3054 //	{
3055 //		return lhs = lhs * rhs;
3056 //	}
3057 
3058 //	RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
3059 //	{
3060 //		return lhs = lhs / rhs;
3061 //	}
3062 
3063 //	RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
3064 //	{
3065 //		return lhs = lhs % rhs;
3066 //	}
3067 
operator &=(SByte8 & lhs,RValue<SByte8> rhs)3068 	RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
3069 	{
3070 		return lhs = lhs & rhs;
3071 	}
3072 
operator |=(SByte8 & lhs,RValue<SByte8> rhs)3073 	RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
3074 	{
3075 		return lhs = lhs | rhs;
3076 	}
3077 
operator ^=(SByte8 & lhs,RValue<SByte8> rhs)3078 	RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
3079 	{
3080 		return lhs = lhs ^ rhs;
3081 	}
3082 
3083 //	RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
3084 //	{
3085 //		return lhs = lhs << rhs;
3086 //	}
3087 
3088 //	RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
3089 //	{
3090 //		return lhs = lhs >> rhs;
3091 //	}
3092 
3093 //	RValue<SByte8> operator+(RValue<SByte8> val)
3094 //	{
3095 //		return val;
3096 //	}
3097 
3098 //	RValue<SByte8> operator-(RValue<SByte8> val)
3099 //	{
3100 //		return RValue<SByte8>(Nucleus::createNeg(val.value));
3101 //	}
3102 
operator ~(RValue<SByte8> val)3103 	RValue<SByte8> operator~(RValue<SByte8> val)
3104 	{
3105 		return RValue<SByte8>(Nucleus::createNot(val.value));
3106 	}
3107 
AddSat(RValue<SByte8> x,RValue<SByte8> y)3108 	RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
3109 	{
3110 #if defined(__i386__) || defined(__x86_64__)
3111 		return x86::paddsb(x, y);
3112 #else
3113 		return As<SByte8>(V(lowerPSADDSAT(V(x.value), V(y.value))));
3114 #endif
3115 	}
3116 
SubSat(RValue<SByte8> x,RValue<SByte8> y)3117 	RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
3118 	{
3119 #if defined(__i386__) || defined(__x86_64__)
3120 		return x86::psubsb(x, y);
3121 #else
3122 		return As<SByte8>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
3123 #endif
3124 	}
3125 
UnpackLow(RValue<SByte8> x,RValue<SByte8> y)3126 	RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
3127 	{
3128 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
3129 		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3130 	}
3131 
UnpackHigh(RValue<SByte8> x,RValue<SByte8> y)3132 	RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
3133 	{
3134 		int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
3135 		auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3136 		return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
3137 	}
3138 
SignMask(RValue<SByte8> x)3139 	RValue<Int> SignMask(RValue<SByte8> x)
3140 	{
3141 #if defined(__i386__) || defined(__x86_64__)
3142 		return x86::pmovmskb(As<Byte8>(x));
3143 #else
3144 		return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
3145 #endif
3146 	}
3147 
CmpGT(RValue<SByte8> x,RValue<SByte8> y)3148 	RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
3149 	{
3150 #if defined(__i386__) || defined(__x86_64__)
3151 		return x86::pcmpgtb(x, y);
3152 #else
3153 		return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
3154 #endif
3155 	}
3156 
CmpEQ(RValue<SByte8> x,RValue<SByte8> y)3157 	RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
3158 	{
3159 #if defined(__i386__) || defined(__x86_64__)
3160 		return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
3161 #else
3162 		return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
3163 #endif
3164 	}
3165 
getType()3166 	Type *SByte8::getType()
3167 	{
3168 		return T(Type_v8i8);
3169 	}
3170 
Byte16(RValue<Byte16> rhs)3171 	Byte16::Byte16(RValue<Byte16> rhs)
3172 	{
3173 		storeValue(rhs.value);
3174 	}
3175 
Byte16(const Byte16 & rhs)3176 	Byte16::Byte16(const Byte16 &rhs)
3177 	{
3178 		Value *value = rhs.loadValue();
3179 		storeValue(value);
3180 	}
3181 
Byte16(const Reference<Byte16> & rhs)3182 	Byte16::Byte16(const Reference<Byte16> &rhs)
3183 	{
3184 		Value *value = rhs.loadValue();
3185 		storeValue(value);
3186 	}
3187 
operator =(RValue<Byte16> rhs)3188 	RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
3189 	{
3190 		storeValue(rhs.value);
3191 
3192 		return rhs;
3193 	}
3194 
operator =(const Byte16 & rhs)3195 	RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
3196 	{
3197 		Value *value = rhs.loadValue();
3198 		storeValue(value);
3199 
3200 		return RValue<Byte16>(value);
3201 	}
3202 
operator =(const Reference<Byte16> & rhs)3203 	RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
3204 	{
3205 		Value *value = rhs.loadValue();
3206 		storeValue(value);
3207 
3208 		return RValue<Byte16>(value);
3209 	}
3210 
getType()3211 	Type *Byte16::getType()
3212 	{
3213 		return T(llvm::VectorType::get(T(Byte::getType()), 16));
3214 	}
3215 
getType()3216 	Type *SByte16::getType()
3217 	{
3218 		return T(llvm::VectorType::get(T(SByte::getType()), 16));
3219 	}
3220 
Short2(RValue<Short4> cast)3221 	Short2::Short2(RValue<Short4> cast)
3222 	{
3223 		storeValue(Nucleus::createBitCast(cast.value, getType()));
3224 	}
3225 
getType()3226 	Type *Short2::getType()
3227 	{
3228 		return T(Type_v2i16);
3229 	}
3230 
UShort2(RValue<UShort4> cast)3231 	UShort2::UShort2(RValue<UShort4> cast)
3232 	{
3233 		storeValue(Nucleus::createBitCast(cast.value, getType()));
3234 	}
3235 
getType()3236 	Type *UShort2::getType()
3237 	{
3238 		return T(Type_v2i16);
3239 	}
3240 
Short4(RValue<Int> cast)3241 	Short4::Short4(RValue<Int> cast)
3242 	{
3243 		Value *vector = loadValue();
3244 		Value *element = Nucleus::createTrunc(cast.value, Short::getType());
3245 		Value *insert = Nucleus::createInsertElement(vector, element, 0);
3246 		Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
3247 
3248 		storeValue(swizzle);
3249 	}
3250 
Short4(RValue<Int4> cast)3251 	Short4::Short4(RValue<Int4> cast)
3252 	{
3253 		int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
3254 		Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
3255 
3256 		Value *packed = Nucleus::createShuffleVector(short8, short8, select);
3257 		Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
3258 
3259 		storeValue(short4);
3260 	}
3261 
3262 //	Short4::Short4(RValue<Float> cast)
3263 //	{
3264 //	}
3265 
Short4(RValue<Float4> cast)3266 	Short4::Short4(RValue<Float4> cast)
3267 	{
3268 		Int4 v4i32 = Int4(cast);
3269 #if defined(__i386__) || defined(__x86_64__)
3270 		v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
3271 #else
3272 		Value *v = v4i32.loadValue();
3273 		v4i32 = As<Int4>(V(lowerPack(V(v), V(v), true)));
3274 #endif
3275 
3276 		storeValue(As<Short4>(Int2(v4i32)).value);
3277 	}
3278 
Short4(short xyzw)3279 	Short4::Short4(short xyzw)
3280 	{
3281 		int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3282 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3283 	}
3284 
Short4(short x,short y,short z,short w)3285 	Short4::Short4(short x, short y, short z, short w)
3286 	{
3287 		int64_t constantVector[4] = {x, y, z, w};
3288 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3289 	}
3290 
Short4(RValue<Short4> rhs)3291 	Short4::Short4(RValue<Short4> rhs)
3292 	{
3293 		storeValue(rhs.value);
3294 	}
3295 
Short4(const Short4 & rhs)3296 	Short4::Short4(const Short4 &rhs)
3297 	{
3298 		Value *value = rhs.loadValue();
3299 		storeValue(value);
3300 	}
3301 
Short4(const Reference<Short4> & rhs)3302 	Short4::Short4(const Reference<Short4> &rhs)
3303 	{
3304 		Value *value = rhs.loadValue();
3305 		storeValue(value);
3306 	}
3307 
Short4(RValue<UShort4> rhs)3308 	Short4::Short4(RValue<UShort4> rhs)
3309 	{
3310 		storeValue(rhs.value);
3311 	}
3312 
Short4(const UShort4 & rhs)3313 	Short4::Short4(const UShort4 &rhs)
3314 	{
3315 		storeValue(rhs.loadValue());
3316 	}
3317 
Short4(const Reference<UShort4> & rhs)3318 	Short4::Short4(const Reference<UShort4> &rhs)
3319 	{
3320 		storeValue(rhs.loadValue());
3321 	}
3322 
operator =(RValue<Short4> rhs)3323 	RValue<Short4> Short4::operator=(RValue<Short4> rhs)
3324 	{
3325 		storeValue(rhs.value);
3326 
3327 		return rhs;
3328 	}
3329 
operator =(const Short4 & rhs)3330 	RValue<Short4> Short4::operator=(const Short4 &rhs)
3331 	{
3332 		Value *value = rhs.loadValue();
3333 		storeValue(value);
3334 
3335 		return RValue<Short4>(value);
3336 	}
3337 
operator =(const Reference<Short4> & rhs)3338 	RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
3339 	{
3340 		Value *value = rhs.loadValue();
3341 		storeValue(value);
3342 
3343 		return RValue<Short4>(value);
3344 	}
3345 
operator =(RValue<UShort4> rhs)3346 	RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
3347 	{
3348 		storeValue(rhs.value);
3349 
3350 		return RValue<Short4>(rhs);
3351 	}
3352 
operator =(const UShort4 & rhs)3353 	RValue<Short4> Short4::operator=(const UShort4 &rhs)
3354 	{
3355 		Value *value = rhs.loadValue();
3356 		storeValue(value);
3357 
3358 		return RValue<Short4>(value);
3359 	}
3360 
operator =(const Reference<UShort4> & rhs)3361 	RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
3362 	{
3363 		Value *value = rhs.loadValue();
3364 		storeValue(value);
3365 
3366 		return RValue<Short4>(value);
3367 	}
3368 
operator +(RValue<Short4> lhs,RValue<Short4> rhs)3369 	RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
3370 	{
3371 		return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
3372 	}
3373 
operator -(RValue<Short4> lhs,RValue<Short4> rhs)3374 	RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
3375 	{
3376 		return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
3377 	}
3378 
operator *(RValue<Short4> lhs,RValue<Short4> rhs)3379 	RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
3380 	{
3381 		return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
3382 	}
3383 
3384 //	RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
3385 //	{
3386 //		return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
3387 //	}
3388 
3389 //	RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
3390 //	{
3391 //		return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
3392 //	}
3393 
operator &(RValue<Short4> lhs,RValue<Short4> rhs)3394 	RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
3395 	{
3396 		return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
3397 	}
3398 
operator |(RValue<Short4> lhs,RValue<Short4> rhs)3399 	RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
3400 	{
3401 		return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
3402 	}
3403 
operator ^(RValue<Short4> lhs,RValue<Short4> rhs)3404 	RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
3405 	{
3406 		return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
3407 	}
3408 
operator <<(RValue<Short4> lhs,unsigned char rhs)3409 	RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
3410 	{
3411 #if defined(__i386__) || defined(__x86_64__)
3412 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3413 
3414 		return x86::psllw(lhs, rhs);
3415 #else
3416 		return As<Short4>(V(lowerVectorShl(V(lhs.value), rhs)));
3417 #endif
3418 	}
3419 
operator >>(RValue<Short4> lhs,unsigned char rhs)3420 	RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
3421 	{
3422 #if defined(__i386__) || defined(__x86_64__)
3423 		return x86::psraw(lhs, rhs);
3424 #else
3425 		return As<Short4>(V(lowerVectorAShr(V(lhs.value), rhs)));
3426 #endif
3427 	}
3428 
operator +=(Short4 & lhs,RValue<Short4> rhs)3429 	RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
3430 	{
3431 		return lhs = lhs + rhs;
3432 	}
3433 
operator -=(Short4 & lhs,RValue<Short4> rhs)3434 	RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
3435 	{
3436 		return lhs = lhs - rhs;
3437 	}
3438 
operator *=(Short4 & lhs,RValue<Short4> rhs)3439 	RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
3440 	{
3441 		return lhs = lhs * rhs;
3442 	}
3443 
3444 //	RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
3445 //	{
3446 //		return lhs = lhs / rhs;
3447 //	}
3448 
3449 //	RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
3450 //	{
3451 //		return lhs = lhs % rhs;
3452 //	}
3453 
operator &=(Short4 & lhs,RValue<Short4> rhs)3454 	RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
3455 	{
3456 		return lhs = lhs & rhs;
3457 	}
3458 
operator |=(Short4 & lhs,RValue<Short4> rhs)3459 	RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
3460 	{
3461 		return lhs = lhs | rhs;
3462 	}
3463 
operator ^=(Short4 & lhs,RValue<Short4> rhs)3464 	RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
3465 	{
3466 		return lhs = lhs ^ rhs;
3467 	}
3468 
operator <<=(Short4 & lhs,unsigned char rhs)3469 	RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
3470 	{
3471 		return lhs = lhs << rhs;
3472 	}
3473 
operator >>=(Short4 & lhs,unsigned char rhs)3474 	RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
3475 	{
3476 		return lhs = lhs >> rhs;
3477 	}
3478 
3479 //	RValue<Short4> operator+(RValue<Short4> val)
3480 //	{
3481 //		return val;
3482 //	}
3483 
operator -(RValue<Short4> val)3484 	RValue<Short4> operator-(RValue<Short4> val)
3485 	{
3486 		return RValue<Short4>(Nucleus::createNeg(val.value));
3487 	}
3488 
operator ~(RValue<Short4> val)3489 	RValue<Short4> operator~(RValue<Short4> val)
3490 	{
3491 		return RValue<Short4>(Nucleus::createNot(val.value));
3492 	}
3493 
RoundShort4(RValue<Float4> cast)3494 	RValue<Short4> RoundShort4(RValue<Float4> cast)
3495 	{
3496 		RValue<Int4> int4 = RoundInt(cast);
3497 		return As<Short4>(PackSigned(int4, int4));
3498 	}
3499 
Max(RValue<Short4> x,RValue<Short4> y)3500 	RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3501 	{
3502 #if defined(__i386__) || defined(__x86_64__)
3503 		return x86::pmaxsw(x, y);
3504 #else
3505 		return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
3506 #endif
3507 	}
3508 
Min(RValue<Short4> x,RValue<Short4> y)3509 	RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3510 	{
3511 #if defined(__i386__) || defined(__x86_64__)
3512 		return x86::pminsw(x, y);
3513 #else
3514 		return RValue<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
3515 #endif
3516 	}
3517 
AddSat(RValue<Short4> x,RValue<Short4> y)3518 	RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3519 	{
3520 #if defined(__i386__) || defined(__x86_64__)
3521 		return x86::paddsw(x, y);
3522 #else
3523 		return As<Short4>(V(lowerPSADDSAT(V(x.value), V(y.value))));
3524 #endif
3525 	}
3526 
SubSat(RValue<Short4> x,RValue<Short4> y)3527 	RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3528 	{
3529 #if defined(__i386__) || defined(__x86_64__)
3530 		return x86::psubsw(x, y);
3531 #else
3532 		return As<Short4>(V(lowerPSSUBSAT(V(x.value), V(y.value))));
3533 #endif
3534 	}
3535 
MulHigh(RValue<Short4> x,RValue<Short4> y)3536 	RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3537 	{
3538 #if defined(__i386__) || defined(__x86_64__)
3539 		return x86::pmulhw(x, y);
3540 #else
3541 		return As<Short4>(V(lowerMulHigh(V(x.value), V(y.value), true)));
3542 #endif
3543 	}
3544 
MulAdd(RValue<Short4> x,RValue<Short4> y)3545 	RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3546 	{
3547 #if defined(__i386__) || defined(__x86_64__)
3548 		return x86::pmaddwd(x, y);
3549 #else
3550 		return As<Int2>(V(lowerMulAdd(V(x.value), V(y.value))));
3551 #endif
3552 	}
3553 
PackSigned(RValue<Short4> x,RValue<Short4> y)3554 	RValue<SByte8> PackSigned(RValue<Short4> x, RValue<Short4> y)
3555 	{
3556 #if defined(__i386__) || defined(__x86_64__)
3557 		auto result = x86::packsswb(x, y);
3558 #else
3559 		auto result = V(lowerPack(V(x.value), V(y.value), true));
3560 #endif
3561 		return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
3562 	}
3563 
PackUnsigned(RValue<Short4> x,RValue<Short4> y)3564 	RValue<Byte8> PackUnsigned(RValue<Short4> x, RValue<Short4> y)
3565 	{
3566 #if defined(__i386__) || defined(__x86_64__)
3567 		auto result = x86::packuswb(x, y);
3568 #else
3569 		auto result = V(lowerPack(V(x.value), V(y.value), false));
3570 #endif
3571 		return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
3572 	}
3573 
UnpackLow(RValue<Short4> x,RValue<Short4> y)3574 	RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3575 	{
3576 		int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3577 		return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3578 	}
3579 
UnpackHigh(RValue<Short4> x,RValue<Short4> y)3580 	RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3581 	{
3582 		int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
3583 		auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
3584 		return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
3585 	}
3586 
Swizzle(RValue<Short4> x,unsigned char select)3587 	RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3588 	{
3589 		// Real type is v8i16
3590 		int shuffle[8] =
3591 		{
3592 			(select >> 0) & 0x03,
3593 			(select >> 2) & 0x03,
3594 			(select >> 4) & 0x03,
3595 			(select >> 6) & 0x03,
3596 			(select >> 0) & 0x03,
3597 			(select >> 2) & 0x03,
3598 			(select >> 4) & 0x03,
3599 			(select >> 6) & 0x03,
3600 		};
3601 
3602 		return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
3603 	}
3604 
Insert(RValue<Short4> val,RValue<Short> element,int i)3605 	RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3606 	{
3607 		return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3608 	}
3609 
Extract(RValue<Short4> val,int i)3610 	RValue<Short> Extract(RValue<Short4> val, int i)
3611 	{
3612 		return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3613 	}
3614 
CmpGT(RValue<Short4> x,RValue<Short4> y)3615 	RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3616 	{
3617 #if defined(__i386__) || defined(__x86_64__)
3618 		return x86::pcmpgtw(x, y);
3619 #else
3620 		return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
3621 #endif
3622 	}
3623 
CmpEQ(RValue<Short4> x,RValue<Short4> y)3624 	RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3625 	{
3626 #if defined(__i386__) || defined(__x86_64__)
3627 		return x86::pcmpeqw(x, y);
3628 #else
3629 		return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
3630 #endif
3631 	}
3632 
getType()3633 	Type *Short4::getType()
3634 	{
3635 		return T(Type_v4i16);
3636 	}
3637 
UShort4(RValue<Int4> cast)3638 	UShort4::UShort4(RValue<Int4> cast)
3639 	{
3640 		*this = Short4(cast);
3641 	}
3642 
UShort4(RValue<Float4> cast,bool saturate)3643 	UShort4::UShort4(RValue<Float4> cast, bool saturate)
3644 	{
3645 		if(saturate)
3646 		{
3647 #if defined(__i386__) || defined(__x86_64__)
3648 			if(CPUID::supportsSSE4_1())
3649 			{
3650 				Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
3651 				*this = As<Short4>(PackUnsigned(int4, int4));
3652 			}
3653 			else
3654 #endif
3655 			{
3656 				*this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
3657 			}
3658 		}
3659 		else
3660 		{
3661 			*this = Short4(Int4(cast));
3662 		}
3663 	}
3664 
UShort4(unsigned short xyzw)3665 	UShort4::UShort4(unsigned short xyzw)
3666 	{
3667 		int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3668 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3669 	}
3670 
UShort4(unsigned short x,unsigned short y,unsigned short z,unsigned short w)3671 	UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3672 	{
3673 		int64_t constantVector[4] = {x, y, z, w};
3674 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3675 	}
3676 
UShort4(RValue<UShort4> rhs)3677 	UShort4::UShort4(RValue<UShort4> rhs)
3678 	{
3679 		storeValue(rhs.value);
3680 	}
3681 
UShort4(const UShort4 & rhs)3682 	UShort4::UShort4(const UShort4 &rhs)
3683 	{
3684 		Value *value = rhs.loadValue();
3685 		storeValue(value);
3686 	}
3687 
UShort4(const Reference<UShort4> & rhs)3688 	UShort4::UShort4(const Reference<UShort4> &rhs)
3689 	{
3690 		Value *value = rhs.loadValue();
3691 		storeValue(value);
3692 	}
3693 
UShort4(RValue<Short4> rhs)3694 	UShort4::UShort4(RValue<Short4> rhs)
3695 	{
3696 		storeValue(rhs.value);
3697 	}
3698 
UShort4(const Short4 & rhs)3699 	UShort4::UShort4(const Short4 &rhs)
3700 	{
3701 		Value *value = rhs.loadValue();
3702 		storeValue(value);
3703 	}
3704 
UShort4(const Reference<Short4> & rhs)3705 	UShort4::UShort4(const Reference<Short4> &rhs)
3706 	{
3707 		Value *value = rhs.loadValue();
3708 		storeValue(value);
3709 	}
3710 
operator =(RValue<UShort4> rhs)3711 	RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3712 	{
3713 		storeValue(rhs.value);
3714 
3715 		return rhs;
3716 	}
3717 
operator =(const UShort4 & rhs)3718 	RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3719 	{
3720 		Value *value = rhs.loadValue();
3721 		storeValue(value);
3722 
3723 		return RValue<UShort4>(value);
3724 	}
3725 
operator =(const Reference<UShort4> & rhs)3726 	RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3727 	{
3728 		Value *value = rhs.loadValue();
3729 		storeValue(value);
3730 
3731 		return RValue<UShort4>(value);
3732 	}
3733 
operator =(RValue<Short4> rhs)3734 	RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3735 	{
3736 		storeValue(rhs.value);
3737 
3738 		return RValue<UShort4>(rhs);
3739 	}
3740 
operator =(const Short4 & rhs)3741 	RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3742 	{
3743 		Value *value = rhs.loadValue();
3744 		storeValue(value);
3745 
3746 		return RValue<UShort4>(value);
3747 	}
3748 
operator =(const Reference<Short4> & rhs)3749 	RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3750 	{
3751 		Value *value = rhs.loadValue();
3752 		storeValue(value);
3753 
3754 		return RValue<UShort4>(value);
3755 	}
3756 
operator +(RValue<UShort4> lhs,RValue<UShort4> rhs)3757 	RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3758 	{
3759 		return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3760 	}
3761 
operator -(RValue<UShort4> lhs,RValue<UShort4> rhs)3762 	RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3763 	{
3764 		return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3765 	}
3766 
operator *(RValue<UShort4> lhs,RValue<UShort4> rhs)3767 	RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3768 	{
3769 		return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3770 	}
3771 
operator &(RValue<UShort4> lhs,RValue<UShort4> rhs)3772 	RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3773 	{
3774 		return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3775 	}
3776 
operator |(RValue<UShort4> lhs,RValue<UShort4> rhs)3777 	RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3778 	{
3779 		return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3780 	}
3781 
operator ^(RValue<UShort4> lhs,RValue<UShort4> rhs)3782 	RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3783 	{
3784 		return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3785 	}
3786 
operator <<(RValue<UShort4> lhs,unsigned char rhs)3787 	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3788 	{
3789 #if defined(__i386__) || defined(__x86_64__)
3790 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3791 
3792 		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3793 #else
3794 		return As<UShort4>(V(lowerVectorShl(V(lhs.value), rhs)));
3795 #endif
3796 	}
3797 
operator >>(RValue<UShort4> lhs,unsigned char rhs)3798 	RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3799 	{
3800 #if defined(__i386__) || defined(__x86_64__)
3801 	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3802 
3803 		return x86::psrlw(lhs, rhs);
3804 #else
3805 		return As<UShort4>(V(lowerVectorLShr(V(lhs.value), rhs)));
3806 #endif
3807 	}
3808 
operator <<=(UShort4 & lhs,unsigned char rhs)3809 	RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3810 	{
3811 		return lhs = lhs << rhs;
3812 	}
3813 
operator >>=(UShort4 & lhs,unsigned char rhs)3814 	RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3815 	{
3816 		return lhs = lhs >> rhs;
3817 	}
3818 
operator ~(RValue<UShort4> val)3819 	RValue<UShort4> operator~(RValue<UShort4> val)
3820 	{
3821 		return RValue<UShort4>(Nucleus::createNot(val.value));
3822 	}
3823 
Max(RValue<UShort4> x,RValue<UShort4> y)3824 	RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3825 	{
3826 		return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3827 	}
3828 
Min(RValue<UShort4> x,RValue<UShort4> y)3829 	RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3830 	{
3831 		return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3832 	}
3833 
AddSat(RValue<UShort4> x,RValue<UShort4> y)3834 	RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3835 	{
3836 #if defined(__i386__) || defined(__x86_64__)
3837 		return x86::paddusw(x, y);
3838 #else
3839 		return As<UShort4>(V(lowerPUADDSAT(V(x.value), V(y.value))));
3840 #endif
3841 	}
3842 
SubSat(RValue<UShort4> x,RValue<UShort4> y)3843 	RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3844 	{
3845 #if defined(__i386__) || defined(__x86_64__)
3846 		return x86::psubusw(x, y);
3847 #else
3848 		return As<UShort4>(V(lowerPUSUBSAT(V(x.value), V(y.value))));
3849 #endif
3850 	}
3851 
MulHigh(RValue<UShort4> x,RValue<UShort4> y)3852 	RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3853 	{
3854 #if defined(__i386__) || defined(__x86_64__)
3855 		return x86::pmulhuw(x, y);
3856 #else
3857 		return As<UShort4>(V(lowerMulHigh(V(x.value), V(y.value), false)));
3858 #endif
3859 	}
3860 
Average(RValue<UShort4> x,RValue<UShort4> y)3861 	RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3862 	{
3863 #if defined(__i386__) || defined(__x86_64__)
3864 		return x86::pavgw(x, y);
3865 #else
3866 		return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
3867 #endif
3868 	}
3869 
getType()3870 	Type *UShort4::getType()
3871 	{
3872 		return T(Type_v4i16);
3873 	}
3874 
Short8(short c)3875 	Short8::Short8(short c)
3876 	{
3877 		int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3878 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3879 	}
3880 
Short8(short c0,short c1,short c2,short c3,short c4,short c5,short c6,short c7)3881 	Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3882 	{
3883 		int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3884 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3885 	}
3886 
Short8(RValue<Short8> rhs)3887 	Short8::Short8(RValue<Short8> rhs)
3888 	{
3889 		storeValue(rhs.value);
3890 	}
3891 
Short8(const Reference<Short8> & rhs)3892 	Short8::Short8(const Reference<Short8> &rhs)
3893 	{
3894 		Value *value = rhs.loadValue();
3895 		storeValue(value);
3896 	}
3897 
Short8(RValue<Short4> lo,RValue<Short4> hi)3898 	Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3899 	{
3900 		int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3901 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3902 
3903 		storeValue(packed);
3904 	}
3905 
operator +(RValue<Short8> lhs,RValue<Short8> rhs)3906 	RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3907 	{
3908 		return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3909 	}
3910 
operator &(RValue<Short8> lhs,RValue<Short8> rhs)3911 	RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3912 	{
3913 		return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3914 	}
3915 
operator <<(RValue<Short8> lhs,unsigned char rhs)3916 	RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3917 	{
3918 #if defined(__i386__) || defined(__x86_64__)
3919 		return x86::psllw(lhs, rhs);
3920 #else
3921 		return As<Short8>(V(lowerVectorShl(V(lhs.value), rhs)));
3922 #endif
3923 	}
3924 
operator >>(RValue<Short8> lhs,unsigned char rhs)3925 	RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3926 	{
3927 #if defined(__i386__) || defined(__x86_64__)
3928 		return x86::psraw(lhs, rhs);
3929 #else
3930 		return As<Short8>(V(lowerVectorAShr(V(lhs.value), rhs)));
3931 #endif
3932 	}
3933 
MulAdd(RValue<Short8> x,RValue<Short8> y)3934 	RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3935 	{
3936 #if defined(__i386__) || defined(__x86_64__)
3937 		return x86::pmaddwd(x, y);
3938 #else
3939 		return As<Int4>(V(lowerMulAdd(V(x.value), V(y.value))));
3940 #endif
3941 	}
3942 
Abs(RValue<Int4> x)3943 	RValue<Int4> Abs(RValue<Int4> x)
3944 	{
3945 		auto negative = x >> 31;
3946 		return (x ^ negative) - negative;
3947 	}
3948 
MulHigh(RValue<Short8> x,RValue<Short8> y)3949 	RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3950 	{
3951 #if defined(__i386__) || defined(__x86_64__)
3952 		return x86::pmulhw(x, y);
3953 #else
3954 		return As<Short8>(V(lowerMulHigh(V(x.value), V(y.value), true)));
3955 #endif
3956 	}
3957 
getType()3958 	Type *Short8::getType()
3959 	{
3960 		return T(llvm::VectorType::get(T(Short::getType()), 8));
3961 	}
3962 
UShort8(unsigned short c)3963 	UShort8::UShort8(unsigned short c)
3964 	{
3965 		int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3966 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3967 	}
3968 
UShort8(unsigned short c0,unsigned short c1,unsigned short c2,unsigned short c3,unsigned short c4,unsigned short c5,unsigned short c6,unsigned short c7)3969 	UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3970 	{
3971 		int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3972 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
3973 	}
3974 
UShort8(RValue<UShort8> rhs)3975 	UShort8::UShort8(RValue<UShort8> rhs)
3976 	{
3977 		storeValue(rhs.value);
3978 	}
3979 
UShort8(const Reference<UShort8> & rhs)3980 	UShort8::UShort8(const Reference<UShort8> &rhs)
3981 	{
3982 		Value *value = rhs.loadValue();
3983 		storeValue(value);
3984 	}
3985 
UShort8(RValue<UShort4> lo,RValue<UShort4> hi)3986 	UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3987 	{
3988 		int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3989 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3990 
3991 		storeValue(packed);
3992 	}
3993 
operator =(RValue<UShort8> rhs)3994 	RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3995 	{
3996 		storeValue(rhs.value);
3997 
3998 		return rhs;
3999 	}
4000 
operator =(const UShort8 & rhs)4001 	RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
4002 	{
4003 		Value *value = rhs.loadValue();
4004 		storeValue(value);
4005 
4006 		return RValue<UShort8>(value);
4007 	}
4008 
operator =(const Reference<UShort8> & rhs)4009 	RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
4010 	{
4011 		Value *value = rhs.loadValue();
4012 		storeValue(value);
4013 
4014 		return RValue<UShort8>(value);
4015 	}
4016 
operator &(RValue<UShort8> lhs,RValue<UShort8> rhs)4017 	RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
4018 	{
4019 		return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
4020 	}
4021 
operator <<(RValue<UShort8> lhs,unsigned char rhs)4022 	RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
4023 	{
4024 #if defined(__i386__) || defined(__x86_64__)
4025 		return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));
4026 #else
4027 		return As<UShort8>(V(lowerVectorShl(V(lhs.value), rhs)));
4028 #endif
4029 	}
4030 
operator >>(RValue<UShort8> lhs,unsigned char rhs)4031 	RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
4032 	{
4033 #if defined(__i386__) || defined(__x86_64__)
4034 		return x86::psrlw(lhs, rhs);   // FIXME: Fallback required
4035 #else
4036 		return As<UShort8>(V(lowerVectorLShr(V(lhs.value), rhs)));
4037 #endif
4038 	}
4039 
operator +(RValue<UShort8> lhs,RValue<UShort8> rhs)4040 	RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
4041 	{
4042 		return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
4043 	}
4044 
operator *(RValue<UShort8> lhs,RValue<UShort8> rhs)4045 	RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
4046 	{
4047 		return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
4048 	}
4049 
operator +=(UShort8 & lhs,RValue<UShort8> rhs)4050 	RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
4051 	{
4052 		return lhs = lhs + rhs;
4053 	}
4054 
operator ~(RValue<UShort8> val)4055 	RValue<UShort8> operator~(RValue<UShort8> val)
4056 	{
4057 		return RValue<UShort8>(Nucleus::createNot(val.value));
4058 	}
4059 
Swizzle(RValue<UShort8> x,char select0,char select1,char select2,char select3,char select4,char select5,char select6,char select7)4060 	RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
4061 	{
4062 		int pshufb[16] =
4063 		{
4064 			select0 + 0,
4065 			select0 + 1,
4066 			select1 + 0,
4067 			select1 + 1,
4068 			select2 + 0,
4069 			select2 + 1,
4070 			select3 + 0,
4071 			select3 + 1,
4072 			select4 + 0,
4073 			select4 + 1,
4074 			select5 + 0,
4075 			select5 + 1,
4076 			select6 + 0,
4077 			select6 + 1,
4078 			select7 + 0,
4079 			select7 + 1,
4080 		};
4081 
4082 		Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
4083 		Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
4084 		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
4085 
4086 		return RValue<UShort8>(short8);
4087 	}
4088 
MulHigh(RValue<UShort8> x,RValue<UShort8> y)4089 	RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
4090 	{
4091 #if defined(__i386__) || defined(__x86_64__)
4092 		return x86::pmulhuw(x, y);
4093 #else
4094 		return As<UShort8>(V(lowerMulHigh(V(x.value), V(y.value), false)));
4095 #endif
4096 	}
4097 
getType()4098 	Type *UShort8::getType()
4099 	{
4100 		return T(llvm::VectorType::get(T(UShort::getType()), 8));
4101 	}
4102 
Int(Argument<Int> argument)4103 	Int::Int(Argument<Int> argument)
4104 	{
4105 		storeValue(argument.value);
4106 	}
4107 
Int(RValue<Byte> cast)4108 	Int::Int(RValue<Byte> cast)
4109 	{
4110 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4111 
4112 		storeValue(integer);
4113 	}
4114 
Int(RValue<SByte> cast)4115 	Int::Int(RValue<SByte> cast)
4116 	{
4117 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4118 
4119 		storeValue(integer);
4120 	}
4121 
Int(RValue<Short> cast)4122 	Int::Int(RValue<Short> cast)
4123 	{
4124 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
4125 
4126 		storeValue(integer);
4127 	}
4128 
Int(RValue<UShort> cast)4129 	Int::Int(RValue<UShort> cast)
4130 	{
4131 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
4132 
4133 		storeValue(integer);
4134 	}
4135 
Int(RValue<Int2> cast)4136 	Int::Int(RValue<Int2> cast)
4137 	{
4138 		*this = Extract(cast, 0);
4139 	}
4140 
Int(RValue<Long> cast)4141 	Int::Int(RValue<Long> cast)
4142 	{
4143 		Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
4144 
4145 		storeValue(integer);
4146 	}
4147 
Int(RValue<Float> cast)4148 	Int::Int(RValue<Float> cast)
4149 	{
4150 		Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
4151 
4152 		storeValue(integer);
4153 	}
4154 
Int(int x)4155 	Int::Int(int x)
4156 	{
4157 		storeValue(Nucleus::createConstantInt(x));
4158 	}
4159 
Int(RValue<Int> rhs)4160 	Int::Int(RValue<Int> rhs)
4161 	{
4162 		storeValue(rhs.value);
4163 	}
4164 
Int(RValue<UInt> rhs)4165 	Int::Int(RValue<UInt> rhs)
4166 	{
4167 		storeValue(rhs.value);
4168 	}
4169 
Int(const Int & rhs)4170 	Int::Int(const Int &rhs)
4171 	{
4172 		Value *value = rhs.loadValue();
4173 		storeValue(value);
4174 	}
4175 
Int(const Reference<Int> & rhs)4176 	Int::Int(const Reference<Int> &rhs)
4177 	{
4178 		Value *value = rhs.loadValue();
4179 		storeValue(value);
4180 	}
4181 
Int(const UInt & rhs)4182 	Int::Int(const UInt &rhs)
4183 	{
4184 		Value *value = rhs.loadValue();
4185 		storeValue(value);
4186 	}
4187 
Int(const Reference<UInt> & rhs)4188 	Int::Int(const Reference<UInt> &rhs)
4189 	{
4190 		Value *value = rhs.loadValue();
4191 		storeValue(value);
4192 	}
4193 
operator =(int rhs)4194 	RValue<Int> Int::operator=(int rhs)
4195 	{
4196 		return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
4197 	}
4198 
operator =(RValue<Int> rhs)4199 	RValue<Int> Int::operator=(RValue<Int> rhs)
4200 	{
4201 		storeValue(rhs.value);
4202 
4203 		return rhs;
4204 	}
4205 
operator =(RValue<UInt> rhs)4206 	RValue<Int> Int::operator=(RValue<UInt> rhs)
4207 	{
4208 		storeValue(rhs.value);
4209 
4210 		return RValue<Int>(rhs);
4211 	}
4212 
operator =(const Int & rhs)4213 	RValue<Int> Int::operator=(const Int &rhs)
4214 	{
4215 		Value *value = rhs.loadValue();
4216 		storeValue(value);
4217 
4218 		return RValue<Int>(value);
4219 	}
4220 
operator =(const Reference<Int> & rhs)4221 	RValue<Int> Int::operator=(const Reference<Int> &rhs)
4222 	{
4223 		Value *value = rhs.loadValue();
4224 		storeValue(value);
4225 
4226 		return RValue<Int>(value);
4227 	}
4228 
operator =(const UInt & rhs)4229 	RValue<Int> Int::operator=(const UInt &rhs)
4230 	{
4231 		Value *value = rhs.loadValue();
4232 		storeValue(value);
4233 
4234 		return RValue<Int>(value);
4235 	}
4236 
operator =(const Reference<UInt> & rhs)4237 	RValue<Int> Int::operator=(const Reference<UInt> &rhs)
4238 	{
4239 		Value *value = rhs.loadValue();
4240 		storeValue(value);
4241 
4242 		return RValue<Int>(value);
4243 	}
4244 
operator +(RValue<Int> lhs,RValue<Int> rhs)4245 	RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
4246 	{
4247 		return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
4248 	}
4249 
operator -(RValue<Int> lhs,RValue<Int> rhs)4250 	RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
4251 	{
4252 		return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
4253 	}
4254 
operator *(RValue<Int> lhs,RValue<Int> rhs)4255 	RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
4256 	{
4257 		return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
4258 	}
4259 
operator /(RValue<Int> lhs,RValue<Int> rhs)4260 	RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
4261 	{
4262 		return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
4263 	}
4264 
operator %(RValue<Int> lhs,RValue<Int> rhs)4265 	RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
4266 	{
4267 		return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
4268 	}
4269 
operator &(RValue<Int> lhs,RValue<Int> rhs)4270 	RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
4271 	{
4272 		return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
4273 	}
4274 
operator |(RValue<Int> lhs,RValue<Int> rhs)4275 	RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
4276 	{
4277 		return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
4278 	}
4279 
operator ^(RValue<Int> lhs,RValue<Int> rhs)4280 	RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
4281 	{
4282 		return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
4283 	}
4284 
operator <<(RValue<Int> lhs,RValue<Int> rhs)4285 	RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
4286 	{
4287 		return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
4288 	}
4289 
operator >>(RValue<Int> lhs,RValue<Int> rhs)4290 	RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
4291 	{
4292 		return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
4293 	}
4294 
operator +=(Int & lhs,RValue<Int> rhs)4295 	RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
4296 	{
4297 		return lhs = lhs + rhs;
4298 	}
4299 
operator -=(Int & lhs,RValue<Int> rhs)4300 	RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
4301 	{
4302 		return lhs = lhs - rhs;
4303 	}
4304 
operator *=(Int & lhs,RValue<Int> rhs)4305 	RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
4306 	{
4307 		return lhs = lhs * rhs;
4308 	}
4309 
operator /=(Int & lhs,RValue<Int> rhs)4310 	RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
4311 	{
4312 		return lhs = lhs / rhs;
4313 	}
4314 
operator %=(Int & lhs,RValue<Int> rhs)4315 	RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
4316 	{
4317 		return lhs = lhs % rhs;
4318 	}
4319 
operator &=(Int & lhs,RValue<Int> rhs)4320 	RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
4321 	{
4322 		return lhs = lhs & rhs;
4323 	}
4324 
operator |=(Int & lhs,RValue<Int> rhs)4325 	RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
4326 	{
4327 		return lhs = lhs | rhs;
4328 	}
4329 
operator ^=(Int & lhs,RValue<Int> rhs)4330 	RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
4331 	{
4332 		return lhs = lhs ^ rhs;
4333 	}
4334 
operator <<=(Int & lhs,RValue<Int> rhs)4335 	RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
4336 	{
4337 		return lhs = lhs << rhs;
4338 	}
4339 
operator >>=(Int & lhs,RValue<Int> rhs)4340 	RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
4341 	{
4342 		return lhs = lhs >> rhs;
4343 	}
4344 
operator +(RValue<Int> val)4345 	RValue<Int> operator+(RValue<Int> val)
4346 	{
4347 		return val;
4348 	}
4349 
operator -(RValue<Int> val)4350 	RValue<Int> operator-(RValue<Int> val)
4351 	{
4352 		return RValue<Int>(Nucleus::createNeg(val.value));
4353 	}
4354 
operator ~(RValue<Int> val)4355 	RValue<Int> operator~(RValue<Int> val)
4356 	{
4357 		return RValue<Int>(Nucleus::createNot(val.value));
4358 	}
4359 
operator ++(Int & val,int)4360 	RValue<Int> operator++(Int &val, int)   // Post-increment
4361 	{
4362 		RValue<Int> res = val;
4363 
4364 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
4365 		val.storeValue(inc);
4366 
4367 		return res;
4368 	}
4369 
operator ++(Int & val)4370 	const Int &operator++(Int &val)   // Pre-increment
4371 	{
4372 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
4373 		val.storeValue(inc);
4374 
4375 		return val;
4376 	}
4377 
operator --(Int & val,int)4378 	RValue<Int> operator--(Int &val, int)   // Post-decrement
4379 	{
4380 		RValue<Int> res = val;
4381 
4382 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
4383 		val.storeValue(inc);
4384 
4385 		return res;
4386 	}
4387 
operator --(Int & val)4388 	const Int &operator--(Int &val)   // Pre-decrement
4389 	{
4390 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
4391 		val.storeValue(inc);
4392 
4393 		return val;
4394 	}
4395 
operator <(RValue<Int> lhs,RValue<Int> rhs)4396 	RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4397 	{
4398 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4399 	}
4400 
operator <=(RValue<Int> lhs,RValue<Int> rhs)4401 	RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4402 	{
4403 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4404 	}
4405 
operator >(RValue<Int> lhs,RValue<Int> rhs)4406 	RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4407 	{
4408 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4409 	}
4410 
operator >=(RValue<Int> lhs,RValue<Int> rhs)4411 	RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4412 	{
4413 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4414 	}
4415 
operator !=(RValue<Int> lhs,RValue<Int> rhs)4416 	RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4417 	{
4418 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4419 	}
4420 
operator ==(RValue<Int> lhs,RValue<Int> rhs)4421 	RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4422 	{
4423 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4424 	}
4425 
Max(RValue<Int> x,RValue<Int> y)4426 	RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4427 	{
4428 		return IfThenElse(x > y, x, y);
4429 	}
4430 
Min(RValue<Int> x,RValue<Int> y)4431 	RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4432 	{
4433 		return IfThenElse(x < y, x, y);
4434 	}
4435 
Clamp(RValue<Int> x,RValue<Int> min,RValue<Int> max)4436 	RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4437 	{
4438 		return Min(Max(x, min), max);
4439 	}
4440 
RoundInt(RValue<Float> cast)4441 	RValue<Int> RoundInt(RValue<Float> cast)
4442 	{
4443 #if defined(__i386__) || defined(__x86_64__)
4444 		return x86::cvtss2si(cast);
4445 #else
4446 		return RValue<Int>(V(lowerRoundInt(V(cast.value), T(Int::getType()))));
4447 #endif
4448 	}
4449 
getType()4450 	Type *Int::getType()
4451 	{
4452 		return T(llvm::Type::getInt32Ty(*::context));
4453 	}
4454 
Long(RValue<Int> cast)4455 	Long::Long(RValue<Int> cast)
4456 	{
4457 		Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4458 
4459 		storeValue(integer);
4460 	}
4461 
Long(RValue<UInt> cast)4462 	Long::Long(RValue<UInt> cast)
4463 	{
4464 		Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4465 
4466 		storeValue(integer);
4467 	}
4468 
Long(RValue<Long> rhs)4469 	Long::Long(RValue<Long> rhs)
4470 	{
4471 		storeValue(rhs.value);
4472 	}
4473 
operator =(int64_t rhs)4474 	RValue<Long> Long::operator=(int64_t rhs)
4475 	{
4476 		return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4477 	}
4478 
operator =(RValue<Long> rhs)4479 	RValue<Long> Long::operator=(RValue<Long> rhs)
4480 	{
4481 		storeValue(rhs.value);
4482 
4483 		return rhs;
4484 	}
4485 
operator =(const Long & rhs)4486 	RValue<Long> Long::operator=(const Long &rhs)
4487 	{
4488 		Value *value = rhs.loadValue();
4489 		storeValue(value);
4490 
4491 		return RValue<Long>(value);
4492 	}
4493 
operator =(const Reference<Long> & rhs)4494 	RValue<Long> Long::operator=(const Reference<Long> &rhs)
4495 	{
4496 		Value *value = rhs.loadValue();
4497 		storeValue(value);
4498 
4499 		return RValue<Long>(value);
4500 	}
4501 
operator +(RValue<Long> lhs,RValue<Long> rhs)4502 	RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4503 	{
4504 		return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4505 	}
4506 
operator -(RValue<Long> lhs,RValue<Long> rhs)4507 	RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4508 	{
4509 		return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4510 	}
4511 
operator +=(Long & lhs,RValue<Long> rhs)4512 	RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
4513 	{
4514 		return lhs = lhs + rhs;
4515 	}
4516 
operator -=(Long & lhs,RValue<Long> rhs)4517 	RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4518 	{
4519 		return lhs = lhs - rhs;
4520 	}
4521 
AddAtomic(RValue<Pointer<Long>> x,RValue<Long> y)4522 	RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4523 	{
4524 		return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4525 	}
4526 
getType()4527 	Type *Long::getType()
4528 	{
4529 		return T(llvm::Type::getInt64Ty(*::context));
4530 	}
4531 
UInt(Argument<UInt> argument)4532 	UInt::UInt(Argument<UInt> argument)
4533 	{
4534 		storeValue(argument.value);
4535 	}
4536 
UInt(RValue<UShort> cast)4537 	UInt::UInt(RValue<UShort> cast)
4538 	{
4539 		Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4540 
4541 		storeValue(integer);
4542 	}
4543 
UInt(RValue<Long> cast)4544 	UInt::UInt(RValue<Long> cast)
4545 	{
4546 		Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4547 
4548 		storeValue(integer);
4549 	}
4550 
UInt(RValue<Float> cast)4551 	UInt::UInt(RValue<Float> cast)
4552 	{
4553 		// Note: createFPToUI is broken, must perform conversion using createFPtoSI
4554 		// Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
4555 
4556 		// Smallest positive value representable in UInt, but not in Int
4557 		const unsigned int ustart = 0x80000000u;
4558 		const float ustartf = float(ustart);
4559 
4560 		// If the value is negative, store 0, otherwise store the result of the conversion
4561 		storeValue((~(As<Int>(cast) >> 31) &
4562 		// Check if the value can be represented as an Int
4563 			IfThenElse(cast >= ustartf,
4564 		// If the value is too large, subtract ustart and re-add it after conversion.
4565 				As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4566 		// Otherwise, just convert normally
4567 				Int(cast))).value);
4568 	}
4569 
UInt(int x)4570 	UInt::UInt(int x)
4571 	{
4572 		storeValue(Nucleus::createConstantInt(x));
4573 	}
4574 
UInt(unsigned int x)4575 	UInt::UInt(unsigned int x)
4576 	{
4577 		storeValue(Nucleus::createConstantInt(x));
4578 	}
4579 
UInt(RValue<UInt> rhs)4580 	UInt::UInt(RValue<UInt> rhs)
4581 	{
4582 		storeValue(rhs.value);
4583 	}
4584 
UInt(RValue<Int> rhs)4585 	UInt::UInt(RValue<Int> rhs)
4586 	{
4587 		storeValue(rhs.value);
4588 	}
4589 
UInt(const UInt & rhs)4590 	UInt::UInt(const UInt &rhs)
4591 	{
4592 		Value *value = rhs.loadValue();
4593 		storeValue(value);
4594 	}
4595 
UInt(const Reference<UInt> & rhs)4596 	UInt::UInt(const Reference<UInt> &rhs)
4597 	{
4598 		Value *value = rhs.loadValue();
4599 		storeValue(value);
4600 	}
4601 
UInt(const Int & rhs)4602 	UInt::UInt(const Int &rhs)
4603 	{
4604 		Value *value = rhs.loadValue();
4605 		storeValue(value);
4606 	}
4607 
UInt(const Reference<Int> & rhs)4608 	UInt::UInt(const Reference<Int> &rhs)
4609 	{
4610 		Value *value = rhs.loadValue();
4611 		storeValue(value);
4612 	}
4613 
operator =(unsigned int rhs)4614 	RValue<UInt> UInt::operator=(unsigned int rhs)
4615 	{
4616 		return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4617 	}
4618 
operator =(RValue<UInt> rhs)4619 	RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4620 	{
4621 		storeValue(rhs.value);
4622 
4623 		return rhs;
4624 	}
4625 
operator =(RValue<Int> rhs)4626 	RValue<UInt> UInt::operator=(RValue<Int> rhs)
4627 	{
4628 		storeValue(rhs.value);
4629 
4630 		return RValue<UInt>(rhs);
4631 	}
4632 
operator =(const UInt & rhs)4633 	RValue<UInt> UInt::operator=(const UInt &rhs)
4634 	{
4635 		Value *value = rhs.loadValue();
4636 		storeValue(value);
4637 
4638 		return RValue<UInt>(value);
4639 	}
4640 
operator =(const Reference<UInt> & rhs)4641 	RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4642 	{
4643 		Value *value = rhs.loadValue();
4644 		storeValue(value);
4645 
4646 		return RValue<UInt>(value);
4647 	}
4648 
operator =(const Int & rhs)4649 	RValue<UInt> UInt::operator=(const Int &rhs)
4650 	{
4651 		Value *value = rhs.loadValue();
4652 		storeValue(value);
4653 
4654 		return RValue<UInt>(value);
4655 	}
4656 
operator =(const Reference<Int> & rhs)4657 	RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4658 	{
4659 		Value *value = rhs.loadValue();
4660 		storeValue(value);
4661 
4662 		return RValue<UInt>(value);
4663 	}
4664 
operator +(RValue<UInt> lhs,RValue<UInt> rhs)4665 	RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4666 	{
4667 		return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4668 	}
4669 
operator -(RValue<UInt> lhs,RValue<UInt> rhs)4670 	RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4671 	{
4672 		return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4673 	}
4674 
operator *(RValue<UInt> lhs,RValue<UInt> rhs)4675 	RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4676 	{
4677 		return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4678 	}
4679 
operator /(RValue<UInt> lhs,RValue<UInt> rhs)4680 	RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4681 	{
4682 		return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4683 	}
4684 
operator %(RValue<UInt> lhs,RValue<UInt> rhs)4685 	RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4686 	{
4687 		return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4688 	}
4689 
operator &(RValue<UInt> lhs,RValue<UInt> rhs)4690 	RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4691 	{
4692 		return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4693 	}
4694 
operator |(RValue<UInt> lhs,RValue<UInt> rhs)4695 	RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4696 	{
4697 		return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4698 	}
4699 
operator ^(RValue<UInt> lhs,RValue<UInt> rhs)4700 	RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4701 	{
4702 		return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4703 	}
4704 
operator <<(RValue<UInt> lhs,RValue<UInt> rhs)4705 	RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4706 	{
4707 		return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4708 	}
4709 
operator >>(RValue<UInt> lhs,RValue<UInt> rhs)4710 	RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4711 	{
4712 		return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4713 	}
4714 
operator +=(UInt & lhs,RValue<UInt> rhs)4715 	RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4716 	{
4717 		return lhs = lhs + rhs;
4718 	}
4719 
operator -=(UInt & lhs,RValue<UInt> rhs)4720 	RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4721 	{
4722 		return lhs = lhs - rhs;
4723 	}
4724 
operator *=(UInt & lhs,RValue<UInt> rhs)4725 	RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4726 	{
4727 		return lhs = lhs * rhs;
4728 	}
4729 
operator /=(UInt & lhs,RValue<UInt> rhs)4730 	RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4731 	{
4732 		return lhs = lhs / rhs;
4733 	}
4734 
operator %=(UInt & lhs,RValue<UInt> rhs)4735 	RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4736 	{
4737 		return lhs = lhs % rhs;
4738 	}
4739 
operator &=(UInt & lhs,RValue<UInt> rhs)4740 	RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4741 	{
4742 		return lhs = lhs & rhs;
4743 	}
4744 
operator |=(UInt & lhs,RValue<UInt> rhs)4745 	RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4746 	{
4747 		return lhs = lhs | rhs;
4748 	}
4749 
operator ^=(UInt & lhs,RValue<UInt> rhs)4750 	RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4751 	{
4752 		return lhs = lhs ^ rhs;
4753 	}
4754 
operator <<=(UInt & lhs,RValue<UInt> rhs)4755 	RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4756 	{
4757 		return lhs = lhs << rhs;
4758 	}
4759 
operator >>=(UInt & lhs,RValue<UInt> rhs)4760 	RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4761 	{
4762 		return lhs = lhs >> rhs;
4763 	}
4764 
operator +(RValue<UInt> val)4765 	RValue<UInt> operator+(RValue<UInt> val)
4766 	{
4767 		return val;
4768 	}
4769 
operator -(RValue<UInt> val)4770 	RValue<UInt> operator-(RValue<UInt> val)
4771 	{
4772 		return RValue<UInt>(Nucleus::createNeg(val.value));
4773 	}
4774 
operator ~(RValue<UInt> val)4775 	RValue<UInt> operator~(RValue<UInt> val)
4776 	{
4777 		return RValue<UInt>(Nucleus::createNot(val.value));
4778 	}
4779 
operator ++(UInt & val,int)4780 	RValue<UInt> operator++(UInt &val, int)   // Post-increment
4781 	{
4782 		RValue<UInt> res = val;
4783 
4784 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
4785 		val.storeValue(inc);
4786 
4787 		return res;
4788 	}
4789 
operator ++(UInt & val)4790 	const UInt &operator++(UInt &val)   // Pre-increment
4791 	{
4792 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
4793 		val.storeValue(inc);
4794 
4795 		return val;
4796 	}
4797 
operator --(UInt & val,int)4798 	RValue<UInt> operator--(UInt &val, int)   // Post-decrement
4799 	{
4800 		RValue<UInt> res = val;
4801 
4802 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
4803 		val.storeValue(inc);
4804 
4805 		return res;
4806 	}
4807 
operator --(UInt & val)4808 	const UInt &operator--(UInt &val)   // Pre-decrement
4809 	{
4810 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
4811 		val.storeValue(inc);
4812 
4813 		return val;
4814 	}
4815 
Max(RValue<UInt> x,RValue<UInt> y)4816 	RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4817 	{
4818 		return IfThenElse(x > y, x, y);
4819 	}
4820 
Min(RValue<UInt> x,RValue<UInt> y)4821 	RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4822 	{
4823 		return IfThenElse(x < y, x, y);
4824 	}
4825 
Clamp(RValue<UInt> x,RValue<UInt> min,RValue<UInt> max)4826 	RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4827 	{
4828 		return Min(Max(x, min), max);
4829 	}
4830 
operator <(RValue<UInt> lhs,RValue<UInt> rhs)4831 	RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4832 	{
4833 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4834 	}
4835 
operator <=(RValue<UInt> lhs,RValue<UInt> rhs)4836 	RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4837 	{
4838 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4839 	}
4840 
operator >(RValue<UInt> lhs,RValue<UInt> rhs)4841 	RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4842 	{
4843 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4844 	}
4845 
operator >=(RValue<UInt> lhs,RValue<UInt> rhs)4846 	RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4847 	{
4848 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4849 	}
4850 
operator !=(RValue<UInt> lhs,RValue<UInt> rhs)4851 	RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4852 	{
4853 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4854 	}
4855 
operator ==(RValue<UInt> lhs,RValue<UInt> rhs)4856 	RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4857 	{
4858 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4859 	}
4860 
4861 //	RValue<UInt> RoundUInt(RValue<Float> cast)
4862 //	{
4863 //#if defined(__i386__) || defined(__x86_64__)
4864 //		return x86::cvtss2si(val);   // FIXME: Unsigned
4865 //#else
4866 //		return IfThenElse(cast > 0.0f, Int(cast + 0.5f), Int(cast - 0.5f));
4867 //#endif
4868 //	}
4869 
getType()4870 	Type *UInt::getType()
4871 	{
4872 		return T(llvm::Type::getInt32Ty(*::context));
4873 	}
4874 
4875 //	Int2::Int2(RValue<Int> cast)
4876 //	{
4877 //		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4878 //		Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4879 //
4880 //		int shuffle[2] = {0, 0};
4881 //		Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
4882 //
4883 //		storeValue(replicate);
4884 //	}
4885 
Int2(RValue<Int4> cast)4886 	Int2::Int2(RValue<Int4> cast)
4887 	{
4888 		storeValue(Nucleus::createBitCast(cast.value, getType()));
4889 	}
4890 
Int2(int x,int y)4891 	Int2::Int2(int x, int y)
4892 	{
4893 		int64_t constantVector[2] = {x, y};
4894 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
4895 	}
4896 
Int2(RValue<Int2> rhs)4897 	Int2::Int2(RValue<Int2> rhs)
4898 	{
4899 		storeValue(rhs.value);
4900 	}
4901 
Int2(const Int2 & rhs)4902 	Int2::Int2(const Int2 &rhs)
4903 	{
4904 		Value *value = rhs.loadValue();
4905 		storeValue(value);
4906 	}
4907 
Int2(const Reference<Int2> & rhs)4908 	Int2::Int2(const Reference<Int2> &rhs)
4909 	{
4910 		Value *value = rhs.loadValue();
4911 		storeValue(value);
4912 	}
4913 
Int2(RValue<Int> lo,RValue<Int> hi)4914 	Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4915 	{
4916 		int shuffle[4] = {0, 4, 1, 5};
4917 		Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4918 
4919 		storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4920 	}
4921 
operator =(RValue<Int2> rhs)4922 	RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4923 	{
4924 		storeValue(rhs.value);
4925 
4926 		return rhs;
4927 	}
4928 
operator =(const Int2 & rhs)4929 	RValue<Int2> Int2::operator=(const Int2 &rhs)
4930 	{
4931 		Value *value = rhs.loadValue();
4932 		storeValue(value);
4933 
4934 		return RValue<Int2>(value);
4935 	}
4936 
operator =(const Reference<Int2> & rhs)4937 	RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4938 	{
4939 		Value *value = rhs.loadValue();
4940 		storeValue(value);
4941 
4942 		return RValue<Int2>(value);
4943 	}
4944 
operator +(RValue<Int2> lhs,RValue<Int2> rhs)4945 	RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4946 	{
4947 		return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4948 	}
4949 
operator -(RValue<Int2> lhs,RValue<Int2> rhs)4950 	RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4951 	{
4952 		return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4953 	}
4954 
4955 //	RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4956 //	{
4957 //		return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4958 //	}
4959 
4960 //	RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4961 //	{
4962 //		return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4963 //	}
4964 
4965 //	RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4966 //	{
4967 //		return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4968 //	}
4969 
operator &(RValue<Int2> lhs,RValue<Int2> rhs)4970 	RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4971 	{
4972 		return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4973 	}
4974 
operator |(RValue<Int2> lhs,RValue<Int2> rhs)4975 	RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4976 	{
4977 		return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4978 	}
4979 
operator ^(RValue<Int2> lhs,RValue<Int2> rhs)4980 	RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4981 	{
4982 		return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4983 	}
4984 
operator <<(RValue<Int2> lhs,unsigned char rhs)4985 	RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4986 	{
4987 #if defined(__i386__) || defined(__x86_64__)
4988 	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4989 
4990 		return x86::pslld(lhs, rhs);
4991 #else
4992 		return As<Int2>(V(lowerVectorShl(V(lhs.value), rhs)));
4993 #endif
4994 	}
4995 
operator >>(RValue<Int2> lhs,unsigned char rhs)4996 	RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4997 	{
4998 #if defined(__i386__) || defined(__x86_64__)
4999 	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
5000 
5001 		return x86::psrad(lhs, rhs);
5002 #else
5003 		return As<Int2>(V(lowerVectorAShr(V(lhs.value), rhs)));
5004 #endif
5005 	}
5006 
operator +=(Int2 & lhs,RValue<Int2> rhs)5007 	RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
5008 	{
5009 		return lhs = lhs + rhs;
5010 	}
5011 
operator -=(Int2 & lhs,RValue<Int2> rhs)5012 	RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
5013 	{
5014 		return lhs = lhs - rhs;
5015 	}
5016 
5017 //	RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
5018 //	{
5019 //		return lhs = lhs * rhs;
5020 //	}
5021 
5022 //	RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
5023 //	{
5024 //		return lhs = lhs / rhs;
5025 //	}
5026 
5027 //	RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
5028 //	{
5029 //		return lhs = lhs % rhs;
5030 //	}
5031 
operator &=(Int2 & lhs,RValue<Int2> rhs)5032 	RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
5033 	{
5034 		return lhs = lhs & rhs;
5035 	}
5036 
operator |=(Int2 & lhs,RValue<Int2> rhs)5037 	RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
5038 	{
5039 		return lhs = lhs | rhs;
5040 	}
5041 
operator ^=(Int2 & lhs,RValue<Int2> rhs)5042 	RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
5043 	{
5044 		return lhs = lhs ^ rhs;
5045 	}
5046 
operator <<=(Int2 & lhs,unsigned char rhs)5047 	RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
5048 	{
5049 		return lhs = lhs << rhs;
5050 	}
5051 
operator >>=(Int2 & lhs,unsigned char rhs)5052 	RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
5053 	{
5054 		return lhs = lhs >> rhs;
5055 	}
5056 
5057 //	RValue<Int2> operator+(RValue<Int2> val)
5058 //	{
5059 //		return val;
5060 //	}
5061 
5062 //	RValue<Int2> operator-(RValue<Int2> val)
5063 //	{
5064 //		return RValue<Int2>(Nucleus::createNeg(val.value));
5065 //	}
5066 
operator ~(RValue<Int2> val)5067 	RValue<Int2> operator~(RValue<Int2> val)
5068 	{
5069 		return RValue<Int2>(Nucleus::createNot(val.value));
5070 	}
5071 
UnpackLow(RValue<Int2> x,RValue<Int2> y)5072 	RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
5073 	{
5074 		int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
5075 		return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5076 	}
5077 
UnpackHigh(RValue<Int2> x,RValue<Int2> y)5078 	RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
5079 	{
5080 		int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
5081 		auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5082 		return As<Short4>(Swizzle(lowHigh, 0xEE));
5083 	}
5084 
Extract(RValue<Int2> val,int i)5085 	RValue<Int> Extract(RValue<Int2> val, int i)
5086 	{
5087 		return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
5088 	}
5089 
Insert(RValue<Int2> val,RValue<Int> element,int i)5090 	RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
5091 	{
5092 		return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
5093 	}
5094 
getType()5095 	Type *Int2::getType()
5096 	{
5097 		return T(Type_v2i32);
5098 	}
5099 
UInt2(unsigned int x,unsigned int y)5100 	UInt2::UInt2(unsigned int x, unsigned int y)
5101 	{
5102 		int64_t constantVector[2] = {x, y};
5103 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
5104 	}
5105 
UInt2(RValue<UInt2> rhs)5106 	UInt2::UInt2(RValue<UInt2> rhs)
5107 	{
5108 		storeValue(rhs.value);
5109 	}
5110 
UInt2(const UInt2 & rhs)5111 	UInt2::UInt2(const UInt2 &rhs)
5112 	{
5113 		Value *value = rhs.loadValue();
5114 		storeValue(value);
5115 	}
5116 
UInt2(const Reference<UInt2> & rhs)5117 	UInt2::UInt2(const Reference<UInt2> &rhs)
5118 	{
5119 		Value *value = rhs.loadValue();
5120 		storeValue(value);
5121 	}
5122 
operator =(RValue<UInt2> rhs)5123 	RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
5124 	{
5125 		storeValue(rhs.value);
5126 
5127 		return rhs;
5128 	}
5129 
operator =(const UInt2 & rhs)5130 	RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
5131 	{
5132 		Value *value = rhs.loadValue();
5133 		storeValue(value);
5134 
5135 		return RValue<UInt2>(value);
5136 	}
5137 
operator =(const Reference<UInt2> & rhs)5138 	RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
5139 	{
5140 		Value *value = rhs.loadValue();
5141 		storeValue(value);
5142 
5143 		return RValue<UInt2>(value);
5144 	}
5145 
operator +(RValue<UInt2> lhs,RValue<UInt2> rhs)5146 	RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
5147 	{
5148 		return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
5149 	}
5150 
operator -(RValue<UInt2> lhs,RValue<UInt2> rhs)5151 	RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
5152 	{
5153 		return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
5154 	}
5155 
5156 //	RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
5157 //	{
5158 //		return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
5159 //	}
5160 
5161 //	RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
5162 //	{
5163 //		return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
5164 //	}
5165 
5166 //	RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
5167 //	{
5168 //		return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
5169 //	}
5170 
operator &(RValue<UInt2> lhs,RValue<UInt2> rhs)5171 	RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
5172 	{
5173 		return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
5174 	}
5175 
operator |(RValue<UInt2> lhs,RValue<UInt2> rhs)5176 	RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
5177 	{
5178 		return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
5179 	}
5180 
operator ^(RValue<UInt2> lhs,RValue<UInt2> rhs)5181 	RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
5182 	{
5183 		return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
5184 	}
5185 
operator <<(RValue<UInt2> lhs,unsigned char rhs)5186 	RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
5187 	{
5188 #if defined(__i386__) || defined(__x86_64__)
5189 	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
5190 
5191 		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
5192 #else
5193 		return As<UInt2>(V(lowerVectorShl(V(lhs.value), rhs)));
5194 #endif
5195 	}
5196 
operator >>(RValue<UInt2> lhs,unsigned char rhs)5197 	RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
5198 	{
5199 #if defined(__i386__) || defined(__x86_64__)
5200 	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
5201 
5202 		return x86::psrld(lhs, rhs);
5203 #else
5204 		return As<UInt2>(V(lowerVectorLShr(V(lhs.value), rhs)));
5205 #endif
5206 	}
5207 
operator +=(UInt2 & lhs,RValue<UInt2> rhs)5208 	RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
5209 	{
5210 		return lhs = lhs + rhs;
5211 	}
5212 
operator -=(UInt2 & lhs,RValue<UInt2> rhs)5213 	RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
5214 	{
5215 		return lhs = lhs - rhs;
5216 	}
5217 
5218 //	RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
5219 //	{
5220 //		return lhs = lhs * rhs;
5221 //	}
5222 
5223 //	RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
5224 //	{
5225 //		return lhs = lhs / rhs;
5226 //	}
5227 
5228 //	RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
5229 //	{
5230 //		return lhs = lhs % rhs;
5231 //	}
5232 
operator &=(UInt2 & lhs,RValue<UInt2> rhs)5233 	RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
5234 	{
5235 		return lhs = lhs & rhs;
5236 	}
5237 
operator |=(UInt2 & lhs,RValue<UInt2> rhs)5238 	RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
5239 	{
5240 		return lhs = lhs | rhs;
5241 	}
5242 
operator ^=(UInt2 & lhs,RValue<UInt2> rhs)5243 	RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
5244 	{
5245 		return lhs = lhs ^ rhs;
5246 	}
5247 
operator <<=(UInt2 & lhs,unsigned char rhs)5248 	RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
5249 	{
5250 		return lhs = lhs << rhs;
5251 	}
5252 
operator >>=(UInt2 & lhs,unsigned char rhs)5253 	RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
5254 	{
5255 		return lhs = lhs >> rhs;
5256 	}
5257 
5258 //	RValue<UInt2> operator+(RValue<UInt2> val)
5259 //	{
5260 //		return val;
5261 //	}
5262 
5263 //	RValue<UInt2> operator-(RValue<UInt2> val)
5264 //	{
5265 //		return RValue<UInt2>(Nucleus::createNeg(val.value));
5266 //	}
5267 
operator ~(RValue<UInt2> val)5268 	RValue<UInt2> operator~(RValue<UInt2> val)
5269 	{
5270 		return RValue<UInt2>(Nucleus::createNot(val.value));
5271 	}
5272 
getType()5273 	Type *UInt2::getType()
5274 	{
5275 		return T(Type_v2i32);
5276 	}
5277 
Int4()5278 	Int4::Int4() : XYZW(this)
5279 	{
5280 	}
5281 
Int4(RValue<Byte4> cast)5282 	Int4::Int4(RValue<Byte4> cast) : XYZW(this)
5283 	{
5284 #if defined(__i386__) || defined(__x86_64__)
5285 		if(CPUID::supportsSSE4_1())
5286 		{
5287 			*this = x86::pmovzxbd(As<Byte16>(cast));
5288 		}
5289 		else
5290 #endif
5291 		{
5292 			int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
5293 			Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
5294 			Value *b = Nucleus::createShuffleVector(a, Nucleus::createNullValue(Byte16::getType()), swizzle);
5295 
5296 			int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5297 			Value *c = Nucleus::createBitCast(b, Short8::getType());
5298 			Value *d = Nucleus::createShuffleVector(c, Nucleus::createNullValue(Short8::getType()), swizzle2);
5299 
5300 			*this = As<Int4>(d);
5301 		}
5302 	}
5303 
Int4(RValue<SByte4> cast)5304 	Int4::Int4(RValue<SByte4> cast) : XYZW(this)
5305 	{
5306 #if defined(__i386__) || defined(__x86_64__)
5307 		if(CPUID::supportsSSE4_1())
5308 		{
5309 			*this = x86::pmovsxbd(As<SByte16>(cast));
5310 		}
5311 		else
5312 #endif
5313 		{
5314 			int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
5315 			Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
5316 			Value *b = Nucleus::createShuffleVector(a, a, swizzle);
5317 
5318 			int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5319 			Value *c = Nucleus::createBitCast(b, Short8::getType());
5320 			Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
5321 
5322 			*this = As<Int4>(d) >> 24;
5323 		}
5324 	}
5325 
Int4(RValue<Float4> cast)5326 	Int4::Int4(RValue<Float4> cast) : XYZW(this)
5327 	{
5328 		Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5329 
5330 		storeValue(xyzw);
5331 	}
5332 
Int4(RValue<Short4> cast)5333 	Int4::Int4(RValue<Short4> cast) : XYZW(this)
5334 	{
5335 #if defined(__i386__) || defined(__x86_64__)
5336 		if(CPUID::supportsSSE4_1())
5337 		{
5338 			*this = x86::pmovsxwd(As<Short8>(cast));
5339 		}
5340 		else
5341 #endif
5342 		{
5343 			int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5344 			Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
5345 			*this = As<Int4>(c) >> 16;
5346 		}
5347 	}
5348 
Int4(RValue<UShort4> cast)5349 	Int4::Int4(RValue<UShort4> cast) : XYZW(this)
5350 	{
5351 #if defined(__i386__) || defined(__x86_64__)
5352 		if(CPUID::supportsSSE4_1())
5353 		{
5354 			*this = x86::pmovzxwd(As<UShort8>(cast));
5355 		}
5356 		else
5357 #endif
5358 		{
5359 			int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5360 			Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
5361 			*this = As<Int4>(c);
5362 		}
5363 	}
5364 
Int4(int xyzw)5365 	Int4::Int4(int xyzw) : XYZW(this)
5366 	{
5367 		constant(xyzw, xyzw, xyzw, xyzw);
5368 	}
5369 
Int4(int x,int yzw)5370 	Int4::Int4(int x, int yzw) : XYZW(this)
5371 	{
5372 		constant(x, yzw, yzw, yzw);
5373 	}
5374 
Int4(int x,int y,int zw)5375 	Int4::Int4(int x, int y, int zw) : XYZW(this)
5376 	{
5377 		constant(x, y, zw, zw);
5378 	}
5379 
Int4(int x,int y,int z,int w)5380 	Int4::Int4(int x, int y, int z, int w) : XYZW(this)
5381 	{
5382 		constant(x, y, z, w);
5383 	}
5384 
constant(int x,int y,int z,int w)5385 	void Int4::constant(int x, int y, int z, int w)
5386 	{
5387 		int64_t constantVector[4] = {x, y, z, w};
5388 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
5389 	}
5390 
Int4(RValue<Int4> rhs)5391 	Int4::Int4(RValue<Int4> rhs) : XYZW(this)
5392 	{
5393 		storeValue(rhs.value);
5394 	}
5395 
Int4(const Int4 & rhs)5396 	Int4::Int4(const Int4 &rhs) : XYZW(this)
5397 	{
5398 		Value *value = rhs.loadValue();
5399 		storeValue(value);
5400 	}
5401 
Int4(const Reference<Int4> & rhs)5402 	Int4::Int4(const Reference<Int4> &rhs) : XYZW(this)
5403 	{
5404 		Value *value = rhs.loadValue();
5405 		storeValue(value);
5406 	}
5407 
Int4(RValue<UInt4> rhs)5408 	Int4::Int4(RValue<UInt4> rhs) : XYZW(this)
5409 	{
5410 		storeValue(rhs.value);
5411 	}
5412 
Int4(const UInt4 & rhs)5413 	Int4::Int4(const UInt4 &rhs) : XYZW(this)
5414 	{
5415 		Value *value = rhs.loadValue();
5416 		storeValue(value);
5417 	}
5418 
Int4(const Reference<UInt4> & rhs)5419 	Int4::Int4(const Reference<UInt4> &rhs) : XYZW(this)
5420 	{
5421 		Value *value = rhs.loadValue();
5422 		storeValue(value);
5423 	}
5424 
Int4(RValue<Int2> lo,RValue<Int2> hi)5425 	Int4::Int4(RValue<Int2> lo, RValue<Int2> hi) : XYZW(this)
5426 	{
5427 		int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5428 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5429 
5430 		storeValue(packed);
5431 	}
5432 
Int4(RValue<Int> rhs)5433 	Int4::Int4(RValue<Int> rhs) : XYZW(this)
5434 	{
5435 		Value *vector = loadValue();
5436 		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5437 
5438 		int swizzle[4] = {0, 0, 0, 0};
5439 		Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5440 
5441 		storeValue(replicate);
5442 	}
5443 
Int4(const Int & rhs)5444 	Int4::Int4(const Int &rhs) : XYZW(this)
5445 	{
5446 		*this = RValue<Int>(rhs.loadValue());
5447 	}
5448 
Int4(const Reference<Int> & rhs)5449 	Int4::Int4(const Reference<Int> &rhs) : XYZW(this)
5450 	{
5451 		*this = RValue<Int>(rhs.loadValue());
5452 	}
5453 
operator =(RValue<Int4> rhs)5454 	RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5455 	{
5456 		storeValue(rhs.value);
5457 
5458 		return rhs;
5459 	}
5460 
operator =(const Int4 & rhs)5461 	RValue<Int4> Int4::operator=(const Int4 &rhs)
5462 	{
5463 		Value *value = rhs.loadValue();
5464 		storeValue(value);
5465 
5466 		return RValue<Int4>(value);
5467 	}
5468 
operator =(const Reference<Int4> & rhs)5469 	RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5470 	{
5471 		Value *value = rhs.loadValue();
5472 		storeValue(value);
5473 
5474 		return RValue<Int4>(value);
5475 	}
5476 
operator +(RValue<Int4> lhs,RValue<Int4> rhs)5477 	RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5478 	{
5479 		return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5480 	}
5481 
operator -(RValue<Int4> lhs,RValue<Int4> rhs)5482 	RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5483 	{
5484 		return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5485 	}
5486 
operator *(RValue<Int4> lhs,RValue<Int4> rhs)5487 	RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5488 	{
5489 		return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5490 	}
5491 
operator /(RValue<Int4> lhs,RValue<Int4> rhs)5492 	RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5493 	{
5494 		return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5495 	}
5496 
operator %(RValue<Int4> lhs,RValue<Int4> rhs)5497 	RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5498 	{
5499 		return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5500 	}
5501 
operator &(RValue<Int4> lhs,RValue<Int4> rhs)5502 	RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5503 	{
5504 		return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5505 	}
5506 
operator |(RValue<Int4> lhs,RValue<Int4> rhs)5507 	RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5508 	{
5509 		return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5510 	}
5511 
operator ^(RValue<Int4> lhs,RValue<Int4> rhs)5512 	RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5513 	{
5514 		return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5515 	}
5516 
operator <<(RValue<Int4> lhs,unsigned char rhs)5517 	RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5518 	{
5519 #if defined(__i386__) || defined(__x86_64__)
5520 		return x86::pslld(lhs, rhs);
5521 #else
5522 		return As<Int4>(V(lowerVectorShl(V(lhs.value), rhs)));
5523 #endif
5524 	}
5525 
operator >>(RValue<Int4> lhs,unsigned char rhs)5526 	RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5527 	{
5528 #if defined(__i386__) || defined(__x86_64__)
5529 		return x86::psrad(lhs, rhs);
5530 #else
5531 		return As<Int4>(V(lowerVectorAShr(V(lhs.value), rhs)));
5532 #endif
5533 	}
5534 
operator <<(RValue<Int4> lhs,RValue<Int4> rhs)5535 	RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5536 	{
5537 		return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5538 	}
5539 
operator >>(RValue<Int4> lhs,RValue<Int4> rhs)5540 	RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5541 	{
5542 		return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5543 	}
5544 
operator +=(Int4 & lhs,RValue<Int4> rhs)5545 	RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5546 	{
5547 		return lhs = lhs + rhs;
5548 	}
5549 
operator -=(Int4 & lhs,RValue<Int4> rhs)5550 	RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5551 	{
5552 		return lhs = lhs - rhs;
5553 	}
5554 
operator *=(Int4 & lhs,RValue<Int4> rhs)5555 	RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5556 	{
5557 		return lhs = lhs * rhs;
5558 	}
5559 
5560 //	RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5561 //	{
5562 //		return lhs = lhs / rhs;
5563 //	}
5564 
5565 //	RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5566 //	{
5567 //		return lhs = lhs % rhs;
5568 //	}
5569 
operator &=(Int4 & lhs,RValue<Int4> rhs)5570 	RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5571 	{
5572 		return lhs = lhs & rhs;
5573 	}
5574 
operator |=(Int4 & lhs,RValue<Int4> rhs)5575 	RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5576 	{
5577 		return lhs = lhs | rhs;
5578 	}
5579 
operator ^=(Int4 & lhs,RValue<Int4> rhs)5580 	RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5581 	{
5582 		return lhs = lhs ^ rhs;
5583 	}
5584 
operator <<=(Int4 & lhs,unsigned char rhs)5585 	RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5586 	{
5587 		return lhs = lhs << rhs;
5588 	}
5589 
operator >>=(Int4 & lhs,unsigned char rhs)5590 	RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5591 	{
5592 		return lhs = lhs >> rhs;
5593 	}
5594 
operator +(RValue<Int4> val)5595 	RValue<Int4> operator+(RValue<Int4> val)
5596 	{
5597 		return val;
5598 	}
5599 
operator -(RValue<Int4> val)5600 	RValue<Int4> operator-(RValue<Int4> val)
5601 	{
5602 		return RValue<Int4>(Nucleus::createNeg(val.value));
5603 	}
5604 
operator ~(RValue<Int4> val)5605 	RValue<Int4> operator~(RValue<Int4> val)
5606 	{
5607 		return RValue<Int4>(Nucleus::createNot(val.value));
5608 	}
5609 
CmpEQ(RValue<Int4> x,RValue<Int4> y)5610 	RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5611 	{
5612 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5613 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5614 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5615 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5616 	}
5617 
CmpLT(RValue<Int4> x,RValue<Int4> y)5618 	RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5619 	{
5620 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5621 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5622 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
5623 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5624 	}
5625 
CmpLE(RValue<Int4> x,RValue<Int4> y)5626 	RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5627 	{
5628 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5629 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5630 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
5631 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5632 	}
5633 
CmpNEQ(RValue<Int4> x,RValue<Int4> y)5634 	RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5635 	{
5636 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5637 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5638 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5639 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5640 	}
5641 
CmpNLT(RValue<Int4> x,RValue<Int4> y)5642 	RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5643 	{
5644 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5645 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5646 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
5647 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5648 	}
5649 
CmpNLE(RValue<Int4> x,RValue<Int4> y)5650 	RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5651 	{
5652 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5653 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5654 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
5655 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5656 	}
5657 
Max(RValue<Int4> x,RValue<Int4> y)5658 	RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5659 	{
5660 #if defined(__i386__) || defined(__x86_64__)
5661 		if(CPUID::supportsSSE4_1())
5662 		{
5663 			return x86::pmaxsd(x, y);
5664 		}
5665 		else
5666 #endif
5667 		{
5668 			RValue<Int4> greater = CmpNLE(x, y);
5669 			return (x & greater) | (y & ~greater);
5670 		}
5671 	}
5672 
Min(RValue<Int4> x,RValue<Int4> y)5673 	RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5674 	{
5675 #if defined(__i386__) || defined(__x86_64__)
5676 		if(CPUID::supportsSSE4_1())
5677 		{
5678 			return x86::pminsd(x, y);
5679 		}
5680 		else
5681 #endif
5682 		{
5683 			RValue<Int4> less = CmpLT(x, y);
5684 			return (x & less) | (y & ~less);
5685 		}
5686 	}
5687 
RoundInt(RValue<Float4> cast)5688 	RValue<Int4> RoundInt(RValue<Float4> cast)
5689 	{
5690 #if defined(__i386__) || defined(__x86_64__)
5691 		return x86::cvtps2dq(cast);
5692 #else
5693 		return As<Int4>(V(lowerRoundInt(V(cast.value), T(Int4::getType()))));
5694 #endif
5695 	}
5696 
PackSigned(RValue<Int4> x,RValue<Int4> y)5697 	RValue<Short8> PackSigned(RValue<Int4> x, RValue<Int4> y)
5698 	{
5699 #if defined(__i386__) || defined(__x86_64__)
5700 		return x86::packssdw(x, y);
5701 #else
5702 		return As<Short8>(V(lowerPack(V(x.value), V(y.value), true)));
5703 #endif
5704 	}
5705 
PackUnsigned(RValue<Int4> x,RValue<Int4> y)5706 	RValue<UShort8> PackUnsigned(RValue<Int4> x, RValue<Int4> y)
5707 	{
5708 #if defined(__i386__) || defined(__x86_64__)
5709 		return x86::packusdw(x, y);
5710 #else
5711 		return As<UShort8>(V(lowerPack(V(x.value), V(y.value), false)));
5712 #endif
5713 	}
5714 
Extract(RValue<Int4> x,int i)5715 	RValue<Int> Extract(RValue<Int4> x, int i)
5716 	{
5717 		return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5718 	}
5719 
Insert(RValue<Int4> x,RValue<Int> element,int i)5720 	RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5721 	{
5722 		return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5723 	}
5724 
SignMask(RValue<Int4> x)5725 	RValue<Int> SignMask(RValue<Int4> x)
5726 	{
5727 #if defined(__i386__) || defined(__x86_64__)
5728 		return x86::movmskps(As<Float4>(x));
5729 #else
5730 		return As<Int>(V(lowerSignMask(V(x.value), T(Int::getType()))));
5731 #endif
5732 	}
5733 
Swizzle(RValue<Int4> x,unsigned char select)5734 	RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5735 	{
5736 		return RValue<Int4>(createSwizzle4(x.value, select));
5737 	}
5738 
getType()5739 	Type *Int4::getType()
5740 	{
5741 		return T(llvm::VectorType::get(T(Int::getType()), 4));
5742 	}
5743 
UInt4()5744 	UInt4::UInt4() : XYZW(this)
5745 	{
5746 	}
5747 
UInt4(RValue<Float4> cast)5748 	UInt4::UInt4(RValue<Float4> cast) : XYZW(this)
5749 	{
5750 		// Note: createFPToUI is broken, must perform conversion using createFPtoSI
5751 		// Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
5752 
5753 		// Smallest positive value representable in UInt, but not in Int
5754 		const unsigned int ustart = 0x80000000u;
5755 		const float ustartf = float(ustart);
5756 
5757 		// Check if the value can be represented as an Int
5758 		Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5759 		// If the value is too large, subtract ustart and re-add it after conversion.
5760 		uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5761 		// Otherwise, just convert normally
5762 		          (~uiValue & Int4(cast));
5763 		// If the value is negative, store 0, otherwise store the result of the conversion
5764 		storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5765 	}
5766 
UInt4(int xyzw)5767 	UInt4::UInt4(int xyzw) : XYZW(this)
5768 	{
5769 		constant(xyzw, xyzw, xyzw, xyzw);
5770 	}
5771 
UInt4(int x,int yzw)5772 	UInt4::UInt4(int x, int yzw) : XYZW(this)
5773 	{
5774 		constant(x, yzw, yzw, yzw);
5775 	}
5776 
UInt4(int x,int y,int zw)5777 	UInt4::UInt4(int x, int y, int zw) : XYZW(this)
5778 	{
5779 		constant(x, y, zw, zw);
5780 	}
5781 
UInt4(int x,int y,int z,int w)5782 	UInt4::UInt4(int x, int y, int z, int w) : XYZW(this)
5783 	{
5784 		constant(x, y, z, w);
5785 	}
5786 
constant(int x,int y,int z,int w)5787 	void UInt4::constant(int x, int y, int z, int w)
5788 	{
5789 		int64_t constantVector[4] = {x, y, z, w};
5790 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
5791 	}
5792 
UInt4(RValue<UInt4> rhs)5793 	UInt4::UInt4(RValue<UInt4> rhs) : XYZW(this)
5794 	{
5795 		storeValue(rhs.value);
5796 	}
5797 
UInt4(const UInt4 & rhs)5798 	UInt4::UInt4(const UInt4 &rhs) : XYZW(this)
5799 	{
5800 		Value *value = rhs.loadValue();
5801 		storeValue(value);
5802 	}
5803 
UInt4(const Reference<UInt4> & rhs)5804 	UInt4::UInt4(const Reference<UInt4> &rhs) : XYZW(this)
5805 	{
5806 		Value *value = rhs.loadValue();
5807 		storeValue(value);
5808 	}
5809 
UInt4(RValue<Int4> rhs)5810 	UInt4::UInt4(RValue<Int4> rhs) : XYZW(this)
5811 	{
5812 		storeValue(rhs.value);
5813 	}
5814 
UInt4(const Int4 & rhs)5815 	UInt4::UInt4(const Int4 &rhs) : XYZW(this)
5816 	{
5817 		Value *value = rhs.loadValue();
5818 		storeValue(value);
5819 	}
5820 
UInt4(const Reference<Int4> & rhs)5821 	UInt4::UInt4(const Reference<Int4> &rhs) : XYZW(this)
5822 	{
5823 		Value *value = rhs.loadValue();
5824 		storeValue(value);
5825 	}
5826 
UInt4(RValue<UInt2> lo,RValue<UInt2> hi)5827 	UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi) : XYZW(this)
5828 	{
5829 		int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
5830 		Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
5831 
5832 		storeValue(packed);
5833 	}
5834 
operator =(RValue<UInt4> rhs)5835 	RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5836 	{
5837 		storeValue(rhs.value);
5838 
5839 		return rhs;
5840 	}
5841 
operator =(const UInt4 & rhs)5842 	RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5843 	{
5844 		Value *value = rhs.loadValue();
5845 		storeValue(value);
5846 
5847 		return RValue<UInt4>(value);
5848 	}
5849 
operator =(const Reference<UInt4> & rhs)5850 	RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5851 	{
5852 		Value *value = rhs.loadValue();
5853 		storeValue(value);
5854 
5855 		return RValue<UInt4>(value);
5856 	}
5857 
operator +(RValue<UInt4> lhs,RValue<UInt4> rhs)5858 	RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5859 	{
5860 		return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5861 	}
5862 
operator -(RValue<UInt4> lhs,RValue<UInt4> rhs)5863 	RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5864 	{
5865 		return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5866 	}
5867 
operator *(RValue<UInt4> lhs,RValue<UInt4> rhs)5868 	RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5869 	{
5870 		return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5871 	}
5872 
operator /(RValue<UInt4> lhs,RValue<UInt4> rhs)5873 	RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5874 	{
5875 		return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5876 	}
5877 
operator %(RValue<UInt4> lhs,RValue<UInt4> rhs)5878 	RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5879 	{
5880 		return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5881 	}
5882 
operator &(RValue<UInt4> lhs,RValue<UInt4> rhs)5883 	RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5884 	{
5885 		return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5886 	}
5887 
operator |(RValue<UInt4> lhs,RValue<UInt4> rhs)5888 	RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5889 	{
5890 		return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5891 	}
5892 
operator ^(RValue<UInt4> lhs,RValue<UInt4> rhs)5893 	RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5894 	{
5895 		return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5896 	}
5897 
operator <<(RValue<UInt4> lhs,unsigned char rhs)5898 	RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5899 	{
5900 #if defined(__i386__) || defined(__x86_64__)
5901 		return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5902 #else
5903 		return As<UInt4>(V(lowerVectorShl(V(lhs.value), rhs)));
5904 #endif
5905 	}
5906 
operator >>(RValue<UInt4> lhs,unsigned char rhs)5907 	RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5908 	{
5909 #if defined(__i386__) || defined(__x86_64__)
5910 		return x86::psrld(lhs, rhs);
5911 #else
5912 		return As<UInt4>(V(lowerVectorLShr(V(lhs.value), rhs)));
5913 #endif
5914 	}
5915 
operator <<(RValue<UInt4> lhs,RValue<UInt4> rhs)5916 	RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5917 	{
5918 		return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5919 	}
5920 
operator >>(RValue<UInt4> lhs,RValue<UInt4> rhs)5921 	RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5922 	{
5923 		return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5924 	}
5925 
operator +=(UInt4 & lhs,RValue<UInt4> rhs)5926 	RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5927 	{
5928 		return lhs = lhs + rhs;
5929 	}
5930 
operator -=(UInt4 & lhs,RValue<UInt4> rhs)5931 	RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5932 	{
5933 		return lhs = lhs - rhs;
5934 	}
5935 
operator *=(UInt4 & lhs,RValue<UInt4> rhs)5936 	RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5937 	{
5938 		return lhs = lhs * rhs;
5939 	}
5940 
5941 //	RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5942 //	{
5943 //		return lhs = lhs / rhs;
5944 //	}
5945 
5946 //	RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5947 //	{
5948 //		return lhs = lhs % rhs;
5949 //	}
5950 
operator &=(UInt4 & lhs,RValue<UInt4> rhs)5951 	RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5952 	{
5953 		return lhs = lhs & rhs;
5954 	}
5955 
operator |=(UInt4 & lhs,RValue<UInt4> rhs)5956 	RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5957 	{
5958 		return lhs = lhs | rhs;
5959 	}
5960 
operator ^=(UInt4 & lhs,RValue<UInt4> rhs)5961 	RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5962 	{
5963 		return lhs = lhs ^ rhs;
5964 	}
5965 
operator <<=(UInt4 & lhs,unsigned char rhs)5966 	RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5967 	{
5968 		return lhs = lhs << rhs;
5969 	}
5970 
operator >>=(UInt4 & lhs,unsigned char rhs)5971 	RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5972 	{
5973 		return lhs = lhs >> rhs;
5974 	}
5975 
operator +(RValue<UInt4> val)5976 	RValue<UInt4> operator+(RValue<UInt4> val)
5977 	{
5978 		return val;
5979 	}
5980 
operator -(RValue<UInt4> val)5981 	RValue<UInt4> operator-(RValue<UInt4> val)
5982 	{
5983 		return RValue<UInt4>(Nucleus::createNeg(val.value));
5984 	}
5985 
operator ~(RValue<UInt4> val)5986 	RValue<UInt4> operator~(RValue<UInt4> val)
5987 	{
5988 		return RValue<UInt4>(Nucleus::createNot(val.value));
5989 	}
5990 
CmpEQ(RValue<UInt4> x,RValue<UInt4> y)5991 	RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5992 	{
5993 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5994 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5995 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5996 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5997 	}
5998 
CmpLT(RValue<UInt4> x,RValue<UInt4> y)5999 	RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
6000 	{
6001 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
6002 	}
6003 
CmpLE(RValue<UInt4> x,RValue<UInt4> y)6004 	RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
6005 	{
6006 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
6007 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
6008 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
6009 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
6010 	}
6011 
CmpNEQ(RValue<UInt4> x,RValue<UInt4> y)6012 	RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
6013 	{
6014 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
6015 	}
6016 
CmpNLT(RValue<UInt4> x,RValue<UInt4> y)6017 	RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
6018 	{
6019 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
6020 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
6021 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
6022 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
6023 	}
6024 
CmpNLE(RValue<UInt4> x,RValue<UInt4> y)6025 	RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
6026 	{
6027 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
6028 	}
6029 
Max(RValue<UInt4> x,RValue<UInt4> y)6030 	RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
6031 	{
6032 #if defined(__i386__) || defined(__x86_64__)
6033 		if(CPUID::supportsSSE4_1())
6034 		{
6035 			return x86::pmaxud(x, y);
6036 		}
6037 		else
6038 #endif
6039 		{
6040 			RValue<UInt4> greater = CmpNLE(x, y);
6041 			return (x & greater) | (y & ~greater);
6042 		}
6043 	}
6044 
Min(RValue<UInt4> x,RValue<UInt4> y)6045 	RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
6046 	{
6047 #if defined(__i386__) || defined(__x86_64__)
6048 		if(CPUID::supportsSSE4_1())
6049 		{
6050 			return x86::pminud(x, y);
6051 		}
6052 		else
6053 #endif
6054 		{
6055 			RValue<UInt4> less = CmpLT(x, y);
6056 			return (x & less) | (y & ~less);
6057 		}
6058 	}
6059 
getType()6060 	Type *UInt4::getType()
6061 	{
6062 		return T(llvm::VectorType::get(T(UInt::getType()), 4));
6063 	}
6064 
Half(RValue<Float> cast)6065 	Half::Half(RValue<Float> cast)
6066 	{
6067 		UInt fp32i = As<UInt>(cast);
6068 		UInt abs = fp32i & 0x7FFFFFFF;
6069 		UShort fp16i((fp32i & 0x80000000) >> 16); // sign
6070 
6071 		If(abs > 0x47FFEFFF) // Infinity
6072 		{
6073 			fp16i |= UShort(0x7FFF);
6074 		}
6075 		Else
6076 		{
6077 			If(abs < 0x38800000) // Denormal
6078 			{
6079 				Int mantissa = (abs & 0x007FFFFF) | 0x00800000;
6080 				Int e = 113 - (abs >> 23);
6081 				abs = IfThenElse(e < 24, mantissa >> e, Int(0));
6082 				fp16i |= UShort((abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
6083 			}
6084 			Else
6085 			{
6086 				fp16i |= UShort((abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
6087 			}
6088 		}
6089 
6090 		storeValue(fp16i.loadValue());
6091 	}
6092 
getType()6093 	Type *Half::getType()
6094 	{
6095 		return T(llvm::Type::getInt16Ty(*::context));
6096 	}
6097 
Float(RValue<Int> cast)6098 	Float::Float(RValue<Int> cast)
6099 	{
6100 		Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
6101 
6102 		storeValue(integer);
6103 	}
6104 
Float(RValue<UInt> cast)6105 	Float::Float(RValue<UInt> cast)
6106 	{
6107 		RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
6108 		                       As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
6109 
6110 		storeValue(result.value);
6111 	}
6112 
Float(RValue<Half> cast)6113 	Float::Float(RValue<Half> cast)
6114 	{
6115 		Int fp16i(As<UShort>(cast));
6116 
6117 		Int s = (fp16i >> 15) & 0x00000001;
6118 		Int e = (fp16i >> 10) & 0x0000001F;
6119 		Int m = fp16i & 0x000003FF;
6120 
6121 		UInt fp32i(s << 31);
6122 		If(e == 0)
6123 		{
6124 			If(m != 0)
6125 			{
6126 				While((m & 0x00000400) == 0)
6127 				{
6128 					m <<= 1;
6129 					e -= 1;
6130 				}
6131 
6132 				fp32i |= As<UInt>(((e + (127 - 15) + 1) << 23) | ((m & ~0x00000400) << 13));
6133 			}
6134 		}
6135 		Else
6136 		{
6137 			fp32i |= As<UInt>(((e + (127 - 15)) << 23) | (m << 13));
6138 		}
6139 
6140 		storeValue(As<Float>(fp32i).value);
6141 	}
6142 
Float(float x)6143 	Float::Float(float x)
6144 	{
6145 		storeValue(Nucleus::createConstantFloat(x));
6146 	}
6147 
Float(RValue<Float> rhs)6148 	Float::Float(RValue<Float> rhs)
6149 	{
6150 		storeValue(rhs.value);
6151 	}
6152 
Float(const Float & rhs)6153 	Float::Float(const Float &rhs)
6154 	{
6155 		Value *value = rhs.loadValue();
6156 		storeValue(value);
6157 	}
6158 
Float(const Reference<Float> & rhs)6159 	Float::Float(const Reference<Float> &rhs)
6160 	{
6161 		Value *value = rhs.loadValue();
6162 		storeValue(value);
6163 	}
6164 
operator =(RValue<Float> rhs)6165 	RValue<Float> Float::operator=(RValue<Float> rhs)
6166 	{
6167 		storeValue(rhs.value);
6168 
6169 		return rhs;
6170 	}
6171 
operator =(const Float & rhs)6172 	RValue<Float> Float::operator=(const Float &rhs)
6173 	{
6174 		Value *value = rhs.loadValue();
6175 		storeValue(value);
6176 
6177 		return RValue<Float>(value);
6178 	}
6179 
operator =(const Reference<Float> & rhs)6180 	RValue<Float> Float::operator=(const Reference<Float> &rhs)
6181 	{
6182 		Value *value = rhs.loadValue();
6183 		storeValue(value);
6184 
6185 		return RValue<Float>(value);
6186 	}
6187 
operator +(RValue<Float> lhs,RValue<Float> rhs)6188 	RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
6189 	{
6190 		return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
6191 	}
6192 
operator -(RValue<Float> lhs,RValue<Float> rhs)6193 	RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
6194 	{
6195 		return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
6196 	}
6197 
operator *(RValue<Float> lhs,RValue<Float> rhs)6198 	RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
6199 	{
6200 		return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
6201 	}
6202 
operator /(RValue<Float> lhs,RValue<Float> rhs)6203 	RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
6204 	{
6205 		return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
6206 	}
6207 
operator +=(Float & lhs,RValue<Float> rhs)6208 	RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
6209 	{
6210 		return lhs = lhs + rhs;
6211 	}
6212 
operator -=(Float & lhs,RValue<Float> rhs)6213 	RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
6214 	{
6215 		return lhs = lhs - rhs;
6216 	}
6217 
operator *=(Float & lhs,RValue<Float> rhs)6218 	RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
6219 	{
6220 		return lhs = lhs * rhs;
6221 	}
6222 
operator /=(Float & lhs,RValue<Float> rhs)6223 	RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
6224 	{
6225 		return lhs = lhs / rhs;
6226 	}
6227 
operator +(RValue<Float> val)6228 	RValue<Float> operator+(RValue<Float> val)
6229 	{
6230 		return val;
6231 	}
6232 
operator -(RValue<Float> val)6233 	RValue<Float> operator-(RValue<Float> val)
6234 	{
6235 		return RValue<Float>(Nucleus::createFNeg(val.value));
6236 	}
6237 
operator <(RValue<Float> lhs,RValue<Float> rhs)6238 	RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
6239 	{
6240 		return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
6241 	}
6242 
operator <=(RValue<Float> lhs,RValue<Float> rhs)6243 	RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
6244 	{
6245 		return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
6246 	}
6247 
operator >(RValue<Float> lhs,RValue<Float> rhs)6248 	RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
6249 	{
6250 		return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
6251 	}
6252 
operator >=(RValue<Float> lhs,RValue<Float> rhs)6253 	RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
6254 	{
6255 		return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
6256 	}
6257 
operator !=(RValue<Float> lhs,RValue<Float> rhs)6258 	RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
6259 	{
6260 		return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
6261 	}
6262 
operator ==(RValue<Float> lhs,RValue<Float> rhs)6263 	RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
6264 	{
6265 		return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
6266 	}
6267 
Abs(RValue<Float> x)6268 	RValue<Float> Abs(RValue<Float> x)
6269 	{
6270 		return IfThenElse(x > 0.0f, x, -x);
6271 	}
6272 
Max(RValue<Float> x,RValue<Float> y)6273 	RValue<Float> Max(RValue<Float> x, RValue<Float> y)
6274 	{
6275 		return IfThenElse(x > y, x, y);
6276 	}
6277 
Min(RValue<Float> x,RValue<Float> y)6278 	RValue<Float> Min(RValue<Float> x, RValue<Float> y)
6279 	{
6280 		return IfThenElse(x < y, x, y);
6281 	}
6282 
Rcp_pp(RValue<Float> x,bool exactAtPow2)6283 	RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
6284 	{
6285 #if defined(__i386__) || defined(__x86_64__)
6286 		if(exactAtPow2)
6287 		{
6288 			// rcpss uses a piecewise-linear approximation which minimizes the relative error
6289 			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6290 			return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6291 		}
6292 		return x86::rcpss(x);
6293 #else
6294 		return As<Float>(V(lowerRCP(V(x.value))));
6295 #endif
6296 	}
6297 
RcpSqrt_pp(RValue<Float> x)6298 	RValue<Float> RcpSqrt_pp(RValue<Float> x)
6299 	{
6300 #if defined(__i386__) || defined(__x86_64__)
6301 		return x86::rsqrtss(x);
6302 #else
6303 		return As<Float>(V(lowerRSQRT(V(x.value))));
6304 #endif
6305 	}
6306 
Sqrt(RValue<Float> x)6307 	RValue<Float> Sqrt(RValue<Float> x)
6308 	{
6309 #if defined(__i386__) || defined(__x86_64__)
6310 		return x86::sqrtss(x);
6311 #else
6312 		return As<Float>(V(lowerSQRT(V(x.value))));
6313 #endif
6314 	}
6315 
Round(RValue<Float> x)6316 	RValue<Float> Round(RValue<Float> x)
6317 	{
6318 #if defined(__i386__) || defined(__x86_64__)
6319 		if(CPUID::supportsSSE4_1())
6320 		{
6321 			return x86::roundss(x, 0);
6322 		}
6323 		else
6324 		{
6325 			return Float4(Round(Float4(x))).x;
6326 		}
6327 #else
6328 		return RValue<Float>(V(lowerRound(V(x.value))));
6329 #endif
6330 	}
6331 
Trunc(RValue<Float> x)6332 	RValue<Float> Trunc(RValue<Float> x)
6333 	{
6334 #if defined(__i386__) || defined(__x86_64__)
6335 		if(CPUID::supportsSSE4_1())
6336 		{
6337 			return x86::roundss(x, 3);
6338 		}
6339 		else
6340 		{
6341 			return Float(Int(x));   // Rounded toward zero
6342 		}
6343 #else
6344 		return RValue<Float>(V(lowerTrunc(V(x.value))));
6345 #endif
6346 	}
6347 
Frac(RValue<Float> x)6348 	RValue<Float> Frac(RValue<Float> x)
6349 	{
6350 #if defined(__i386__) || defined(__x86_64__)
6351 		if(CPUID::supportsSSE4_1())
6352 		{
6353 			return x - x86::floorss(x);
6354 		}
6355 		else
6356 		{
6357 			return Float4(Frac(Float4(x))).x;
6358 		}
6359 #else
6360 		// x - floor(x) can be 1.0 for very small negative x.
6361 		// Clamp against the value just below 1.0.
6362 		return Min(x - Floor(x), As<Float>(Int(0x3F7FFFFF)));
6363 #endif
6364 	}
6365 
Floor(RValue<Float> x)6366 	RValue<Float> Floor(RValue<Float> x)
6367 	{
6368 #if defined(__i386__) || defined(__x86_64__)
6369 		if(CPUID::supportsSSE4_1())
6370 		{
6371 			return x86::floorss(x);
6372 		}
6373 		else
6374 		{
6375 			return Float4(Floor(Float4(x))).x;
6376 		}
6377 #else
6378 		return RValue<Float>(V(lowerFloor(V(x.value))));
6379 #endif
6380 	}
6381 
Ceil(RValue<Float> x)6382 	RValue<Float> Ceil(RValue<Float> x)
6383 	{
6384 #if defined(__i386__) || defined(__x86_64__)
6385 		if(CPUID::supportsSSE4_1())
6386 		{
6387 			return x86::ceilss(x);
6388 		}
6389 		else
6390 #endif
6391 		{
6392 			return Float4(Ceil(Float4(x))).x;
6393 		}
6394 	}
6395 
getType()6396 	Type *Float::getType()
6397 	{
6398 		return T(llvm::Type::getFloatTy(*::context));
6399 	}
6400 
Float2(RValue<Float4> cast)6401 	Float2::Float2(RValue<Float4> cast)
6402 	{
6403 		storeValue(Nucleus::createBitCast(cast.value, getType()));
6404 	}
6405 
getType()6406 	Type *Float2::getType()
6407 	{
6408 		return T(Type_v2f32);
6409 	}
6410 
Float4(RValue<Byte4> cast)6411 	Float4::Float4(RValue<Byte4> cast) : XYZW(this)
6412 	{
6413 		Value *a = Int4(cast).loadValue();
6414 		Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6415 
6416 		storeValue(xyzw);
6417 	}
6418 
Float4(RValue<SByte4> cast)6419 	Float4::Float4(RValue<SByte4> cast) : XYZW(this)
6420 	{
6421 		Value *a = Int4(cast).loadValue();
6422 		Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6423 
6424 		storeValue(xyzw);
6425 	}
6426 
Float4(RValue<Short4> cast)6427 	Float4::Float4(RValue<Short4> cast) : XYZW(this)
6428 	{
6429 		Int4 c(cast);
6430 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6431 	}
6432 
Float4(RValue<UShort4> cast)6433 	Float4::Float4(RValue<UShort4> cast) : XYZW(this)
6434 	{
6435 		Int4 c(cast);
6436 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6437 	}
6438 
Float4(RValue<Int4> cast)6439 	Float4::Float4(RValue<Int4> cast) : XYZW(this)
6440 	{
6441 		Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6442 
6443 		storeValue(xyzw);
6444 	}
6445 
Float4(RValue<UInt4> cast)6446 	Float4::Float4(RValue<UInt4> cast) : XYZW(this)
6447 	{
6448 		RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
6449 		                        As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
6450 
6451 		storeValue(result.value);
6452 	}
6453 
Float4()6454 	Float4::Float4() : XYZW(this)
6455 	{
6456 	}
6457 
Float4(float xyzw)6458 	Float4::Float4(float xyzw) : XYZW(this)
6459 	{
6460 		constant(xyzw, xyzw, xyzw, xyzw);
6461 	}
6462 
Float4(float x,float yzw)6463 	Float4::Float4(float x, float yzw) : XYZW(this)
6464 	{
6465 		constant(x, yzw, yzw, yzw);
6466 	}
6467 
Float4(float x,float y,float zw)6468 	Float4::Float4(float x, float y, float zw) : XYZW(this)
6469 	{
6470 		constant(x, y, zw, zw);
6471 	}
6472 
Float4(float x,float y,float z,float w)6473 	Float4::Float4(float x, float y, float z, float w) : XYZW(this)
6474 	{
6475 		constant(x, y, z, w);
6476 	}
6477 
constant(float x,float y,float z,float w)6478 	void Float4::constant(float x, float y, float z, float w)
6479 	{
6480 		double constantVector[4] = {x, y, z, w};
6481 		storeValue(Nucleus::createConstantVector(constantVector, getType()));
6482 	}
6483 
Float4(RValue<Float4> rhs)6484 	Float4::Float4(RValue<Float4> rhs) : XYZW(this)
6485 	{
6486 		storeValue(rhs.value);
6487 	}
6488 
Float4(const Float4 & rhs)6489 	Float4::Float4(const Float4 &rhs) : XYZW(this)
6490 	{
6491 		Value *value = rhs.loadValue();
6492 		storeValue(value);
6493 	}
6494 
Float4(const Reference<Float4> & rhs)6495 	Float4::Float4(const Reference<Float4> &rhs) : XYZW(this)
6496 	{
6497 		Value *value = rhs.loadValue();
6498 		storeValue(value);
6499 	}
6500 
Float4(RValue<Float> rhs)6501 	Float4::Float4(RValue<Float> rhs) : XYZW(this)
6502 	{
6503 		Value *vector = loadValue();
6504 		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
6505 
6506 		int swizzle[4] = {0, 0, 0, 0};
6507 		Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
6508 
6509 		storeValue(replicate);
6510 	}
6511 
Float4(const Float & rhs)6512 	Float4::Float4(const Float &rhs) : XYZW(this)
6513 	{
6514 		*this = RValue<Float>(rhs.loadValue());
6515 	}
6516 
Float4(const Reference<Float> & rhs)6517 	Float4::Float4(const Reference<Float> &rhs) : XYZW(this)
6518 	{
6519 		*this = RValue<Float>(rhs.loadValue());
6520 	}
6521 
operator =(float x)6522 	RValue<Float4> Float4::operator=(float x)
6523 	{
6524 		return *this = Float4(x, x, x, x);
6525 	}
6526 
operator =(RValue<Float4> rhs)6527 	RValue<Float4> Float4::operator=(RValue<Float4> rhs)
6528 	{
6529 		storeValue(rhs.value);
6530 
6531 		return rhs;
6532 	}
6533 
operator =(const Float4 & rhs)6534 	RValue<Float4> Float4::operator=(const Float4 &rhs)
6535 	{
6536 		Value *value = rhs.loadValue();
6537 		storeValue(value);
6538 
6539 		return RValue<Float4>(value);
6540 	}
6541 
operator =(const Reference<Float4> & rhs)6542 	RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
6543 	{
6544 		Value *value = rhs.loadValue();
6545 		storeValue(value);
6546 
6547 		return RValue<Float4>(value);
6548 	}
6549 
operator =(RValue<Float> rhs)6550 	RValue<Float4> Float4::operator=(RValue<Float> rhs)
6551 	{
6552 		return *this = Float4(rhs);
6553 	}
6554 
operator =(const Float & rhs)6555 	RValue<Float4> Float4::operator=(const Float &rhs)
6556 	{
6557 		return *this = Float4(rhs);
6558 	}
6559 
operator =(const Reference<Float> & rhs)6560 	RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
6561 	{
6562 		return *this = Float4(rhs);
6563 	}
6564 
operator +(RValue<Float4> lhs,RValue<Float4> rhs)6565 	RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6566 	{
6567 		return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6568 	}
6569 
operator -(RValue<Float4> lhs,RValue<Float4> rhs)6570 	RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6571 	{
6572 		return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6573 	}
6574 
operator *(RValue<Float4> lhs,RValue<Float4> rhs)6575 	RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6576 	{
6577 		return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6578 	}
6579 
operator /(RValue<Float4> lhs,RValue<Float4> rhs)6580 	RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6581 	{
6582 		return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6583 	}
6584 
operator %(RValue<Float4> lhs,RValue<Float4> rhs)6585 	RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6586 	{
6587 		return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6588 	}
6589 
operator +=(Float4 & lhs,RValue<Float4> rhs)6590 	RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
6591 	{
6592 		return lhs = lhs + rhs;
6593 	}
6594 
operator -=(Float4 & lhs,RValue<Float4> rhs)6595 	RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6596 	{
6597 		return lhs = lhs - rhs;
6598 	}
6599 
operator *=(Float4 & lhs,RValue<Float4> rhs)6600 	RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6601 	{
6602 		return lhs = lhs * rhs;
6603 	}
6604 
operator /=(Float4 & lhs,RValue<Float4> rhs)6605 	RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6606 	{
6607 		return lhs = lhs / rhs;
6608 	}
6609 
operator %=(Float4 & lhs,RValue<Float4> rhs)6610 	RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6611 	{
6612 		return lhs = lhs % rhs;
6613 	}
6614 
operator +(RValue<Float4> val)6615 	RValue<Float4> operator+(RValue<Float4> val)
6616 	{
6617 		return val;
6618 	}
6619 
operator -(RValue<Float4> val)6620 	RValue<Float4> operator-(RValue<Float4> val)
6621 	{
6622 		return RValue<Float4>(Nucleus::createFNeg(val.value));
6623 	}
6624 
Abs(RValue<Float4> x)6625 	RValue<Float4> Abs(RValue<Float4> x)
6626 	{
6627 		Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6628 		int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6629 		Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, Int4::getType()));
6630 
6631 		return As<Float4>(result);
6632 	}
6633 
Max(RValue<Float4> x,RValue<Float4> y)6634 	RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6635 	{
6636 #if defined(__i386__) || defined(__x86_64__)
6637 		return x86::maxps(x, y);
6638 #else
6639 		return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OGT)));
6640 #endif
6641 	}
6642 
Min(RValue<Float4> x,RValue<Float4> y)6643 	RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6644 	{
6645 #if defined(__i386__) || defined(__x86_64__)
6646 		return x86::minps(x, y);
6647 #else
6648 		return As<Float4>(V(lowerPFMINMAX(V(x.value), V(y.value), llvm::FCmpInst::FCMP_OLT)));
6649 #endif
6650 	}
6651 
Rcp_pp(RValue<Float4> x,bool exactAtPow2)6652 	RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6653 	{
6654 #if defined(__i386__) || defined(__x86_64__)
6655 		if(exactAtPow2)
6656 		{
6657 			// rcpps uses a piecewise-linear approximation which minimizes the relative error
6658 			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6659 			return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6660 		}
6661 		return x86::rcpps(x);
6662 #else
6663 		return As<Float4>(V(lowerRCP(V(x.value))));
6664 #endif
6665 	}
6666 
RcpSqrt_pp(RValue<Float4> x)6667 	RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6668 	{
6669 #if defined(__i386__) || defined(__x86_64__)
6670 		return x86::rsqrtps(x);
6671 #else
6672 		return As<Float4>(V(lowerRSQRT(V(x.value))));
6673 #endif
6674 	}
6675 
Sqrt(RValue<Float4> x)6676 	RValue<Float4> Sqrt(RValue<Float4> x)
6677 	{
6678 #if defined(__i386__) || defined(__x86_64__)
6679 		return x86::sqrtps(x);
6680 #else
6681 		return As<Float4>(V(lowerSQRT(V(x.value))));
6682 #endif
6683 	}
6684 
Insert(RValue<Float4> x,RValue<Float> element,int i)6685 	RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
6686 	{
6687 		return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
6688 	}
6689 
Extract(RValue<Float4> x,int i)6690 	RValue<Float> Extract(RValue<Float4> x, int i)
6691 	{
6692 		return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6693 	}
6694 
Swizzle(RValue<Float4> x,unsigned char select)6695 	RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6696 	{
6697 		return RValue<Float4>(createSwizzle4(x.value, select));
6698 	}
6699 
ShuffleLowHigh(RValue<Float4> x,RValue<Float4> y,unsigned char imm)6700 	RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6701 	{
6702 		int shuffle[4] =
6703 		{
6704 			((imm >> 0) & 0x03) + 0,
6705 			((imm >> 2) & 0x03) + 0,
6706 			((imm >> 4) & 0x03) + 4,
6707 			((imm >> 6) & 0x03) + 4,
6708 		};
6709 
6710 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6711 	}
6712 
UnpackLow(RValue<Float4> x,RValue<Float4> y)6713 	RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6714 	{
6715 		int shuffle[4] = {0, 4, 1, 5};
6716 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6717 	}
6718 
UnpackHigh(RValue<Float4> x,RValue<Float4> y)6719 	RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6720 	{
6721 		int shuffle[4] = {2, 6, 3, 7};
6722 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6723 	}
6724 
Mask(Float4 & lhs,RValue<Float4> rhs,unsigned char select)6725 	RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6726 	{
6727 		Value *vector = lhs.loadValue();
6728 		Value *result = createMask4(vector, rhs.value, select);
6729 		lhs.storeValue(result);
6730 
6731 		return RValue<Float4>(result);
6732 	}
6733 
SignMask(RValue<Float4> x)6734 	RValue<Int> SignMask(RValue<Float4> x)
6735 	{
6736 #if defined(__i386__) || defined(__x86_64__)
6737 		return x86::movmskps(x);
6738 #else
6739 		return As<Int>(V(lowerFPSignMask(V(x.value), T(Int::getType()))));
6740 #endif
6741 	}
6742 
CmpEQ(RValue<Float4> x,RValue<Float4> y)6743 	RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6744 	{
6745 	//	return As<Int4>(x86::cmpeqps(x, y));
6746 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
6747 	}
6748 
CmpLT(RValue<Float4> x,RValue<Float4> y)6749 	RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6750 	{
6751 	//	return As<Int4>(x86::cmpltps(x, y));
6752 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
6753 	}
6754 
CmpLE(RValue<Float4> x,RValue<Float4> y)6755 	RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6756 	{
6757 	//	return As<Int4>(x86::cmpleps(x, y));
6758 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
6759 	}
6760 
CmpNEQ(RValue<Float4> x,RValue<Float4> y)6761 	RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6762 	{
6763 	//	return As<Int4>(x86::cmpneqps(x, y));
6764 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
6765 	}
6766 
CmpNLT(RValue<Float4> x,RValue<Float4> y)6767 	RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6768 	{
6769 	//	return As<Int4>(x86::cmpnltps(x, y));
6770 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
6771 	}
6772 
CmpNLE(RValue<Float4> x,RValue<Float4> y)6773 	RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6774 	{
6775 	//	return As<Int4>(x86::cmpnleps(x, y));
6776 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
6777 	}
6778 
IsInf(RValue<Float4> x)6779 	RValue<Int4> IsInf(RValue<Float4> x)
6780 	{
6781 		return CmpEQ(As<Int4>(x) & Int4(0x7FFFFFFF), Int4(0x7F800000));
6782 	}
6783 
IsNan(RValue<Float4> x)6784 	RValue<Int4> IsNan(RValue<Float4> x)
6785 	{
6786 		return ~CmpEQ(x, x);
6787 	}
6788 
Round(RValue<Float4> x)6789 	RValue<Float4> Round(RValue<Float4> x)
6790 	{
6791 #if defined(__i386__) || defined(__x86_64__)
6792 		if(CPUID::supportsSSE4_1())
6793 		{
6794 			return x86::roundps(x, 0);
6795 		}
6796 		else
6797 		{
6798 			return Float4(RoundInt(x));
6799 		}
6800 #else
6801 		return RValue<Float4>(V(lowerRound(V(x.value))));
6802 #endif
6803 	}
6804 
Trunc(RValue<Float4> x)6805 	RValue<Float4> Trunc(RValue<Float4> x)
6806 	{
6807 #if defined(__i386__) || defined(__x86_64__)
6808 		if(CPUID::supportsSSE4_1())
6809 		{
6810 			return x86::roundps(x, 3);
6811 		}
6812 		else
6813 		{
6814 			return Float4(Int4(x));
6815 		}
6816 #else
6817 		return RValue<Float4>(V(lowerTrunc(V(x.value))));
6818 #endif
6819 	}
6820 
Frac(RValue<Float4> x)6821 	RValue<Float4> Frac(RValue<Float4> x)
6822 	{
6823 		Float4 frc;
6824 
6825 #if defined(__i386__) || defined(__x86_64__)
6826 		if(CPUID::supportsSSE4_1())
6827 		{
6828 			frc = x - Floor(x);
6829 		}
6830 		else
6831 		{
6832 			frc = x - Float4(Int4(x));   // Signed fractional part.
6833 
6834 			frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f)));   // Add 1.0 if negative.
6835 		}
6836 #else
6837 		frc = x - Floor(x);
6838 #endif
6839 
6840 		// x - floor(x) can be 1.0 for very small negative x.
6841 		// Clamp against the value just below 1.0.
6842 		return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
6843 	}
6844 
Floor(RValue<Float4> x)6845 	RValue<Float4> Floor(RValue<Float4> x)
6846 	{
6847 #if defined(__i386__) || defined(__x86_64__)
6848 		if(CPUID::supportsSSE4_1())
6849 		{
6850 			return x86::floorps(x);
6851 		}
6852 		else
6853 		{
6854 			return x - Frac(x);
6855 		}
6856 #else
6857 		return RValue<Float4>(V(lowerFloor(V(x.value))));
6858 #endif
6859 	}
6860 
Ceil(RValue<Float4> x)6861 	RValue<Float4> Ceil(RValue<Float4> x)
6862 	{
6863 #if defined(__i386__) || defined(__x86_64__)
6864 		if(CPUID::supportsSSE4_1())
6865 		{
6866 			return x86::ceilps(x);
6867 		}
6868 		else
6869 #endif
6870 		{
6871 			return -Floor(-x);
6872 		}
6873 	}
6874 
getType()6875 	Type *Float4::getType()
6876 	{
6877 		return T(llvm::VectorType::get(T(Float::getType()), 4));
6878 	}
6879 
operator +(RValue<Pointer<Byte>> lhs,int offset)6880 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6881 	{
6882 		return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
6883 	}
6884 
operator +(RValue<Pointer<Byte>> lhs,RValue<Int> offset)6885 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6886 	{
6887 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
6888 	}
6889 
operator +(RValue<Pointer<Byte>> lhs,RValue<UInt> offset)6890 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6891 	{
6892 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
6893 	}
6894 
operator +=(Pointer<Byte> & lhs,int offset)6895 	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6896 	{
6897 		return lhs = lhs + offset;
6898 	}
6899 
operator +=(Pointer<Byte> & lhs,RValue<Int> offset)6900 	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6901 	{
6902 		return lhs = lhs + offset;
6903 	}
6904 
operator +=(Pointer<Byte> & lhs,RValue<UInt> offset)6905 	RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6906 	{
6907 		return lhs = lhs + offset;
6908 	}
6909 
operator -(RValue<Pointer<Byte>> lhs,int offset)6910 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6911 	{
6912 		return lhs + -offset;
6913 	}
6914 
operator -(RValue<Pointer<Byte>> lhs,RValue<Int> offset)6915 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6916 	{
6917 		return lhs + -offset;
6918 	}
6919 
operator -(RValue<Pointer<Byte>> lhs,RValue<UInt> offset)6920 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6921 	{
6922 		return lhs + -offset;
6923 	}
6924 
operator -=(Pointer<Byte> & lhs,int offset)6925 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6926 	{
6927 		return lhs = lhs - offset;
6928 	}
6929 
operator -=(Pointer<Byte> & lhs,RValue<Int> offset)6930 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6931 	{
6932 		return lhs = lhs - offset;
6933 	}
6934 
operator -=(Pointer<Byte> & lhs,RValue<UInt> offset)6935 	RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6936 	{
6937 		return lhs = lhs - offset;
6938 	}
6939 
Return()6940 	void Return()
6941 	{
6942 		Nucleus::createRetVoid();
6943 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6944 		Nucleus::createUnreachable();
6945 	}
6946 
Return(RValue<Int> ret)6947 	void Return(RValue<Int> ret)
6948 	{
6949 		Nucleus::createRet(ret.value);
6950 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6951 		Nucleus::createUnreachable();
6952 	}
6953 
branch(RValue<Bool> cmp,BasicBlock * bodyBB,BasicBlock * endBB)6954 	void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6955 	{
6956 		Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6957 		Nucleus::setInsertBlock(bodyBB);
6958 	}
6959 
Ticks()6960 	RValue<Long> Ticks()
6961 	{
6962 		llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
6963 
6964 		return RValue<Long>(V(::builder->CreateCall(rdtsc)));
6965 	}
6966 }
6967 
6968 namespace rr
6969 {
6970 #if defined(__i386__) || defined(__x86_64__)
6971 	namespace x86
6972 	{
cvtss2si(RValue<Float> val)6973 		RValue<Int> cvtss2si(RValue<Float> val)
6974 		{
6975 			llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
6976 
6977 			Float4 vector;
6978 			vector.x = val;
6979 
6980 			return RValue<Int>(V(::builder->CreateCall(cvtss2si, ARGS(V(RValue<Float4>(vector).value)))));
6981 		}
6982 
cvtps2dq(RValue<Float4> val)6983 		RValue<Int4> cvtps2dq(RValue<Float4> val)
6984 		{
6985 			llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
6986 
6987 			return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, ARGS(V(val.value)))));
6988 		}
6989 
rcpss(RValue<Float> val)6990 		RValue<Float> rcpss(RValue<Float> val)
6991 		{
6992 			llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
6993 
6994 			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
6995 
6996 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, ARGS(V(vector)))), Float::getType(), 0));
6997 		}
6998 
sqrtss(RValue<Float> val)6999 		RValue<Float> sqrtss(RValue<Float> val)
7000 		{
7001 #if REACTOR_LLVM_VERSION < 7
7002 			llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss);
7003 			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
7004 
7005 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, ARGS(V(vector)))), Float::getType(), 0));
7006 #else
7007 			llvm::Function *sqrt = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
7008 			return RValue<Float>(V(::builder->CreateCall(sqrt, ARGS(V(val.value)))));
7009 #endif
7010 		}
7011 
rsqrtss(RValue<Float> val)7012 		RValue<Float> rsqrtss(RValue<Float> val)
7013 		{
7014 			llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
7015 
7016 			Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
7017 
7018 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, ARGS(V(vector)))), Float::getType(), 0));
7019 		}
7020 
rcpps(RValue<Float4> val)7021 		RValue<Float4> rcpps(RValue<Float4> val)
7022 		{
7023 			llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
7024 
7025 			return RValue<Float4>(V(::builder->CreateCall(rcpps, ARGS(V(val.value)))));
7026 		}
7027 
sqrtps(RValue<Float4> val)7028 		RValue<Float4> sqrtps(RValue<Float4> val)
7029 		{
7030 #if REACTOR_LLVM_VERSION < 7
7031 			llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps);
7032 #else
7033 			llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::sqrt, {V(val.value)->getType()});
7034 #endif
7035 
7036 			return RValue<Float4>(V(::builder->CreateCall(sqrtps, ARGS(V(val.value)))));
7037 		}
7038 
rsqrtps(RValue<Float4> val)7039 		RValue<Float4> rsqrtps(RValue<Float4> val)
7040 		{
7041 			llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
7042 
7043 			return RValue<Float4>(V(::builder->CreateCall(rsqrtps, ARGS(V(val.value)))));
7044 		}
7045 
maxps(RValue<Float4> x,RValue<Float4> y)7046 		RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
7047 		{
7048 			llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
7049 
7050 			return RValue<Float4>(V(::builder->CreateCall2(maxps, ARGS(V(x.value), V(y.value)))));
7051 		}
7052 
minps(RValue<Float4> x,RValue<Float4> y)7053 		RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
7054 		{
7055 			llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
7056 
7057 			return RValue<Float4>(V(::builder->CreateCall2(minps, ARGS(V(x.value), V(y.value)))));
7058 		}
7059 
roundss(RValue<Float> val,unsigned char imm)7060 		RValue<Float> roundss(RValue<Float> val, unsigned char imm)
7061 		{
7062 			llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
7063 
7064 			Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
7065 			Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
7066 
7067 			return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, ARGS(V(undef), V(vector), V(Nucleus::createConstantInt(imm))))), Float::getType(), 0));
7068 		}
7069 
floorss(RValue<Float> val)7070 		RValue<Float> floorss(RValue<Float> val)
7071 		{
7072 			return roundss(val, 1);
7073 		}
7074 
ceilss(RValue<Float> val)7075 		RValue<Float> ceilss(RValue<Float> val)
7076 		{
7077 			return roundss(val, 2);
7078 		}
7079 
roundps(RValue<Float4> val,unsigned char imm)7080 		RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
7081 		{
7082 			llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
7083 
7084 			return RValue<Float4>(V(::builder->CreateCall2(roundps, ARGS(V(val.value), V(Nucleus::createConstantInt(imm))))));
7085 		}
7086 
floorps(RValue<Float4> val)7087 		RValue<Float4> floorps(RValue<Float4> val)
7088 		{
7089 			return roundps(val, 1);
7090 		}
7091 
ceilps(RValue<Float4> val)7092 		RValue<Float4> ceilps(RValue<Float4> val)
7093 		{
7094 			return roundps(val, 2);
7095 		}
7096 
pabsd(RValue<Int4> x)7097 		RValue<Int4> pabsd(RValue<Int4> x)
7098 		{
7099 #if REACTOR_LLVM_VERSION < 7
7100 			llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
7101 
7102 			return RValue<Int4>(V(::builder->CreateCall(pabsd, ARGS(V(x.value)))));
7103 #else
7104 			return RValue<Int4>(V(lowerPABS(V(x.value))));
7105 #endif
7106 		}
7107 
paddsw(RValue<Short4> x,RValue<Short4> y)7108 		RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
7109 		{
7110 			llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
7111 
7112 			return As<Short4>(V(::builder->CreateCall2(paddsw, ARGS(V(x.value), V(y.value)))));
7113 		}
7114 
psubsw(RValue<Short4> x,RValue<Short4> y)7115 		RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
7116 		{
7117 			llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
7118 
7119 			return As<Short4>(V(::builder->CreateCall2(psubsw, ARGS(V(x.value), V(y.value)))));
7120 		}
7121 
paddusw(RValue<UShort4> x,RValue<UShort4> y)7122 		RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
7123 		{
7124 			llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
7125 
7126 			return As<UShort4>(V(::builder->CreateCall2(paddusw, ARGS(V(x.value), V(y.value)))));
7127 		}
7128 
psubusw(RValue<UShort4> x,RValue<UShort4> y)7129 		RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
7130 		{
7131 			llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
7132 
7133 			return As<UShort4>(V(::builder->CreateCall2(psubusw, ARGS(V(x.value), V(y.value)))));
7134 		}
7135 
paddsb(RValue<SByte8> x,RValue<SByte8> y)7136 		RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
7137 		{
7138 			llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
7139 
7140 			return As<SByte8>(V(::builder->CreateCall2(paddsb, ARGS(V(x.value), V(y.value)))));
7141 		}
7142 
psubsb(RValue<SByte8> x,RValue<SByte8> y)7143 		RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
7144 		{
7145 			llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
7146 
7147 			return As<SByte8>(V(::builder->CreateCall2(psubsb, ARGS(V(x.value), V(y.value)))));
7148 		}
7149 
paddusb(RValue<Byte8> x,RValue<Byte8> y)7150 		RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
7151 		{
7152 			llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
7153 
7154 			return As<Byte8>(V(::builder->CreateCall2(paddusb, ARGS(V(x.value), V(y.value)))));
7155 		}
7156 
psubusb(RValue<Byte8> x,RValue<Byte8> y)7157 		RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
7158 		{
7159 			llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
7160 
7161 			return As<Byte8>(V(::builder->CreateCall2(psubusb, ARGS(V(x.value), V(y.value)))));
7162 		}
7163 
pavgw(RValue<UShort4> x,RValue<UShort4> y)7164 		RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
7165 		{
7166 #if REACTOR_LLVM_VERSION < 7
7167 			llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
7168 
7169 			return As<UShort4>(V(::builder->CreateCall2(pavgw, ARGS(V(x.value), V(y.value)))));
7170 #else
7171 			return As<UShort4>(V(lowerPAVG(V(x.value), V(y.value))));
7172 #endif
7173 		}
7174 
pmaxsw(RValue<Short4> x,RValue<Short4> y)7175 		RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
7176 		{
7177 #if REACTOR_LLVM_VERSION < 7
7178 			llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
7179 
7180 			return As<Short4>(V(::builder->CreateCall2(pmaxsw, ARGS(V(x.value), V(y.value)))));
7181 #else
7182 			return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
7183 #endif
7184 		}
7185 
pminsw(RValue<Short4> x,RValue<Short4> y)7186 		RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
7187 		{
7188 #if REACTOR_LLVM_VERSION < 7
7189 			llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
7190 
7191 			return As<Short4>(V(::builder->CreateCall2(pminsw, ARGS(V(x.value), V(y.value)))));
7192 #else
7193 			return As<Short4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
7194 #endif
7195 		}
7196 
pcmpgtw(RValue<Short4> x,RValue<Short4> y)7197 		RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
7198 		{
7199 #if REACTOR_LLVM_VERSION < 7
7200 			llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
7201 
7202 			return As<Short4>(V(::builder->CreateCall2(pcmpgtw, ARGS(V(x.value), V(y.value)))));
7203 #else
7204 			return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Short4::getType()))));
7205 #endif
7206 		}
7207 
pcmpeqw(RValue<Short4> x,RValue<Short4> y)7208 		RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
7209 		{
7210 #if REACTOR_LLVM_VERSION < 7
7211 			llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
7212 
7213 			return As<Short4>(V(::builder->CreateCall2(pcmpeqw, ARGS(V(x.value), V(y.value)))));
7214 #else
7215 			return As<Short4>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Short4::getType()))));
7216 #endif
7217 		}
7218 
pcmpgtb(RValue<SByte8> x,RValue<SByte8> y)7219 		RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
7220 		{
7221 #if REACTOR_LLVM_VERSION < 7
7222 			llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
7223 
7224 			return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, ARGS(V(x.value), V(y.value)))));
7225 #else
7226 			return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_SGT, V(x.value), V(y.value), T(Byte8::getType()))));
7227 #endif
7228 		}
7229 
pcmpeqb(RValue<Byte8> x,RValue<Byte8> y)7230 		RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
7231 		{
7232 #if REACTOR_LLVM_VERSION < 7
7233 			llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
7234 
7235 			return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, ARGS(V(x.value), V(y.value)))));
7236 #else
7237 			return As<Byte8>(V(lowerPCMP(llvm::ICmpInst::ICMP_EQ, V(x.value), V(y.value), T(Byte8::getType()))));
7238 #endif
7239 		}
7240 
packssdw(RValue<Int2> x,RValue<Int2> y)7241 		RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
7242 		{
7243 			llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
7244 
7245 			return As<Short4>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
7246 		}
7247 
packssdw(RValue<Int4> x,RValue<Int4> y)7248 		RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
7249 		{
7250 			llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
7251 
7252 			return RValue<Short8>(V(::builder->CreateCall2(packssdw, ARGS(V(x.value), V(y.value)))));
7253 		}
7254 
packsswb(RValue<Short4> x,RValue<Short4> y)7255 		RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
7256 		{
7257 			llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
7258 
7259 			return As<SByte8>(V(::builder->CreateCall2(packsswb, ARGS(V(x.value), V(y.value)))));
7260 		}
7261 
packuswb(RValue<Short4> x,RValue<Short4> y)7262 		RValue<Byte8> packuswb(RValue<Short4> x, RValue<Short4> y)
7263 		{
7264 			llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
7265 
7266 			return As<Byte8>(V(::builder->CreateCall2(packuswb, ARGS(V(x.value), V(y.value)))));
7267 		}
7268 
packusdw(RValue<Int4> x,RValue<Int4> y)7269 		RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
7270 		{
7271 			if(CPUID::supportsSSE4_1())
7272 			{
7273 				llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
7274 
7275 				return RValue<UShort8>(V(::builder->CreateCall2(packusdw, ARGS(V(x.value), V(y.value)))));
7276 			}
7277 			else
7278 			{
7279 				RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
7280 				RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
7281 
7282 				return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
7283 			}
7284 		}
7285 
psrlw(RValue<UShort4> x,unsigned char y)7286 		RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
7287 		{
7288 			llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
7289 
7290 			return As<UShort4>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7291 		}
7292 
psrlw(RValue<UShort8> x,unsigned char y)7293 		RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
7294 		{
7295 			llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
7296 
7297 			return RValue<UShort8>(V(::builder->CreateCall2(psrlw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7298 		}
7299 
psraw(RValue<Short4> x,unsigned char y)7300 		RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
7301 		{
7302 			llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
7303 
7304 			return As<Short4>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7305 		}
7306 
psraw(RValue<Short8> x,unsigned char y)7307 		RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
7308 		{
7309 			llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
7310 
7311 			return RValue<Short8>(V(::builder->CreateCall2(psraw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7312 		}
7313 
psllw(RValue<Short4> x,unsigned char y)7314 		RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
7315 		{
7316 			llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
7317 
7318 			return As<Short4>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7319 		}
7320 
psllw(RValue<Short8> x,unsigned char y)7321 		RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
7322 		{
7323 			llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
7324 
7325 			return RValue<Short8>(V(::builder->CreateCall2(psllw, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7326 		}
7327 
pslld(RValue<Int2> x,unsigned char y)7328 		RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
7329 		{
7330 			llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
7331 
7332 			return As<Int2>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7333 		}
7334 
pslld(RValue<Int4> x,unsigned char y)7335 		RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
7336 		{
7337 			llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
7338 
7339 			return RValue<Int4>(V(::builder->CreateCall2(pslld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7340 		}
7341 
psrad(RValue<Int2> x,unsigned char y)7342 		RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
7343 		{
7344 			llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
7345 
7346 			return As<Int2>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7347 		}
7348 
psrad(RValue<Int4> x,unsigned char y)7349 		RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
7350 		{
7351 			llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
7352 
7353 			return RValue<Int4>(V(::builder->CreateCall2(psrad, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7354 		}
7355 
psrld(RValue<UInt2> x,unsigned char y)7356 		RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
7357 		{
7358 			llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
7359 
7360 			return As<UInt2>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7361 		}
7362 
psrld(RValue<UInt4> x,unsigned char y)7363 		RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
7364 		{
7365 			llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
7366 
7367 			return RValue<UInt4>(V(::builder->CreateCall2(psrld, ARGS(V(x.value), V(Nucleus::createConstantInt(y))))));
7368 		}
7369 
pmaxsd(RValue<Int4> x,RValue<Int4> y)7370 		RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
7371 		{
7372 #if REACTOR_LLVM_VERSION < 7
7373 			llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd);
7374 
7375 			return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, ARGS(V(x.value), V(y.value)))));
7376 #else
7377 			return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SGT)));
7378 #endif
7379 		}
7380 
pminsd(RValue<Int4> x,RValue<Int4> y)7381 		RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
7382 		{
7383 #if REACTOR_LLVM_VERSION < 7
7384 			llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd);
7385 
7386 			return RValue<Int4>(V(::builder->CreateCall2(pminsd, ARGS(V(x.value), V(y.value)))));
7387 #else
7388 			return RValue<Int4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_SLT)));
7389 #endif
7390 		}
7391 
pmaxud(RValue<UInt4> x,RValue<UInt4> y)7392 		RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
7393 		{
7394 #if REACTOR_LLVM_VERSION < 7
7395 			llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud);
7396 
7397 			return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, ARGS(V(x.value), V(y.value)))));
7398 #else
7399 			return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_UGT)));
7400 #endif
7401 		}
7402 
pminud(RValue<UInt4> x,RValue<UInt4> y)7403 		RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
7404 		{
7405 #if REACTOR_LLVM_VERSION < 7
7406 			llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud);
7407 
7408 			return RValue<UInt4>(V(::builder->CreateCall2(pminud, ARGS(V(x.value), V(y.value)))));
7409 #else
7410 			return RValue<UInt4>(V(lowerPMINMAX(V(x.value), V(y.value), llvm::ICmpInst::ICMP_ULT)));
7411 #endif
7412 		}
7413 
pmulhw(RValue<Short4> x,RValue<Short4> y)7414 		RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
7415 		{
7416 			llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
7417 
7418 			return As<Short4>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
7419 		}
7420 
pmulhuw(RValue<UShort4> x,RValue<UShort4> y)7421 		RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
7422 		{
7423 			llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
7424 
7425 			return As<UShort4>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
7426 		}
7427 
pmaddwd(RValue<Short4> x,RValue<Short4> y)7428 		RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
7429 		{
7430 			llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
7431 
7432 			return As<Int2>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
7433 		}
7434 
pmulhw(RValue<Short8> x,RValue<Short8> y)7435 		RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
7436 		{
7437 			llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
7438 
7439 			return RValue<Short8>(V(::builder->CreateCall2(pmulhw, ARGS(V(x.value), V(y.value)))));
7440 		}
7441 
pmulhuw(RValue<UShort8> x,RValue<UShort8> y)7442 		RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
7443 		{
7444 			llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
7445 
7446 			return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, ARGS(V(x.value), V(y.value)))));
7447 		}
7448 
pmaddwd(RValue<Short8> x,RValue<Short8> y)7449 		RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
7450 		{
7451 			llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
7452 
7453 			return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, ARGS(V(x.value), V(y.value)))));
7454 		}
7455 
movmskps(RValue<Float4> x)7456 		RValue<Int> movmskps(RValue<Float4> x)
7457 		{
7458 			llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
7459 
7460 			return RValue<Int>(V(::builder->CreateCall(movmskps, ARGS(V(x.value)))));
7461 		}
7462 
pmovmskb(RValue<Byte8> x)7463 		RValue<Int> pmovmskb(RValue<Byte8> x)
7464 		{
7465 			llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
7466 
7467 			return RValue<Int>(V(::builder->CreateCall(pmovmskb, ARGS(V(x.value))))) & 0xFF;
7468 		}
7469 
pmovzxbd(RValue<Byte16> x)7470 		RValue<Int4> pmovzxbd(RValue<Byte16> x)
7471 		{
7472 #if REACTOR_LLVM_VERSION < 7
7473 			llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
7474 
7475 			return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, ARGS(V(x.value)))));
7476 #else
7477 			return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
7478 #endif
7479 		}
7480 
pmovsxbd(RValue<SByte16> x)7481 		RValue<Int4> pmovsxbd(RValue<SByte16> x)
7482 		{
7483 #if REACTOR_LLVM_VERSION < 7
7484 			llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
7485 
7486 			return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, ARGS(V(x.value)))));
7487 #else
7488 			return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
7489 #endif
7490 		}
7491 
pmovzxwd(RValue<UShort8> x)7492 		RValue<Int4> pmovzxwd(RValue<UShort8> x)
7493 		{
7494 #if REACTOR_LLVM_VERSION < 7
7495 			llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
7496 
7497 			return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, ARGS(V(x.value)))));
7498 #else
7499 			return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), false)));
7500 #endif
7501 		}
7502 
pmovsxwd(RValue<Short8> x)7503 		RValue<Int4> pmovsxwd(RValue<Short8> x)
7504 		{
7505 #if REACTOR_LLVM_VERSION < 7
7506 			llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
7507 
7508 			return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, ARGS(V(x.value)))));
7509 #else
7510 			return RValue<Int4>(V(lowerPMOV(V(x.value), T(Int4::getType()), true)));
7511 #endif
7512 		}
7513 	}
7514 #endif  // defined(__i386__) || defined(__x86_64__)
7515 }
7516