1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Nucleus.hpp"
16 
17 #include "llvm/Support/IRBuilder.h"
18 #include "llvm/Function.h"
19 #include "llvm/GlobalVariable.h"
20 #include "llvm/Module.h"
21 #include "llvm/LLVMContext.h"
22 #include "llvm/Constants.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/PassManager.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/Transforms/Scalar.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetOptions.h"
29 #include "llvm/Support/TargetSelect.h"
30 #include "../lib/ExecutionEngine/JIT/JIT.h"
31 
32 #include "Routine.hpp"
33 #include "RoutineManager.hpp"
34 #include "x86.hpp"
35 #include "CPUID.hpp"
36 #include "Thread.hpp"
37 #include "Memory.hpp"
38 
39 #include <xmmintrin.h>
40 #include <fstream>
41 
42 #if defined(__x86_64__) && defined(_WIN32)
X86CompilationCallback()43 extern "C" void X86CompilationCallback()
44 {
45 	assert(false);   // UNIMPLEMENTED
46 }
47 #endif
48 
49 extern "C"
50 {
51 	bool (*CodeAnalystInitialize)() = 0;
52 	void (*CodeAnalystCompleteJITLog)() = 0;
53 	bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
54 }
55 
56 namespace llvm
57 {
58 	extern bool JITEmitDebugInfo;
59 }
60 
61 namespace sw
62 {
63 	Optimization optimization[10] = {InstructionCombining, Disabled};
64 
65 	using namespace llvm;
66 
67 	RoutineManager *Nucleus::routineManager = 0;
68 	ExecutionEngine *Nucleus::executionEngine = 0;
69 	Builder *Nucleus::builder = 0;
70 	LLVMContext *Nucleus::context = 0;
71 	Module *Nucleus::module = 0;
72 	llvm::Function *Nucleus::function = 0;
73 	BackoffLock Nucleus::codegenMutex;
74 
75 	class Builder : public IRBuilder<>
76 	{
77 	};
78 
Nucleus()79 	Nucleus::Nucleus()
80 	{
81 		codegenMutex.lock();   // Reactor and LLVM are currently not thread safe
82 
83 		InitializeNativeTarget();
84 		JITEmitDebugInfo = false;
85 
86 		if(!context)
87 		{
88 			context = new LLVMContext();
89 		}
90 
91 		module = new Module("", *context);
92 		routineManager = new RoutineManager();
93 
94 		#if defined(__x86_64__)
95 			const char *architecture = "x86-64";
96 		#else
97 			const char *architecture = "x86";
98 		#endif
99 
100 		SmallVector<std::string, 1> MAttrs;
101 		MAttrs.push_back(CPUID::supportsMMX()    ? "+mmx"   : "-mmx");
102 		MAttrs.push_back(CPUID::supportsCMOV()   ? "+cmov"  : "-cmov");
103 		MAttrs.push_back(CPUID::supportsSSE()    ? "+sse"   : "-sse");
104 		MAttrs.push_back(CPUID::supportsSSE2()   ? "+sse2"  : "-sse2");
105 		MAttrs.push_back(CPUID::supportsSSE3()   ? "+sse3"  : "-sse3");
106 		MAttrs.push_back(CPUID::supportsSSSE3()  ? "+ssse3" : "-ssse3");
107 		MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
108 
109 		std::string error;
110 		TargetMachine *targetMachine = EngineBuilder::selectTarget(module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error);
111 		executionEngine = JIT::createJIT(module, 0, routineManager, CodeGenOpt::Aggressive, true, targetMachine);
112 
113 		if(!builder)
114 		{
115 			builder = static_cast<Builder*>(new IRBuilder<>(*context));
116 
117 			#if defined(_WIN32)
118 				HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
119 				if(CodeAnalyst)
120 				{
121 					CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
122 					CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
123 					CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
124 
125 					CodeAnalystInitialize();
126 				}
127 			#endif
128 		}
129 	}
130 
~Nucleus()131 	Nucleus::~Nucleus()
132 	{
133 		delete executionEngine;
134 		executionEngine = 0;
135 
136 		routineManager = 0;
137 		function = 0;
138 		module = 0;
139 
140 		codegenMutex.unlock();
141 	}
142 
acquireRoutine(const wchar_t * name,bool runOptimizations)143 	Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
144 	{
145 		if(builder->GetInsertBlock()->empty() || !builder->GetInsertBlock()->back().isTerminator())
146 		{
147 			Type *type = function->getReturnType();
148 
149 			if(type->isVoidTy())
150 			{
151 				createRetVoid();
152 			}
153 			else
154 			{
155 				createRet(UndefValue::get(type));
156 			}
157 		}
158 
159 		if(false)
160 		{
161 			std::string error;
162 			raw_fd_ostream file("llvm-dump-unopt.txt", error);
163 			module->print(file, 0);
164 		}
165 
166 		if(runOptimizations)
167 		{
168 			optimize();
169 		}
170 
171 		if(false)
172 		{
173 			std::string error;
174 			raw_fd_ostream file("llvm-dump-opt.txt", error);
175 			module->print(file, 0);
176 		}
177 
178 		void *entry = executionEngine->getPointerToFunction(function);
179 		Routine *routine = routineManager->acquireRoutine(entry);
180 
181 		if(CodeAnalystLogJITCode)
182 		{
183 			CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
184 		}
185 
186 		return routine;
187 	}
188 
optimize()189 	void Nucleus::optimize()
190 	{
191 		static PassManager *passManager = 0;
192 
193 		if(!passManager)
194 		{
195 			passManager = new PassManager();
196 
197 			UnsafeFPMath = true;
198 		//	NoInfsFPMath = true;
199 		//	NoNaNsFPMath = true;
200 
201 			passManager->add(new TargetData(*executionEngine->getTargetData()));
202 			passManager->add(createScalarReplAggregatesPass());
203 
204 			for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
205 			{
206 				switch(optimization[pass])
207 				{
208 				case Disabled:                                                                 break;
209 				case CFGSimplification:    passManager->add(createCFGSimplificationPass());    break;
210 				case LICM:                 passManager->add(createLICMPass());                 break;
211 				case AggressiveDCE:        passManager->add(createAggressiveDCEPass());        break;
212 				case GVN:                  passManager->add(createGVNPass());                  break;
213 				case InstructionCombining: passManager->add(createInstructionCombiningPass()); break;
214 				case Reassociate:          passManager->add(createReassociatePass());          break;
215 				case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break;
216 				case SCCP:                 passManager->add(createSCCPPass());                 break;
217 				case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break;
218 				default:
219 					assert(false);
220 				}
221 			}
222 		}
223 
224 		passManager->run(*module);
225 	}
226 
setFunction(llvm::Function * function)227 	void Nucleus::setFunction(llvm::Function *function)
228 	{
229 		Nucleus::function = function;
230 
231 		builder->SetInsertPoint(BasicBlock::Create(*context, "", function));
232 	}
233 
getModule()234 	Module *Nucleus::getModule()
235 	{
236 		return module;
237 	}
238 
getFunction()239 	llvm::Function *Nucleus::getFunction()
240 	{
241 		return function;
242 	}
243 
getContext()244 	llvm::LLVMContext *Nucleus::getContext()
245 	{
246 		return context;
247 	}
248 
allocateStackVariable(Type * type,int arraySize)249 	Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
250 	{
251 		// Need to allocate it in the entry block for mem2reg to work
252 		llvm::Function *function = getFunction();
253 		BasicBlock &entryBlock = function->getEntryBlock();
254 
255 		Instruction *declaration;
256 
257 		if(arraySize)
258 		{
259 			declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize));
260 		}
261 		else
262 		{
263 			declaration = new AllocaInst(type, (Value*)0);
264 		}
265 
266 		entryBlock.getInstList().push_front(declaration);
267 
268 		return declaration;
269 	}
270 
createBasicBlock()271 	BasicBlock *Nucleus::createBasicBlock()
272 	{
273 		return BasicBlock::Create(*context, "", Nucleus::getFunction());
274 	}
275 
getInsertBlock()276 	BasicBlock *Nucleus::getInsertBlock()
277 	{
278 		return builder->GetInsertBlock();
279 	}
280 
setInsertBlock(BasicBlock * basicBlock)281 	void Nucleus::setInsertBlock(BasicBlock *basicBlock)
282 	{
283 	//	assert(builder->GetInsertBlock()->back().isTerminator());
284 		return builder->SetInsertPoint(basicBlock);
285 	}
286 
getPredecessor(BasicBlock * basicBlock)287 	BasicBlock *Nucleus::getPredecessor(BasicBlock *basicBlock)
288 	{
289 		return *pred_begin(basicBlock);
290 	}
291 
createFunction(llvm::Type * ReturnType,std::vector<llvm::Type * > & Params)292 	llvm::Function *Nucleus::createFunction(llvm::Type *ReturnType, std::vector<llvm::Type*> &Params)
293 	{
294 		llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, Params, false);
295 		llvm::Function *function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", Nucleus::getModule());
296 		function->setCallingConv(llvm::CallingConv::C);
297 
298 		return function;
299 	}
300 
getArgument(llvm::Function * function,unsigned int index)301 	llvm::Value *Nucleus::getArgument(llvm::Function *function, unsigned int index)
302 	{
303 		llvm::Function::arg_iterator args = function->arg_begin();
304 
305 		while(index)
306 		{
307 			args++;
308 			index--;
309 		}
310 
311 		return &*args;
312 	}
313 
createRetVoid()314 	Value *Nucleus::createRetVoid()
315 	{
316 		x86::emms();
317 
318 		return builder->CreateRetVoid();
319 	}
320 
createRet(Value * V)321 	Value *Nucleus::createRet(Value *V)
322 	{
323 		x86::emms();
324 
325 		return builder->CreateRet(V);
326 	}
327 
createBr(BasicBlock * dest)328 	Value *Nucleus::createBr(BasicBlock *dest)
329 	{
330 		return builder->CreateBr(dest);
331 	}
332 
createCondBr(Value * cond,BasicBlock * ifTrue,BasicBlock * ifFalse)333 	Value *Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
334 	{
335 		return builder->CreateCondBr(cond, ifTrue, ifFalse);
336 	}
337 
createAdd(Value * lhs,Value * rhs)338 	Value *Nucleus::createAdd(Value *lhs, Value *rhs)
339 	{
340 		return builder->CreateAdd(lhs, rhs);
341 	}
342 
createSub(Value * lhs,Value * rhs)343 	Value *Nucleus::createSub(Value *lhs, Value *rhs)
344 	{
345 		return builder->CreateSub(lhs, rhs);
346 	}
347 
createMul(Value * lhs,Value * rhs)348 	Value *Nucleus::createMul(Value *lhs, Value *rhs)
349 	{
350 		return builder->CreateMul(lhs, rhs);
351 	}
352 
createUDiv(Value * lhs,Value * rhs)353 	Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
354 	{
355 		return builder->CreateUDiv(lhs, rhs);
356 	}
357 
createSDiv(Value * lhs,Value * rhs)358 	Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
359 	{
360 		return builder->CreateSDiv(lhs, rhs);
361 	}
362 
createFAdd(Value * lhs,Value * rhs)363 	Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
364 	{
365 		return builder->CreateFAdd(lhs, rhs);
366 	}
367 
createFSub(Value * lhs,Value * rhs)368 	Value *Nucleus::createFSub(Value *lhs, Value *rhs)
369 	{
370 		return builder->CreateFSub(lhs, rhs);
371 	}
372 
createFMul(Value * lhs,Value * rhs)373 	Value *Nucleus::createFMul(Value *lhs, Value *rhs)
374 	{
375 		return builder->CreateFMul(lhs, rhs);
376 	}
377 
createFDiv(Value * lhs,Value * rhs)378 	Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
379 	{
380 		return builder->CreateFDiv(lhs, rhs);
381 	}
382 
createURem(Value * lhs,Value * rhs)383 	Value *Nucleus::createURem(Value *lhs, Value *rhs)
384 	{
385 		return builder->CreateURem(lhs, rhs);
386 	}
387 
createSRem(Value * lhs,Value * rhs)388 	Value *Nucleus::createSRem(Value *lhs, Value *rhs)
389 	{
390 		return builder->CreateSRem(lhs, rhs);
391 	}
392 
createFRem(Value * lhs,Value * rhs)393 	Value *Nucleus::createFRem(Value *lhs, Value *rhs)
394 	{
395 		return builder->CreateFRem(lhs, rhs);
396 	}
397 
createShl(Value * lhs,Value * rhs)398 	Value *Nucleus::createShl(Value *lhs, Value *rhs)
399 	{
400 		return builder->CreateShl(lhs, rhs);
401 	}
402 
createLShr(Value * lhs,Value * rhs)403 	Value *Nucleus::createLShr(Value *lhs, Value *rhs)
404 	{
405 		return builder->CreateLShr(lhs, rhs);
406 	}
407 
createAShr(Value * lhs,Value * rhs)408 	Value *Nucleus::createAShr(Value *lhs, Value *rhs)
409 	{
410 		return builder->CreateAShr(lhs, rhs);
411 	}
412 
createAnd(Value * lhs,Value * rhs)413 	Value *Nucleus::createAnd(Value *lhs, Value *rhs)
414 	{
415 		return builder->CreateAnd(lhs, rhs);
416 	}
417 
createOr(Value * lhs,Value * rhs)418 	Value *Nucleus::createOr(Value *lhs, Value *rhs)
419 	{
420 		return builder->CreateOr(lhs, rhs);
421 	}
422 
createXor(Value * lhs,Value * rhs)423 	Value *Nucleus::createXor(Value *lhs, Value *rhs)
424 	{
425 		return builder->CreateXor(lhs, rhs);
426 	}
427 
createNeg(Value * V)428 	Value *Nucleus::createNeg(Value *V)
429 	{
430 		return builder->CreateNeg(V);
431 	}
432 
createFNeg(Value * V)433 	Value *Nucleus::createFNeg(Value *V)
434 	{
435 		return builder->CreateFNeg(V);
436 	}
437 
createNot(Value * V)438 	Value *Nucleus::createNot(Value *V)
439 	{
440 		return builder->CreateNot(V);
441 	}
442 
createLoad(Value * ptr,bool isVolatile,unsigned int align)443 	Value *Nucleus::createLoad(Value *ptr, bool isVolatile, unsigned int align)
444 	{
445 		return builder->Insert(new LoadInst(ptr, "", isVolatile, align));
446 	}
447 
createStore(Value * value,Value * ptr,bool isVolatile,unsigned int align)448 	Value *Nucleus::createStore(Value *value, Value *ptr, bool isVolatile, unsigned int align)
449 	{
450 		return builder->Insert(new StoreInst(value, ptr, isVolatile, align));
451 	}
452 
createGEP(Value * ptr,Value * index)453 	Value *Nucleus::createGEP(Value *ptr, Value *index)
454 	{
455 		return builder->CreateGEP(ptr, index);
456 	}
457 
createAtomicAdd(Value * ptr,Value * value)458 	Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
459 	{
460 		return builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent);
461 	}
462 
createTrunc(Value * V,Type * destType)463 	Value *Nucleus::createTrunc(Value *V, Type *destType)
464 	{
465 		return builder->CreateTrunc(V, destType);
466 	}
467 
createZExt(Value * V,Type * destType)468 	Value *Nucleus::createZExt(Value *V, Type *destType)
469 	{
470 		return builder->CreateZExt(V, destType);
471 	}
472 
createSExt(Value * V,Type * destType)473 	Value *Nucleus::createSExt(Value *V, Type *destType)
474 	{
475 		return builder->CreateSExt(V, destType);
476 	}
477 
createFPToUI(Value * V,Type * destType)478 	Value *Nucleus::createFPToUI(Value *V, Type *destType)
479 	{
480 		return builder->CreateFPToUI(V, destType);
481 	}
482 
createFPToSI(Value * V,Type * destType)483 	Value *Nucleus::createFPToSI(Value *V, Type *destType)
484 	{
485 		return builder->CreateFPToSI(V, destType);
486 	}
487 
createUIToFP(Value * V,Type * destType)488 	Value *Nucleus::createUIToFP(Value *V, Type *destType)
489 	{
490 		return builder->CreateUIToFP(V, destType);
491 	}
492 
createSIToFP(Value * V,Type * destType)493 	Value *Nucleus::createSIToFP(Value *V, Type *destType)
494 	{
495 		return builder->CreateSIToFP(V, destType);
496 	}
497 
createFPTrunc(Value * V,Type * destType)498 	Value *Nucleus::createFPTrunc(Value *V, Type *destType)
499 	{
500 		return builder->CreateFPTrunc(V, destType);
501 	}
502 
createFPExt(Value * V,Type * destType)503 	Value *Nucleus::createFPExt(Value *V, Type *destType)
504 	{
505 		return builder->CreateFPExt(V, destType);
506 	}
507 
createPtrToInt(Value * V,Type * destType)508 	Value *Nucleus::createPtrToInt(Value *V, Type *destType)
509 	{
510 		return builder->CreatePtrToInt(V, destType);
511 	}
512 
createIntToPtr(Value * V,Type * destType)513 	Value *Nucleus::createIntToPtr(Value *V, Type *destType)
514 	{
515 		return builder->CreateIntToPtr(V, destType);
516 	}
517 
createBitCast(Value * V,Type * destType)518 	Value *Nucleus::createBitCast(Value *V, Type *destType)
519 	{
520 		return builder->CreateBitCast(V, destType);
521 	}
522 
createIntCast(Value * V,Type * destType,bool isSigned)523 	Value *Nucleus::createIntCast(Value *V, Type *destType, bool isSigned)
524 	{
525 		return builder->CreateIntCast(V, destType, isSigned);
526 	}
527 
createICmpEQ(Value * lhs,Value * rhs)528 	Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
529 	{
530 		return builder->CreateICmpEQ(lhs, rhs);
531 	}
532 
createICmpNE(Value * lhs,Value * rhs)533 	Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
534 	{
535 		return builder->CreateICmpNE(lhs, rhs);
536 	}
537 
createICmpUGT(Value * lhs,Value * rhs)538 	Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
539 	{
540 		return builder->CreateICmpUGT(lhs, rhs);
541 	}
542 
createICmpUGE(Value * lhs,Value * rhs)543 	Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
544 	{
545 		return builder->CreateICmpUGE(lhs, rhs);
546 	}
547 
createICmpULT(Value * lhs,Value * rhs)548 	Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
549 	{
550 		return builder->CreateICmpULT(lhs, rhs);
551 	}
552 
createICmpULE(Value * lhs,Value * rhs)553 	Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
554 	{
555 		return builder->CreateICmpULE(lhs, rhs);
556 	}
557 
createICmpSGT(Value * lhs,Value * rhs)558 	Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
559 	{
560 		return builder->CreateICmpSGT(lhs, rhs);
561 	}
562 
createICmpSGE(Value * lhs,Value * rhs)563 	Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
564 	{
565 		return builder->CreateICmpSGE(lhs, rhs);
566 	}
567 
createICmpSLT(Value * lhs,Value * rhs)568 	Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
569 	{
570 		return builder->CreateICmpSLT(lhs, rhs);
571 	}
572 
createICmpSLE(Value * lhs,Value * rhs)573 	Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
574 	{
575 		return builder->CreateICmpSLE(lhs, rhs);
576 	}
577 
createFCmpOEQ(Value * lhs,Value * rhs)578 	Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
579 	{
580 		return builder->CreateFCmpOEQ(lhs, rhs);
581 	}
582 
createFCmpOGT(Value * lhs,Value * rhs)583 	Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
584 	{
585 		return builder->CreateFCmpOGT(lhs, rhs);
586 	}
587 
createFCmpOGE(Value * lhs,Value * rhs)588 	Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
589 	{
590 		return builder->CreateFCmpOGE(lhs, rhs);
591 	}
592 
createFCmpOLT(Value * lhs,Value * rhs)593 	Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
594 	{
595 		return builder->CreateFCmpOLT(lhs, rhs);
596 	}
597 
createFCmpOLE(Value * lhs,Value * rhs)598 	Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
599 	{
600 		return builder->CreateFCmpOLE(lhs, rhs);
601 	}
602 
createFCmpONE(Value * lhs,Value * rhs)603 	Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
604 	{
605 		return builder->CreateFCmpONE(lhs, rhs);
606 	}
607 
createFCmpORD(Value * lhs,Value * rhs)608 	Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
609 	{
610 		return builder->CreateFCmpORD(lhs, rhs);
611 	}
612 
createFCmpUNO(Value * lhs,Value * rhs)613 	Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
614 	{
615 		return builder->CreateFCmpUNO(lhs, rhs);
616 	}
617 
createFCmpUEQ(Value * lhs,Value * rhs)618 	Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
619 	{
620 		return builder->CreateFCmpUEQ(lhs, rhs);
621 	}
622 
createFCmpUGT(Value * lhs,Value * rhs)623 	Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
624 	{
625 		return builder->CreateFCmpUGT(lhs, rhs);
626 	}
627 
createFCmpUGE(Value * lhs,Value * rhs)628 	Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
629 	{
630 		return builder->CreateFCmpUGE(lhs, rhs);
631 	}
632 
createFCmpULT(Value * lhs,Value * rhs)633 	Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
634 	{
635 		return builder->CreateFCmpULT(lhs, rhs);
636 	}
637 
createFCmpULE(Value * lhs,Value * rhs)638 	Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
639 	{
640 		return builder->CreateFCmpULE(lhs, rhs);
641 	}
642 
createFCmpUNE(Value * lhs,Value * rhs)643 	Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
644 	{
645 		return builder->CreateFCmpULE(lhs, rhs);
646 	}
647 
createCall(Value * callee)648 	Value *Nucleus::createCall(Value *callee)
649 	{
650 		return builder->CreateCall(callee);
651 	}
652 
createCall(Value * callee,Value * arg)653 	Value *Nucleus::createCall(Value *callee, Value *arg)
654 	{
655 		return builder->CreateCall(callee, arg);
656 	}
657 
createCall(Value * callee,Value * arg1,Value * arg2)658 	Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2)
659 	{
660 		return builder->CreateCall2(callee, arg1, arg2);
661 	}
662 
createCall(Value * callee,Value * arg1,Value * arg2,Value * arg3)663 	Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3)
664 	{
665 		return builder->CreateCall3(callee, arg1, arg2, arg3);
666 	}
667 
createCall(Value * callee,Value * arg1,Value * arg2,Value * arg3,Value * arg4)668 	Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3, Value *arg4)
669 	{
670 		return builder->CreateCall4(callee, arg1, arg2, arg3, arg4);
671 	}
672 
createExtractElement(Value * vector,int index)673 	Value *Nucleus::createExtractElement(Value *vector, int index)
674 	{
675 		return builder->CreateExtractElement(vector, createConstantInt(index));
676 	}
677 
createInsertElement(Value * vector,Value * element,int index)678 	Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
679 	{
680 		return builder->CreateInsertElement(vector, element, createConstantInt(index));
681 	}
682 
createShuffleVector(Value * V1,Value * V2,Value * mask)683 	Value *Nucleus::createShuffleVector(Value *V1, Value *V2, Value *mask)
684 	{
685 		return builder->CreateShuffleVector(V1, V2, mask);
686 	}
687 
createSelect(Value * C,Value * ifTrue,Value * ifFalse)688 	Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
689 	{
690 		return builder->CreateSelect(C, ifTrue, ifFalse);
691 	}
692 
createSwitch(llvm::Value * V,llvm::BasicBlock * Dest,unsigned NumCases)693 	Value *Nucleus::createSwitch(llvm::Value *V, llvm::BasicBlock *Dest, unsigned NumCases)
694 	{
695 		return builder->CreateSwitch(V, Dest, NumCases);
696 	}
697 
addSwitchCase(llvm::Value * Switch,int Case,llvm::BasicBlock * Branch)698 	void Nucleus::addSwitchCase(llvm::Value *Switch, int Case, llvm::BasicBlock *Branch)
699 	{
700 		static_cast<SwitchInst*>(Switch)->addCase(Nucleus::createConstantInt(Case), Branch);
701 	}
702 
createUnreachable()703 	Value *Nucleus::createUnreachable()
704 	{
705 		return builder->CreateUnreachable();
706 	}
707 
createSwizzle(Value * val,unsigned char select)708 	Value *Nucleus::createSwizzle(Value *val, unsigned char select)
709 	{
710 		Constant *swizzle[4];
711 		swizzle[0] = Nucleus::createConstantInt((select >> 0) & 0x03);
712 		swizzle[1] = Nucleus::createConstantInt((select >> 2) & 0x03);
713 		swizzle[2] = Nucleus::createConstantInt((select >> 4) & 0x03);
714 		swizzle[3] = Nucleus::createConstantInt((select >> 6) & 0x03);
715 
716 		Value *shuffle = Nucleus::createShuffleVector(val, UndefValue::get(val->getType()), Nucleus::createConstantVector(swizzle, 4));
717 
718 		return shuffle;
719 	}
720 
createMask(Value * lhs,Value * rhs,unsigned char select)721 	Value *Nucleus::createMask(Value *lhs, Value *rhs, unsigned char select)
722 	{
723 		bool mask[4] = {false, false, false, false};
724 
725 		mask[(select >> 0) & 0x03] = true;
726 		mask[(select >> 2) & 0x03] = true;
727 		mask[(select >> 4) & 0x03] = true;
728 		mask[(select >> 6) & 0x03] = true;
729 
730 		Constant *swizzle[4];
731 		swizzle[0] = Nucleus::createConstantInt(mask[0] ? 4 : 0);
732 		swizzle[1] = Nucleus::createConstantInt(mask[1] ? 5 : 1);
733 		swizzle[2] = Nucleus::createConstantInt(mask[2] ? 6 : 2);
734 		swizzle[3] = Nucleus::createConstantInt(mask[3] ? 7 : 3);
735 
736 		Value *shuffle = Nucleus::createShuffleVector(lhs, rhs, Nucleus::createConstantVector(swizzle, 4));
737 
738 		return shuffle;
739 	}
740 
getGlobalValueAtAddress(void * Addr)741 	const llvm::GlobalValue *Nucleus::getGlobalValueAtAddress(void *Addr)
742 	{
743 		return executionEngine->getGlobalValueAtAddress(Addr);
744 	}
745 
addGlobalMapping(const llvm::GlobalValue * GV,void * Addr)746 	void Nucleus::addGlobalMapping(const llvm::GlobalValue *GV, void *Addr)
747 	{
748 		executionEngine->addGlobalMapping(GV, Addr);
749 	}
750 
createGlobalValue(llvm::Type * Ty,bool isConstant,unsigned int Align)751 	llvm::GlobalValue *Nucleus::createGlobalValue(llvm::Type *Ty, bool isConstant, unsigned int Align)
752 	{
753 		llvm::GlobalValue *global = new llvm::GlobalVariable(*Nucleus::getModule(), Ty, isConstant, llvm::GlobalValue::ExternalLinkage, 0, "");
754 		global->setAlignment(Align);
755 
756 		return global;
757 	}
758 
getPointerType(llvm::Type * ElementType)759 	llvm::Type *Nucleus::getPointerType(llvm::Type *ElementType)
760 	{
761 		return llvm::PointerType::get(ElementType, 0);
762 	}
763 
createNullValue(llvm::Type * Ty)764 	llvm::Constant *Nucleus::createNullValue(llvm::Type *Ty)
765 	{
766 		return llvm::Constant::getNullValue(Ty);
767 	}
768 
createConstantInt(int64_t i)769 	llvm::ConstantInt *Nucleus::createConstantInt(int64_t i)
770 	{
771 		return llvm::ConstantInt::get(Type::getInt64Ty(*context), i, true);
772 	}
773 
createConstantInt(int i)774 	llvm::ConstantInt *Nucleus::createConstantInt(int i)
775 	{
776 		return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, true);
777 	}
778 
createConstantInt(unsigned int i)779 	llvm::ConstantInt *Nucleus::createConstantInt(unsigned int i)
780 	{
781 		return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, false);
782 	}
783 
createConstantBool(bool b)784 	llvm::ConstantInt *Nucleus::createConstantBool(bool b)
785 	{
786 		return llvm::ConstantInt::get(Type::getInt1Ty(*context), b);
787 	}
788 
createConstantByte(signed char i)789 	llvm::ConstantInt *Nucleus::createConstantByte(signed char i)
790 	{
791 		return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, true);
792 	}
793 
createConstantByte(unsigned char i)794 	llvm::ConstantInt *Nucleus::createConstantByte(unsigned char i)
795 	{
796 		return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, false);
797 	}
798 
createConstantShort(short i)799 	llvm::ConstantInt *Nucleus::createConstantShort(short i)
800 	{
801 		return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, true);
802 	}
803 
createConstantShort(unsigned short i)804 	llvm::ConstantInt *Nucleus::createConstantShort(unsigned short i)
805 	{
806 		return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, false);
807 	}
808 
createConstantFloat(float x)809 	llvm::Constant *Nucleus::createConstantFloat(float x)
810 	{
811 		return ConstantFP::get(Float::getType(), x);
812 	}
813 
createNullPointer(llvm::Type * Ty)814 	llvm::Value *Nucleus::createNullPointer(llvm::Type *Ty)
815 	{
816 		return llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0));
817 	}
818 
createConstantVector(llvm::Constant * const * Vals,unsigned NumVals)819 	llvm::Value *Nucleus::createConstantVector(llvm::Constant *const *Vals, unsigned NumVals)
820 	{
821 		return llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(Vals, NumVals));
822 	}
823 
getType()824 	Type *Void::getType()
825 	{
826 		return Type::getVoidTy(*Nucleus::getContext());
827 	}
828 
LValue(llvm::Type * type,int arraySize)829 	LValue::LValue(llvm::Type *type, int arraySize)
830 	{
831 		address = Nucleus::allocateStackVariable(type, arraySize);
832 	}
833 
loadValue(unsigned int alignment) const834 	llvm::Value *LValue::loadValue(unsigned int alignment) const
835 	{
836 		return Nucleus::createLoad(address, false, alignment);
837 	}
838 
storeValue(llvm::Value * value,unsigned int alignment) const839 	llvm::Value *LValue::storeValue(llvm::Value *value, unsigned int alignment) const
840 	{
841 		return Nucleus::createStore(value, address, false, alignment);
842 	}
843 
getAddress(llvm::Value * index) const844 	llvm::Value *LValue::getAddress(llvm::Value *index) const
845 	{
846 		return Nucleus::createGEP(address, index);
847 	}
848 
getType()849 	Type *MMX::getType()
850 	{
851 		return Type::getX86_MMXTy(*Nucleus::getContext());
852 	}
853 
Bool(Argument<Bool> argument)854 	Bool::Bool(Argument<Bool> argument)
855 	{
856 		storeValue(argument.value);
857 	}
858 
Bool()859 	Bool::Bool()
860 	{
861 	}
862 
Bool(bool x)863 	Bool::Bool(bool x)
864 	{
865 		storeValue(Nucleus::createConstantBool(x));
866 	}
867 
Bool(RValue<Bool> rhs)868 	Bool::Bool(RValue<Bool> rhs)
869 	{
870 		storeValue(rhs.value);
871 	}
872 
Bool(const Bool & rhs)873 	Bool::Bool(const Bool &rhs)
874 	{
875 		Value *value = rhs.loadValue();
876 		storeValue(value);
877 	}
878 
Bool(const Reference<Bool> & rhs)879 	Bool::Bool(const Reference<Bool> &rhs)
880 	{
881 		Value *value = rhs.loadValue();
882 		storeValue(value);
883 	}
884 
operator =(RValue<Bool> rhs) const885 	RValue<Bool> Bool::operator=(RValue<Bool> rhs) const
886 	{
887 		storeValue(rhs.value);
888 
889 		return rhs;
890 	}
891 
operator =(const Bool & rhs) const892 	RValue<Bool> Bool::operator=(const Bool &rhs) const
893 	{
894 		Value *value = rhs.loadValue();
895 		storeValue(value);
896 
897 		return RValue<Bool>(value);
898 	}
899 
operator =(const Reference<Bool> & rhs) const900 	RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) const
901 	{
902 		Value *value = rhs.loadValue();
903 		storeValue(value);
904 
905 		return RValue<Bool>(value);
906 	}
907 
operator !(RValue<Bool> val)908 	RValue<Bool> operator!(RValue<Bool> val)
909 	{
910 		return RValue<Bool>(Nucleus::createNot(val.value));
911 	}
912 
operator &&(RValue<Bool> lhs,RValue<Bool> rhs)913 	RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
914 	{
915 		return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
916 	}
917 
operator ||(RValue<Bool> lhs,RValue<Bool> rhs)918 	RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
919 	{
920 		return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
921 	}
922 
getType()923 	Type *Bool::getType()
924 	{
925 		return Type::getInt1Ty(*Nucleus::getContext());
926 	}
927 
Byte(Argument<Byte> argument)928 	Byte::Byte(Argument<Byte> argument)
929 	{
930 		storeValue(argument.value);
931 	}
932 
Byte(RValue<Int> cast)933 	Byte::Byte(RValue<Int> cast)
934 	{
935 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
936 
937 		storeValue(integer);
938 	}
939 
Byte(RValue<UInt> cast)940 	Byte::Byte(RValue<UInt> cast)
941 	{
942 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
943 
944 		storeValue(integer);
945 	}
946 
Byte(RValue<UShort> cast)947 	Byte::Byte(RValue<UShort> cast)
948 	{
949 		Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
950 
951 		storeValue(integer);
952 	}
953 
Byte()954 	Byte::Byte()
955 	{
956 	}
957 
Byte(int x)958 	Byte::Byte(int x)
959 	{
960 		storeValue(Nucleus::createConstantByte((unsigned char)x));
961 	}
962 
Byte(unsigned char x)963 	Byte::Byte(unsigned char x)
964 	{
965 		storeValue(Nucleus::createConstantByte(x));
966 	}
967 
Byte(RValue<Byte> rhs)968 	Byte::Byte(RValue<Byte> rhs)
969 	{
970 		storeValue(rhs.value);
971 	}
972 
Byte(const Byte & rhs)973 	Byte::Byte(const Byte &rhs)
974 	{
975 		Value *value = rhs.loadValue();
976 		storeValue(value);
977 	}
978 
Byte(const Reference<Byte> & rhs)979 	Byte::Byte(const Reference<Byte> &rhs)
980 	{
981 		Value *value = rhs.loadValue();
982 		storeValue(value);
983 	}
984 
operator =(RValue<Byte> rhs) const985 	RValue<Byte> Byte::operator=(RValue<Byte> rhs) const
986 	{
987 		storeValue(rhs.value);
988 
989 		return rhs;
990 	}
991 
operator =(const Byte & rhs) const992 	RValue<Byte> Byte::operator=(const Byte &rhs) const
993 	{
994 		Value *value = rhs.loadValue();
995 		storeValue(value);
996 
997 		return RValue<Byte>(value);
998 	}
999 
operator =(const Reference<Byte> & rhs) const1000 	RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) const
1001 	{
1002 		Value *value = rhs.loadValue();
1003 		storeValue(value);
1004 
1005 		return RValue<Byte>(value);
1006 	}
1007 
operator +(RValue<Byte> lhs,RValue<Byte> rhs)1008 	RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1009 	{
1010 		return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1011 	}
1012 
operator -(RValue<Byte> lhs,RValue<Byte> rhs)1013 	RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1014 	{
1015 		return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1016 	}
1017 
operator *(RValue<Byte> lhs,RValue<Byte> rhs)1018 	RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1019 	{
1020 		return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1021 	}
1022 
operator /(RValue<Byte> lhs,RValue<Byte> rhs)1023 	RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1024 	{
1025 		return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1026 	}
1027 
operator %(RValue<Byte> lhs,RValue<Byte> rhs)1028 	RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1029 	{
1030 		return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1031 	}
1032 
operator &(RValue<Byte> lhs,RValue<Byte> rhs)1033 	RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1034 	{
1035 		return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1036 	}
1037 
operator |(RValue<Byte> lhs,RValue<Byte> rhs)1038 	RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1039 	{
1040 		return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1041 	}
1042 
operator ^(RValue<Byte> lhs,RValue<Byte> rhs)1043 	RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1044 	{
1045 		return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1046 	}
1047 
operator <<(RValue<Byte> lhs,RValue<Byte> rhs)1048 	RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1049 	{
1050 		return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1051 	}
1052 
operator >>(RValue<Byte> lhs,RValue<Byte> rhs)1053 	RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1054 	{
1055 		return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1056 	}
1057 
operator +=(const Byte & lhs,RValue<Byte> rhs)1058 	RValue<Byte> operator+=(const Byte &lhs, RValue<Byte> rhs)
1059 	{
1060 		return lhs = lhs + rhs;
1061 	}
1062 
operator -=(const Byte & lhs,RValue<Byte> rhs)1063 	RValue<Byte> operator-=(const Byte &lhs, RValue<Byte> rhs)
1064 	{
1065 		return lhs = lhs - rhs;
1066 	}
1067 
operator *=(const Byte & lhs,RValue<Byte> rhs)1068 	RValue<Byte> operator*=(const Byte &lhs, RValue<Byte> rhs)
1069 	{
1070 		return lhs = lhs * rhs;
1071 	}
1072 
operator /=(const Byte & lhs,RValue<Byte> rhs)1073 	RValue<Byte> operator/=(const Byte &lhs, RValue<Byte> rhs)
1074 	{
1075 		return lhs = lhs / rhs;
1076 	}
1077 
operator %=(const Byte & lhs,RValue<Byte> rhs)1078 	RValue<Byte> operator%=(const Byte &lhs, RValue<Byte> rhs)
1079 	{
1080 		return lhs = lhs % rhs;
1081 	}
1082 
operator &=(const Byte & lhs,RValue<Byte> rhs)1083 	RValue<Byte> operator&=(const Byte &lhs, RValue<Byte> rhs)
1084 	{
1085 		return lhs = lhs & rhs;
1086 	}
1087 
operator |=(const Byte & lhs,RValue<Byte> rhs)1088 	RValue<Byte> operator|=(const Byte &lhs, RValue<Byte> rhs)
1089 	{
1090 		return lhs = lhs | rhs;
1091 	}
1092 
operator ^=(const Byte & lhs,RValue<Byte> rhs)1093 	RValue<Byte> operator^=(const Byte &lhs, RValue<Byte> rhs)
1094 	{
1095 		return lhs = lhs ^ rhs;
1096 	}
1097 
operator <<=(const Byte & lhs,RValue<Byte> rhs)1098 	RValue<Byte> operator<<=(const Byte &lhs, RValue<Byte> rhs)
1099 	{
1100 		return lhs = lhs << rhs;
1101 	}
1102 
operator >>=(const Byte & lhs,RValue<Byte> rhs)1103 	RValue<Byte> operator>>=(const Byte &lhs, RValue<Byte> rhs)
1104 	{
1105 		return lhs = lhs >> rhs;
1106 	}
1107 
operator +(RValue<Byte> val)1108 	RValue<Byte> operator+(RValue<Byte> val)
1109 	{
1110 		return val;
1111 	}
1112 
operator -(RValue<Byte> val)1113 	RValue<Byte> operator-(RValue<Byte> val)
1114 	{
1115 		return RValue<Byte>(Nucleus::createNeg(val.value));
1116 	}
1117 
operator ~(RValue<Byte> val)1118 	RValue<Byte> operator~(RValue<Byte> val)
1119 	{
1120 		return RValue<Byte>(Nucleus::createNot(val.value));
1121 	}
1122 
operator ++(const Byte & val,int)1123 	RValue<Byte> operator++(const Byte &val, int)   // Post-increment
1124 	{
1125 		RValue<Byte> res = val;
1126 
1127 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((unsigned char)1));
1128 		val.storeValue(inc);
1129 
1130 		return res;
1131 	}
1132 
operator ++(const Byte & val)1133 	const Byte &operator++(const Byte &val)   // Pre-increment
1134 	{
1135 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
1136 		val.storeValue(inc);
1137 
1138 		return val;
1139 	}
1140 
operator --(const Byte & val,int)1141 	RValue<Byte> operator--(const Byte &val, int)   // Post-decrement
1142 	{
1143 		RValue<Byte> res = val;
1144 
1145 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((unsigned char)1));
1146 		val.storeValue(inc);
1147 
1148 		return res;
1149 	}
1150 
operator --(const Byte & val)1151 	const Byte &operator--(const Byte &val)   // Pre-decrement
1152 	{
1153 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
1154 		val.storeValue(inc);
1155 
1156 		return val;
1157 	}
1158 
operator <(RValue<Byte> lhs,RValue<Byte> rhs)1159 	RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1160 	{
1161 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1162 	}
1163 
operator <=(RValue<Byte> lhs,RValue<Byte> rhs)1164 	RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1165 	{
1166 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1167 	}
1168 
operator >(RValue<Byte> lhs,RValue<Byte> rhs)1169 	RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1170 	{
1171 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1172 	}
1173 
operator >=(RValue<Byte> lhs,RValue<Byte> rhs)1174 	RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1175 	{
1176 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1177 	}
1178 
operator !=(RValue<Byte> lhs,RValue<Byte> rhs)1179 	RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1180 	{
1181 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1182 	}
1183 
operator ==(RValue<Byte> lhs,RValue<Byte> rhs)1184 	RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1185 	{
1186 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1187 	}
1188 
getType()1189 	Type *Byte::getType()
1190 	{
1191 		return Type::getInt8Ty(*Nucleus::getContext());
1192 	}
1193 
SByte(Argument<SByte> argument)1194 	SByte::SByte(Argument<SByte> argument)
1195 	{
1196 		storeValue(argument.value);
1197 	}
1198 
SByte(RValue<Int> cast)1199 	SByte::SByte(RValue<Int> cast)
1200 	{
1201 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1202 
1203 		storeValue(integer);
1204 	}
1205 
SByte(RValue<Short> cast)1206 	SByte::SByte(RValue<Short> cast)
1207 	{
1208 		Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1209 
1210 		storeValue(integer);
1211 	}
1212 
SByte()1213 	SByte::SByte()
1214 	{
1215 	}
1216 
SByte(signed char x)1217 	SByte::SByte(signed char x)
1218 	{
1219 		storeValue(Nucleus::createConstantByte(x));
1220 	}
1221 
SByte(RValue<SByte> rhs)1222 	SByte::SByte(RValue<SByte> rhs)
1223 	{
1224 		storeValue(rhs.value);
1225 	}
1226 
SByte(const SByte & rhs)1227 	SByte::SByte(const SByte &rhs)
1228 	{
1229 		Value *value = rhs.loadValue();
1230 		storeValue(value);
1231 	}
1232 
SByte(const Reference<SByte> & rhs)1233 	SByte::SByte(const Reference<SByte> &rhs)
1234 	{
1235 		Value *value = rhs.loadValue();
1236 		storeValue(value);
1237 	}
1238 
operator =(RValue<SByte> rhs) const1239 	RValue<SByte> SByte::operator=(RValue<SByte> rhs) const
1240 	{
1241 		storeValue(rhs.value);
1242 
1243 		return rhs;
1244 	}
1245 
operator =(const SByte & rhs) const1246 	RValue<SByte> SByte::operator=(const SByte &rhs) const
1247 	{
1248 		Value *value = rhs.loadValue();
1249 		storeValue(value);
1250 
1251 		return RValue<SByte>(value);
1252 	}
1253 
operator =(const Reference<SByte> & rhs) const1254 	RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) const
1255 	{
1256 		Value *value = rhs.loadValue();
1257 		storeValue(value);
1258 
1259 		return RValue<SByte>(value);
1260 	}
1261 
operator +(RValue<SByte> lhs,RValue<SByte> rhs)1262 	RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1263 	{
1264 		return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1265 	}
1266 
operator -(RValue<SByte> lhs,RValue<SByte> rhs)1267 	RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1268 	{
1269 		return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1270 	}
1271 
operator *(RValue<SByte> lhs,RValue<SByte> rhs)1272 	RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1273 	{
1274 		return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1275 	}
1276 
operator /(RValue<SByte> lhs,RValue<SByte> rhs)1277 	RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1278 	{
1279 		return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1280 	}
1281 
operator %(RValue<SByte> lhs,RValue<SByte> rhs)1282 	RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1283 	{
1284 		return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1285 	}
1286 
operator &(RValue<SByte> lhs,RValue<SByte> rhs)1287 	RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1288 	{
1289 		return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1290 	}
1291 
operator |(RValue<SByte> lhs,RValue<SByte> rhs)1292 	RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1293 	{
1294 		return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1295 	}
1296 
operator ^(RValue<SByte> lhs,RValue<SByte> rhs)1297 	RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1298 	{
1299 		return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1300 	}
1301 
operator <<(RValue<SByte> lhs,RValue<SByte> rhs)1302 	RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1303 	{
1304 		return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1305 	}
1306 
operator >>(RValue<SByte> lhs,RValue<SByte> rhs)1307 	RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1308 	{
1309 		return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1310 	}
1311 
operator +=(const SByte & lhs,RValue<SByte> rhs)1312 	RValue<SByte> operator+=(const SByte &lhs, RValue<SByte> rhs)
1313 	{
1314 		return lhs = lhs + rhs;
1315 	}
1316 
operator -=(const SByte & lhs,RValue<SByte> rhs)1317 	RValue<SByte> operator-=(const SByte &lhs, RValue<SByte> rhs)
1318 	{
1319 		return lhs = lhs - rhs;
1320 	}
1321 
operator *=(const SByte & lhs,RValue<SByte> rhs)1322 	RValue<SByte> operator*=(const SByte &lhs, RValue<SByte> rhs)
1323 	{
1324 		return lhs = lhs * rhs;
1325 	}
1326 
operator /=(const SByte & lhs,RValue<SByte> rhs)1327 	RValue<SByte> operator/=(const SByte &lhs, RValue<SByte> rhs)
1328 	{
1329 		return lhs = lhs / rhs;
1330 	}
1331 
operator %=(const SByte & lhs,RValue<SByte> rhs)1332 	RValue<SByte> operator%=(const SByte &lhs, RValue<SByte> rhs)
1333 	{
1334 		return lhs = lhs % rhs;
1335 	}
1336 
operator &=(const SByte & lhs,RValue<SByte> rhs)1337 	RValue<SByte> operator&=(const SByte &lhs, RValue<SByte> rhs)
1338 	{
1339 		return lhs = lhs & rhs;
1340 	}
1341 
operator |=(const SByte & lhs,RValue<SByte> rhs)1342 	RValue<SByte> operator|=(const SByte &lhs, RValue<SByte> rhs)
1343 	{
1344 		return lhs = lhs | rhs;
1345 	}
1346 
operator ^=(const SByte & lhs,RValue<SByte> rhs)1347 	RValue<SByte> operator^=(const SByte &lhs, RValue<SByte> rhs)
1348 	{
1349 		return lhs = lhs ^ rhs;
1350 	}
1351 
operator <<=(const SByte & lhs,RValue<SByte> rhs)1352 	RValue<SByte> operator<<=(const SByte &lhs, RValue<SByte> rhs)
1353 	{
1354 		return lhs = lhs << rhs;
1355 	}
1356 
operator >>=(const SByte & lhs,RValue<SByte> rhs)1357 	RValue<SByte> operator>>=(const SByte &lhs, RValue<SByte> rhs)
1358 	{
1359 		return lhs = lhs >> rhs;
1360 	}
1361 
operator +(RValue<SByte> val)1362 	RValue<SByte> operator+(RValue<SByte> val)
1363 	{
1364 		return val;
1365 	}
1366 
operator -(RValue<SByte> val)1367 	RValue<SByte> operator-(RValue<SByte> val)
1368 	{
1369 		return RValue<SByte>(Nucleus::createNeg(val.value));
1370 	}
1371 
operator ~(RValue<SByte> val)1372 	RValue<SByte> operator~(RValue<SByte> val)
1373 	{
1374 		return RValue<SByte>(Nucleus::createNot(val.value));
1375 	}
1376 
operator ++(const SByte & val,int)1377 	RValue<SByte> operator++(const SByte &val, int)   // Post-increment
1378 	{
1379 		RValue<SByte> res = val;
1380 
1381 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((signed char)1));
1382 		val.storeValue(inc);
1383 
1384 		return res;
1385 	}
1386 
operator ++(const SByte & val)1387 	const SByte &operator++(const SByte &val)   // Pre-increment
1388 	{
1389 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((signed char)1));
1390 		val.storeValue(inc);
1391 
1392 		return val;
1393 	}
1394 
operator --(const SByte & val,int)1395 	RValue<SByte> operator--(const SByte &val, int)   // Post-decrement
1396 	{
1397 		RValue<SByte> res = val;
1398 
1399 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((signed char)1));
1400 		val.storeValue(inc);
1401 
1402 		return res;
1403 	}
1404 
operator --(const SByte & val)1405 	const SByte &operator--(const SByte &val)   // Pre-decrement
1406 	{
1407 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((signed char)1));
1408 		val.storeValue(inc);
1409 
1410 		return val;
1411 	}
1412 
operator <(RValue<SByte> lhs,RValue<SByte> rhs)1413 	RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1414 	{
1415 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1416 	}
1417 
operator <=(RValue<SByte> lhs,RValue<SByte> rhs)1418 	RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1419 	{
1420 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1421 	}
1422 
operator >(RValue<SByte> lhs,RValue<SByte> rhs)1423 	RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1424 	{
1425 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1426 	}
1427 
operator >=(RValue<SByte> lhs,RValue<SByte> rhs)1428 	RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1429 	{
1430 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1431 	}
1432 
operator !=(RValue<SByte> lhs,RValue<SByte> rhs)1433 	RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1434 	{
1435 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1436 	}
1437 
operator ==(RValue<SByte> lhs,RValue<SByte> rhs)1438 	RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1439 	{
1440 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1441 	}
1442 
getType()1443 	Type *SByte::getType()
1444 	{
1445 		return Type::getInt8Ty(*Nucleus::getContext());
1446 	}
1447 
Short(Argument<Short> argument)1448 	Short::Short(Argument<Short> argument)
1449 	{
1450 		storeValue(argument.value);
1451 	}
1452 
Short(RValue<Int> cast)1453 	Short::Short(RValue<Int> cast)
1454 	{
1455 		Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1456 
1457 		storeValue(integer);
1458 	}
1459 
Short()1460 	Short::Short()
1461 	{
1462 	}
1463 
Short(short x)1464 	Short::Short(short x)
1465 	{
1466 		storeValue(Nucleus::createConstantShort(x));
1467 	}
1468 
Short(RValue<Short> rhs)1469 	Short::Short(RValue<Short> rhs)
1470 	{
1471 		storeValue(rhs.value);
1472 	}
1473 
Short(const Short & rhs)1474 	Short::Short(const Short &rhs)
1475 	{
1476 		Value *value = rhs.loadValue();
1477 		storeValue(value);
1478 	}
1479 
Short(const Reference<Short> & rhs)1480 	Short::Short(const Reference<Short> &rhs)
1481 	{
1482 		Value *value = rhs.loadValue();
1483 		storeValue(value);
1484 	}
1485 
operator =(RValue<Short> rhs) const1486 	RValue<Short> Short::operator=(RValue<Short> rhs) const
1487 	{
1488 		storeValue(rhs.value);
1489 
1490 		return rhs;
1491 	}
1492 
operator =(const Short & rhs) const1493 	RValue<Short> Short::operator=(const Short &rhs) const
1494 	{
1495 		Value *value = rhs.loadValue();
1496 		storeValue(value);
1497 
1498 		return RValue<Short>(value);
1499 	}
1500 
operator =(const Reference<Short> & rhs) const1501 	RValue<Short> Short::operator=(const Reference<Short> &rhs) const
1502 	{
1503 		Value *value = rhs.loadValue();
1504 		storeValue(value);
1505 
1506 		return RValue<Short>(value);
1507 	}
1508 
operator +(RValue<Short> lhs,RValue<Short> rhs)1509 	RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1510 	{
1511 		return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1512 	}
1513 
operator -(RValue<Short> lhs,RValue<Short> rhs)1514 	RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1515 	{
1516 		return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1517 	}
1518 
operator *(RValue<Short> lhs,RValue<Short> rhs)1519 	RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1520 	{
1521 		return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1522 	}
1523 
operator /(RValue<Short> lhs,RValue<Short> rhs)1524 	RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1525 	{
1526 		return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1527 	}
1528 
operator %(RValue<Short> lhs,RValue<Short> rhs)1529 	RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1530 	{
1531 		return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1532 	}
1533 
operator &(RValue<Short> lhs,RValue<Short> rhs)1534 	RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1535 	{
1536 		return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1537 	}
1538 
operator |(RValue<Short> lhs,RValue<Short> rhs)1539 	RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1540 	{
1541 		return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1542 	}
1543 
operator ^(RValue<Short> lhs,RValue<Short> rhs)1544 	RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1545 	{
1546 		return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1547 	}
1548 
operator <<(RValue<Short> lhs,RValue<Short> rhs)1549 	RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1550 	{
1551 		return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1552 	}
1553 
operator >>(RValue<Short> lhs,RValue<Short> rhs)1554 	RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1555 	{
1556 		return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1557 	}
1558 
operator +=(const Short & lhs,RValue<Short> rhs)1559 	RValue<Short> operator+=(const Short &lhs, RValue<Short> rhs)
1560 	{
1561 		return lhs = lhs + rhs;
1562 	}
1563 
operator -=(const Short & lhs,RValue<Short> rhs)1564 	RValue<Short> operator-=(const Short &lhs, RValue<Short> rhs)
1565 	{
1566 		return lhs = lhs - rhs;
1567 	}
1568 
operator *=(const Short & lhs,RValue<Short> rhs)1569 	RValue<Short> operator*=(const Short &lhs, RValue<Short> rhs)
1570 	{
1571 		return lhs = lhs * rhs;
1572 	}
1573 
operator /=(const Short & lhs,RValue<Short> rhs)1574 	RValue<Short> operator/=(const Short &lhs, RValue<Short> rhs)
1575 	{
1576 		return lhs = lhs / rhs;
1577 	}
1578 
operator %=(const Short & lhs,RValue<Short> rhs)1579 	RValue<Short> operator%=(const Short &lhs, RValue<Short> rhs)
1580 	{
1581 		return lhs = lhs % rhs;
1582 	}
1583 
operator &=(const Short & lhs,RValue<Short> rhs)1584 	RValue<Short> operator&=(const Short &lhs, RValue<Short> rhs)
1585 	{
1586 		return lhs = lhs & rhs;
1587 	}
1588 
operator |=(const Short & lhs,RValue<Short> rhs)1589 	RValue<Short> operator|=(const Short &lhs, RValue<Short> rhs)
1590 	{
1591 		return lhs = lhs | rhs;
1592 	}
1593 
operator ^=(const Short & lhs,RValue<Short> rhs)1594 	RValue<Short> operator^=(const Short &lhs, RValue<Short> rhs)
1595 	{
1596 		return lhs = lhs ^ rhs;
1597 	}
1598 
operator <<=(const Short & lhs,RValue<Short> rhs)1599 	RValue<Short> operator<<=(const Short &lhs, RValue<Short> rhs)
1600 	{
1601 		return lhs = lhs << rhs;
1602 	}
1603 
operator >>=(const Short & lhs,RValue<Short> rhs)1604 	RValue<Short> operator>>=(const Short &lhs, RValue<Short> rhs)
1605 	{
1606 		return lhs = lhs >> rhs;
1607 	}
1608 
operator +(RValue<Short> val)1609 	RValue<Short> operator+(RValue<Short> val)
1610 	{
1611 		return val;
1612 	}
1613 
operator -(RValue<Short> val)1614 	RValue<Short> operator-(RValue<Short> val)
1615 	{
1616 		return RValue<Short>(Nucleus::createNeg(val.value));
1617 	}
1618 
operator ~(RValue<Short> val)1619 	RValue<Short> operator~(RValue<Short> val)
1620 	{
1621 		return RValue<Short>(Nucleus::createNot(val.value));
1622 	}
1623 
operator ++(const Short & val,int)1624 	RValue<Short> operator++(const Short &val, int)   // Post-increment
1625 	{
1626 		RValue<Short> res = val;
1627 
1628 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((short)1));
1629 		val.storeValue(inc);
1630 
1631 		return res;
1632 	}
1633 
operator ++(const Short & val)1634 	const Short &operator++(const Short &val)   // Pre-increment
1635 	{
1636 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((short)1));
1637 		val.storeValue(inc);
1638 
1639 		return val;
1640 	}
1641 
operator --(const Short & val,int)1642 	RValue<Short> operator--(const Short &val, int)   // Post-decrement
1643 	{
1644 		RValue<Short> res = val;
1645 
1646 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((short)1));
1647 		val.storeValue(inc);
1648 
1649 		return res;
1650 	}
1651 
operator --(const Short & val)1652 	const Short &operator--(const Short &val)   // Pre-decrement
1653 	{
1654 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((short)1));
1655 		val.storeValue(inc);
1656 
1657 		return val;
1658 	}
1659 
operator <(RValue<Short> lhs,RValue<Short> rhs)1660 	RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
1661 	{
1662 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1663 	}
1664 
operator <=(RValue<Short> lhs,RValue<Short> rhs)1665 	RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
1666 	{
1667 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1668 	}
1669 
operator >(RValue<Short> lhs,RValue<Short> rhs)1670 	RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
1671 	{
1672 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1673 	}
1674 
operator >=(RValue<Short> lhs,RValue<Short> rhs)1675 	RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
1676 	{
1677 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1678 	}
1679 
operator !=(RValue<Short> lhs,RValue<Short> rhs)1680 	RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
1681 	{
1682 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1683 	}
1684 
operator ==(RValue<Short> lhs,RValue<Short> rhs)1685 	RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
1686 	{
1687 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1688 	}
1689 
getType()1690 	Type *Short::getType()
1691 	{
1692 		return Type::getInt16Ty(*Nucleus::getContext());
1693 	}
1694 
UShort(Argument<UShort> argument)1695 	UShort::UShort(Argument<UShort> argument)
1696 	{
1697 		storeValue(argument.value);
1698 	}
1699 
UShort(RValue<UInt> cast)1700 	UShort::UShort(RValue<UInt> cast)
1701 	{
1702 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1703 
1704 		storeValue(integer);
1705 	}
1706 
UShort(RValue<Int> cast)1707 	UShort::UShort(RValue<Int> cast)
1708 	{
1709 		Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1710 
1711 		storeValue(integer);
1712 	}
1713 
UShort()1714 	UShort::UShort()
1715 	{
1716 	}
1717 
UShort(unsigned short x)1718 	UShort::UShort(unsigned short x)
1719 	{
1720 		storeValue(Nucleus::createConstantShort(x));
1721 	}
1722 
UShort(RValue<UShort> rhs)1723 	UShort::UShort(RValue<UShort> rhs)
1724 	{
1725 		storeValue(rhs.value);
1726 	}
1727 
UShort(const UShort & rhs)1728 	UShort::UShort(const UShort &rhs)
1729 	{
1730 		Value *value = rhs.loadValue();
1731 		storeValue(value);
1732 	}
1733 
UShort(const Reference<UShort> & rhs)1734 	UShort::UShort(const Reference<UShort> &rhs)
1735 	{
1736 		Value *value = rhs.loadValue();
1737 		storeValue(value);
1738 	}
1739 
operator =(RValue<UShort> rhs) const1740 	RValue<UShort> UShort::operator=(RValue<UShort> rhs) const
1741 	{
1742 		storeValue(rhs.value);
1743 
1744 		return rhs;
1745 	}
1746 
operator =(const UShort & rhs) const1747 	RValue<UShort> UShort::operator=(const UShort &rhs) const
1748 	{
1749 		Value *value = rhs.loadValue();
1750 		storeValue(value);
1751 
1752 		return RValue<UShort>(value);
1753 	}
1754 
operator =(const Reference<UShort> & rhs) const1755 	RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) const
1756 	{
1757 		Value *value = rhs.loadValue();
1758 		storeValue(value);
1759 
1760 		return RValue<UShort>(value);
1761 	}
1762 
operator +(RValue<UShort> lhs,RValue<UShort> rhs)1763 	RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
1764 	{
1765 		return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
1766 	}
1767 
operator -(RValue<UShort> lhs,RValue<UShort> rhs)1768 	RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
1769 	{
1770 		return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
1771 	}
1772 
operator *(RValue<UShort> lhs,RValue<UShort> rhs)1773 	RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
1774 	{
1775 		return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
1776 	}
1777 
operator /(RValue<UShort> lhs,RValue<UShort> rhs)1778 	RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
1779 	{
1780 		return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
1781 	}
1782 
operator %(RValue<UShort> lhs,RValue<UShort> rhs)1783 	RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
1784 	{
1785 		return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
1786 	}
1787 
operator &(RValue<UShort> lhs,RValue<UShort> rhs)1788 	RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
1789 	{
1790 		return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
1791 	}
1792 
operator |(RValue<UShort> lhs,RValue<UShort> rhs)1793 	RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
1794 	{
1795 		return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
1796 	}
1797 
operator ^(RValue<UShort> lhs,RValue<UShort> rhs)1798 	RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
1799 	{
1800 		return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
1801 	}
1802 
operator <<(RValue<UShort> lhs,RValue<UShort> rhs)1803 	RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
1804 	{
1805 		return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
1806 	}
1807 
operator >>(RValue<UShort> lhs,RValue<UShort> rhs)1808 	RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
1809 	{
1810 		return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
1811 	}
1812 
operator +=(const UShort & lhs,RValue<UShort> rhs)1813 	RValue<UShort> operator+=(const UShort &lhs, RValue<UShort> rhs)
1814 	{
1815 		return lhs = lhs + rhs;
1816 	}
1817 
operator -=(const UShort & lhs,RValue<UShort> rhs)1818 	RValue<UShort> operator-=(const UShort &lhs, RValue<UShort> rhs)
1819 	{
1820 		return lhs = lhs - rhs;
1821 	}
1822 
operator *=(const UShort & lhs,RValue<UShort> rhs)1823 	RValue<UShort> operator*=(const UShort &lhs, RValue<UShort> rhs)
1824 	{
1825 		return lhs = lhs * rhs;
1826 	}
1827 
operator /=(const UShort & lhs,RValue<UShort> rhs)1828 	RValue<UShort> operator/=(const UShort &lhs, RValue<UShort> rhs)
1829 	{
1830 		return lhs = lhs / rhs;
1831 	}
1832 
operator %=(const UShort & lhs,RValue<UShort> rhs)1833 	RValue<UShort> operator%=(const UShort &lhs, RValue<UShort> rhs)
1834 	{
1835 		return lhs = lhs % rhs;
1836 	}
1837 
operator &=(const UShort & lhs,RValue<UShort> rhs)1838 	RValue<UShort> operator&=(const UShort &lhs, RValue<UShort> rhs)
1839 	{
1840 		return lhs = lhs & rhs;
1841 	}
1842 
operator |=(const UShort & lhs,RValue<UShort> rhs)1843 	RValue<UShort> operator|=(const UShort &lhs, RValue<UShort> rhs)
1844 	{
1845 		return lhs = lhs | rhs;
1846 	}
1847 
operator ^=(const UShort & lhs,RValue<UShort> rhs)1848 	RValue<UShort> operator^=(const UShort &lhs, RValue<UShort> rhs)
1849 	{
1850 		return lhs = lhs ^ rhs;
1851 	}
1852 
operator <<=(const UShort & lhs,RValue<UShort> rhs)1853 	RValue<UShort> operator<<=(const UShort &lhs, RValue<UShort> rhs)
1854 	{
1855 		return lhs = lhs << rhs;
1856 	}
1857 
operator >>=(const UShort & lhs,RValue<UShort> rhs)1858 	RValue<UShort> operator>>=(const UShort &lhs, RValue<UShort> rhs)
1859 	{
1860 		return lhs = lhs >> rhs;
1861 	}
1862 
operator +(RValue<UShort> val)1863 	RValue<UShort> operator+(RValue<UShort> val)
1864 	{
1865 		return val;
1866 	}
1867 
operator -(RValue<UShort> val)1868 	RValue<UShort> operator-(RValue<UShort> val)
1869 	{
1870 		return RValue<UShort>(Nucleus::createNeg(val.value));
1871 	}
1872 
operator ~(RValue<UShort> val)1873 	RValue<UShort> operator~(RValue<UShort> val)
1874 	{
1875 		return RValue<UShort>(Nucleus::createNot(val.value));
1876 	}
1877 
operator ++(const UShort & val,int)1878 	RValue<UShort> operator++(const UShort &val, int)   // Post-increment
1879 	{
1880 		RValue<UShort> res = val;
1881 
1882 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((unsigned short)1));
1883 		val.storeValue(inc);
1884 
1885 		return res;
1886 	}
1887 
operator ++(const UShort & val)1888 	const UShort &operator++(const UShort &val)   // Pre-increment
1889 	{
1890 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
1891 		val.storeValue(inc);
1892 
1893 		return val;
1894 	}
1895 
operator --(const UShort & val,int)1896 	RValue<UShort> operator--(const UShort &val, int)   // Post-decrement
1897 	{
1898 		RValue<UShort> res = val;
1899 
1900 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((unsigned short)1));
1901 		val.storeValue(inc);
1902 
1903 		return res;
1904 	}
1905 
operator --(const UShort & val)1906 	const UShort &operator--(const UShort &val)   // Pre-decrement
1907 	{
1908 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
1909 		val.storeValue(inc);
1910 
1911 		return val;
1912 	}
1913 
operator <(RValue<UShort> lhs,RValue<UShort> rhs)1914 	RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
1915 	{
1916 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1917 	}
1918 
operator <=(RValue<UShort> lhs,RValue<UShort> rhs)1919 	RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
1920 	{
1921 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1922 	}
1923 
operator >(RValue<UShort> lhs,RValue<UShort> rhs)1924 	RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
1925 	{
1926 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1927 	}
1928 
operator >=(RValue<UShort> lhs,RValue<UShort> rhs)1929 	RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
1930 	{
1931 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1932 	}
1933 
operator !=(RValue<UShort> lhs,RValue<UShort> rhs)1934 	RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
1935 	{
1936 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1937 	}
1938 
operator ==(RValue<UShort> lhs,RValue<UShort> rhs)1939 	RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
1940 	{
1941 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1942 	}
1943 
getType()1944 	Type *UShort::getType()
1945 	{
1946 		return Type::getInt16Ty(*Nucleus::getContext());
1947 	}
1948 
getType()1949 	Type *Byte4::getType()
1950 	{
1951 		#if 0
1952 			return VectorType::get(Byte::getType(), 4);
1953 		#else
1954 			return UInt::getType();   // FIXME: LLVM doesn't manipulate it as one 32-bit block
1955 		#endif
1956 	}
1957 
getType()1958 	Type *SByte4::getType()
1959 	{
1960 		#if 0
1961 			return VectorType::get(SByte::getType(), 4);
1962 		#else
1963 			return Int::getType();   // FIXME: LLVM doesn't manipulate it as one 32-bit block
1964 		#endif
1965 	}
1966 
Byte8()1967 	Byte8::Byte8()
1968 	{
1969 	//	xyzw.parent = this;
1970 	}
1971 
Byte8(byte x0,byte x1,byte x2,byte x3,byte x4,byte x5,byte x6,byte x7)1972 	Byte8::Byte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7)
1973 	{
1974 	//	xyzw.parent = this;
1975 
1976 		Constant *constantVector[8];
1977 		constantVector[0] = Nucleus::createConstantByte(x0);
1978 		constantVector[1] = Nucleus::createConstantByte(x1);
1979 		constantVector[2] = Nucleus::createConstantByte(x2);
1980 		constantVector[3] = Nucleus::createConstantByte(x3);
1981 		constantVector[4] = Nucleus::createConstantByte(x4);
1982 		constantVector[5] = Nucleus::createConstantByte(x5);
1983 		constantVector[6] = Nucleus::createConstantByte(x6);
1984 		constantVector[7] = Nucleus::createConstantByte(x7);
1985 		Value *vector = Nucleus::createConstantVector(constantVector, 8);
1986 
1987 		storeValue(Nucleus::createBitCast(vector, getType()));
1988 	}
1989 
Byte8(int64_t x)1990 	Byte8::Byte8(int64_t x)
1991 	{
1992 	//	xyzw.parent = this;
1993 
1994 		Constant *constantVector[8];
1995 		constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >>  0));
1996 		constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >>  8));
1997 		constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
1998 		constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
1999 		constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
2000 		constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
2001 		constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
2002 		constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
2003 		Value *vector = Nucleus::createConstantVector(constantVector, 8);
2004 
2005 		storeValue(Nucleus::createBitCast(vector, getType()));
2006 	}
2007 
Byte8(RValue<Byte8> rhs)2008 	Byte8::Byte8(RValue<Byte8> rhs)
2009 	{
2010 	//	xyzw.parent = this;
2011 
2012 		storeValue(rhs.value);
2013 	}
2014 
Byte8(const Byte8 & rhs)2015 	Byte8::Byte8(const Byte8 &rhs)
2016 	{
2017 	//	xyzw.parent = this;
2018 
2019 		Value *value = rhs.loadValue();
2020 		storeValue(value);
2021 	}
2022 
Byte8(const Reference<Byte8> & rhs)2023 	Byte8::Byte8(const Reference<Byte8> &rhs)
2024 	{
2025 	//	xyzw.parent = this;
2026 
2027 		Value *value = rhs.loadValue();
2028 		storeValue(value);
2029 	}
2030 
operator =(RValue<Byte8> rhs) const2031 	RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) const
2032 	{
2033 		storeValue(rhs.value);
2034 
2035 		return rhs;
2036 	}
2037 
operator =(const Byte8 & rhs) const2038 	RValue<Byte8> Byte8::operator=(const Byte8 &rhs) const
2039 	{
2040 		Value *value = rhs.loadValue();
2041 		storeValue(value);
2042 
2043 		return RValue<Byte8>(value);
2044 	}
2045 
operator =(const Reference<Byte8> & rhs) const2046 	RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) const
2047 	{
2048 		Value *value = rhs.loadValue();
2049 		storeValue(value);
2050 
2051 		return RValue<Byte8>(value);
2052 	}
2053 
operator +(RValue<Byte8> lhs,RValue<Byte8> rhs)2054 	RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2055 	{
2056 		if(CPUID::supportsMMX2())
2057 		{
2058 			return x86::paddb(lhs, rhs);
2059 		}
2060 		else
2061 		{
2062 			return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2063 		}
2064 	}
2065 
operator -(RValue<Byte8> lhs,RValue<Byte8> rhs)2066 	RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2067 	{
2068 		if(CPUID::supportsMMX2())
2069 		{
2070 			return x86::psubb(lhs, rhs);
2071 		}
2072 		else
2073 		{
2074 			return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2075 		}
2076 	}
2077 
2078 //	RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2079 //	{
2080 //		return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2081 //	}
2082 
2083 //	RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2084 //	{
2085 //		return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2086 //	}
2087 
2088 //	RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2089 //	{
2090 //		return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2091 //	}
2092 
operator &(RValue<Byte8> lhs,RValue<Byte8> rhs)2093 	RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2094 	{
2095 		if(CPUID::supportsMMX2())
2096 		{
2097 			return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
2098 		}
2099 		else
2100 		{
2101 			return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2102 		}
2103 	}
2104 
operator |(RValue<Byte8> lhs,RValue<Byte8> rhs)2105 	RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2106 	{
2107 		if(CPUID::supportsMMX2())
2108 		{
2109 			return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
2110 		}
2111 		else
2112 		{
2113 			return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2114 		}
2115 	}
2116 
operator ^(RValue<Byte8> lhs,RValue<Byte8> rhs)2117 	RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2118 	{
2119 		if(CPUID::supportsMMX2())
2120 		{
2121 			return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
2122 		}
2123 		else
2124 		{
2125 			return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2126 		}
2127 	}
2128 
2129 //	RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2130 //	{
2131 //		return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2132 //	}
2133 
2134 //	RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2135 //	{
2136 //		return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2137 //	}
2138 
operator +=(const Byte8 & lhs,RValue<Byte8> rhs)2139 	RValue<Byte8> operator+=(const Byte8 &lhs, RValue<Byte8> rhs)
2140 	{
2141 		return lhs = lhs + rhs;
2142 	}
2143 
operator -=(const Byte8 & lhs,RValue<Byte8> rhs)2144 	RValue<Byte8> operator-=(const Byte8 &lhs, RValue<Byte8> rhs)
2145 	{
2146 		return lhs = lhs - rhs;
2147 	}
2148 
2149 //	RValue<Byte8> operator*=(const Byte8 &lhs, RValue<Byte8> rhs)
2150 //	{
2151 //		return lhs = lhs * rhs;
2152 //	}
2153 
2154 //	RValue<Byte8> operator/=(const Byte8 &lhs, RValue<Byte8> rhs)
2155 //	{
2156 //		return lhs = lhs / rhs;
2157 //	}
2158 
2159 //	RValue<Byte8> operator%=(const Byte8 &lhs, RValue<Byte8> rhs)
2160 //	{
2161 //		return lhs = lhs % rhs;
2162 //	}
2163 
operator &=(const Byte8 & lhs,RValue<Byte8> rhs)2164 	RValue<Byte8> operator&=(const Byte8 &lhs, RValue<Byte8> rhs)
2165 	{
2166 		return lhs = lhs & rhs;
2167 	}
2168 
operator |=(const Byte8 & lhs,RValue<Byte8> rhs)2169 	RValue<Byte8> operator|=(const Byte8 &lhs, RValue<Byte8> rhs)
2170 	{
2171 		return lhs = lhs | rhs;
2172 	}
2173 
operator ^=(const Byte8 & lhs,RValue<Byte8> rhs)2174 	RValue<Byte8> operator^=(const Byte8 &lhs, RValue<Byte8> rhs)
2175 	{
2176 		return lhs = lhs ^ rhs;
2177 	}
2178 
2179 //	RValue<Byte8> operator<<=(const Byte8 &lhs, RValue<Byte8> rhs)
2180 //	{
2181 //		return lhs = lhs << rhs;
2182 //	}
2183 
2184 //	RValue<Byte8> operator>>=(const Byte8 &lhs, RValue<Byte8> rhs)
2185 //	{
2186 //		return lhs = lhs >> rhs;
2187 //	}
2188 
2189 //	RValue<Byte8> operator+(RValue<Byte8> val)
2190 //	{
2191 //		return val;
2192 //	}
2193 
2194 //	RValue<Byte8> operator-(RValue<Byte8> val)
2195 //	{
2196 //		return RValue<Byte8>(Nucleus::createNeg(val.value));
2197 //	}
2198 
operator ~(RValue<Byte8> val)2199 	RValue<Byte8> operator~(RValue<Byte8> val)
2200 	{
2201 		if(CPUID::supportsMMX2())
2202 		{
2203 			return val ^ Byte8(0xFFFFFFFFFFFFFFFF);
2204 		}
2205 		else
2206 		{
2207 			return RValue<Byte8>(Nucleus::createNot(val.value));
2208 		}
2209 	}
2210 
AddSat(RValue<Byte8> x,RValue<Byte8> y)2211 	RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2212 	{
2213 		return x86::paddusb(x, y);
2214 	}
2215 
SubSat(RValue<Byte8> x,RValue<Byte8> y)2216 	RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2217 	{
2218 		return x86::psubusb(x, y);
2219 	}
2220 
Unpack(RValue<Byte4> x)2221 	RValue<Short4> Unpack(RValue<Byte4> x)
2222 	{
2223 		Value *int2 = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), x.value, 0);
2224 		Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType());
2225 
2226 		return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
2227 	}
2228 
UnpackLow(RValue<Byte8> x,RValue<Byte8> y)2229 	RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2230 	{
2231 		if(CPUID::supportsMMX2())
2232 		{
2233 			return x86::punpcklbw(x, y);
2234 		}
2235 		else
2236 		{
2237 			Constant *shuffle[8];
2238 			shuffle[0] = Nucleus::createConstantInt(0);
2239 			shuffle[1] = Nucleus::createConstantInt(8);
2240 			shuffle[2] = Nucleus::createConstantInt(1);
2241 			shuffle[3] = Nucleus::createConstantInt(9);
2242 			shuffle[4] = Nucleus::createConstantInt(2);
2243 			shuffle[5] = Nucleus::createConstantInt(10);
2244 			shuffle[6] = Nucleus::createConstantInt(3);
2245 			shuffle[7] = Nucleus::createConstantInt(11);
2246 
2247 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
2248 
2249 			return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2250 		}
2251 	}
2252 
UnpackHigh(RValue<Byte8> x,RValue<Byte8> y)2253 	RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2254 	{
2255 		if(CPUID::supportsMMX2())
2256 		{
2257 			return x86::punpckhbw(x, y);
2258 		}
2259 		else
2260 		{
2261 			Constant *shuffle[8];
2262 			shuffle[0] = Nucleus::createConstantInt(4);
2263 			shuffle[1] = Nucleus::createConstantInt(12);
2264 			shuffle[2] = Nucleus::createConstantInt(5);
2265 			shuffle[3] = Nucleus::createConstantInt(13);
2266 			shuffle[4] = Nucleus::createConstantInt(6);
2267 			shuffle[5] = Nucleus::createConstantInt(14);
2268 			shuffle[6] = Nucleus::createConstantInt(7);
2269 			shuffle[7] = Nucleus::createConstantInt(15);
2270 
2271 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
2272 
2273 			return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2274 		}
2275 	}
2276 
SignMask(RValue<Byte8> x)2277 	RValue<Int> SignMask(RValue<Byte8> x)
2278 	{
2279 		return x86::pmovmskb(x);
2280 	}
2281 
2282 //	RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2283 //	{
2284 //		return x86::pcmpgtb(x, y);   // FIXME: Signedness
2285 //	}
2286 
CmpEQ(RValue<Byte8> x,RValue<Byte8> y)2287 	RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2288 	{
2289 		return x86::pcmpeqb(x, y);
2290 	}
2291 
getType()2292 	Type *Byte8::getType()
2293 	{
2294 		if(CPUID::supportsMMX2())
2295 		{
2296 			return MMX::getType();
2297 		}
2298 		else
2299 		{
2300 			return VectorType::get(Byte::getType(), 8);
2301 		}
2302 	}
2303 
SByte8()2304 	SByte8::SByte8()
2305 	{
2306 	//	xyzw.parent = this;
2307 	}
2308 
SByte8(byte x0,byte x1,byte x2,byte x3,byte x4,byte x5,byte x6,byte x7)2309 	SByte8::SByte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7)
2310 	{
2311 	//	xyzw.parent = this;
2312 
2313 		Constant *constantVector[8];
2314 		constantVector[0] = Nucleus::createConstantByte(x0);
2315 		constantVector[1] = Nucleus::createConstantByte(x1);
2316 		constantVector[2] = Nucleus::createConstantByte(x2);
2317 		constantVector[3] = Nucleus::createConstantByte(x3);
2318 		constantVector[4] = Nucleus::createConstantByte(x4);
2319 		constantVector[5] = Nucleus::createConstantByte(x5);
2320 		constantVector[6] = Nucleus::createConstantByte(x6);
2321 		constantVector[7] = Nucleus::createConstantByte(x7);
2322 		Value *vector = Nucleus::createConstantVector(constantVector, 8);
2323 
2324 		storeValue(Nucleus::createBitCast(vector, getType()));
2325 	}
2326 
SByte8(int64_t x)2327 	SByte8::SByte8(int64_t x)
2328 	{
2329 	//	xyzw.parent = this;
2330 
2331 		Constant *constantVector[8];
2332 		constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >>  0));
2333 		constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >>  8));
2334 		constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
2335 		constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
2336 		constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
2337 		constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
2338 		constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
2339 		constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
2340 		Value *vector = Nucleus::createConstantVector(constantVector, 8);
2341 
2342 		storeValue(Nucleus::createBitCast(vector, getType()));
2343 	}
2344 
SByte8(RValue<SByte8> rhs)2345 	SByte8::SByte8(RValue<SByte8> rhs)
2346 	{
2347 	//	xyzw.parent = this;
2348 
2349 		storeValue(rhs.value);
2350 	}
2351 
SByte8(const SByte8 & rhs)2352 	SByte8::SByte8(const SByte8 &rhs)
2353 	{
2354 	//	xyzw.parent = this;
2355 
2356 		Value *value = rhs.loadValue();
2357 		storeValue(value);
2358 	}
2359 
SByte8(const Reference<SByte8> & rhs)2360 	SByte8::SByte8(const Reference<SByte8> &rhs)
2361 	{
2362 	//	xyzw.parent = this;
2363 
2364 		Value *value = rhs.loadValue();
2365 		storeValue(value);
2366 	}
2367 
operator =(RValue<SByte8> rhs) const2368 	RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) const
2369 	{
2370 		storeValue(rhs.value);
2371 
2372 		return rhs;
2373 	}
2374 
operator =(const SByte8 & rhs) const2375 	RValue<SByte8> SByte8::operator=(const SByte8 &rhs) const
2376 	{
2377 		Value *value = rhs.loadValue();
2378 		storeValue(value);
2379 
2380 		return RValue<SByte8>(value);
2381 	}
2382 
operator =(const Reference<SByte8> & rhs) const2383 	RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) const
2384 	{
2385 		Value *value = rhs.loadValue();
2386 		storeValue(value);
2387 
2388 		return RValue<SByte8>(value);
2389 	}
2390 
operator +(RValue<SByte8> lhs,RValue<SByte8> rhs)2391 	RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2392 	{
2393 		if(CPUID::supportsMMX2())
2394 		{
2395 			return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs)));
2396 		}
2397 		else
2398 		{
2399 			return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2400 		}
2401 	}
2402 
operator -(RValue<SByte8> lhs,RValue<SByte8> rhs)2403 	RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2404 	{
2405 		if(CPUID::supportsMMX2())
2406 		{
2407 			return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs)));
2408 		}
2409 		else
2410 		{
2411 			return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2412 		}
2413 	}
2414 
2415 //	RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2416 //	{
2417 //		return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2418 //	}
2419 
2420 //	RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2421 //	{
2422 //		return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2423 //	}
2424 
2425 //	RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2426 //	{
2427 //		return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2428 //	}
2429 
operator &(RValue<SByte8> lhs,RValue<SByte8> rhs)2430 	RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2431 	{
2432 		return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2433 	}
2434 
operator |(RValue<SByte8> lhs,RValue<SByte8> rhs)2435 	RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2436 	{
2437 		return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2438 	}
2439 
operator ^(RValue<SByte8> lhs,RValue<SByte8> rhs)2440 	RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2441 	{
2442 		return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2443 	}
2444 
2445 //	RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2446 //	{
2447 //		return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
2448 //	}
2449 
2450 //	RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2451 //	{
2452 //		return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
2453 //	}
2454 
operator +=(const SByte8 & lhs,RValue<SByte8> rhs)2455 	RValue<SByte8> operator+=(const SByte8 &lhs, RValue<SByte8> rhs)
2456 	{
2457 		return lhs = lhs + rhs;
2458 	}
2459 
operator -=(const SByte8 & lhs,RValue<SByte8> rhs)2460 	RValue<SByte8> operator-=(const SByte8 &lhs, RValue<SByte8> rhs)
2461 	{
2462 		return lhs = lhs - rhs;
2463 	}
2464 
2465 //	RValue<SByte8> operator*=(const SByte8 &lhs, RValue<SByte8> rhs)
2466 //	{
2467 //		return lhs = lhs * rhs;
2468 //	}
2469 
2470 //	RValue<SByte8> operator/=(const SByte8 &lhs, RValue<SByte8> rhs)
2471 //	{
2472 //		return lhs = lhs / rhs;
2473 //	}
2474 
2475 //	RValue<SByte8> operator%=(const SByte8 &lhs, RValue<SByte8> rhs)
2476 //	{
2477 //		return lhs = lhs % rhs;
2478 //	}
2479 
operator &=(const SByte8 & lhs,RValue<SByte8> rhs)2480 	RValue<SByte8> operator&=(const SByte8 &lhs, RValue<SByte8> rhs)
2481 	{
2482 		return lhs = lhs & rhs;
2483 	}
2484 
operator |=(const SByte8 & lhs,RValue<SByte8> rhs)2485 	RValue<SByte8> operator|=(const SByte8 &lhs, RValue<SByte8> rhs)
2486 	{
2487 		return lhs = lhs | rhs;
2488 	}
2489 
operator ^=(const SByte8 & lhs,RValue<SByte8> rhs)2490 	RValue<SByte8> operator^=(const SByte8 &lhs, RValue<SByte8> rhs)
2491 	{
2492 		return lhs = lhs ^ rhs;
2493 	}
2494 
2495 //	RValue<SByte8> operator<<=(const SByte8 &lhs, RValue<SByte8> rhs)
2496 //	{
2497 //		return lhs = lhs << rhs;
2498 //	}
2499 
2500 //	RValue<SByte8> operator>>=(const SByte8 &lhs, RValue<SByte8> rhs)
2501 //	{
2502 //		return lhs = lhs >> rhs;
2503 //	}
2504 
2505 //	RValue<SByte8> operator+(RValue<SByte8> val)
2506 //	{
2507 //		return val;
2508 //	}
2509 
2510 //	RValue<SByte8> operator-(RValue<SByte8> val)
2511 //	{
2512 //		return RValue<SByte8>(Nucleus::createNeg(val.value));
2513 //	}
2514 
operator ~(RValue<SByte8> val)2515 	RValue<SByte8> operator~(RValue<SByte8> val)
2516 	{
2517 		if(CPUID::supportsMMX2())
2518 		{
2519 			return val ^ SByte8(0xFFFFFFFFFFFFFFFF);
2520 		}
2521 		else
2522 		{
2523 			return RValue<SByte8>(Nucleus::createNot(val.value));
2524 		}
2525 	}
2526 
AddSat(RValue<SByte8> x,RValue<SByte8> y)2527 	RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2528 	{
2529 		return x86::paddsb(x, y);
2530 	}
2531 
SubSat(RValue<SByte8> x,RValue<SByte8> y)2532 	RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2533 	{
2534 		return x86::psubsb(x, y);
2535 	}
2536 
UnpackLow(RValue<SByte8> x,RValue<SByte8> y)2537 	RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2538 	{
2539 		if(CPUID::supportsMMX2())
2540 		{
2541 			return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y)));
2542 		}
2543 		else
2544 		{
2545 			Constant *shuffle[8];
2546 			shuffle[0] = Nucleus::createConstantInt(0);
2547 			shuffle[1] = Nucleus::createConstantInt(8);
2548 			shuffle[2] = Nucleus::createConstantInt(1);
2549 			shuffle[3] = Nucleus::createConstantInt(9);
2550 			shuffle[4] = Nucleus::createConstantInt(2);
2551 			shuffle[5] = Nucleus::createConstantInt(10);
2552 			shuffle[6] = Nucleus::createConstantInt(3);
2553 			shuffle[7] = Nucleus::createConstantInt(11);
2554 
2555 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
2556 
2557 			return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2558 		}
2559 	}
2560 
UnpackHigh(RValue<SByte8> x,RValue<SByte8> y)2561 	RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2562 	{
2563 		if(CPUID::supportsMMX2())
2564 		{
2565 			return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y)));
2566 		}
2567 		else
2568 		{
2569 			Constant *shuffle[8];
2570 			shuffle[0] = Nucleus::createConstantInt(4);
2571 			shuffle[1] = Nucleus::createConstantInt(12);
2572 			shuffle[2] = Nucleus::createConstantInt(5);
2573 			shuffle[3] = Nucleus::createConstantInt(13);
2574 			shuffle[4] = Nucleus::createConstantInt(6);
2575 			shuffle[5] = Nucleus::createConstantInt(14);
2576 			shuffle[6] = Nucleus::createConstantInt(7);
2577 			shuffle[7] = Nucleus::createConstantInt(15);
2578 
2579 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
2580 
2581 			return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2582 		}
2583 	}
2584 
SignMask(RValue<SByte8> x)2585 	RValue<Int> SignMask(RValue<SByte8> x)
2586 	{
2587 		return x86::pmovmskb(As<Byte8>(x));
2588 	}
2589 
CmpGT(RValue<SByte8> x,RValue<SByte8> y)2590 	RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2591 	{
2592 		return x86::pcmpgtb(x, y);
2593 	}
2594 
CmpEQ(RValue<SByte8> x,RValue<SByte8> y)2595 	RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2596 	{
2597 		return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
2598 	}
2599 
getType()2600 	Type *SByte8::getType()
2601 	{
2602 		if(CPUID::supportsMMX2())
2603 		{
2604 			return MMX::getType();
2605 		}
2606 		else
2607 		{
2608 			return VectorType::get(SByte::getType(), 8);
2609 		}
2610 	}
2611 
Byte16(RValue<Byte16> rhs)2612 	Byte16::Byte16(RValue<Byte16> rhs)
2613 	{
2614 	//	xyzw.parent = this;
2615 
2616 		storeValue(rhs.value);
2617 	}
2618 
Byte16(const Byte16 & rhs)2619 	Byte16::Byte16(const Byte16 &rhs)
2620 	{
2621 	//	xyzw.parent = this;
2622 
2623 		Value *value = rhs.loadValue();
2624 		storeValue(value);
2625 	}
2626 
Byte16(const Reference<Byte16> & rhs)2627 	Byte16::Byte16(const Reference<Byte16> &rhs)
2628 	{
2629 	//	xyzw.parent = this;
2630 
2631 		Value *value = rhs.loadValue();
2632 		storeValue(value);
2633 	}
2634 
operator =(RValue<Byte16> rhs) const2635 	RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) const
2636 	{
2637 		storeValue(rhs.value);
2638 
2639 		return rhs;
2640 	}
2641 
operator =(const Byte16 & rhs) const2642 	RValue<Byte16> Byte16::operator=(const Byte16 &rhs) const
2643 	{
2644 		Value *value = rhs.loadValue();
2645 		storeValue(value);
2646 
2647 		return RValue<Byte16>(value);
2648 	}
2649 
operator =(const Reference<Byte16> & rhs) const2650 	RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) const
2651 	{
2652 		Value *value = rhs.loadValue();
2653 		storeValue(value);
2654 
2655 		return RValue<Byte16>(value);
2656 	}
2657 
getType()2658 	Type *Byte16::getType()
2659 	{
2660 		return VectorType::get(Byte::getType(), 16);
2661 	}
2662 
getType()2663 	Type *SByte16::getType()
2664 	{
2665 		return VectorType::get(SByte::getType(), 16);
2666 	}
2667 
Short4(RValue<Int> cast)2668 	Short4::Short4(RValue<Int> cast)
2669 	{
2670 		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2671 		Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value;
2672 
2673 		storeValue(swizzle);
2674 	}
2675 
Short4(RValue<Int4> cast)2676 	Short4::Short4(RValue<Int4> cast)
2677 	{
2678 		Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2679 
2680 		#if 0   // FIXME: Check codegen (pshuflw phshufhw pshufd)
2681 			Constant *pack[8];
2682 			pack[0] = Nucleus::createConstantInt(0);
2683 			pack[1] = Nucleus::createConstantInt(2);
2684 			pack[2] = Nucleus::createConstantInt(4);
2685 			pack[3] = Nucleus::createConstantInt(6);
2686 
2687 			Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
2688 		#else
2689 			Value *packed;
2690 
2691 			// FIXME: Use Swizzle<Short8>
2692 			if(!CPUID::supportsSSSE3())
2693 			{
2694 				Constant *pshuflw[8];
2695 				pshuflw[0] = Nucleus::createConstantInt(0);
2696 				pshuflw[1] = Nucleus::createConstantInt(2);
2697 				pshuflw[2] = Nucleus::createConstantInt(0);
2698 				pshuflw[3] = Nucleus::createConstantInt(2);
2699 				pshuflw[4] = Nucleus::createConstantInt(4);
2700 				pshuflw[5] = Nucleus::createConstantInt(5);
2701 				pshuflw[6] = Nucleus::createConstantInt(6);
2702 				pshuflw[7] = Nucleus::createConstantInt(7);
2703 
2704 				Constant *pshufhw[8];
2705 				pshufhw[0] = Nucleus::createConstantInt(0);
2706 				pshufhw[1] = Nucleus::createConstantInt(1);
2707 				pshufhw[2] = Nucleus::createConstantInt(2);
2708 				pshufhw[3] = Nucleus::createConstantInt(3);
2709 				pshufhw[4] = Nucleus::createConstantInt(4);
2710 				pshufhw[5] = Nucleus::createConstantInt(6);
2711 				pshufhw[6] = Nucleus::createConstantInt(4);
2712 				pshufhw[7] = Nucleus::createConstantInt(6);
2713 
2714 				Value *shuffle1 = Nucleus::createShuffleVector(short8, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshuflw, 8));
2715 				Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshufhw, 8));
2716 				Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
2717 				packed = Nucleus::createSwizzle(int4, 0x88);
2718 			}
2719 			else
2720 			{
2721 				Constant *pshufb[16];
2722 				pshufb[0] = Nucleus::createConstantInt(0);
2723 				pshufb[1] = Nucleus::createConstantInt(1);
2724 				pshufb[2] = Nucleus::createConstantInt(4);
2725 				pshufb[3] = Nucleus::createConstantInt(5);
2726 				pshufb[4] = Nucleus::createConstantInt(8);
2727 				pshufb[5] = Nucleus::createConstantInt(9);
2728 				pshufb[6] = Nucleus::createConstantInt(12);
2729 				pshufb[7] = Nucleus::createConstantInt(13);
2730 				pshufb[8] = Nucleus::createConstantInt(0);
2731 				pshufb[9] = Nucleus::createConstantInt(1);
2732 				pshufb[10] = Nucleus::createConstantInt(4);
2733 				pshufb[11] = Nucleus::createConstantInt(5);
2734 				pshufb[12] = Nucleus::createConstantInt(8);
2735 				pshufb[13] = Nucleus::createConstantInt(9);
2736 				pshufb[14] = Nucleus::createConstantInt(12);
2737 				pshufb[15] = Nucleus::createConstantInt(13);
2738 
2739 				Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
2740 				packed = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16));
2741 			}
2742 
2743 			#if 0   // FIXME: No optimal instruction selection
2744 				Value *qword2 = Nucleus::createBitCast(packed, Long2::getType());
2745 				Value *element = Nucleus::createExtractElement(qword2, 0);
2746 				Value *short4 = Nucleus::createBitCast(element, Short4::getType());
2747 			#else   // FIXME: Requires SSE
2748 				Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
2749 				Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2750 			#endif
2751 		#endif
2752 
2753 		storeValue(short4);
2754 	}
2755 
2756 //	Short4::Short4(RValue<Float> cast)
2757 //	{
2758 //	}
2759 
Short4(RValue<Float4> cast)2760 	Short4::Short4(RValue<Float4> cast)
2761 	{
2762 		Int4 v4i32 = Int4(cast);
2763 		v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
2764 
2765 		storeValue(As<Short4>(Int2(v4i32)).value);
2766 	}
2767 
Short4()2768 	Short4::Short4()
2769 	{
2770 	//	xyzw.parent = this;
2771 	}
2772 
Short4(short xyzw)2773 	Short4::Short4(short xyzw)
2774 	{
2775 		//	xyzw.parent = this;
2776 
2777 		Constant *constantVector[4];
2778 		constantVector[0] = Nucleus::createConstantShort(xyzw);
2779 		constantVector[1] = Nucleus::createConstantShort(xyzw);
2780 		constantVector[2] = Nucleus::createConstantShort(xyzw);
2781 		constantVector[3] = Nucleus::createConstantShort(xyzw);
2782 		Value *vector = Nucleus::createConstantVector(constantVector, 4);
2783 
2784 		storeValue(Nucleus::createBitCast(vector, getType()));
2785 	}
2786 
Short4(short x,short y,short z,short w)2787 	Short4::Short4(short x, short y, short z, short w)
2788 	{
2789 	//	xyzw.parent = this;
2790 
2791 		Constant *constantVector[4];
2792 		constantVector[0] = Nucleus::createConstantShort(x);
2793 		constantVector[1] = Nucleus::createConstantShort(y);
2794 		constantVector[2] = Nucleus::createConstantShort(z);
2795 		constantVector[3] = Nucleus::createConstantShort(w);
2796 		Value *vector = Nucleus::createConstantVector(constantVector, 4);
2797 
2798 		storeValue(Nucleus::createBitCast(vector, getType()));
2799 	}
2800 
Short4(RValue<Short4> rhs)2801 	Short4::Short4(RValue<Short4> rhs)
2802 	{
2803 	//	xyzw.parent = this;
2804 
2805 		storeValue(rhs.value);
2806 	}
2807 
Short4(const Short4 & rhs)2808 	Short4::Short4(const Short4 &rhs)
2809 	{
2810 	//	xyzw.parent = this;
2811 
2812 		Value *value = rhs.loadValue();
2813 		storeValue(value);
2814 	}
2815 
Short4(const Reference<Short4> & rhs)2816 	Short4::Short4(const Reference<Short4> &rhs)
2817 	{
2818 	//	xyzw.parent = this;
2819 
2820 		Value *value = rhs.loadValue();
2821 		storeValue(value);
2822 	}
2823 
Short4(RValue<UShort4> rhs)2824 	Short4::Short4(RValue<UShort4> rhs)
2825 	{
2826 	//	xyzw.parent = this;
2827 
2828 		storeValue(rhs.value);
2829 	}
2830 
Short4(const UShort4 & rhs)2831 	Short4::Short4(const UShort4 &rhs)
2832 	{
2833 	//	xyzw.parent = this;
2834 
2835 		storeValue(rhs.loadValue());
2836 	}
2837 
Short4(const Reference<UShort4> & rhs)2838 	Short4::Short4(const Reference<UShort4> &rhs)
2839 	{
2840 	//	xyzw.parent = this;
2841 
2842 		storeValue(rhs.loadValue());
2843 	}
2844 
operator =(RValue<Short4> rhs) const2845 	RValue<Short4> Short4::operator=(RValue<Short4> rhs) const
2846 	{
2847 		storeValue(rhs.value);
2848 
2849 		return rhs;
2850 	}
2851 
operator =(const Short4 & rhs) const2852 	RValue<Short4> Short4::operator=(const Short4 &rhs) const
2853 	{
2854 		Value *value = rhs.loadValue();
2855 		storeValue(value);
2856 
2857 		return RValue<Short4>(value);
2858 	}
2859 
operator =(const Reference<Short4> & rhs) const2860 	RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) const
2861 	{
2862 		Value *value = rhs.loadValue();
2863 		storeValue(value);
2864 
2865 		return RValue<Short4>(value);
2866 	}
2867 
operator =(RValue<UShort4> rhs) const2868 	RValue<Short4> Short4::operator=(RValue<UShort4> rhs) const
2869 	{
2870 		storeValue(rhs.value);
2871 
2872 		return RValue<Short4>(rhs);
2873 	}
2874 
operator =(const UShort4 & rhs) const2875 	RValue<Short4> Short4::operator=(const UShort4 &rhs) const
2876 	{
2877 		Value *value = rhs.loadValue();
2878 		storeValue(value);
2879 
2880 		return RValue<Short4>(value);
2881 	}
2882 
operator =(const Reference<UShort4> & rhs) const2883 	RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) const
2884 	{
2885 		Value *value = rhs.loadValue();
2886 		storeValue(value);
2887 
2888 		return RValue<Short4>(value);
2889 	}
2890 
operator +(RValue<Short4> lhs,RValue<Short4> rhs)2891 	RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
2892 	{
2893 		if(CPUID::supportsMMX2())
2894 		{
2895 			return x86::paddw(lhs, rhs);
2896 		}
2897 		else
2898 		{
2899 			return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
2900 		}
2901 	}
2902 
operator -(RValue<Short4> lhs,RValue<Short4> rhs)2903 	RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
2904 	{
2905 		if(CPUID::supportsMMX2())
2906 		{
2907 			return x86::psubw(lhs, rhs);
2908 		}
2909 		else
2910 		{
2911 			return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
2912 		}
2913 	}
2914 
operator *(RValue<Short4> lhs,RValue<Short4> rhs)2915 	RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
2916 	{
2917 		if(CPUID::supportsMMX2())
2918 		{
2919 			return x86::pmullw(lhs, rhs);
2920 		}
2921 		else
2922 		{
2923 			return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
2924 		}
2925 	}
2926 
2927 //	RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
2928 //	{
2929 //		return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
2930 //	}
2931 
2932 //	RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
2933 //	{
2934 //		return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
2935 //	}
2936 
operator &(RValue<Short4> lhs,RValue<Short4> rhs)2937 	RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
2938 	{
2939 		if(CPUID::supportsMMX2())
2940 		{
2941 			return x86::pand(lhs, rhs);
2942 		}
2943 		else
2944 		{
2945 			return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
2946 		}
2947 	}
2948 
operator |(RValue<Short4> lhs,RValue<Short4> rhs)2949 	RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
2950 	{
2951 		if(CPUID::supportsMMX2())
2952 		{
2953 			return x86::por(lhs, rhs);
2954 		}
2955 		else
2956 		{
2957 			return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
2958 		}
2959 	}
2960 
operator ^(RValue<Short4> lhs,RValue<Short4> rhs)2961 	RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
2962 	{
2963 		if(CPUID::supportsMMX2())
2964 		{
2965 			return x86::pxor(lhs, rhs);
2966 		}
2967 		else
2968 		{
2969 			return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
2970 		}
2971 	}
2972 
operator <<(RValue<Short4> lhs,unsigned char rhs)2973 	RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2974 	{
2975 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2976 
2977 		return x86::psllw(lhs, rhs);
2978 	}
2979 
operator >>(RValue<Short4> lhs,unsigned char rhs)2980 	RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2981 	{
2982 	//	return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2983 
2984 		return x86::psraw(lhs, rhs);
2985 	}
2986 
operator <<(RValue<Short4> lhs,RValue<Long1> rhs)2987 	RValue<Short4> operator<<(RValue<Short4> lhs, RValue<Long1> rhs)
2988 	{
2989 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2990 
2991 		return x86::psllw(lhs, rhs);
2992 	}
2993 
operator >>(RValue<Short4> lhs,RValue<Long1> rhs)2994 	RValue<Short4> operator>>(RValue<Short4> lhs, RValue<Long1> rhs)
2995 	{
2996 	//	return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2997 
2998 		return x86::psraw(lhs, rhs);
2999 	}
3000 
operator +=(const Short4 & lhs,RValue<Short4> rhs)3001 	RValue<Short4> operator+=(const Short4 &lhs, RValue<Short4> rhs)
3002 	{
3003 		return lhs = lhs + rhs;
3004 	}
3005 
operator -=(const Short4 & lhs,RValue<Short4> rhs)3006 	RValue<Short4> operator-=(const Short4 &lhs, RValue<Short4> rhs)
3007 	{
3008 		return lhs = lhs - rhs;
3009 	}
3010 
operator *=(const Short4 & lhs,RValue<Short4> rhs)3011 	RValue<Short4> operator*=(const Short4 &lhs, RValue<Short4> rhs)
3012 	{
3013 		return lhs = lhs * rhs;
3014 	}
3015 
3016 //	RValue<Short4> operator/=(const Short4 &lhs, RValue<Short4> rhs)
3017 //	{
3018 //		return lhs = lhs / rhs;
3019 //	}
3020 
3021 //	RValue<Short4> operator%=(const Short4 &lhs, RValue<Short4> rhs)
3022 //	{
3023 //		return lhs = lhs % rhs;
3024 //	}
3025 
operator &=(const Short4 & lhs,RValue<Short4> rhs)3026 	RValue<Short4> operator&=(const Short4 &lhs, RValue<Short4> rhs)
3027 	{
3028 		return lhs = lhs & rhs;
3029 	}
3030 
operator |=(const Short4 & lhs,RValue<Short4> rhs)3031 	RValue<Short4> operator|=(const Short4 &lhs, RValue<Short4> rhs)
3032 	{
3033 		return lhs = lhs | rhs;
3034 	}
3035 
operator ^=(const Short4 & lhs,RValue<Short4> rhs)3036 	RValue<Short4> operator^=(const Short4 &lhs, RValue<Short4> rhs)
3037 	{
3038 		return lhs = lhs ^ rhs;
3039 	}
3040 
operator <<=(const Short4 & lhs,unsigned char rhs)3041 	RValue<Short4> operator<<=(const Short4 &lhs, unsigned char rhs)
3042 	{
3043 		return lhs = lhs << rhs;
3044 	}
3045 
operator >>=(const Short4 & lhs,unsigned char rhs)3046 	RValue<Short4> operator>>=(const Short4 &lhs, unsigned char rhs)
3047 	{
3048 		return lhs = lhs >> rhs;
3049 	}
3050 
operator <<=(const Short4 & lhs,RValue<Long1> rhs)3051 	RValue<Short4> operator<<=(const Short4 &lhs, RValue<Long1> rhs)
3052 	{
3053 		return lhs = lhs << rhs;
3054 	}
3055 
operator >>=(const Short4 & lhs,RValue<Long1> rhs)3056 	RValue<Short4> operator>>=(const Short4 &lhs, RValue<Long1> rhs)
3057 	{
3058 		return lhs = lhs >> rhs;
3059 	}
3060 
3061 //	RValue<Short4> operator+(RValue<Short4> val)
3062 //	{
3063 //		return val;
3064 //	}
3065 
operator -(RValue<Short4> val)3066 	RValue<Short4> operator-(RValue<Short4> val)
3067 	{
3068 		if(CPUID::supportsMMX2())
3069 		{
3070 			return Short4(0, 0, 0, 0) - val;
3071 		}
3072 		else
3073 		{
3074 			return RValue<Short4>(Nucleus::createNeg(val.value));
3075 		}
3076 	}
3077 
operator ~(RValue<Short4> val)3078 	RValue<Short4> operator~(RValue<Short4> val)
3079 	{
3080 		if(CPUID::supportsMMX2())
3081 		{
3082 			return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu);
3083 		}
3084 		else
3085 		{
3086 			return RValue<Short4>(Nucleus::createNot(val.value));
3087 		}
3088 	}
3089 
RoundShort4(RValue<Float4> cast)3090 	RValue<Short4> RoundShort4(RValue<Float4> cast)
3091 	{
3092 		RValue<Int4> v4i32 = x86::cvtps2dq(cast);
3093 		RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32);
3094 
3095 		return As<Short4>(Int2(As<Int4>(v8i16)));
3096 	}
3097 
Max(RValue<Short4> x,RValue<Short4> y)3098 	RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3099 	{
3100 		return x86::pmaxsw(x, y);
3101 	}
3102 
Min(RValue<Short4> x,RValue<Short4> y)3103 	RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3104 	{
3105 		return x86::pminsw(x, y);
3106 	}
3107 
AddSat(RValue<Short4> x,RValue<Short4> y)3108 	RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3109 	{
3110 		return x86::paddsw(x, y);
3111 	}
3112 
SubSat(RValue<Short4> x,RValue<Short4> y)3113 	RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3114 	{
3115 		return x86::psubsw(x, y);
3116 	}
3117 
MulHigh(RValue<Short4> x,RValue<Short4> y)3118 	RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3119 	{
3120 		return x86::pmulhw(x, y);
3121 	}
3122 
MulAdd(RValue<Short4> x,RValue<Short4> y)3123 	RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3124 	{
3125 		return x86::pmaddwd(x, y);
3126 	}
3127 
Pack(RValue<Short4> x,RValue<Short4> y)3128 	RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3129 	{
3130 		return x86::packsswb(x, y);
3131 	}
3132 
UnpackLow(RValue<Short4> x,RValue<Short4> y)3133 	RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3134 	{
3135 		if(CPUID::supportsMMX2())
3136 		{
3137 			return x86::punpcklwd(x, y);
3138 		}
3139 		else
3140 		{
3141 			Constant *shuffle[4];
3142 			shuffle[0] = Nucleus::createConstantInt(0);
3143 			shuffle[1] = Nucleus::createConstantInt(4);
3144 			shuffle[2] = Nucleus::createConstantInt(1);
3145 			shuffle[3] = Nucleus::createConstantInt(5);
3146 
3147 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4));
3148 
3149 			return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
3150 		}
3151 	}
3152 
UnpackHigh(RValue<Short4> x,RValue<Short4> y)3153 	RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3154 	{
3155 		if(CPUID::supportsMMX2())
3156 		{
3157 			return x86::punpckhwd(x, y);
3158 		}
3159 		else
3160 		{
3161 			Constant *shuffle[4];
3162 			shuffle[0] = Nucleus::createConstantInt(2);
3163 			shuffle[1] = Nucleus::createConstantInt(6);
3164 			shuffle[2] = Nucleus::createConstantInt(3);
3165 			shuffle[3] = Nucleus::createConstantInt(7);
3166 
3167 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4));
3168 
3169 			return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
3170 		}
3171 	}
3172 
Swizzle(RValue<Short4> x,unsigned char select)3173 	RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3174 	{
3175 		if(CPUID::supportsMMX2())
3176 		{
3177 			return x86::pshufw(x, select);
3178 		}
3179 		else
3180 		{
3181 			return RValue<Short4>(Nucleus::createSwizzle(x.value, select));
3182 		}
3183 	}
3184 
Insert(RValue<Short4> val,RValue<Short> element,int i)3185 	RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3186 	{
3187 		if(CPUID::supportsMMX2())
3188 		{
3189 			return x86::pinsrw(val, Int(element), i);
3190 		}
3191 		else
3192 		{
3193 			return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3194 		}
3195 	}
3196 
Extract(RValue<Short4> val,int i)3197 	RValue<Short> Extract(RValue<Short4> val, int i)
3198 	{
3199 		if(CPUID::supportsMMX2())
3200 		{
3201 			return Short(x86::pextrw(val, i));
3202 		}
3203 		else
3204 		{
3205 			return RValue<Short>(Nucleus::createExtractElement(val.value, i));
3206 		}
3207 	}
3208 
CmpGT(RValue<Short4> x,RValue<Short4> y)3209 	RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3210 	{
3211 		return x86::pcmpgtw(x, y);
3212 	}
3213 
CmpEQ(RValue<Short4> x,RValue<Short4> y)3214 	RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3215 	{
3216 		return x86::pcmpeqw(x, y);
3217 	}
3218 
getType()3219 	Type *Short4::getType()
3220 	{
3221 		if(CPUID::supportsMMX2())
3222 		{
3223 			return MMX::getType();
3224 		}
3225 		else
3226 		{
3227 			return VectorType::get(Short::getType(), 4);
3228 		}
3229 	}
3230 
UShort4(RValue<Int4> cast)3231 	UShort4::UShort4(RValue<Int4> cast)
3232 	{
3233 		*this = Short4(cast);
3234 	}
3235 
UShort4(RValue<Float4> cast,bool saturate)3236 	UShort4::UShort4(RValue<Float4> cast, bool saturate)
3237 	{
3238 		Float4 sat;
3239 
3240 		if(saturate)
3241 		{
3242 			if(CPUID::supportsSSE4_1())
3243 			{
3244 				sat = Min(cast, Float4(0xFFFF));   // packusdw takes care of 0x0000 saturation
3245 			}
3246 			else
3247 			{
3248 				sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000));
3249 			}
3250 		}
3251 		else
3252 		{
3253 			sat = cast;
3254 		}
3255 
3256 		Int4 int4(sat);
3257 
3258 		if(!saturate || !CPUID::supportsSSE4_1())
3259 		{
3260 			*this = Short4(Int4(int4));
3261 		}
3262 		else
3263 		{
3264 			*this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4)))));
3265 		}
3266 	}
3267 
UShort4()3268 	UShort4::UShort4()
3269 	{
3270 	//	xyzw.parent = this;
3271 	}
3272 
UShort4(unsigned short x,unsigned short y,unsigned short z,unsigned short w)3273 	UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3274 	{
3275 	//	xyzw.parent = this;
3276 
3277 		Constant *constantVector[4];
3278 		constantVector[0] = Nucleus::createConstantShort(x);
3279 		constantVector[1] = Nucleus::createConstantShort(y);
3280 		constantVector[2] = Nucleus::createConstantShort(z);
3281 		constantVector[3] = Nucleus::createConstantShort(w);
3282 		Value *vector = Nucleus::createConstantVector(constantVector, 4);
3283 
3284 		storeValue(Nucleus::createBitCast(vector, getType()));
3285 	}
3286 
UShort4(RValue<UShort4> rhs)3287 	UShort4::UShort4(RValue<UShort4> rhs)
3288 	{
3289 	//	xyzw.parent = this;
3290 
3291 		storeValue(rhs.value);
3292 	}
3293 
UShort4(const UShort4 & rhs)3294 	UShort4::UShort4(const UShort4 &rhs)
3295 	{
3296 	//	xyzw.parent = this;
3297 
3298 		Value *value = rhs.loadValue();
3299 		storeValue(value);
3300 	}
3301 
UShort4(const Reference<UShort4> & rhs)3302 	UShort4::UShort4(const Reference<UShort4> &rhs)
3303 	{
3304 	//	xyzw.parent = this;
3305 
3306 		Value *value = rhs.loadValue();
3307 		storeValue(value);
3308 	}
3309 
UShort4(RValue<Short4> rhs)3310 	UShort4::UShort4(RValue<Short4> rhs)
3311 	{
3312 	//	xyzw.parent = this;
3313 
3314 		storeValue(rhs.value);
3315 	}
3316 
UShort4(const Short4 & rhs)3317 	UShort4::UShort4(const Short4 &rhs)
3318 	{
3319 	//	xyzw.parent = this;
3320 
3321 		Value *value = rhs.loadValue();
3322 		storeValue(value);
3323 	}
3324 
UShort4(const Reference<Short4> & rhs)3325 	UShort4::UShort4(const Reference<Short4> &rhs)
3326 	{
3327 	//	xyzw.parent = this;
3328 
3329 		Value *value = rhs.loadValue();
3330 		storeValue(value);
3331 	}
3332 
operator =(RValue<UShort4> rhs) const3333 	RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) const
3334 	{
3335 		storeValue(rhs.value);
3336 
3337 		return rhs;
3338 	}
3339 
operator =(const UShort4 & rhs) const3340 	RValue<UShort4> UShort4::operator=(const UShort4 &rhs) const
3341 	{
3342 		Value *value = rhs.loadValue();
3343 		storeValue(value);
3344 
3345 		return RValue<UShort4>(value);
3346 	}
3347 
operator =(const Reference<UShort4> & rhs) const3348 	RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) const
3349 	{
3350 		Value *value = rhs.loadValue();
3351 		storeValue(value);
3352 
3353 		return RValue<UShort4>(value);
3354 	}
3355 
operator =(RValue<Short4> rhs) const3356 	RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) const
3357 	{
3358 		storeValue(rhs.value);
3359 
3360 		return RValue<UShort4>(rhs);
3361 	}
3362 
operator =(const Short4 & rhs) const3363 	RValue<UShort4> UShort4::operator=(const Short4 &rhs) const
3364 	{
3365 		Value *value = rhs.loadValue();
3366 		storeValue(value);
3367 
3368 		return RValue<UShort4>(value);
3369 	}
3370 
operator =(const Reference<Short4> & rhs) const3371 	RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) const
3372 	{
3373 		Value *value = rhs.loadValue();
3374 		storeValue(value);
3375 
3376 		return RValue<UShort4>(value);
3377 	}
3378 
operator +(RValue<UShort4> lhs,RValue<UShort4> rhs)3379 	RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3380 	{
3381 		if(CPUID::supportsMMX2())
3382 		{
3383 			return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs)));
3384 		}
3385 		else
3386 		{
3387 			return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3388 		}
3389 	}
3390 
operator -(RValue<UShort4> lhs,RValue<UShort4> rhs)3391 	RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3392 	{
3393 		if(CPUID::supportsMMX2())
3394 		{
3395 			return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs)));
3396 		}
3397 		else
3398 		{
3399 			return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3400 		}
3401 	}
3402 
3403 
operator *(RValue<UShort4> lhs,RValue<UShort4> rhs)3404 	RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3405 	{
3406 		if(CPUID::supportsMMX2())
3407 		{
3408 			return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs)));
3409 		}
3410 		else
3411 		{
3412 			return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3413 		}
3414 	}
3415 
operator <<(RValue<UShort4> lhs,unsigned char rhs)3416 	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3417 	{
3418 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3419 
3420 		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3421 	}
3422 
operator >>(RValue<UShort4> lhs,unsigned char rhs)3423 	RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3424 	{
3425 	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3426 
3427 		return x86::psrlw(lhs, rhs);
3428 	}
3429 
operator <<(RValue<UShort4> lhs,RValue<Long1> rhs)3430 	RValue<UShort4> operator<<(RValue<UShort4> lhs, RValue<Long1> rhs)
3431 	{
3432 	//	return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3433 
3434 		return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3435 	}
3436 
operator >>(RValue<UShort4> lhs,RValue<Long1> rhs)3437 	RValue<UShort4> operator>>(RValue<UShort4> lhs, RValue<Long1> rhs)
3438 	{
3439 	//	return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3440 
3441 		return x86::psrlw(lhs, rhs);
3442 	}
3443 
operator <<=(const UShort4 & lhs,unsigned char rhs)3444 	RValue<UShort4> operator<<=(const UShort4 &lhs, unsigned char rhs)
3445 	{
3446 		return lhs = lhs << rhs;
3447 	}
3448 
operator >>=(const UShort4 & lhs,unsigned char rhs)3449 	RValue<UShort4> operator>>=(const UShort4 &lhs, unsigned char rhs)
3450 	{
3451 		return lhs = lhs >> rhs;
3452 	}
3453 
operator <<=(const UShort4 & lhs,RValue<Long1> rhs)3454 	RValue<UShort4> operator<<=(const UShort4 &lhs, RValue<Long1> rhs)
3455 	{
3456 		return lhs = lhs << rhs;
3457 	}
3458 
operator >>=(const UShort4 & lhs,RValue<Long1> rhs)3459 	RValue<UShort4> operator>>=(const UShort4 &lhs, RValue<Long1> rhs)
3460 	{
3461 		return lhs = lhs >> rhs;
3462 	}
3463 
operator ~(RValue<UShort4> val)3464 	RValue<UShort4> operator~(RValue<UShort4> val)
3465 	{
3466 		if(CPUID::supportsMMX2())
3467 		{
3468 			return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu));
3469 		}
3470 		else
3471 		{
3472 			return RValue<UShort4>(Nucleus::createNot(val.value));
3473 		}
3474 	}
3475 
Max(RValue<UShort4> x,RValue<UShort4> y)3476 	RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3477 	{
3478 		return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3479 	}
3480 
Min(RValue<UShort4> x,RValue<UShort4> y)3481 	RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3482 	{
3483 		return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3484 	}
3485 
AddSat(RValue<UShort4> x,RValue<UShort4> y)3486 	RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3487 	{
3488 		return x86::paddusw(x, y);
3489 	}
3490 
SubSat(RValue<UShort4> x,RValue<UShort4> y)3491 	RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3492 	{
3493 		return x86::psubusw(x, y);
3494 	}
3495 
MulHigh(RValue<UShort4> x,RValue<UShort4> y)3496 	RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3497 	{
3498 		return x86::pmulhuw(x, y);
3499 	}
3500 
Average(RValue<UShort4> x,RValue<UShort4> y)3501 	RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3502 	{
3503 		return x86::pavgw(x, y);
3504 	}
3505 
Pack(RValue<UShort4> x,RValue<UShort4> y)3506 	RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3507 	{
3508 		return x86::packuswb(x, y);
3509 	}
3510 
getType()3511 	Type *UShort4::getType()
3512 	{
3513 		if(CPUID::supportsMMX2())
3514 		{
3515 			return MMX::getType();
3516 		}
3517 		else
3518 		{
3519 			return VectorType::get(UShort::getType(), 4);
3520 		}
3521 	}
3522 
Short8(short c0,short c1,short c2,short c3,short c4,short c5,short c6,short c7)3523 	Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3524 	{
3525 	//	xyzw.parent = this;
3526 
3527 		Constant *constantVector[8];
3528 		constantVector[0] = Nucleus::createConstantShort(c0);
3529 		constantVector[1] = Nucleus::createConstantShort(c1);
3530 		constantVector[2] = Nucleus::createConstantShort(c2);
3531 		constantVector[3] = Nucleus::createConstantShort(c3);
3532 		constantVector[4] = Nucleus::createConstantShort(c4);
3533 		constantVector[5] = Nucleus::createConstantShort(c5);
3534 		constantVector[6] = Nucleus::createConstantShort(c6);
3535 		constantVector[7] = Nucleus::createConstantShort(c7);
3536 
3537 		storeValue(Nucleus::createConstantVector(constantVector, 8));
3538 	}
3539 
Short8(RValue<Short8> rhs)3540 	Short8::Short8(RValue<Short8> rhs)
3541 	{
3542 	//	xyzw.parent = this;
3543 
3544 		storeValue(rhs.value);
3545 	}
3546 
Short8(RValue<Short4> lo,RValue<Short4> hi)3547 	Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3548 	{
3549 		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3550 		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3551 
3552 		Value *long2 = UndefValue::get(Long2::getType());
3553 		long2 = Nucleus::createInsertElement(long2, loLong, 0);
3554 		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3555 		Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3556 
3557 		storeValue(short8);
3558 	}
3559 
operator +(RValue<Short8> lhs,RValue<Short8> rhs)3560 	RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3561 	{
3562 		return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3563 	}
3564 
operator &(RValue<Short8> lhs,RValue<Short8> rhs)3565 	RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3566 	{
3567 		return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3568 	}
3569 
operator <<(RValue<Short8> lhs,unsigned char rhs)3570 	RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3571 	{
3572 		return x86::psllw(lhs, rhs);   // FIXME: Fallback required
3573 	}
3574 
operator >>(RValue<Short8> lhs,unsigned char rhs)3575 	RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3576 	{
3577 		return x86::psraw(lhs, rhs);   // FIXME: Fallback required
3578 	}
3579 
MulAdd(RValue<Short8> x,RValue<Short8> y)3580 	RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3581 	{
3582 		return x86::pmaddwd(x, y);   // FIXME: Fallback required
3583 	}
3584 
Abs(RValue<Int4> x)3585 	RValue<Int4> Abs(RValue<Int4> x)
3586 	{
3587 		if(CPUID::supportsSSSE3())
3588 		{
3589 			return x86::pabsd(x);
3590 		}
3591 		else
3592 		{
3593 			Int4 mask = (x >> 31);
3594 			return (mask ^ x) - mask;
3595 		}
3596 	}
3597 
MulHigh(RValue<Short8> x,RValue<Short8> y)3598 	RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3599 	{
3600 		return x86::pmulhw(x, y);   // FIXME: Fallback required
3601 	}
3602 
getType()3603 	Type *Short8::getType()
3604 	{
3605 		return VectorType::get(Short::getType(), 8);
3606 	}
3607 
UShort8(unsigned short c0,unsigned short c1,unsigned short c2,unsigned short c3,unsigned short c4,unsigned short c5,unsigned short c6,unsigned short c7)3608 	UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3609 	{
3610 	//	xyzw.parent = this;
3611 
3612 		Constant *constantVector[8];
3613 		constantVector[0] = Nucleus::createConstantShort(c0);
3614 		constantVector[1] = Nucleus::createConstantShort(c1);
3615 		constantVector[2] = Nucleus::createConstantShort(c2);
3616 		constantVector[3] = Nucleus::createConstantShort(c3);
3617 		constantVector[4] = Nucleus::createConstantShort(c4);
3618 		constantVector[5] = Nucleus::createConstantShort(c5);
3619 		constantVector[6] = Nucleus::createConstantShort(c6);
3620 		constantVector[7] = Nucleus::createConstantShort(c7);
3621 
3622 		storeValue(Nucleus::createConstantVector(constantVector, 8));
3623 	}
3624 
UShort8(RValue<UShort8> rhs)3625 	UShort8::UShort8(RValue<UShort8> rhs)
3626 	{
3627 	//	xyzw.parent = this;
3628 
3629 		storeValue(rhs.value);
3630 	}
3631 
UShort8(RValue<UShort4> lo,RValue<UShort4> hi)3632 	UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3633 	{
3634 		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3635 		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3636 
3637 		Value *long2 = UndefValue::get(Long2::getType());
3638 		long2 = Nucleus::createInsertElement(long2, loLong, 0);
3639 		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3640 		Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3641 
3642 		storeValue(short8);
3643 	}
3644 
operator =(RValue<UShort8> rhs) const3645 	RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) const
3646 	{
3647 		storeValue(rhs.value);
3648 
3649 		return rhs;
3650 	}
3651 
operator =(const UShort8 & rhs) const3652 	RValue<UShort8> UShort8::operator=(const UShort8 &rhs) const
3653 	{
3654 		Value *value = rhs.loadValue();
3655 		storeValue(value);
3656 
3657 		return RValue<UShort8>(value);
3658 	}
3659 
operator =(const Reference<UShort8> & rhs) const3660 	RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) const
3661 	{
3662 		Value *value = rhs.loadValue();
3663 		storeValue(value);
3664 
3665 		return RValue<UShort8>(value);
3666 	}
3667 
operator &(RValue<UShort8> lhs,RValue<UShort8> rhs)3668 	RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3669 	{
3670 		return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3671 	}
3672 
operator <<(RValue<UShort8> lhs,unsigned char rhs)3673 	RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3674 	{
3675 		return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));   // FIXME: Fallback required
3676 	}
3677 
operator >>(RValue<UShort8> lhs,unsigned char rhs)3678 	RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3679 	{
3680 		return x86::psrlw(lhs, rhs);   // FIXME: Fallback required
3681 	}
3682 
operator +(RValue<UShort8> lhs,RValue<UShort8> rhs)3683 	RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3684 	{
3685 		return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3686 	}
3687 
operator *(RValue<UShort8> lhs,RValue<UShort8> rhs)3688 	RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3689 	{
3690 		return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3691 	}
3692 
operator +=(const UShort8 & lhs,RValue<UShort8> rhs)3693 	RValue<UShort8> operator+=(const UShort8 &lhs, RValue<UShort8> rhs)
3694 	{
3695 		return lhs = lhs + rhs;
3696 	}
3697 
operator ~(RValue<UShort8> val)3698 	RValue<UShort8> operator~(RValue<UShort8> val)
3699 	{
3700 		return RValue<UShort8>(Nucleus::createNot(val.value));
3701 	}
3702 
Swizzle(RValue<UShort8> x,char select0,char select1,char select2,char select3,char select4,char select5,char select6,char select7)3703 	RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3704 	{
3705 		Constant *pshufb[16];
3706 		pshufb[0] = Nucleus::createConstantInt(select0 + 0);
3707 		pshufb[1] = Nucleus::createConstantInt(select0 + 1);
3708 		pshufb[2] = Nucleus::createConstantInt(select1 + 0);
3709 		pshufb[3] = Nucleus::createConstantInt(select1 + 1);
3710 		pshufb[4] = Nucleus::createConstantInt(select2 + 0);
3711 		pshufb[5] = Nucleus::createConstantInt(select2 + 1);
3712 		pshufb[6] = Nucleus::createConstantInt(select3 + 0);
3713 		pshufb[7] = Nucleus::createConstantInt(select3 + 1);
3714 		pshufb[8] = Nucleus::createConstantInt(select4 + 0);
3715 		pshufb[9] = Nucleus::createConstantInt(select4 + 1);
3716 		pshufb[10] = Nucleus::createConstantInt(select5 + 0);
3717 		pshufb[11] = Nucleus::createConstantInt(select5 + 1);
3718 		pshufb[12] = Nucleus::createConstantInt(select6 + 0);
3719 		pshufb[13] = Nucleus::createConstantInt(select6 + 1);
3720 		pshufb[14] = Nucleus::createConstantInt(select7 + 0);
3721 		pshufb[15] = Nucleus::createConstantInt(select7 + 1);
3722 
3723 		Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
3724 		Value *shuffle = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16));
3725 		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3726 
3727 		return RValue<UShort8>(short8);
3728 	}
3729 
MulHigh(RValue<UShort8> x,RValue<UShort8> y)3730 	RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3731 	{
3732 		return x86::pmulhuw(x, y);   // FIXME: Fallback required
3733 	}
3734 
3735 	// FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
3736 //	RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
3737 //	{
3738 //		Constant *pshufb[16];
3739 //		pshufb[0] = Nucleus::createConstantInt(element + 0);
3740 //		pshufb[1] = Nucleus::createConstantInt(element + 0);
3741 //		pshufb[2] = Nucleus::createConstantInt(element + 4);
3742 //		pshufb[3] = Nucleus::createConstantInt(element + 4);
3743 //		pshufb[4] = Nucleus::createConstantInt(element + 8);
3744 //		pshufb[5] = Nucleus::createConstantInt(element + 8);
3745 //		pshufb[6] = Nucleus::createConstantInt(element + 12);
3746 //		pshufb[7] = Nucleus::createConstantInt(element + 12);
3747 //		pshufb[8] = Nucleus::createConstantInt(element + 16);
3748 //		pshufb[9] = Nucleus::createConstantInt(element + 16);
3749 //		pshufb[10] = Nucleus::createConstantInt(element + 20);
3750 //		pshufb[11] = Nucleus::createConstantInt(element + 20);
3751 //		pshufb[12] = Nucleus::createConstantInt(element + 24);
3752 //		pshufb[13] = Nucleus::createConstantInt(element + 24);
3753 //		pshufb[14] = Nucleus::createConstantInt(element + 28);
3754 //		pshufb[15] = Nucleus::createConstantInt(element + 28);
3755 //
3756 //		Value *shuffle = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(pshufb, 16));
3757 //		Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3758 //
3759 //		return RValue<UShort8>(short8);
3760 //	}
3761 
getType()3762 	Type *UShort8::getType()
3763 	{
3764 		return VectorType::get(UShort::getType(), 8);
3765 	}
3766 
Int(Argument<Int> argument)3767 	Int::Int(Argument<Int> argument)
3768 	{
3769 		storeValue(argument.value);
3770 	}
3771 
Int(RValue<Byte> cast)3772 	Int::Int(RValue<Byte> cast)
3773 	{
3774 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3775 
3776 		storeValue(integer);
3777 	}
3778 
Int(RValue<SByte> cast)3779 	Int::Int(RValue<SByte> cast)
3780 	{
3781 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3782 
3783 		storeValue(integer);
3784 	}
3785 
Int(RValue<Short> cast)3786 	Int::Int(RValue<Short> cast)
3787 	{
3788 		Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3789 
3790 		storeValue(integer);
3791 	}
3792 
Int(RValue<UShort> cast)3793 	Int::Int(RValue<UShort> cast)
3794 	{
3795 		Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3796 
3797 		storeValue(integer);
3798 	}
3799 
Int(RValue<Int2> cast)3800 	Int::Int(RValue<Int2> cast)
3801 	{
3802 		*this = Extract(cast, 0);
3803 	}
3804 
Int(RValue<Long> cast)3805 	Int::Int(RValue<Long> cast)
3806 	{
3807 		Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3808 
3809 		storeValue(integer);
3810 	}
3811 
Int(RValue<Float> cast)3812 	Int::Int(RValue<Float> cast)
3813 	{
3814 		Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3815 
3816 		storeValue(integer);
3817 	}
3818 
Int()3819 	Int::Int()
3820 	{
3821 	}
3822 
Int(int x)3823 	Int::Int(int x)
3824 	{
3825 		storeValue(Nucleus::createConstantInt(x));
3826 	}
3827 
Int(RValue<Int> rhs)3828 	Int::Int(RValue<Int> rhs)
3829 	{
3830 		storeValue(rhs.value);
3831 	}
3832 
Int(RValue<UInt> rhs)3833 	Int::Int(RValue<UInt> rhs)
3834 	{
3835 		storeValue(rhs.value);
3836 	}
3837 
Int(const Int & rhs)3838 	Int::Int(const Int &rhs)
3839 	{
3840 		Value *value = rhs.loadValue();
3841 		storeValue(value);
3842 	}
3843 
Int(const Reference<Int> & rhs)3844 	Int::Int(const Reference<Int> &rhs)
3845 	{
3846 		Value *value = rhs.loadValue();
3847 		storeValue(value);
3848 	}
3849 
Int(const UInt & rhs)3850 	Int::Int(const UInt &rhs)
3851 	{
3852 		Value *value = rhs.loadValue();
3853 		storeValue(value);
3854 	}
3855 
Int(const Reference<UInt> & rhs)3856 	Int::Int(const Reference<UInt> &rhs)
3857 	{
3858 		Value *value = rhs.loadValue();
3859 		storeValue(value);
3860 	}
3861 
operator =(int rhs) const3862 	RValue<Int> Int::operator=(int rhs) const
3863 	{
3864 		return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3865 	}
3866 
operator =(RValue<Int> rhs) const3867 	RValue<Int> Int::operator=(RValue<Int> rhs) const
3868 	{
3869 		storeValue(rhs.value);
3870 
3871 		return rhs;
3872 	}
3873 
operator =(RValue<UInt> rhs) const3874 	RValue<Int> Int::operator=(RValue<UInt> rhs) const
3875 	{
3876 		storeValue(rhs.value);
3877 
3878 		return RValue<Int>(rhs);
3879 	}
3880 
operator =(const Int & rhs) const3881 	RValue<Int> Int::operator=(const Int &rhs) const
3882 	{
3883 		Value *value = rhs.loadValue();
3884 		storeValue(value);
3885 
3886 		return RValue<Int>(value);
3887 	}
3888 
operator =(const Reference<Int> & rhs) const3889 	RValue<Int> Int::operator=(const Reference<Int> &rhs) const
3890 	{
3891 		Value *value = rhs.loadValue();
3892 		storeValue(value);
3893 
3894 		return RValue<Int>(value);
3895 	}
3896 
operator =(const UInt & rhs) const3897 	RValue<Int> Int::operator=(const UInt &rhs) const
3898 	{
3899 		Value *value = rhs.loadValue();
3900 		storeValue(value);
3901 
3902 		return RValue<Int>(value);
3903 	}
3904 
operator =(const Reference<UInt> & rhs) const3905 	RValue<Int> Int::operator=(const Reference<UInt> &rhs) const
3906 	{
3907 		Value *value = rhs.loadValue();
3908 		storeValue(value);
3909 
3910 		return RValue<Int>(value);
3911 	}
3912 
operator +(RValue<Int> lhs,RValue<Int> rhs)3913 	RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3914 	{
3915 		return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3916 	}
3917 
operator -(RValue<Int> lhs,RValue<Int> rhs)3918 	RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3919 	{
3920 		return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3921 	}
3922 
operator *(RValue<Int> lhs,RValue<Int> rhs)3923 	RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3924 	{
3925 		return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3926 	}
3927 
operator /(RValue<Int> lhs,RValue<Int> rhs)3928 	RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3929 	{
3930 		return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3931 	}
3932 
operator %(RValue<Int> lhs,RValue<Int> rhs)3933 	RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3934 	{
3935 		return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3936 	}
3937 
operator &(RValue<Int> lhs,RValue<Int> rhs)3938 	RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3939 	{
3940 		return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3941 	}
3942 
operator |(RValue<Int> lhs,RValue<Int> rhs)3943 	RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3944 	{
3945 		return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3946 	}
3947 
operator ^(RValue<Int> lhs,RValue<Int> rhs)3948 	RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3949 	{
3950 		return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3951 	}
3952 
operator <<(RValue<Int> lhs,RValue<Int> rhs)3953 	RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3954 	{
3955 		return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3956 	}
3957 
operator >>(RValue<Int> lhs,RValue<Int> rhs)3958 	RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3959 	{
3960 		return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3961 	}
3962 
operator +=(const Int & lhs,RValue<Int> rhs)3963 	RValue<Int> operator+=(const Int &lhs, RValue<Int> rhs)
3964 	{
3965 		return lhs = lhs + rhs;
3966 	}
3967 
operator -=(const Int & lhs,RValue<Int> rhs)3968 	RValue<Int> operator-=(const Int &lhs, RValue<Int> rhs)
3969 	{
3970 		return lhs = lhs - rhs;
3971 	}
3972 
operator *=(const Int & lhs,RValue<Int> rhs)3973 	RValue<Int> operator*=(const Int &lhs, RValue<Int> rhs)
3974 	{
3975 		return lhs = lhs * rhs;
3976 	}
3977 
operator /=(const Int & lhs,RValue<Int> rhs)3978 	RValue<Int> operator/=(const Int &lhs, RValue<Int> rhs)
3979 	{
3980 		return lhs = lhs / rhs;
3981 	}
3982 
operator %=(const Int & lhs,RValue<Int> rhs)3983 	RValue<Int> operator%=(const Int &lhs, RValue<Int> rhs)
3984 	{
3985 		return lhs = lhs % rhs;
3986 	}
3987 
operator &=(const Int & lhs,RValue<Int> rhs)3988 	RValue<Int> operator&=(const Int &lhs, RValue<Int> rhs)
3989 	{
3990 		return lhs = lhs & rhs;
3991 	}
3992 
operator |=(const Int & lhs,RValue<Int> rhs)3993 	RValue<Int> operator|=(const Int &lhs, RValue<Int> rhs)
3994 	{
3995 		return lhs = lhs | rhs;
3996 	}
3997 
operator ^=(const Int & lhs,RValue<Int> rhs)3998 	RValue<Int> operator^=(const Int &lhs, RValue<Int> rhs)
3999 	{
4000 		return lhs = lhs ^ rhs;
4001 	}
4002 
operator <<=(const Int & lhs,RValue<Int> rhs)4003 	RValue<Int> operator<<=(const Int &lhs, RValue<Int> rhs)
4004 	{
4005 		return lhs = lhs << rhs;
4006 	}
4007 
operator >>=(const Int & lhs,RValue<Int> rhs)4008 	RValue<Int> operator>>=(const Int &lhs, RValue<Int> rhs)
4009 	{
4010 		return lhs = lhs >> rhs;
4011 	}
4012 
operator +(RValue<Int> val)4013 	RValue<Int> operator+(RValue<Int> val)
4014 	{
4015 		return val;
4016 	}
4017 
operator -(RValue<Int> val)4018 	RValue<Int> operator-(RValue<Int> val)
4019 	{
4020 		return RValue<Int>(Nucleus::createNeg(val.value));
4021 	}
4022 
operator ~(RValue<Int> val)4023 	RValue<Int> operator~(RValue<Int> val)
4024 	{
4025 		return RValue<Int>(Nucleus::createNot(val.value));
4026 	}
4027 
operator ++(const Int & val,int)4028 	RValue<Int> operator++(const Int &val, int)   // Post-increment
4029 	{
4030 		RValue<Int> res = val;
4031 
4032 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
4033 		val.storeValue(inc);
4034 
4035 		return res;
4036 	}
4037 
operator ++(const Int & val)4038 	const Int &operator++(const Int &val)   // Pre-increment
4039 	{
4040 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
4041 		val.storeValue(inc);
4042 
4043 		return val;
4044 	}
4045 
operator --(const Int & val,int)4046 	RValue<Int> operator--(const Int &val, int)   // Post-decrement
4047 	{
4048 		RValue<Int> res = val;
4049 
4050 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
4051 		val.storeValue(inc);
4052 
4053 		return res;
4054 	}
4055 
operator --(const Int & val)4056 	const Int &operator--(const Int &val)   // Pre-decrement
4057 	{
4058 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
4059 		val.storeValue(inc);
4060 
4061 		return val;
4062 	}
4063 
operator <(RValue<Int> lhs,RValue<Int> rhs)4064 	RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4065 	{
4066 		return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4067 	}
4068 
operator <=(RValue<Int> lhs,RValue<Int> rhs)4069 	RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4070 	{
4071 		return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4072 	}
4073 
operator >(RValue<Int> lhs,RValue<Int> rhs)4074 	RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4075 	{
4076 		return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4077 	}
4078 
operator >=(RValue<Int> lhs,RValue<Int> rhs)4079 	RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4080 	{
4081 		return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4082 	}
4083 
operator !=(RValue<Int> lhs,RValue<Int> rhs)4084 	RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4085 	{
4086 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4087 	}
4088 
operator ==(RValue<Int> lhs,RValue<Int> rhs)4089 	RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4090 	{
4091 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4092 	}
4093 
Max(RValue<Int> x,RValue<Int> y)4094 	RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4095 	{
4096 		return IfThenElse(x > y, x, y);
4097 	}
4098 
Min(RValue<Int> x,RValue<Int> y)4099 	RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4100 	{
4101 		return IfThenElse(x < y, x, y);
4102 	}
4103 
Clamp(RValue<Int> x,RValue<Int> min,RValue<Int> max)4104 	RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4105 	{
4106 		return Min(Max(x, min), max);
4107 	}
4108 
RoundInt(RValue<Float> cast)4109 	RValue<Int> RoundInt(RValue<Float> cast)
4110 	{
4111 		return x86::cvtss2si(cast);
4112 
4113 	//	return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4114 	}
4115 
getType()4116 	Type *Int::getType()
4117 	{
4118 		return Type::getInt32Ty(*Nucleus::getContext());
4119 	}
4120 
Long(RValue<Int> cast)4121 	Long::Long(RValue<Int> cast)
4122 	{
4123 
4124 
4125 		Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4126 
4127 		storeValue(integer);
4128 	}
4129 
Long(RValue<UInt> cast)4130 	Long::Long(RValue<UInt> cast)
4131 	{
4132 		Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4133 
4134 		storeValue(integer);
4135 	}
4136 
Long()4137 	Long::Long()
4138 	{
4139 	}
4140 
Long(RValue<Long> rhs)4141 	Long::Long(RValue<Long> rhs)
4142 	{
4143 		storeValue(rhs.value);
4144 	}
4145 
operator =(int64_t rhs) const4146 	RValue<Long> Long::operator=(int64_t rhs) const
4147 	{
4148 		return RValue<Long>(storeValue(Nucleus::createConstantInt(rhs)));
4149 	}
4150 
operator =(RValue<Long> rhs) const4151 	RValue<Long> Long::operator=(RValue<Long> rhs) const
4152 	{
4153 		storeValue(rhs.value);
4154 
4155 		return rhs;
4156 	}
4157 
operator =(const Long & rhs) const4158 	RValue<Long> Long::operator=(const Long &rhs) const
4159 	{
4160 		Value *value = rhs.loadValue();
4161 		storeValue(value);
4162 
4163 		return RValue<Long>(value);
4164 	}
4165 
operator =(const Reference<Long> & rhs) const4166 	RValue<Long> Long::operator=(const Reference<Long> &rhs) const
4167 	{
4168 		Value *value = rhs.loadValue();
4169 		storeValue(value);
4170 
4171 		return RValue<Long>(value);
4172 	}
4173 
operator +(RValue<Long> lhs,RValue<Long> rhs)4174 	RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4175 	{
4176 		return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4177 	}
4178 
operator -(RValue<Long> lhs,RValue<Long> rhs)4179 	RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4180 	{
4181 		return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4182 	}
4183 
operator +=(const Long & lhs,RValue<Long> rhs)4184 	RValue<Long> operator+=(const Long &lhs, RValue<Long> rhs)
4185 	{
4186 		return lhs = lhs + rhs;
4187 	}
4188 
operator -=(const Long & lhs,RValue<Long> rhs)4189 	RValue<Long> operator-=(const Long &lhs, RValue<Long> rhs)
4190 	{
4191 		return lhs = lhs - rhs;
4192 	}
4193 
AddAtomic(RValue<Pointer<Long>> x,RValue<Long> y)4194 	RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4195 	{
4196 		return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4197 	}
4198 
getType()4199 	Type *Long::getType()
4200 	{
4201 		return Type::getInt64Ty(*Nucleus::getContext());
4202 	}
4203 
Long1(const RValue<UInt> cast)4204 	Long1::Long1(const RValue<UInt> cast)
4205 	{
4206 		Value *undefCast = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), cast.value, 0);
4207 		Value *zeroCast = Nucleus::createInsertElement(undefCast, Nucleus::createConstantInt(0), 1);
4208 
4209 		storeValue(Nucleus::createBitCast(zeroCast, Long1::getType()));
4210 	}
4211 
Long1(RValue<Long1> rhs)4212 	Long1::Long1(RValue<Long1> rhs)
4213 	{
4214 		storeValue(rhs.value);
4215 	}
4216 
getType()4217 	Type *Long1::getType()
4218 	{
4219 		if(CPUID::supportsMMX2())
4220 		{
4221 			return MMX::getType();
4222 		}
4223 		else
4224 		{
4225 			return VectorType::get(Long::getType(), 1);
4226 		}
4227 	}
4228 
UnpackHigh(RValue<Long2> x,RValue<Long2> y)4229 	RValue<Long2> UnpackHigh(RValue<Long2> x, RValue<Long2> y)
4230 	{
4231 		Constant *shuffle[2];
4232 		shuffle[0] = Nucleus::createConstantInt(1);
4233 		shuffle[1] = Nucleus::createConstantInt(3);
4234 
4235 		Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
4236 
4237 		return RValue<Long2>(packed);
4238 	}
4239 
getType()4240 	Type *Long2::getType()
4241 	{
4242 		return VectorType::get(Long::getType(), 2);
4243 	}
4244 
UInt(Argument<UInt> argument)4245 	UInt::UInt(Argument<UInt> argument)
4246 	{
4247 		storeValue(argument.value);
4248 	}
4249 
UInt(RValue<UShort> cast)4250 	UInt::UInt(RValue<UShort> cast)
4251 	{
4252 		Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4253 
4254 		storeValue(integer);
4255 	}
4256 
UInt(RValue<Long> cast)4257 	UInt::UInt(RValue<Long> cast)
4258 	{
4259 		Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4260 
4261 		storeValue(integer);
4262 	}
4263 
UInt(RValue<Float> cast)4264 	UInt::UInt(RValue<Float> cast)
4265 	{
4266 		Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
4267 
4268 		storeValue(integer);
4269 	}
4270 
UInt()4271 	UInt::UInt()
4272 	{
4273 	}
4274 
UInt(int x)4275 	UInt::UInt(int x)
4276 	{
4277 		storeValue(Nucleus::createConstantInt(x));
4278 	}
4279 
UInt(unsigned int x)4280 	UInt::UInt(unsigned int x)
4281 	{
4282 		storeValue(Nucleus::createConstantInt(x));
4283 	}
4284 
UInt(RValue<UInt> rhs)4285 	UInt::UInt(RValue<UInt> rhs)
4286 	{
4287 		storeValue(rhs.value);
4288 	}
4289 
UInt(RValue<Int> rhs)4290 	UInt::UInt(RValue<Int> rhs)
4291 	{
4292 		storeValue(rhs.value);
4293 	}
4294 
UInt(const UInt & rhs)4295 	UInt::UInt(const UInt &rhs)
4296 	{
4297 		Value *value = rhs.loadValue();
4298 		storeValue(value);
4299 	}
4300 
UInt(const Reference<UInt> & rhs)4301 	UInt::UInt(const Reference<UInt> &rhs)
4302 	{
4303 		Value *value = rhs.loadValue();
4304 		storeValue(value);
4305 	}
4306 
UInt(const Int & rhs)4307 	UInt::UInt(const Int &rhs)
4308 	{
4309 		Value *value = rhs.loadValue();
4310 		storeValue(value);
4311 	}
4312 
UInt(const Reference<Int> & rhs)4313 	UInt::UInt(const Reference<Int> &rhs)
4314 	{
4315 		Value *value = rhs.loadValue();
4316 		storeValue(value);
4317 	}
4318 
operator =(unsigned int rhs) const4319 	RValue<UInt> UInt::operator=(unsigned int rhs) const
4320 	{
4321 		return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4322 	}
4323 
operator =(RValue<UInt> rhs) const4324 	RValue<UInt> UInt::operator=(RValue<UInt> rhs) const
4325 	{
4326 		storeValue(rhs.value);
4327 
4328 		return rhs;
4329 	}
4330 
operator =(RValue<Int> rhs) const4331 	RValue<UInt> UInt::operator=(RValue<Int> rhs) const
4332 	{
4333 		storeValue(rhs.value);
4334 
4335 		return RValue<UInt>(rhs);
4336 	}
4337 
operator =(const UInt & rhs) const4338 	RValue<UInt> UInt::operator=(const UInt &rhs) const
4339 	{
4340 		Value *value = rhs.loadValue();
4341 		storeValue(value);
4342 
4343 		return RValue<UInt>(value);
4344 	}
4345 
operator =(const Reference<UInt> & rhs) const4346 	RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) const
4347 	{
4348 		Value *value = rhs.loadValue();
4349 		storeValue(value);
4350 
4351 		return RValue<UInt>(value);
4352 	}
4353 
operator =(const Int & rhs) const4354 	RValue<UInt> UInt::operator=(const Int &rhs) const
4355 	{
4356 		Value *value = rhs.loadValue();
4357 		storeValue(value);
4358 
4359 		return RValue<UInt>(value);
4360 	}
4361 
operator =(const Reference<Int> & rhs) const4362 	RValue<UInt> UInt::operator=(const Reference<Int> &rhs) const
4363 	{
4364 		Value *value = rhs.loadValue();
4365 		storeValue(value);
4366 
4367 		return RValue<UInt>(value);
4368 	}
4369 
operator +(RValue<UInt> lhs,RValue<UInt> rhs)4370 	RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4371 	{
4372 		return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4373 	}
4374 
operator -(RValue<UInt> lhs,RValue<UInt> rhs)4375 	RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4376 	{
4377 		return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4378 	}
4379 
operator *(RValue<UInt> lhs,RValue<UInt> rhs)4380 	RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4381 	{
4382 		return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4383 	}
4384 
operator /(RValue<UInt> lhs,RValue<UInt> rhs)4385 	RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4386 	{
4387 		return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4388 	}
4389 
operator %(RValue<UInt> lhs,RValue<UInt> rhs)4390 	RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4391 	{
4392 		return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4393 	}
4394 
operator &(RValue<UInt> lhs,RValue<UInt> rhs)4395 	RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4396 	{
4397 		return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4398 	}
4399 
operator |(RValue<UInt> lhs,RValue<UInt> rhs)4400 	RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4401 	{
4402 		return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4403 	}
4404 
operator ^(RValue<UInt> lhs,RValue<UInt> rhs)4405 	RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4406 	{
4407 		return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4408 	}
4409 
operator <<(RValue<UInt> lhs,RValue<UInt> rhs)4410 	RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4411 	{
4412 		return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4413 	}
4414 
operator >>(RValue<UInt> lhs,RValue<UInt> rhs)4415 	RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4416 	{
4417 		return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4418 	}
4419 
operator +=(const UInt & lhs,RValue<UInt> rhs)4420 	RValue<UInt> operator+=(const UInt &lhs, RValue<UInt> rhs)
4421 	{
4422 		return lhs = lhs + rhs;
4423 	}
4424 
operator -=(const UInt & lhs,RValue<UInt> rhs)4425 	RValue<UInt> operator-=(const UInt &lhs, RValue<UInt> rhs)
4426 	{
4427 		return lhs = lhs - rhs;
4428 	}
4429 
operator *=(const UInt & lhs,RValue<UInt> rhs)4430 	RValue<UInt> operator*=(const UInt &lhs, RValue<UInt> rhs)
4431 	{
4432 		return lhs = lhs * rhs;
4433 	}
4434 
operator /=(const UInt & lhs,RValue<UInt> rhs)4435 	RValue<UInt> operator/=(const UInt &lhs, RValue<UInt> rhs)
4436 	{
4437 		return lhs = lhs / rhs;
4438 	}
4439 
operator %=(const UInt & lhs,RValue<UInt> rhs)4440 	RValue<UInt> operator%=(const UInt &lhs, RValue<UInt> rhs)
4441 	{
4442 		return lhs = lhs % rhs;
4443 	}
4444 
operator &=(const UInt & lhs,RValue<UInt> rhs)4445 	RValue<UInt> operator&=(const UInt &lhs, RValue<UInt> rhs)
4446 	{
4447 		return lhs = lhs & rhs;
4448 	}
4449 
operator |=(const UInt & lhs,RValue<UInt> rhs)4450 	RValue<UInt> operator|=(const UInt &lhs, RValue<UInt> rhs)
4451 	{
4452 		return lhs = lhs | rhs;
4453 	}
4454 
operator ^=(const UInt & lhs,RValue<UInt> rhs)4455 	RValue<UInt> operator^=(const UInt &lhs, RValue<UInt> rhs)
4456 	{
4457 		return lhs = lhs ^ rhs;
4458 	}
4459 
operator <<=(const UInt & lhs,RValue<UInt> rhs)4460 	RValue<UInt> operator<<=(const UInt &lhs, RValue<UInt> rhs)
4461 	{
4462 		return lhs = lhs << rhs;
4463 	}
4464 
operator >>=(const UInt & lhs,RValue<UInt> rhs)4465 	RValue<UInt> operator>>=(const UInt &lhs, RValue<UInt> rhs)
4466 	{
4467 		return lhs = lhs >> rhs;
4468 	}
4469 
operator +(RValue<UInt> val)4470 	RValue<UInt> operator+(RValue<UInt> val)
4471 	{
4472 		return val;
4473 	}
4474 
operator -(RValue<UInt> val)4475 	RValue<UInt> operator-(RValue<UInt> val)
4476 	{
4477 		return RValue<UInt>(Nucleus::createNeg(val.value));
4478 	}
4479 
operator ~(RValue<UInt> val)4480 	RValue<UInt> operator~(RValue<UInt> val)
4481 	{
4482 		return RValue<UInt>(Nucleus::createNot(val.value));
4483 	}
4484 
operator ++(const UInt & val,int)4485 	RValue<UInt> operator++(const UInt &val, int)   // Post-increment
4486 	{
4487 		RValue<UInt> res = val;
4488 
4489 		Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
4490 		val.storeValue(inc);
4491 
4492 		return res;
4493 	}
4494 
operator ++(const UInt & val)4495 	const UInt &operator++(const UInt &val)   // Pre-increment
4496 	{
4497 		Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
4498 		val.storeValue(inc);
4499 
4500 		return val;
4501 	}
4502 
operator --(const UInt & val,int)4503 	RValue<UInt> operator--(const UInt &val, int)   // Post-decrement
4504 	{
4505 		RValue<UInt> res = val;
4506 
4507 		Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
4508 		val.storeValue(inc);
4509 
4510 		return res;
4511 	}
4512 
operator --(const UInt & val)4513 	const UInt &operator--(const UInt &val)   // Pre-decrement
4514 	{
4515 		Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
4516 		val.storeValue(inc);
4517 
4518 		return val;
4519 	}
4520 
Max(RValue<UInt> x,RValue<UInt> y)4521 	RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4522 	{
4523 		return IfThenElse(x > y, x, y);
4524 	}
4525 
Min(RValue<UInt> x,RValue<UInt> y)4526 	RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4527 	{
4528 		return IfThenElse(x < y, x, y);
4529 	}
4530 
Clamp(RValue<UInt> x,RValue<UInt> min,RValue<UInt> max)4531 	RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4532 	{
4533 		return Min(Max(x, min), max);
4534 	}
4535 
operator <(RValue<UInt> lhs,RValue<UInt> rhs)4536 	RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4537 	{
4538 		return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4539 	}
4540 
operator <=(RValue<UInt> lhs,RValue<UInt> rhs)4541 	RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4542 	{
4543 		return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4544 	}
4545 
operator >(RValue<UInt> lhs,RValue<UInt> rhs)4546 	RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4547 	{
4548 		return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4549 	}
4550 
operator >=(RValue<UInt> lhs,RValue<UInt> rhs)4551 	RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4552 	{
4553 		return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4554 	}
4555 
operator !=(RValue<UInt> lhs,RValue<UInt> rhs)4556 	RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4557 	{
4558 		return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4559 	}
4560 
operator ==(RValue<UInt> lhs,RValue<UInt> rhs)4561 	RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4562 	{
4563 		return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4564 	}
4565 
4566 //	RValue<UInt> RoundUInt(RValue<Float> cast)
4567 //	{
4568 //		return x86::cvtss2si(val);   // FIXME: Unsigned
4569 //
4570 //	//	return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4571 //	}
4572 
getType()4573 	Type *UInt::getType()
4574 	{
4575 		return Type::getInt32Ty(*Nucleus::getContext());
4576 	}
4577 
4578 //	Int2::Int2(RValue<Int> cast)
4579 //	{
4580 //		Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4581 //		Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4582 //
4583 //		Constant *shuffle[2];
4584 //		shuffle[0] = Nucleus::createConstantInt(0);
4585 //		shuffle[1] = Nucleus::createConstantInt(0);
4586 //
4587 //		Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
4588 //
4589 //		storeValue(replicate);
4590 //	}
4591 
Int2(RValue<Int4> cast)4592 	Int2::Int2(RValue<Int4> cast)
4593 	{
4594 		Value *long2 = Nucleus::createBitCast(cast.value, Long2::getType());
4595 		Value *element = Nucleus::createExtractElement(long2, 0);
4596 		Value *int2 = Nucleus::createBitCast(element, Int2::getType());
4597 
4598 		storeValue(int2);
4599 	}
4600 
Int2()4601 	Int2::Int2()
4602 	{
4603 	//	xy.parent = this;
4604 	}
4605 
Int2(int x,int y)4606 	Int2::Int2(int x, int y)
4607 	{
4608 	//	xy.parent = this;
4609 
4610 		Constant *constantVector[2];
4611 		constantVector[0] = Nucleus::createConstantInt(x);
4612 		constantVector[1] = Nucleus::createConstantInt(y);
4613 		Value *vector = Nucleus::createConstantVector(constantVector, 2);
4614 
4615 		storeValue(Nucleus::createBitCast(vector, getType()));
4616 	}
4617 
Int2(RValue<Int2> rhs)4618 	Int2::Int2(RValue<Int2> rhs)
4619 	{
4620 	//	xy.parent = this;
4621 
4622 		storeValue(rhs.value);
4623 	}
4624 
Int2(const Int2 & rhs)4625 	Int2::Int2(const Int2 &rhs)
4626 	{
4627 	//	xy.parent = this;
4628 
4629 		Value *value = rhs.loadValue();
4630 		storeValue(value);
4631 	}
4632 
Int2(const Reference<Int2> & rhs)4633 	Int2::Int2(const Reference<Int2> &rhs)
4634 	{
4635 	//	xy.parent = this;
4636 
4637 		Value *value = rhs.loadValue();
4638 		storeValue(value);
4639 	}
4640 
Int2(RValue<Int> lo,RValue<Int> hi)4641 	Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4642 	{
4643 		if(CPUID::supportsMMX2())
4644 		{
4645 			// movd mm0, lo
4646 			// movd mm1, hi
4647 			// punpckldq mm0, mm1
4648 			storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value);
4649 		}
4650 		else
4651 		{
4652 			Constant *shuffle[2];
4653 			shuffle[0] = Nucleus::createConstantInt(0);
4654 			shuffle[1] = Nucleus::createConstantInt(1);
4655 
4656 			Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
4657 
4658 			storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4659 		}
4660 	}
4661 
operator =(RValue<Int2> rhs) const4662 	RValue<Int2> Int2::operator=(RValue<Int2> rhs) const
4663 	{
4664 		storeValue(rhs.value);
4665 
4666 		return rhs;
4667 	}
4668 
operator =(const Int2 & rhs) const4669 	RValue<Int2> Int2::operator=(const Int2 &rhs) const
4670 	{
4671 		Value *value = rhs.loadValue();
4672 		storeValue(value);
4673 
4674 		return RValue<Int2>(value);
4675 	}
4676 
operator =(const Reference<Int2> & rhs) const4677 	RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) const
4678 	{
4679 		Value *value = rhs.loadValue();
4680 		storeValue(value);
4681 
4682 		return RValue<Int2>(value);
4683 	}
4684 
operator +(RValue<Int2> lhs,RValue<Int2> rhs)4685 	RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4686 	{
4687 		if(CPUID::supportsMMX2())
4688 		{
4689 			return x86::paddd(lhs, rhs);
4690 		}
4691 		else
4692 		{
4693 			return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4694 		}
4695 	}
4696 
operator -(RValue<Int2> lhs,RValue<Int2> rhs)4697 	RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4698 	{
4699 		if(CPUID::supportsMMX2())
4700 		{
4701 			return x86::psubd(lhs, rhs);
4702 		}
4703 		else
4704 		{
4705 			return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4706 		}
4707 	}
4708 
4709 //	RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4710 //	{
4711 //		return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4712 //	}
4713 
4714 //	RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4715 //	{
4716 //		return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4717 //	}
4718 
4719 //	RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4720 //	{
4721 //		return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4722 //	}
4723 
operator &(RValue<Int2> lhs,RValue<Int2> rhs)4724 	RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4725 	{
4726 		if(CPUID::supportsMMX2())
4727 		{
4728 			return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
4729 		}
4730 		else
4731 		{
4732 			return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4733 		}
4734 	}
4735 
operator |(RValue<Int2> lhs,RValue<Int2> rhs)4736 	RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4737 	{
4738 		if(CPUID::supportsMMX2())
4739 		{
4740 			return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
4741 		}
4742 		else
4743 		{
4744 			return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4745 		}
4746 	}
4747 
operator ^(RValue<Int2> lhs,RValue<Int2> rhs)4748 	RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4749 	{
4750 		if(CPUID::supportsMMX2())
4751 		{
4752 			return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
4753 		}
4754 		else
4755 		{
4756 			return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4757 		}
4758 	}
4759 
operator <<(RValue<Int2> lhs,unsigned char rhs)4760 	RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4761 	{
4762 	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4763 
4764 		return x86::pslld(lhs, rhs);
4765 	}
4766 
operator >>(RValue<Int2> lhs,unsigned char rhs)4767 	RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4768 	{
4769 	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4770 
4771 		return x86::psrad(lhs, rhs);
4772 	}
4773 
operator <<(RValue<Int2> lhs,RValue<Long1> rhs)4774 	RValue<Int2> operator<<(RValue<Int2> lhs, RValue<Long1> rhs)
4775 	{
4776 	//	return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4777 
4778 		return x86::pslld(lhs, rhs);
4779 	}
4780 
operator >>(RValue<Int2> lhs,RValue<Long1> rhs)4781 	RValue<Int2> operator>>(RValue<Int2> lhs, RValue<Long1> rhs)
4782 	{
4783 	//	return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4784 
4785 		return x86::psrad(lhs, rhs);
4786 	}
4787 
operator +=(const Int2 & lhs,RValue<Int2> rhs)4788 	RValue<Int2> operator+=(const Int2 &lhs, RValue<Int2> rhs)
4789 	{
4790 		return lhs = lhs + rhs;
4791 	}
4792 
operator -=(const Int2 & lhs,RValue<Int2> rhs)4793 	RValue<Int2> operator-=(const Int2 &lhs, RValue<Int2> rhs)
4794 	{
4795 		return lhs = lhs - rhs;
4796 	}
4797 
4798 //	RValue<Int2> operator*=(const Int2 &lhs, RValue<Int2> rhs)
4799 //	{
4800 //		return lhs = lhs * rhs;
4801 //	}
4802 
4803 //	RValue<Int2> operator/=(const Int2 &lhs, RValue<Int2> rhs)
4804 //	{
4805 //		return lhs = lhs / rhs;
4806 //	}
4807 
4808 //	RValue<Int2> operator%=(const Int2 &lhs, RValue<Int2> rhs)
4809 //	{
4810 //		return lhs = lhs % rhs;
4811 //	}
4812 
operator &=(const Int2 & lhs,RValue<Int2> rhs)4813 	RValue<Int2> operator&=(const Int2 &lhs, RValue<Int2> rhs)
4814 	{
4815 		return lhs = lhs & rhs;
4816 	}
4817 
operator |=(const Int2 & lhs,RValue<Int2> rhs)4818 	RValue<Int2> operator|=(const Int2 &lhs, RValue<Int2> rhs)
4819 	{
4820 		return lhs = lhs | rhs;
4821 	}
4822 
operator ^=(const Int2 & lhs,RValue<Int2> rhs)4823 	RValue<Int2> operator^=(const Int2 &lhs, RValue<Int2> rhs)
4824 	{
4825 		return lhs = lhs ^ rhs;
4826 	}
4827 
operator <<=(const Int2 & lhs,unsigned char rhs)4828 	RValue<Int2> operator<<=(const Int2 &lhs, unsigned char rhs)
4829 	{
4830 		return lhs = lhs << rhs;
4831 	}
4832 
operator >>=(const Int2 & lhs,unsigned char rhs)4833 	RValue<Int2> operator>>=(const Int2 &lhs, unsigned char rhs)
4834 	{
4835 		return lhs = lhs >> rhs;
4836 	}
4837 
operator <<=(const Int2 & lhs,RValue<Long1> rhs)4838 	RValue<Int2> operator<<=(const Int2 &lhs, RValue<Long1> rhs)
4839 	{
4840 		return lhs = lhs << rhs;
4841 	}
4842 
operator >>=(const Int2 & lhs,RValue<Long1> rhs)4843 	RValue<Int2> operator>>=(const Int2 &lhs, RValue<Long1> rhs)
4844 	{
4845 		return lhs = lhs >> rhs;
4846 	}
4847 
4848 //	RValue<Int2> operator+(RValue<Int2> val)
4849 //	{
4850 //		return val;
4851 //	}
4852 
4853 //	RValue<Int2> operator-(RValue<Int2> val)
4854 //	{
4855 //		return RValue<Int2>(Nucleus::createNeg(val.value));
4856 //	}
4857 
operator ~(RValue<Int2> val)4858 	RValue<Int2> operator~(RValue<Int2> val)
4859 	{
4860 		if(CPUID::supportsMMX2())
4861 		{
4862 			return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF);
4863 		}
4864 		else
4865 		{
4866 			return RValue<Int2>(Nucleus::createNot(val.value));
4867 		}
4868 	}
4869 
UnpackLow(RValue<Int2> x,RValue<Int2> y)4870 	RValue<Long1> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4871 	{
4872 		if(CPUID::supportsMMX2())
4873 		{
4874 			return x86::punpckldq(x, y);
4875 		}
4876 		else
4877 		{
4878 			Constant *shuffle[2];
4879 			shuffle[0] = Nucleus::createConstantInt(0);
4880 			shuffle[1] = Nucleus::createConstantInt(2);
4881 
4882 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
4883 
4884 			return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
4885 		}
4886 	}
4887 
UnpackHigh(RValue<Int2> x,RValue<Int2> y)4888 	RValue<Long1> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4889 	{
4890 		if(CPUID::supportsMMX2())
4891 		{
4892 			return x86::punpckhdq(x, y);
4893 		}
4894 		else
4895 		{
4896 			Constant *shuffle[2];
4897 			shuffle[0] = Nucleus::createConstantInt(1);
4898 			shuffle[1] = Nucleus::createConstantInt(3);
4899 
4900 			Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
4901 
4902 			return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
4903 		}
4904 	}
4905 
Extract(RValue<Int2> val,int i)4906 	RValue<Int> Extract(RValue<Int2> val, int i)
4907 	{
4908 		if(false)   // FIXME: LLVM does not generate optimal code
4909 		{
4910 			return RValue<Int>(Nucleus::createExtractElement(val.value, i));
4911 		}
4912 		else
4913 		{
4914 			if(i == 0)
4915 			{
4916 				return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, VectorType::get(Int::getType(), 2)), 0));
4917 			}
4918 			else
4919 			{
4920 				Int2 val2 = As<Int2>(UnpackHigh(val, val));
4921 
4922 				return Extract(val2, 0);
4923 			}
4924 		}
4925 	}
4926 
Insert(RValue<Int2> val,RValue<Int> element,int i)4927 	RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4928 	{
4929 		return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, VectorType::get(Int::getType(), 2)), element.value, i), Int2::getType()));
4930 	}
4931 
getType()4932 	Type *Int2::getType()
4933 	{
4934 		if(CPUID::supportsMMX2())
4935 		{
4936 			return MMX::getType();
4937 		}
4938 		else
4939 		{
4940 			return VectorType::get(Int::getType(), 2);
4941 		}
4942 	}
4943 
UInt2()4944 	UInt2::UInt2()
4945 	{
4946 	//	xy.parent = this;
4947 	}
4948 
UInt2(unsigned int x,unsigned int y)4949 	UInt2::UInt2(unsigned int x, unsigned int y)
4950 	{
4951 	//	xy.parent = this;
4952 
4953 		Constant *constantVector[2];
4954 		constantVector[0] = Nucleus::createConstantInt(x);
4955 		constantVector[1] = Nucleus::createConstantInt(y);
4956 		Value *vector = Nucleus::createConstantVector(constantVector, 2);
4957 
4958 		storeValue(Nucleus::createBitCast(vector, getType()));
4959 	}
4960 
UInt2(RValue<UInt2> rhs)4961 	UInt2::UInt2(RValue<UInt2> rhs)
4962 	{
4963 	//	xy.parent = this;
4964 
4965 		storeValue(rhs.value);
4966 	}
4967 
UInt2(const UInt2 & rhs)4968 	UInt2::UInt2(const UInt2 &rhs)
4969 	{
4970 	//	xy.parent = this;
4971 
4972 		Value *value = rhs.loadValue();
4973 		storeValue(value);
4974 	}
4975 
UInt2(const Reference<UInt2> & rhs)4976 	UInt2::UInt2(const Reference<UInt2> &rhs)
4977 	{
4978 	//	xy.parent = this;
4979 
4980 		Value *value = rhs.loadValue();
4981 		storeValue(value);
4982 	}
4983 
operator =(RValue<UInt2> rhs) const4984 	RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) const
4985 	{
4986 		storeValue(rhs.value);
4987 
4988 		return rhs;
4989 	}
4990 
operator =(const UInt2 & rhs) const4991 	RValue<UInt2> UInt2::operator=(const UInt2 &rhs) const
4992 	{
4993 		Value *value = rhs.loadValue();
4994 		storeValue(value);
4995 
4996 		return RValue<UInt2>(value);
4997 	}
4998 
operator =(const Reference<UInt2> & rhs) const4999 	RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) const
5000 	{
5001 		Value *value = rhs.loadValue();
5002 		storeValue(value);
5003 
5004 		return RValue<UInt2>(value);
5005 	}
5006 
operator +(RValue<UInt2> lhs,RValue<UInt2> rhs)5007 	RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
5008 	{
5009 		if(CPUID::supportsMMX2())
5010 		{
5011 			return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs)));
5012 		}
5013 		else
5014 		{
5015 			return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
5016 		}
5017 	}
5018 
operator -(RValue<UInt2> lhs,RValue<UInt2> rhs)5019 	RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
5020 	{
5021 		if(CPUID::supportsMMX2())
5022 		{
5023 			return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs)));
5024 		}
5025 		else
5026 		{
5027 			return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
5028 		}
5029 	}
5030 
5031 //	RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
5032 //	{
5033 //		return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
5034 //	}
5035 
5036 //	RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
5037 //	{
5038 //		return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
5039 //	}
5040 
5041 //	RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
5042 //	{
5043 //		return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
5044 //	}
5045 
operator &(RValue<UInt2> lhs,RValue<UInt2> rhs)5046 	RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
5047 	{
5048 		if(CPUID::supportsMMX2())
5049 		{
5050 			return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
5051 		}
5052 		else
5053 		{
5054 			return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
5055 		}
5056 	}
5057 
operator |(RValue<UInt2> lhs,RValue<UInt2> rhs)5058 	RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
5059 	{
5060 		if(CPUID::supportsMMX2())
5061 		{
5062 			return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
5063 		}
5064 		else
5065 		{
5066 			return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
5067 		}
5068 	}
5069 
operator ^(RValue<UInt2> lhs,RValue<UInt2> rhs)5070 	RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
5071 	{
5072 		if(CPUID::supportsMMX2())
5073 		{
5074 			return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
5075 		}
5076 		else
5077 		{
5078 			return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
5079 		}
5080 	}
5081 
operator <<(RValue<UInt2> lhs,unsigned char rhs)5082 	RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
5083 	{
5084 	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
5085 
5086 		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
5087 	}
5088 
operator >>(RValue<UInt2> lhs,unsigned char rhs)5089 	RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
5090 	{
5091 	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
5092 
5093 		return x86::psrld(lhs, rhs);
5094 	}
5095 
operator <<(RValue<UInt2> lhs,RValue<Long1> rhs)5096 	RValue<UInt2> operator<<(RValue<UInt2> lhs, RValue<Long1> rhs)
5097 	{
5098 	//	return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
5099 
5100 		return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
5101 	}
5102 
operator >>(RValue<UInt2> lhs,RValue<Long1> rhs)5103 	RValue<UInt2> operator>>(RValue<UInt2> lhs, RValue<Long1> rhs)
5104 	{
5105 	//	return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
5106 
5107 		return x86::psrld(lhs, rhs);
5108 	}
5109 
operator +=(const UInt2 & lhs,RValue<UInt2> rhs)5110 	RValue<UInt2> operator+=(const UInt2 &lhs, RValue<UInt2> rhs)
5111 	{
5112 		return lhs = lhs + rhs;
5113 	}
5114 
operator -=(const UInt2 & lhs,RValue<UInt2> rhs)5115 	RValue<UInt2> operator-=(const UInt2 &lhs, RValue<UInt2> rhs)
5116 	{
5117 		return lhs = lhs - rhs;
5118 	}
5119 
5120 //	RValue<UInt2> operator*=(const UInt2 &lhs, RValue<UInt2> rhs)
5121 //	{
5122 //		return lhs = lhs * rhs;
5123 //	}
5124 
5125 //	RValue<UInt2> operator/=(const UInt2 &lhs, RValue<UInt2> rhs)
5126 //	{
5127 //		return lhs = lhs / rhs;
5128 //	}
5129 
5130 //	RValue<UInt2> operator%=(const UInt2 &lhs, RValue<UInt2> rhs)
5131 //	{
5132 //		return lhs = lhs % rhs;
5133 //	}
5134 
operator &=(const UInt2 & lhs,RValue<UInt2> rhs)5135 	RValue<UInt2> operator&=(const UInt2 &lhs, RValue<UInt2> rhs)
5136 	{
5137 		return lhs = lhs & rhs;
5138 	}
5139 
operator |=(const UInt2 & lhs,RValue<UInt2> rhs)5140 	RValue<UInt2> operator|=(const UInt2 &lhs, RValue<UInt2> rhs)
5141 	{
5142 		return lhs = lhs | rhs;
5143 	}
5144 
operator ^=(const UInt2 & lhs,RValue<UInt2> rhs)5145 	RValue<UInt2> operator^=(const UInt2 &lhs, RValue<UInt2> rhs)
5146 	{
5147 		return lhs = lhs ^ rhs;
5148 	}
5149 
operator <<=(const UInt2 & lhs,unsigned char rhs)5150 	RValue<UInt2> operator<<=(const UInt2 &lhs, unsigned char rhs)
5151 	{
5152 		return lhs = lhs << rhs;
5153 	}
5154 
operator >>=(const UInt2 & lhs,unsigned char rhs)5155 	RValue<UInt2> operator>>=(const UInt2 &lhs, unsigned char rhs)
5156 	{
5157 		return lhs = lhs >> rhs;
5158 	}
5159 
operator <<=(const UInt2 & lhs,RValue<Long1> rhs)5160 	RValue<UInt2> operator<<=(const UInt2 &lhs, RValue<Long1> rhs)
5161 	{
5162 		return lhs = lhs << rhs;
5163 	}
5164 
operator >>=(const UInt2 & lhs,RValue<Long1> rhs)5165 	RValue<UInt2> operator>>=(const UInt2 &lhs, RValue<Long1> rhs)
5166 	{
5167 		return lhs = lhs >> rhs;
5168 	}
5169 
5170 //	RValue<UInt2> operator+(RValue<UInt2> val)
5171 //	{
5172 //		return val;
5173 //	}
5174 
5175 //	RValue<UInt2> operator-(RValue<UInt2> val)
5176 //	{
5177 //		return RValue<UInt2>(Nucleus::createNeg(val.value));
5178 //	}
5179 
operator ~(RValue<UInt2> val)5180 	RValue<UInt2> operator~(RValue<UInt2> val)
5181 	{
5182 		if(CPUID::supportsMMX2())
5183 		{
5184 			return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF);
5185 		}
5186 		else
5187 		{
5188 			return RValue<UInt2>(Nucleus::createNot(val.value));
5189 		}
5190 	}
5191 
getType()5192 	Type *UInt2::getType()
5193 	{
5194 		if(CPUID::supportsMMX2())
5195 		{
5196 			return MMX::getType();
5197 		}
5198 		else
5199 		{
5200 			return VectorType::get(UInt::getType(), 2);
5201 		}
5202 	}
5203 
Int4(RValue<Float4> cast)5204 	Int4::Int4(RValue<Float4> cast)
5205 	{
5206 	//	xyzw.parent = this;
5207 
5208 		Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5209 
5210 		storeValue(xyzw);
5211 	}
5212 
Int4(RValue<Short4> cast)5213 	Int4::Int4(RValue<Short4> cast)
5214 	{
5215 		Value *long2 = UndefValue::get(Long2::getType());
5216 		Value *element = Nucleus::createBitCast(cast.value, Long::getType());
5217 		long2 = Nucleus::createInsertElement(long2, element, 0);
5218 		RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5219 
5220 		if(CPUID::supportsSSE4_1())
5221 		{
5222 			storeValue(x86::pmovsxwd(vector).value);
5223 		}
5224 		else
5225 		{
5226 			Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5227 
5228 			Constant *swizzle[8];
5229 			swizzle[0] = Nucleus::createConstantInt(0);
5230 			swizzle[1] = Nucleus::createConstantInt(0);
5231 			swizzle[2] = Nucleus::createConstantInt(1);
5232 			swizzle[3] = Nucleus::createConstantInt(1);
5233 			swizzle[4] = Nucleus::createConstantInt(2);
5234 			swizzle[5] = Nucleus::createConstantInt(2);
5235 			swizzle[6] = Nucleus::createConstantInt(3);
5236 			swizzle[7] = Nucleus::createConstantInt(3);
5237 
5238 			Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 8));
5239 			Value *d = Nucleus::createBitCast(c, Int4::getType());
5240 			storeValue(d);
5241 
5242 			// Each Short is packed into each Int in the (Short | Short) format.
5243 			// Shifting by 16 will retrieve the original Short value.
5244 			// Shitfing an Int will propagate the sign bit, which will work
5245 			// for both positive and negative values of a Short.
5246 			*this >>= 16;
5247 		}
5248 	}
5249 
Int4(RValue<UShort4> cast)5250 	Int4::Int4(RValue<UShort4> cast)
5251 	{
5252 		Value *long2 = UndefValue::get(Long2::getType());
5253 		Value *element = Nucleus::createBitCast(cast.value, Long::getType());
5254 		long2 = Nucleus::createInsertElement(long2, element, 0);
5255 		RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5256 
5257 		if(CPUID::supportsSSE4_1())
5258 		{
5259 			storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value);
5260 		}
5261 		else
5262 		{
5263 			Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5264 
5265 			Constant *swizzle[8];
5266 			swizzle[0] = Nucleus::createConstantInt(0);
5267 			swizzle[1] = Nucleus::createConstantInt(8);
5268 			swizzle[2] = Nucleus::createConstantInt(1);
5269 			swizzle[3] = Nucleus::createConstantInt(9);
5270 			swizzle[4] = Nucleus::createConstantInt(2);
5271 			swizzle[5] = Nucleus::createConstantInt(10);
5272 			swizzle[6] = Nucleus::createConstantInt(3);
5273 			swizzle[7] = Nucleus::createConstantInt(11);
5274 
5275 			Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle, 8));
5276 			Value *d = Nucleus::createBitCast(c, Int4::getType());
5277 			storeValue(d);
5278 		}
5279 	}
5280 
Int4()5281 	Int4::Int4()
5282 	{
5283 	//	xyzw.parent = this;
5284 	}
5285 
Int4(int xyzw)5286 	Int4::Int4(int xyzw)
5287 	{
5288 		constant(xyzw, xyzw, xyzw, xyzw);
5289 	}
5290 
Int4(int x,int yzw)5291 	Int4::Int4(int x, int yzw)
5292 	{
5293 		constant(x, yzw, yzw, yzw);
5294 	}
5295 
Int4(int x,int y,int zw)5296 	Int4::Int4(int x, int y, int zw)
5297 	{
5298 		constant(x, y, zw, zw);
5299 	}
5300 
Int4(int x,int y,int z,int w)5301 	Int4::Int4(int x, int y, int z, int w)
5302 	{
5303 		constant(x, y, z, w);
5304 	}
5305 
constant(int x,int y,int z,int w)5306 	void Int4::constant(int x, int y, int z, int w)
5307 	{
5308 	//	xyzw.parent = this;
5309 
5310 		Constant *constantVector[4];
5311 		constantVector[0] = Nucleus::createConstantInt(x);
5312 		constantVector[1] = Nucleus::createConstantInt(y);
5313 		constantVector[2] = Nucleus::createConstantInt(z);
5314 		constantVector[3] = Nucleus::createConstantInt(w);
5315 
5316 		storeValue(Nucleus::createConstantVector(constantVector, 4));
5317 	}
5318 
Int4(RValue<Int4> rhs)5319 	Int4::Int4(RValue<Int4> rhs)
5320 	{
5321 	//	xyzw.parent = this;
5322 
5323 		storeValue(rhs.value);
5324 	}
5325 
Int4(const Int4 & rhs)5326 	Int4::Int4(const Int4 &rhs)
5327 	{
5328 	//	xyzw.parent = this;
5329 
5330 		Value *value = rhs.loadValue();
5331 		storeValue(value);
5332 	}
5333 
Int4(const Reference<Int4> & rhs)5334 	Int4::Int4(const Reference<Int4> &rhs)
5335 	{
5336 	//	xyzw.parent = this;
5337 
5338 		Value *value = rhs.loadValue();
5339 		storeValue(value);
5340 	}
5341 
Int4(RValue<UInt4> rhs)5342 	Int4::Int4(RValue<UInt4> rhs)
5343 	{
5344 	//	xyzw.parent = this;
5345 
5346 		storeValue(rhs.value);
5347 	}
5348 
Int4(const UInt4 & rhs)5349 	Int4::Int4(const UInt4 &rhs)
5350 	{
5351 	//	xyzw.parent = this;
5352 
5353 		Value *value = rhs.loadValue();
5354 		storeValue(value);
5355 	}
5356 
Int4(const Reference<UInt4> & rhs)5357 	Int4::Int4(const Reference<UInt4> &rhs)
5358 	{
5359 	//	xyzw.parent = this;
5360 
5361 		Value *value = rhs.loadValue();
5362 		storeValue(value);
5363 	}
5364 
Int4(RValue<Int2> lo,RValue<Int2> hi)5365 	Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5366 	{
5367 		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5368 		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5369 
5370 		Value *long2 = UndefValue::get(Long2::getType());
5371 		long2 = Nucleus::createInsertElement(long2, loLong, 0);
5372 		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5373 		Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
5374 
5375 		storeValue(int4);
5376 	}
5377 
operator =(RValue<Int4> rhs) const5378 	RValue<Int4> Int4::operator=(RValue<Int4> rhs) const
5379 	{
5380 		storeValue(rhs.value);
5381 
5382 		return rhs;
5383 	}
5384 
operator =(const Int4 & rhs) const5385 	RValue<Int4> Int4::operator=(const Int4 &rhs) const
5386 	{
5387 		Value *value = rhs.loadValue();
5388 		storeValue(value);
5389 
5390 		return RValue<Int4>(value);
5391 	}
5392 
operator =(const Reference<Int4> & rhs) const5393 	RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) const
5394 	{
5395 		Value *value = rhs.loadValue();
5396 		storeValue(value);
5397 
5398 		return RValue<Int4>(value);
5399 	}
5400 
operator +(RValue<Int4> lhs,RValue<Int4> rhs)5401 	RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5402 	{
5403 		return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5404 	}
5405 
operator -(RValue<Int4> lhs,RValue<Int4> rhs)5406 	RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5407 	{
5408 		return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5409 	}
5410 
operator *(RValue<Int4> lhs,RValue<Int4> rhs)5411 	RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5412 	{
5413 		return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5414 	}
5415 
operator /(RValue<Int4> lhs,RValue<Int4> rhs)5416 	RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5417 	{
5418 		return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5419 	}
5420 
operator %(RValue<Int4> lhs,RValue<Int4> rhs)5421 	RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5422 	{
5423 		return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5424 	}
5425 
operator &(RValue<Int4> lhs,RValue<Int4> rhs)5426 	RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5427 	{
5428 		return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5429 	}
5430 
operator |(RValue<Int4> lhs,RValue<Int4> rhs)5431 	RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5432 	{
5433 		return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5434 	}
5435 
operator ^(RValue<Int4> lhs,RValue<Int4> rhs)5436 	RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5437 	{
5438 		return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5439 	}
5440 
operator <<(RValue<Int4> lhs,unsigned char rhs)5441 	RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5442 	{
5443 		return x86::pslld(lhs, rhs);
5444 	}
5445 
operator >>(RValue<Int4> lhs,unsigned char rhs)5446 	RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5447 	{
5448 		return x86::psrad(lhs, rhs);
5449 	}
5450 
operator <<(RValue<Int4> lhs,RValue<Int4> rhs)5451 	RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5452 	{
5453 		return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5454 	}
5455 
operator >>(RValue<Int4> lhs,RValue<Int4> rhs)5456 	RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5457 	{
5458 		return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5459 	}
5460 
operator +=(const Int4 & lhs,RValue<Int4> rhs)5461 	RValue<Int4> operator+=(const Int4 &lhs, RValue<Int4> rhs)
5462 	{
5463 		return lhs = lhs + rhs;
5464 	}
5465 
operator -=(const Int4 & lhs,RValue<Int4> rhs)5466 	RValue<Int4> operator-=(const Int4 &lhs, RValue<Int4> rhs)
5467 	{
5468 		return lhs = lhs - rhs;
5469 	}
5470 
operator *=(const Int4 & lhs,RValue<Int4> rhs)5471 	RValue<Int4> operator*=(const Int4 &lhs, RValue<Int4> rhs)
5472 	{
5473 		return lhs = lhs * rhs;
5474 	}
5475 
5476 //	RValue<Int4> operator/=(const Int4 &lhs, RValue<Int4> rhs)
5477 //	{
5478 //		return lhs = lhs / rhs;
5479 //	}
5480 
5481 //	RValue<Int4> operator%=(const Int4 &lhs, RValue<Int4> rhs)
5482 //	{
5483 //		return lhs = lhs % rhs;
5484 //	}
5485 
operator &=(const Int4 & lhs,RValue<Int4> rhs)5486 	RValue<Int4> operator&=(const Int4 &lhs, RValue<Int4> rhs)
5487 	{
5488 		return lhs = lhs & rhs;
5489 	}
5490 
operator |=(const Int4 & lhs,RValue<Int4> rhs)5491 	RValue<Int4> operator|=(const Int4 &lhs, RValue<Int4> rhs)
5492 	{
5493 		return lhs = lhs | rhs;
5494 	}
5495 
operator ^=(const Int4 & lhs,RValue<Int4> rhs)5496 	RValue<Int4> operator^=(const Int4 &lhs, RValue<Int4> rhs)
5497 	{
5498 		return lhs = lhs ^ rhs;
5499 	}
5500 
operator <<=(const Int4 & lhs,unsigned char rhs)5501 	RValue<Int4> operator<<=(const Int4 &lhs, unsigned char rhs)
5502 	{
5503 		return lhs = lhs << rhs;
5504 	}
5505 
operator >>=(const Int4 & lhs,unsigned char rhs)5506 	RValue<Int4> operator>>=(const Int4 &lhs, unsigned char rhs)
5507 	{
5508 		return lhs = lhs >> rhs;
5509 	}
5510 
operator +(RValue<Int4> val)5511 	RValue<Int4> operator+(RValue<Int4> val)
5512 	{
5513 		return val;
5514 	}
5515 
operator -(RValue<Int4> val)5516 	RValue<Int4> operator-(RValue<Int4> val)
5517 	{
5518 		return RValue<Int4>(Nucleus::createNeg(val.value));
5519 	}
5520 
operator ~(RValue<Int4> val)5521 	RValue<Int4> operator~(RValue<Int4> val)
5522 	{
5523 		return RValue<Int4>(Nucleus::createNot(val.value));
5524 	}
5525 
CmpEQ(RValue<Int4> x,RValue<Int4> y)5526 	RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5527 	{
5528 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5529 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5530 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5531 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5532 	}
5533 
CmpLT(RValue<Int4> x,RValue<Int4> y)5534 	RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5535 	{
5536 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
5537 	}
5538 
CmpLE(RValue<Int4> x,RValue<Int4> y)5539 	RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5540 	{
5541 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5542 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5543 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
5544 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5545 	}
5546 
CmpNEQ(RValue<Int4> x,RValue<Int4> y)5547 	RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5548 	{
5549 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5550 	}
5551 
CmpNLT(RValue<Int4> x,RValue<Int4> y)5552 	RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5553 	{
5554 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5555 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5556 		// return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
5557 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5558 	}
5559 
CmpNLE(RValue<Int4> x,RValue<Int4> y)5560 	RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5561 	{
5562 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
5563 	}
5564 
Max(RValue<Int4> x,RValue<Int4> y)5565 	RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5566 	{
5567 		if(CPUID::supportsSSE4_1())
5568 		{
5569 			return x86::pmaxsd(x, y);
5570 		}
5571 		else
5572 		{
5573 			RValue<Int4> greater = CmpNLE(x, y);
5574 			return x & greater | y & ~greater;
5575 		}
5576 	}
5577 
Min(RValue<Int4> x,RValue<Int4> y)5578 	RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5579 	{
5580 		if(CPUID::supportsSSE4_1())
5581 		{
5582 			return x86::pminsd(x, y);
5583 		}
5584 		else
5585 		{
5586 			RValue<Int4> less = CmpLT(x, y);
5587 			return x & less | y & ~less;
5588 		}
5589 	}
5590 
RoundInt(RValue<Float4> cast)5591 	RValue<Int4> RoundInt(RValue<Float4> cast)
5592 	{
5593 		return x86::cvtps2dq(cast);
5594 	}
5595 
Pack(RValue<Int4> x,RValue<Int4> y)5596 	RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5597 	{
5598 		return x86::packssdw(x, y);
5599 	}
5600 
Extract(RValue<Int4> x,int i)5601 	RValue<Int> Extract(RValue<Int4> x, int i)
5602 	{
5603 		return RValue<Int>(Nucleus::createExtractElement(x.value, i));
5604 	}
5605 
Insert(RValue<Int4> x,RValue<Int> element,int i)5606 	RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5607 	{
5608 		return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5609 	}
5610 
SignMask(RValue<Int4> x)5611 	RValue<Int> SignMask(RValue<Int4> x)
5612 	{
5613 		return x86::movmskps(As<Float4>(x));
5614 	}
5615 
Swizzle(RValue<Int4> x,unsigned char select)5616 	RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5617 	{
5618 		return RValue<Int4>(Nucleus::createSwizzle(x.value, select));
5619 	}
5620 
getType()5621 	Type *Int4::getType()
5622 	{
5623 		return VectorType::get(Int::getType(), 4);
5624 	}
5625 
UInt4(RValue<Float4> cast)5626 	UInt4::UInt4(RValue<Float4> cast)
5627 	{
5628 	//	xyzw.parent = this;
5629 
5630 		Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
5631 
5632 		storeValue(xyzw);
5633 	}
5634 
UInt4()5635 	UInt4::UInt4()
5636 	{
5637 	//	xyzw.parent = this;
5638 	}
5639 
UInt4(int xyzw)5640 	UInt4::UInt4(int xyzw)
5641 	{
5642 		constant(xyzw, xyzw, xyzw, xyzw);
5643 	}
5644 
UInt4(int x,int yzw)5645 	UInt4::UInt4(int x, int yzw)
5646 	{
5647 		constant(x, yzw, yzw, yzw);
5648 	}
5649 
UInt4(int x,int y,int zw)5650 	UInt4::UInt4(int x, int y, int zw)
5651 	{
5652 		constant(x, y, zw, zw);
5653 	}
5654 
UInt4(int x,int y,int z,int w)5655 	UInt4::UInt4(int x, int y, int z, int w)
5656 	{
5657 		constant(x, y, z, w);
5658 	}
5659 
constant(int x,int y,int z,int w)5660 	void UInt4::constant(int x, int y, int z, int w)
5661 	{
5662 	//	xyzw.parent = this;
5663 
5664 		Constant *constantVector[4];
5665 		constantVector[0] = Nucleus::createConstantInt(x);
5666 		constantVector[1] = Nucleus::createConstantInt(y);
5667 		constantVector[2] = Nucleus::createConstantInt(z);
5668 		constantVector[3] = Nucleus::createConstantInt(w);
5669 
5670 		storeValue(Nucleus::createConstantVector(constantVector, 4));
5671 	}
5672 
UInt4(RValue<UInt4> rhs)5673 	UInt4::UInt4(RValue<UInt4> rhs)
5674 	{
5675 	//	xyzw.parent = this;
5676 
5677 		storeValue(rhs.value);
5678 	}
5679 
UInt4(const UInt4 & rhs)5680 	UInt4::UInt4(const UInt4 &rhs)
5681 	{
5682 	//	xyzw.parent = this;
5683 
5684 		Value *value = rhs.loadValue();
5685 		storeValue(value);
5686 	}
5687 
UInt4(const Reference<UInt4> & rhs)5688 	UInt4::UInt4(const Reference<UInt4> &rhs)
5689 	{
5690 	//	xyzw.parent = this;
5691 
5692 		Value *value = rhs.loadValue();
5693 		storeValue(value);
5694 	}
5695 
UInt4(RValue<Int4> rhs)5696 	UInt4::UInt4(RValue<Int4> rhs)
5697 	{
5698 	//	xyzw.parent = this;
5699 
5700 		storeValue(rhs.value);
5701 	}
5702 
UInt4(const Int4 & rhs)5703 	UInt4::UInt4(const Int4 &rhs)
5704 	{
5705 	//	xyzw.parent = this;
5706 
5707 		Value *value = rhs.loadValue();
5708 		storeValue(value);
5709 	}
5710 
UInt4(const Reference<Int4> & rhs)5711 	UInt4::UInt4(const Reference<Int4> &rhs)
5712 	{
5713 	//	xyzw.parent = this;
5714 
5715 		Value *value = rhs.loadValue();
5716 		storeValue(value);
5717 	}
5718 
UInt4(RValue<UInt2> lo,RValue<UInt2> hi)5719 	UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5720 	{
5721 		Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5722 		Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5723 
5724 		Value *long2 = UndefValue::get(Long2::getType());
5725 		long2 = Nucleus::createInsertElement(long2, loLong, 0);
5726 		long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5727 		Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
5728 
5729 		storeValue(uint4);
5730 	}
5731 
operator =(RValue<UInt4> rhs) const5732 	RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) const
5733 	{
5734 		storeValue(rhs.value);
5735 
5736 		return rhs;
5737 	}
5738 
operator =(const UInt4 & rhs) const5739 	RValue<UInt4> UInt4::operator=(const UInt4 &rhs) const
5740 	{
5741 		Value *value = rhs.loadValue();
5742 		storeValue(value);
5743 
5744 		return RValue<UInt4>(value);
5745 	}
5746 
operator =(const Reference<UInt4> & rhs) const5747 	RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) const
5748 	{
5749 		Value *value = rhs.loadValue();
5750 		storeValue(value);
5751 
5752 		return RValue<UInt4>(value);
5753 	}
5754 
operator +(RValue<UInt4> lhs,RValue<UInt4> rhs)5755 	RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5756 	{
5757 		return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5758 	}
5759 
operator -(RValue<UInt4> lhs,RValue<UInt4> rhs)5760 	RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5761 	{
5762 		return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5763 	}
5764 
operator *(RValue<UInt4> lhs,RValue<UInt4> rhs)5765 	RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5766 	{
5767 		return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5768 	}
5769 
operator /(RValue<UInt4> lhs,RValue<UInt4> rhs)5770 	RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5771 	{
5772 		return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5773 	}
5774 
operator %(RValue<UInt4> lhs,RValue<UInt4> rhs)5775 	RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5776 	{
5777 		return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5778 	}
5779 
operator &(RValue<UInt4> lhs,RValue<UInt4> rhs)5780 	RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5781 	{
5782 		return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5783 	}
5784 
operator |(RValue<UInt4> lhs,RValue<UInt4> rhs)5785 	RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5786 	{
5787 		return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5788 	}
5789 
operator ^(RValue<UInt4> lhs,RValue<UInt4> rhs)5790 	RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5791 	{
5792 		return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5793 	}
5794 
operator <<(RValue<UInt4> lhs,unsigned char rhs)5795 	RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5796 	{
5797 		return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5798 	}
5799 
operator >>(RValue<UInt4> lhs,unsigned char rhs)5800 	RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5801 	{
5802 		return x86::psrld(lhs, rhs);
5803 	}
5804 
operator <<(RValue<UInt4> lhs,RValue<UInt4> rhs)5805 	RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5806 	{
5807 		return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5808 	}
5809 
operator >>(RValue<UInt4> lhs,RValue<UInt4> rhs)5810 	RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5811 	{
5812 		return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5813 	}
5814 
operator +=(const UInt4 & lhs,RValue<UInt4> rhs)5815 	RValue<UInt4> operator+=(const UInt4 &lhs, RValue<UInt4> rhs)
5816 	{
5817 		return lhs = lhs + rhs;
5818 	}
5819 
operator -=(const UInt4 & lhs,RValue<UInt4> rhs)5820 	RValue<UInt4> operator-=(const UInt4 &lhs, RValue<UInt4> rhs)
5821 	{
5822 		return lhs = lhs - rhs;
5823 	}
5824 
operator *=(const UInt4 & lhs,RValue<UInt4> rhs)5825 	RValue<UInt4> operator*=(const UInt4 &lhs, RValue<UInt4> rhs)
5826 	{
5827 		return lhs = lhs * rhs;
5828 	}
5829 
5830 //	RValue<UInt4> operator/=(const UInt4 &lhs, RValue<UInt4> rhs)
5831 //	{
5832 //		return lhs = lhs / rhs;
5833 //	}
5834 
5835 //	RValue<UInt4> operator%=(const UInt4 &lhs, RValue<UInt4> rhs)
5836 //	{
5837 //		return lhs = lhs % rhs;
5838 //	}
5839 
operator &=(const UInt4 & lhs,RValue<UInt4> rhs)5840 	RValue<UInt4> operator&=(const UInt4 &lhs, RValue<UInt4> rhs)
5841 	{
5842 		return lhs = lhs & rhs;
5843 	}
5844 
operator |=(const UInt4 & lhs,RValue<UInt4> rhs)5845 	RValue<UInt4> operator|=(const UInt4 &lhs, RValue<UInt4> rhs)
5846 	{
5847 		return lhs = lhs | rhs;
5848 	}
5849 
operator ^=(const UInt4 & lhs,RValue<UInt4> rhs)5850 	RValue<UInt4> operator^=(const UInt4 &lhs, RValue<UInt4> rhs)
5851 	{
5852 		return lhs = lhs ^ rhs;
5853 	}
5854 
operator <<=(const UInt4 & lhs,unsigned char rhs)5855 	RValue<UInt4> operator<<=(const UInt4 &lhs, unsigned char rhs)
5856 	{
5857 		return lhs = lhs << rhs;
5858 	}
5859 
operator >>=(const UInt4 & lhs,unsigned char rhs)5860 	RValue<UInt4> operator>>=(const UInt4 &lhs, unsigned char rhs)
5861 	{
5862 		return lhs = lhs >> rhs;
5863 	}
5864 
operator +(RValue<UInt4> val)5865 	RValue<UInt4> operator+(RValue<UInt4> val)
5866 	{
5867 		return val;
5868 	}
5869 
operator -(RValue<UInt4> val)5870 	RValue<UInt4> operator-(RValue<UInt4> val)
5871 	{
5872 		return RValue<UInt4>(Nucleus::createNeg(val.value));
5873 	}
5874 
operator ~(RValue<UInt4> val)5875 	RValue<UInt4> operator~(RValue<UInt4> val)
5876 	{
5877 		return RValue<UInt4>(Nucleus::createNot(val.value));
5878 	}
5879 
CmpEQ(RValue<UInt4> x,RValue<UInt4> y)5880 	RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5881 	{
5882 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5883 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5884 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5885 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5886 	}
5887 
CmpLT(RValue<UInt4> x,RValue<UInt4> y)5888 	RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5889 	{
5890 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
5891 	}
5892 
CmpLE(RValue<UInt4> x,RValue<UInt4> y)5893 	RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5894 	{
5895 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5896 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5897 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
5898 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5899 	}
5900 
CmpNEQ(RValue<UInt4> x,RValue<UInt4> y)5901 	RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5902 	{
5903 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5904 	}
5905 
CmpNLT(RValue<UInt4> x,RValue<UInt4> y)5906 	RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5907 	{
5908 		// FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5909 		//        Restore the following line when LLVM is updated to a version where this issue is fixed.
5910 		// return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
5911 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5912 	}
5913 
CmpNLE(RValue<UInt4> x,RValue<UInt4> y)5914 	RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5915 	{
5916 		return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
5917 	}
5918 
Max(RValue<UInt4> x,RValue<UInt4> y)5919 	RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5920 	{
5921 		if(CPUID::supportsSSE4_1())
5922 		{
5923 			return x86::pmaxud(x, y);
5924 		}
5925 		else
5926 		{
5927 			RValue<UInt4> greater = CmpNLE(x, y);
5928 			return x & greater | y & ~greater;
5929 		}
5930 	}
5931 
Min(RValue<UInt4> x,RValue<UInt4> y)5932 	RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5933 	{
5934 		if(CPUID::supportsSSE4_1())
5935 		{
5936 			return x86::pminud(x, y);
5937 		}
5938 		else
5939 		{
5940 			RValue<UInt4> less = CmpLT(x, y);
5941 			return x & less | y & ~less;
5942 		}
5943 	}
5944 
Pack(RValue<UInt4> x,RValue<UInt4> y)5945 	RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5946 	{
5947 		return x86::packusdw(x, y);   // FIXME: Fallback required
5948 	}
5949 
getType()5950 	Type *UInt4::getType()
5951 	{
5952 		return VectorType::get(UInt::getType(), 4);
5953 	}
5954 
Float(RValue<Int> cast)5955 	Float::Float(RValue<Int> cast)
5956 	{
5957 		Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5958 
5959 		storeValue(integer);
5960 	}
5961 
Float()5962 	Float::Float()
5963 	{
5964 
5965 	}
5966 
Float(float x)5967 	Float::Float(float x)
5968 	{
5969 		storeValue(Nucleus::createConstantFloat(x));
5970 	}
5971 
Float(RValue<Float> rhs)5972 	Float::Float(RValue<Float> rhs)
5973 	{
5974 		storeValue(rhs.value);
5975 	}
5976 
Float(const Float & rhs)5977 	Float::Float(const Float &rhs)
5978 	{
5979 		Value *value = rhs.loadValue();
5980 		storeValue(value);
5981 	}
5982 
Float(const Reference<Float> & rhs)5983 	Float::Float(const Reference<Float> &rhs)
5984 	{
5985 		Value *value = rhs.loadValue();
5986 		storeValue(value);
5987 	}
5988 
operator =(RValue<Float> rhs) const5989 	RValue<Float> Float::operator=(RValue<Float> rhs) const
5990 	{
5991 		storeValue(rhs.value);
5992 
5993 		return rhs;
5994 	}
5995 
operator =(const Float & rhs) const5996 	RValue<Float> Float::operator=(const Float &rhs) const
5997 	{
5998 		Value *value = rhs.loadValue();
5999 		storeValue(value);
6000 
6001 		return RValue<Float>(value);
6002 	}
6003 
operator =(const Reference<Float> & rhs) const6004 	RValue<Float> Float::operator=(const Reference<Float> &rhs) const
6005 	{
6006 		Value *value = rhs.loadValue();
6007 		storeValue(value);
6008 
6009 		return RValue<Float>(value);
6010 	}
6011 
operator +(RValue<Float> lhs,RValue<Float> rhs)6012 	RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
6013 	{
6014 		return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
6015 	}
6016 
operator -(RValue<Float> lhs,RValue<Float> rhs)6017 	RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
6018 	{
6019 		return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
6020 	}
6021 
operator *(RValue<Float> lhs,RValue<Float> rhs)6022 	RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
6023 	{
6024 		return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
6025 	}
6026 
operator /(RValue<Float> lhs,RValue<Float> rhs)6027 	RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
6028 	{
6029 		return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
6030 	}
6031 
operator +=(const Float & lhs,RValue<Float> rhs)6032 	RValue<Float> operator+=(const Float &lhs, RValue<Float> rhs)
6033 	{
6034 		return lhs = lhs + rhs;
6035 	}
6036 
operator -=(const Float & lhs,RValue<Float> rhs)6037 	RValue<Float> operator-=(const Float &lhs, RValue<Float> rhs)
6038 	{
6039 		return lhs = lhs - rhs;
6040 	}
6041 
operator *=(const Float & lhs,RValue<Float> rhs)6042 	RValue<Float> operator*=(const Float &lhs, RValue<Float> rhs)
6043 	{
6044 		return lhs = lhs * rhs;
6045 	}
6046 
operator /=(const Float & lhs,RValue<Float> rhs)6047 	RValue<Float> operator/=(const Float &lhs, RValue<Float> rhs)
6048 	{
6049 		return lhs = lhs / rhs;
6050 	}
6051 
operator +(RValue<Float> val)6052 	RValue<Float> operator+(RValue<Float> val)
6053 	{
6054 		return val;
6055 	}
6056 
operator -(RValue<Float> val)6057 	RValue<Float> operator-(RValue<Float> val)
6058 	{
6059 		return RValue<Float>(Nucleus::createFNeg(val.value));
6060 	}
6061 
operator <(RValue<Float> lhs,RValue<Float> rhs)6062 	RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
6063 	{
6064 		return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
6065 	}
6066 
operator <=(RValue<Float> lhs,RValue<Float> rhs)6067 	RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
6068 	{
6069 		return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
6070 	}
6071 
operator >(RValue<Float> lhs,RValue<Float> rhs)6072 	RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
6073 	{
6074 		return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
6075 	}
6076 
operator >=(RValue<Float> lhs,RValue<Float> rhs)6077 	RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
6078 	{
6079 		return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
6080 	}
6081 
operator !=(RValue<Float> lhs,RValue<Float> rhs)6082 	RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
6083 	{
6084 		return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
6085 	}
6086 
operator ==(RValue<Float> lhs,RValue<Float> rhs)6087 	RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
6088 	{
6089 		return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
6090 	}
6091 
Abs(RValue<Float> x)6092 	RValue<Float> Abs(RValue<Float> x)
6093 	{
6094 		return IfThenElse(x > 0.0f, x, -x);
6095 	}
6096 
Max(RValue<Float> x,RValue<Float> y)6097 	RValue<Float> Max(RValue<Float> x, RValue<Float> y)
6098 	{
6099 		return IfThenElse(x > y, x, y);
6100 	}
6101 
Min(RValue<Float> x,RValue<Float> y)6102 	RValue<Float> Min(RValue<Float> x, RValue<Float> y)
6103 	{
6104 		return IfThenElse(x < y, x, y);
6105 	}
6106 
Rcp_pp(RValue<Float> x,bool exactAtPow2)6107 	RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
6108 	{
6109 		if(exactAtPow2)
6110 		{
6111 			// rcpss uses a piecewise-linear approximation which minimizes the relative error
6112 			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6113 			return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6114 		}
6115 		else
6116 		{
6117 			return x86::rcpss(x);
6118 		}
6119 	}
6120 
RcpSqrt_pp(RValue<Float> x)6121 	RValue<Float> RcpSqrt_pp(RValue<Float> x)
6122 	{
6123 		return x86::rsqrtss(x);
6124 	}
6125 
Sqrt(RValue<Float> x)6126 	RValue<Float> Sqrt(RValue<Float> x)
6127 	{
6128 		return x86::sqrtss(x);
6129 	}
6130 
Round(RValue<Float> x)6131 	RValue<Float> Round(RValue<Float> x)
6132 	{
6133 		if(CPUID::supportsSSE4_1())
6134 		{
6135 			return x86::roundss(x, 0);
6136 		}
6137 		else
6138 		{
6139 			return Float4(Round(Float4(x))).x;
6140 		}
6141 	}
6142 
Trunc(RValue<Float> x)6143 	RValue<Float> Trunc(RValue<Float> x)
6144 	{
6145 		if(CPUID::supportsSSE4_1())
6146 		{
6147 			return x86::roundss(x, 3);
6148 		}
6149 		else
6150 		{
6151 			return Float(Int(x));   // Rounded toward zero
6152 		}
6153 	}
6154 
Frac(RValue<Float> x)6155 	RValue<Float> Frac(RValue<Float> x)
6156 	{
6157 		if(CPUID::supportsSSE4_1())
6158 		{
6159 			return x - x86::floorss(x);
6160 		}
6161 		else
6162 		{
6163 			return Float4(Frac(Float4(x))).x;
6164 		}
6165 	}
6166 
Floor(RValue<Float> x)6167 	RValue<Float> Floor(RValue<Float> x)
6168 	{
6169 		if(CPUID::supportsSSE4_1())
6170 		{
6171 			return x86::floorss(x);
6172 		}
6173 		else
6174 		{
6175 			return Float4(Floor(Float4(x))).x;
6176 		}
6177 	}
6178 
Ceil(RValue<Float> x)6179 	RValue<Float> Ceil(RValue<Float> x)
6180 	{
6181 		if(CPUID::supportsSSE4_1())
6182 		{
6183 			return x86::ceilss(x);
6184 		}
6185 		else
6186 		{
6187 			return Float4(Ceil(Float4(x))).x;
6188 		}
6189 	}
6190 
getType()6191 	Type *Float::getType()
6192 	{
6193 		return Type::getFloatTy(*Nucleus::getContext());
6194 	}
6195 
Float2(RValue<Float4> cast)6196 	Float2::Float2(RValue<Float4> cast)
6197 	{
6198 	//	xyzw.parent = this;
6199 
6200 		Value *int64x2 = Nucleus::createBitCast(cast.value, Long2::getType());
6201 		Value *int64 = Nucleus::createExtractElement(int64x2, 0);
6202 		Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
6203 
6204 		storeValue(float2);
6205 	}
6206 
getType()6207 	Type *Float2::getType()
6208 	{
6209 		return VectorType::get(Float::getType(), 2);
6210 	}
6211 
Float4(RValue<Byte4> cast)6212 	Float4::Float4(RValue<Byte4> cast)
6213 	{
6214 		xyzw.parent = this;
6215 
6216 		#if 0
6217 			Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());   // FIXME: Crashes
6218 		#elif 0
6219 			Value *vector = loadValue();
6220 
6221 			Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6222 			Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
6223 			Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6224 
6225 			Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
6226 			Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
6227 			Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
6228 
6229 			Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6230 			Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
6231 			Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6232 
6233 			Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6234 			Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
6235 			Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6236 		#else
6237 			Value *x = Nucleus::createBitCast(cast.value, Int::getType());
6238 			Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0);
6239 
6240 			Value *e;
6241 
6242 			if(CPUID::supportsSSE4_1())
6243 			{
6244 				e = x86::pmovzxbd(RValue<Int4>(a)).value;
6245 			}
6246 			else
6247 			{
6248 				Constant *swizzle[16];
6249 				swizzle[0] = Nucleus::createConstantInt(0);
6250 				swizzle[1] = Nucleus::createConstantInt(16);
6251 				swizzle[2] = Nucleus::createConstantInt(1);
6252 				swizzle[3] = Nucleus::createConstantInt(17);
6253 				swizzle[4] = Nucleus::createConstantInt(2);
6254 				swizzle[5] = Nucleus::createConstantInt(18);
6255 				swizzle[6] = Nucleus::createConstantInt(3);
6256 				swizzle[7] = Nucleus::createConstantInt(19);
6257 				swizzle[8] = Nucleus::createConstantInt(4);
6258 				swizzle[9] = Nucleus::createConstantInt(20);
6259 				swizzle[10] = Nucleus::createConstantInt(5);
6260 				swizzle[11] = Nucleus::createConstantInt(21);
6261 				swizzle[12] = Nucleus::createConstantInt(6);
6262 				swizzle[13] = Nucleus::createConstantInt(22);
6263 				swizzle[14] = Nucleus::createConstantInt(7);
6264 				swizzle[15] = Nucleus::createConstantInt(23);
6265 
6266 				Value *b = Nucleus::createBitCast(a, Byte16::getType());
6267 				Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), Nucleus::createConstantVector(swizzle, 16));
6268 
6269 				Constant *swizzle2[8];
6270 				swizzle2[0] = Nucleus::createConstantInt(0);
6271 				swizzle2[1] = Nucleus::createConstantInt(8);
6272 				swizzle2[2] = Nucleus::createConstantInt(1);
6273 				swizzle2[3] = Nucleus::createConstantInt(9);
6274 				swizzle2[4] = Nucleus::createConstantInt(2);
6275 				swizzle2[5] = Nucleus::createConstantInt(10);
6276 				swizzle2[6] = Nucleus::createConstantInt(3);
6277 				swizzle2[7] = Nucleus::createConstantInt(11);
6278 
6279 				Value *d = Nucleus::createBitCast(c, Short8::getType());
6280 				e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle2, 8));
6281 			}
6282 
6283 			Value *f = Nucleus::createBitCast(e, Int4::getType());
6284 			Value *g = Nucleus::createSIToFP(f, Float4::getType());
6285 			Value *xyzw = g;
6286 		#endif
6287 
6288 		storeValue(xyzw);
6289 	}
6290 
Float4(RValue<SByte4> cast)6291 	Float4::Float4(RValue<SByte4> cast)
6292 	{
6293 		xyzw.parent = this;
6294 
6295 		#if 0
6296 			Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());   // FIXME: Crashes
6297 		#elif 0
6298 			Value *vector = loadValue();
6299 
6300 			Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6301 			Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
6302 			Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6303 
6304 			Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
6305 			Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
6306 			Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
6307 
6308 			Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6309 			Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
6310 			Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6311 
6312 			Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6313 			Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
6314 			Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6315 		#else
6316 			Value *x = Nucleus::createBitCast(cast.value, Int::getType());
6317 			Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0);
6318 
6319 			Value *g;
6320 
6321 			if(CPUID::supportsSSE4_1())
6322 			{
6323 				g = x86::pmovsxbd(RValue<Int4>(a)).value;
6324 			}
6325 			else
6326 			{
6327 				Constant *swizzle[16];
6328 				swizzle[0] = Nucleus::createConstantInt(0);
6329 				swizzle[1] = Nucleus::createConstantInt(0);
6330 				swizzle[2] = Nucleus::createConstantInt(1);
6331 				swizzle[3] = Nucleus::createConstantInt(1);
6332 				swizzle[4] = Nucleus::createConstantInt(2);
6333 				swizzle[5] = Nucleus::createConstantInt(2);
6334 				swizzle[6] = Nucleus::createConstantInt(3);
6335 				swizzle[7] = Nucleus::createConstantInt(3);
6336 				swizzle[8] = Nucleus::createConstantInt(4);
6337 				swizzle[9] = Nucleus::createConstantInt(4);
6338 				swizzle[10] = Nucleus::createConstantInt(5);
6339 				swizzle[11] = Nucleus::createConstantInt(5);
6340 				swizzle[12] = Nucleus::createConstantInt(6);
6341 				swizzle[13] = Nucleus::createConstantInt(6);
6342 				swizzle[14] = Nucleus::createConstantInt(7);
6343 				swizzle[15] = Nucleus::createConstantInt(7);
6344 
6345 				Value *b = Nucleus::createBitCast(a, Byte16::getType());
6346 				Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 16));
6347 
6348 				Constant *swizzle2[8];
6349 				swizzle2[0] = Nucleus::createConstantInt(0);
6350 				swizzle2[1] = Nucleus::createConstantInt(0);
6351 				swizzle2[2] = Nucleus::createConstantInt(1);
6352 				swizzle2[3] = Nucleus::createConstantInt(1);
6353 				swizzle2[4] = Nucleus::createConstantInt(2);
6354 				swizzle2[5] = Nucleus::createConstantInt(2);
6355 				swizzle2[6] = Nucleus::createConstantInt(3);
6356 				swizzle2[7] = Nucleus::createConstantInt(3);
6357 
6358 				Value *d = Nucleus::createBitCast(c, Short8::getType());
6359 				Value *e = Nucleus::createShuffleVector(d, d, Nucleus::createConstantVector(swizzle2, 8));
6360 
6361 				Value *f = Nucleus::createBitCast(e, Int4::getType());
6362 			//	g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
6363 				g = x86::psrad(RValue<Int4>(f), 24).value;
6364 			}
6365 
6366 			Value *xyzw = Nucleus::createSIToFP(g, Float4::getType());
6367 		#endif
6368 
6369 		storeValue(xyzw);
6370 	}
6371 
Float4(RValue<Short4> cast)6372 	Float4::Float4(RValue<Short4> cast)
6373 	{
6374 		xyzw.parent = this;
6375 
6376 		Int4 c(cast);
6377 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6378 	}
6379 
Float4(RValue<UShort4> cast)6380 	Float4::Float4(RValue<UShort4> cast)
6381 	{
6382 		xyzw.parent = this;
6383 
6384 		Int4 c(cast);
6385 		storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6386 	}
6387 
Float4(RValue<Int4> cast)6388 	Float4::Float4(RValue<Int4> cast)
6389 	{
6390 		xyzw.parent = this;
6391 
6392 		Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6393 
6394 		storeValue(xyzw);
6395 	}
6396 
Float4(RValue<UInt4> cast)6397 	Float4::Float4(RValue<UInt4> cast)
6398 	{
6399 		xyzw.parent = this;
6400 
6401 		Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());
6402 
6403 		storeValue(xyzw);
6404 	}
6405 
Float4()6406 	Float4::Float4()
6407 	{
6408 		xyzw.parent = this;
6409 	}
6410 
Float4(float xyzw)6411 	Float4::Float4(float xyzw)
6412 	{
6413 		constant(xyzw, xyzw, xyzw, xyzw);
6414 	}
6415 
Float4(float x,float yzw)6416 	Float4::Float4(float x, float yzw)
6417 	{
6418 		constant(x, yzw, yzw, yzw);
6419 	}
6420 
Float4(float x,float y,float zw)6421 	Float4::Float4(float x, float y, float zw)
6422 	{
6423 		constant(x, y, zw, zw);
6424 	}
6425 
Float4(float x,float y,float z,float w)6426 	Float4::Float4(float x, float y, float z, float w)
6427 	{
6428 		constant(x, y, z, w);
6429 	}
6430 
constant(float x,float y,float z,float w)6431 	void Float4::constant(float x, float y, float z, float w)
6432 	{
6433 		xyzw.parent = this;
6434 
6435 		Constant *constantVector[4];
6436 		constantVector[0] = Nucleus::createConstantFloat(x);
6437 		constantVector[1] = Nucleus::createConstantFloat(y);
6438 		constantVector[2] = Nucleus::createConstantFloat(z);
6439 		constantVector[3] = Nucleus::createConstantFloat(w);
6440 
6441 		storeValue(Nucleus::createConstantVector(constantVector, 4));
6442 	}
6443 
Float4(RValue<Float4> rhs)6444 	Float4::Float4(RValue<Float4> rhs)
6445 	{
6446 		xyzw.parent = this;
6447 
6448 		storeValue(rhs.value);
6449 	}
6450 
Float4(const Float4 & rhs)6451 	Float4::Float4(const Float4 &rhs)
6452 	{
6453 		xyzw.parent = this;
6454 
6455 		Value *value = rhs.loadValue();
6456 		storeValue(value);
6457 	}
6458 
Float4(const Reference<Float4> & rhs)6459 	Float4::Float4(const Reference<Float4> &rhs)
6460 	{
6461 		xyzw.parent = this;
6462 
6463 		Value *value = rhs.loadValue();
6464 		storeValue(value);
6465 	}
6466 
Float4(RValue<Float> rhs)6467 	Float4::Float4(RValue<Float> rhs)
6468 	{
6469 		xyzw.parent = this;
6470 
6471 		Value *vector = loadValue();
6472 		Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
6473 
6474 		Constant *swizzle[4];
6475 		swizzle[0] = Nucleus::createConstantInt(0);
6476 		swizzle[1] = Nucleus::createConstantInt(0);
6477 		swizzle[2] = Nucleus::createConstantInt(0);
6478 		swizzle[3] = Nucleus::createConstantInt(0);
6479 
6480 		Value *replicate = Nucleus::createShuffleVector(insert, UndefValue::get(Float4::getType()), Nucleus::createConstantVector(swizzle, 4));
6481 
6482 		storeValue(replicate);
6483 	}
6484 
Float4(const Float & rhs)6485 	Float4::Float4(const Float &rhs)
6486 	{
6487 		xyzw.parent = this;
6488 
6489 		*this = RValue<Float>(rhs.loadValue());
6490 	}
6491 
Float4(const Reference<Float> & rhs)6492 	Float4::Float4(const Reference<Float> &rhs)
6493 	{
6494 		xyzw.parent = this;
6495 
6496 		*this = RValue<Float>(rhs.loadValue());
6497 	}
6498 
operator =(float x) const6499 	RValue<Float4> Float4::operator=(float x) const
6500 	{
6501 		return *this = Float4(x, x, x, x);
6502 	}
6503 
operator =(RValue<Float4> rhs) const6504 	RValue<Float4> Float4::operator=(RValue<Float4> rhs) const
6505 	{
6506 		storeValue(rhs.value);
6507 
6508 		return rhs;
6509 	}
6510 
operator =(const Float4 & rhs) const6511 	RValue<Float4> Float4::operator=(const Float4 &rhs) const
6512 	{
6513 		Value *value = rhs.loadValue();
6514 		storeValue(value);
6515 
6516 		return RValue<Float4>(value);
6517 	}
6518 
operator =(const Reference<Float4> & rhs) const6519 	RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) const
6520 	{
6521 		Value *value = rhs.loadValue();
6522 		storeValue(value);
6523 
6524 		return RValue<Float4>(value);
6525 	}
6526 
operator =(RValue<Float> rhs) const6527 	RValue<Float4> Float4::operator=(RValue<Float> rhs) const
6528 	{
6529 		return *this = Float4(rhs);
6530 	}
6531 
operator =(const Float & rhs) const6532 	RValue<Float4> Float4::operator=(const Float &rhs) const
6533 	{
6534 		return *this = Float4(rhs);
6535 	}
6536 
operator =(const Reference<Float> & rhs) const6537 	RValue<Float4> Float4::operator=(const Reference<Float> &rhs) const
6538 	{
6539 		return *this = Float4(rhs);
6540 	}
6541 
operator +(RValue<Float4> lhs,RValue<Float4> rhs)6542 	RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6543 	{
6544 		return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6545 	}
6546 
operator -(RValue<Float4> lhs,RValue<Float4> rhs)6547 	RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6548 	{
6549 		return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6550 	}
6551 
operator *(RValue<Float4> lhs,RValue<Float4> rhs)6552 	RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6553 	{
6554 		return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6555 	}
6556 
operator /(RValue<Float4> lhs,RValue<Float4> rhs)6557 	RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6558 	{
6559 		return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6560 	}
6561 
operator %(RValue<Float4> lhs,RValue<Float4> rhs)6562 	RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6563 	{
6564 		return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6565 	}
6566 
operator +=(const Float4 & lhs,RValue<Float4> rhs)6567 	RValue<Float4> operator+=(const Float4 &lhs, RValue<Float4> rhs)
6568 	{
6569 		return lhs = lhs + rhs;
6570 	}
6571 
operator -=(const Float4 & lhs,RValue<Float4> rhs)6572 	RValue<Float4> operator-=(const Float4 &lhs, RValue<Float4> rhs)
6573 	{
6574 		return lhs = lhs - rhs;
6575 	}
6576 
operator *=(const Float4 & lhs,RValue<Float4> rhs)6577 	RValue<Float4> operator*=(const Float4 &lhs, RValue<Float4> rhs)
6578 	{
6579 		return lhs = lhs * rhs;
6580 	}
6581 
operator /=(const Float4 & lhs,RValue<Float4> rhs)6582 	RValue<Float4> operator/=(const Float4 &lhs, RValue<Float4> rhs)
6583 	{
6584 		return lhs = lhs / rhs;
6585 	}
6586 
operator %=(const Float4 & lhs,RValue<Float4> rhs)6587 	RValue<Float4> operator%=(const Float4 &lhs, RValue<Float4> rhs)
6588 	{
6589 		return lhs = lhs % rhs;
6590 	}
6591 
operator +(RValue<Float4> val)6592 	RValue<Float4> operator+(RValue<Float4> val)
6593 	{
6594 		return val;
6595 	}
6596 
operator -(RValue<Float4> val)6597 	RValue<Float4> operator-(RValue<Float4> val)
6598 	{
6599 		return RValue<Float4>(Nucleus::createFNeg(val.value));
6600 	}
6601 
Abs(RValue<Float4> x)6602 	RValue<Float4> Abs(RValue<Float4> x)
6603 	{
6604 		Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6605 
6606 		Constant *constantVector[4];
6607 		constantVector[0] = Nucleus::createConstantInt(0x7FFFFFFF);
6608 		constantVector[1] = Nucleus::createConstantInt(0x7FFFFFFF);
6609 		constantVector[2] = Nucleus::createConstantInt(0x7FFFFFFF);
6610 		constantVector[3] = Nucleus::createConstantInt(0x7FFFFFFF);
6611 
6612 		Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, 4));
6613 
6614 		return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType()));
6615 	}
6616 
Max(RValue<Float4> x,RValue<Float4> y)6617 	RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6618 	{
6619 		return x86::maxps(x, y);
6620 	}
6621 
Min(RValue<Float4> x,RValue<Float4> y)6622 	RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6623 	{
6624 		return x86::minps(x, y);
6625 	}
6626 
Rcp_pp(RValue<Float4> x,bool exactAtPow2)6627 	RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6628 	{
6629 		if(exactAtPow2)
6630 		{
6631 			// rcpps uses a piecewise-linear approximation which minimizes the relative error
6632 			// but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6633 			return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6634 		}
6635 		else
6636 		{
6637 			return x86::rcpps(x);
6638 		}
6639 	}
6640 
RcpSqrt_pp(RValue<Float4> x)6641 	RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6642 	{
6643 		return x86::rsqrtps(x);
6644 	}
6645 
Sqrt(RValue<Float4> x)6646 	RValue<Float4> Sqrt(RValue<Float4> x)
6647 	{
6648 		return x86::sqrtps(x);
6649 	}
6650 
Insert(const Float4 & val,RValue<Float> element,int i)6651 	RValue<Float4> Insert(const Float4 &val, RValue<Float> element, int i)
6652 	{
6653 		llvm::Value *value = val.loadValue();
6654 		llvm::Value *insert = Nucleus::createInsertElement(value, element.value, i);
6655 
6656 		val = RValue<Float4>(insert);
6657 
6658 		return val;
6659 	}
6660 
Extract(RValue<Float4> x,int i)6661 	RValue<Float> Extract(RValue<Float4> x, int i)
6662 	{
6663 		return RValue<Float>(Nucleus::createExtractElement(x.value, i));
6664 	}
6665 
Swizzle(RValue<Float4> x,unsigned char select)6666 	RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6667 	{
6668 		return RValue<Float4>(Nucleus::createSwizzle(x.value, select));
6669 	}
6670 
ShuffleLowHigh(RValue<Float4> x,RValue<Float4> y,unsigned char imm)6671 	RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6672 	{
6673 		Constant *shuffle[4];
6674 		shuffle[0] = Nucleus::createConstantInt(((imm >> 0) & 0x03) + 0);
6675 		shuffle[1] = Nucleus::createConstantInt(((imm >> 2) & 0x03) + 0);
6676 		shuffle[2] = Nucleus::createConstantInt(((imm >> 4) & 0x03) + 4);
6677 		shuffle[3] = Nucleus::createConstantInt(((imm >> 6) & 0x03) + 4);
6678 
6679 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
6680 	}
6681 
UnpackLow(RValue<Float4> x,RValue<Float4> y)6682 	RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6683 	{
6684 		Constant *shuffle[4];
6685 		shuffle[0] = Nucleus::createConstantInt(0);
6686 		shuffle[1] = Nucleus::createConstantInt(4);
6687 		shuffle[2] = Nucleus::createConstantInt(1);
6688 		shuffle[3] = Nucleus::createConstantInt(5);
6689 
6690 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
6691 	}
6692 
UnpackHigh(RValue<Float4> x,RValue<Float4> y)6693 	RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6694 	{
6695 		Constant *shuffle[4];
6696 		shuffle[0] = Nucleus::createConstantInt(2);
6697 		shuffle[1] = Nucleus::createConstantInt(6);
6698 		shuffle[2] = Nucleus::createConstantInt(3);
6699 		shuffle[3] = Nucleus::createConstantInt(7);
6700 
6701 		return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
6702 	}
6703 
Mask(Float4 & lhs,RValue<Float4> rhs,unsigned char select)6704 	RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6705 	{
6706 		Value *vector = lhs.loadValue();
6707 		Value *shuffle = Nucleus::createMask(vector, rhs.value, select);
6708 		lhs.storeValue(shuffle);
6709 
6710 		return RValue<Float4>(shuffle);
6711 	}
6712 
SignMask(RValue<Float4> x)6713 	RValue<Int> SignMask(RValue<Float4> x)
6714 	{
6715 		return x86::movmskps(x);
6716 	}
6717 
CmpEQ(RValue<Float4> x,RValue<Float4> y)6718 	RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6719 	{
6720 	//	return As<Int4>(x86::cmpeqps(x, y));
6721 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
6722 	}
6723 
CmpLT(RValue<Float4> x,RValue<Float4> y)6724 	RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6725 	{
6726 	//	return As<Int4>(x86::cmpltps(x, y));
6727 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
6728 	}
6729 
CmpLE(RValue<Float4> x,RValue<Float4> y)6730 	RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6731 	{
6732 	//	return As<Int4>(x86::cmpleps(x, y));
6733 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
6734 	}
6735 
CmpNEQ(RValue<Float4> x,RValue<Float4> y)6736 	RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6737 	{
6738 	//	return As<Int4>(x86::cmpneqps(x, y));
6739 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
6740 	}
6741 
CmpNLT(RValue<Float4> x,RValue<Float4> y)6742 	RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6743 	{
6744 	//	return As<Int4>(x86::cmpnltps(x, y));
6745 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
6746 	}
6747 
CmpNLE(RValue<Float4> x,RValue<Float4> y)6748 	RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6749 	{
6750 	//	return As<Int4>(x86::cmpnleps(x, y));
6751 		return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
6752 	}
6753 
Round(RValue<Float4> x)6754 	RValue<Float4> Round(RValue<Float4> x)
6755 	{
6756 		if(CPUID::supportsSSE4_1())
6757 		{
6758 			return x86::roundps(x, 0);
6759 		}
6760 		else
6761 		{
6762 			return Float4(RoundInt(x));
6763 		}
6764 	}
6765 
Trunc(RValue<Float4> x)6766 	RValue<Float4> Trunc(RValue<Float4> x)
6767 	{
6768 		if(CPUID::supportsSSE4_1())
6769 		{
6770 			return x86::roundps(x, 3);
6771 		}
6772 		else
6773 		{
6774 			return Float4(Int4(x));   // Rounded toward zero
6775 		}
6776 	}
6777 
Frac(RValue<Float4> x)6778 	RValue<Float4> Frac(RValue<Float4> x)
6779 	{
6780 		if(CPUID::supportsSSE4_1())
6781 		{
6782 			return x - x86::floorps(x);
6783 		}
6784 		else
6785 		{
6786 			Float4 frc = x - Float4(Int4(x));   // Signed fractional part
6787 
6788 			return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
6789 		}
6790 	}
6791 
Floor(RValue<Float4> x)6792 	RValue<Float4> Floor(RValue<Float4> x)
6793 	{
6794 		if(CPUID::supportsSSE4_1())
6795 		{
6796 			return x86::floorps(x);
6797 		}
6798 		else
6799 		{
6800 			return x - Frac(x);
6801 		}
6802 	}
6803 
Ceil(RValue<Float4> x)6804 	RValue<Float4> Ceil(RValue<Float4> x)
6805 	{
6806 		if(CPUID::supportsSSE4_1())
6807 		{
6808 			return x86::ceilps(x);
6809 		}
6810 		else
6811 		{
6812 			return -Floor(-x);
6813 		}
6814 	}
6815 
getType()6816 	Type *Float4::getType()
6817 	{
6818 		return VectorType::get(Float::getType(), 4);
6819 	}
6820 
operator +(RValue<Pointer<Byte>> lhs,int offset)6821 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6822 	{
6823 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Nucleus::createConstantInt(offset)));
6824 	}
6825 
operator +(RValue<Pointer<Byte>> lhs,RValue<Int> offset)6826 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6827 	{
6828 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value));
6829 	}
6830 
operator +(RValue<Pointer<Byte>> lhs,RValue<UInt> offset)6831 	RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6832 	{
6833 		return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value));
6834 	}
6835 
operator +=(const Pointer<Byte> & lhs,int offset)6836 	RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, int offset)
6837 	{
6838 		return lhs = lhs + offset;
6839 	}
6840 
operator +=(const Pointer<Byte> & lhs,RValue<Int> offset)6841 	RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<Int> offset)
6842 	{
6843 		return lhs = lhs + offset;
6844 	}
6845 
operator +=(const Pointer<Byte> & lhs,RValue<UInt> offset)6846 	RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<UInt> offset)
6847 	{
6848 		return lhs = lhs + offset;
6849 	}
6850 
operator -(RValue<Pointer<Byte>> lhs,int offset)6851 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6852 	{
6853 		return lhs + -offset;
6854 	}
6855 
operator -(RValue<Pointer<Byte>> lhs,RValue<Int> offset)6856 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6857 	{
6858 		return lhs + -offset;
6859 	}
6860 
operator -(RValue<Pointer<Byte>> lhs,RValue<UInt> offset)6861 	RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6862 	{
6863 		return lhs + -offset;
6864 	}
6865 
operator -=(const Pointer<Byte> & lhs,int offset)6866 	RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, int offset)
6867 	{
6868 		return lhs = lhs - offset;
6869 	}
6870 
operator -=(const Pointer<Byte> & lhs,RValue<Int> offset)6871 	RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<Int> offset)
6872 	{
6873 		return lhs = lhs - offset;
6874 	}
6875 
operator -=(const Pointer<Byte> & lhs,RValue<UInt> offset)6876 	RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<UInt> offset)
6877 	{
6878 		return lhs = lhs - offset;
6879 	}
6880 
Return()6881 	void Return()
6882 	{
6883 		Nucleus::createRetVoid();
6884 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6885 		Nucleus::createUnreachable();
6886 	}
6887 
Return(bool ret)6888 	void Return(bool ret)
6889 	{
6890 		Nucleus::createRet(Nucleus::createConstantBool(ret));
6891 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6892 		Nucleus::createUnreachable();
6893 	}
6894 
Return(const Int & ret)6895 	void Return(const Int &ret)
6896 	{
6897 		Nucleus::createRet(ret.loadValue());
6898 		Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6899 		Nucleus::createUnreachable();
6900 	}
6901 
beginLoop()6902 	BasicBlock *beginLoop()
6903 	{
6904 		BasicBlock *loopBB = Nucleus::createBasicBlock();
6905 
6906 		Nucleus::createBr(loopBB);
6907 		Nucleus::setInsertBlock(loopBB);
6908 
6909 		return loopBB;
6910 	}
6911 
branch(RValue<Bool> cmp,BasicBlock * bodyBB,BasicBlock * endBB)6912 	bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6913 	{
6914 		Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6915 		Nucleus::setInsertBlock(bodyBB);
6916 
6917 		return true;
6918 	}
6919 
elseBlock(BasicBlock * falseBB)6920 	bool elseBlock(BasicBlock *falseBB)
6921 	{
6922 		falseBB->back().eraseFromParent();
6923 		Nucleus::setInsertBlock(falseBB);
6924 
6925 		return true;
6926 	}
6927 
Ticks()6928 	RValue<Long> Ticks()
6929 	{
6930 		Module *module = Nucleus::getModule();
6931 		llvm::Function *rdtsc = Intrinsic::getDeclaration(module, Intrinsic::readcyclecounter);
6932 
6933 		return RValue<Long>(Nucleus::createCall(rdtsc));
6934 	}
6935 }
6936 
6937 namespace sw
6938 {
6939 	namespace x86
6940 	{
cvtss2si(RValue<Float> val)6941 		RValue<Int> cvtss2si(RValue<Float> val)
6942 		{
6943 			Module *module = Nucleus::getModule();
6944 			llvm::Function *cvtss2si = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtss2si);
6945 
6946 			Float4 vector;
6947 			vector.x = val;
6948 
6949 			return RValue<Int>(Nucleus::createCall(cvtss2si, RValue<Float4>(vector).value));
6950 		}
6951 
cvtps2pi(RValue<Float4> val)6952 		RValue<Int2> cvtps2pi(RValue<Float4> val)
6953 		{
6954 			Module *module = Nucleus::getModule();
6955 			llvm::Function *cvtps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtps2pi);
6956 
6957 			return RValue<Int2>(Nucleus::createCall(cvtps2pi, val.value));
6958 		}
6959 
cvttps2pi(RValue<Float4> val)6960 		RValue<Int2> cvttps2pi(RValue<Float4> val)
6961 		{
6962 			Module *module = Nucleus::getModule();
6963 			llvm::Function *cvttps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvttps2pi);
6964 
6965 			return RValue<Int2>(Nucleus::createCall(cvttps2pi, val.value));
6966 		}
6967 
cvtps2dq(RValue<Float4> val)6968 		RValue<Int4> cvtps2dq(RValue<Float4> val)
6969 		{
6970 			if(CPUID::supportsSSE2())
6971 			{
6972 				Module *module = Nucleus::getModule();
6973 				llvm::Function *cvtps2dq = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_cvtps2dq);
6974 
6975 				return RValue<Int4>(Nucleus::createCall(cvtps2dq, val.value));
6976 			}
6977 			else
6978 			{
6979 				Int2 lo = x86::cvtps2pi(val);
6980 				Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
6981 
6982 				return Int4(lo, hi);
6983 			}
6984 		}
6985 
rcpss(RValue<Float> val)6986 		RValue<Float> rcpss(RValue<Float> val)
6987 		{
6988 			Module *module = Nucleus::getModule();
6989 			llvm::Function *rcpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ss);
6990 
6991 			Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
6992 
6993 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rcpss, vector), 0));
6994 		}
6995 
sqrtss(RValue<Float> val)6996 		RValue<Float> sqrtss(RValue<Float> val)
6997 		{
6998 			Module *module = Nucleus::getModule();
6999 			llvm::Function *sqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ss);
7000 
7001 			Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
7002 
7003 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(sqrtss, vector), 0));
7004 		}
7005 
rsqrtss(RValue<Float> val)7006 		RValue<Float> rsqrtss(RValue<Float> val)
7007 		{
7008 			Module *module = Nucleus::getModule();
7009 			llvm::Function *rsqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ss);
7010 
7011 			Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
7012 
7013 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rsqrtss, vector), 0));
7014 		}
7015 
rcpps(RValue<Float4> val)7016 		RValue<Float4> rcpps(RValue<Float4> val)
7017 		{
7018 			Module *module = Nucleus::getModule();
7019 			llvm::Function *rcpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ps);
7020 
7021 			return RValue<Float4>(Nucleus::createCall(rcpps, val.value));
7022 		}
7023 
sqrtps(RValue<Float4> val)7024 		RValue<Float4> sqrtps(RValue<Float4> val)
7025 		{
7026 			Module *module = Nucleus::getModule();
7027 			llvm::Function *sqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ps);
7028 
7029 			return RValue<Float4>(Nucleus::createCall(sqrtps, val.value));
7030 		}
7031 
rsqrtps(RValue<Float4> val)7032 		RValue<Float4> rsqrtps(RValue<Float4> val)
7033 		{
7034 			Module *module = Nucleus::getModule();
7035 			llvm::Function *rsqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ps);
7036 
7037 			return RValue<Float4>(Nucleus::createCall(rsqrtps, val.value));
7038 		}
7039 
maxps(RValue<Float4> x,RValue<Float4> y)7040 		RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
7041 		{
7042 			Module *module = Nucleus::getModule();
7043 			llvm::Function *maxps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_max_ps);
7044 
7045 			return RValue<Float4>(Nucleus::createCall(maxps, x.value, y.value));
7046 		}
7047 
minps(RValue<Float4> x,RValue<Float4> y)7048 		RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
7049 		{
7050 			Module *module = Nucleus::getModule();
7051 			llvm::Function *minps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_min_ps);
7052 
7053 			return RValue<Float4>(Nucleus::createCall(minps, x.value, y.value));
7054 		}
7055 
roundss(RValue<Float> val,unsigned char imm)7056 		RValue<Float> roundss(RValue<Float> val, unsigned char imm)
7057 		{
7058 			Module *module = Nucleus::getModule();
7059 			llvm::Function *roundss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ss);
7060 
7061 			Value *undef = UndefValue::get(Float4::getType());
7062 			Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
7063 
7064 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(roundss, undef, vector, Nucleus::createConstantInt(imm)), 0));
7065 		}
7066 
floorss(RValue<Float> val)7067 		RValue<Float> floorss(RValue<Float> val)
7068 		{
7069 			return roundss(val, 1);
7070 		}
7071 
ceilss(RValue<Float> val)7072 		RValue<Float> ceilss(RValue<Float> val)
7073 		{
7074 			return roundss(val, 2);
7075 		}
7076 
roundps(RValue<Float4> val,unsigned char imm)7077 		RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
7078 		{
7079 			Module *module = Nucleus::getModule();
7080 			llvm::Function *roundps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ps);
7081 
7082 			return RValue<Float4>(Nucleus::createCall(roundps, val.value, Nucleus::createConstantInt(imm)));
7083 		}
7084 
floorps(RValue<Float4> val)7085 		RValue<Float4> floorps(RValue<Float4> val)
7086 		{
7087 			return roundps(val, 1);
7088 		}
7089 
ceilps(RValue<Float4> val)7090 		RValue<Float4> ceilps(RValue<Float4> val)
7091 		{
7092 			return roundps(val, 2);
7093 		}
7094 
cmpps(RValue<Float4> x,RValue<Float4> y,unsigned char imm)7095 		RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
7096 		{
7097 			Module *module = Nucleus::getModule();
7098 			llvm::Function *cmpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ps);
7099 
7100 			return RValue<Float4>(Nucleus::createCall(cmpps, x.value, y.value, Nucleus::createConstantByte(imm)));
7101 		}
7102 
cmpeqps(RValue<Float4> x,RValue<Float4> y)7103 		RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y)
7104 		{
7105 			return cmpps(x, y, 0);
7106 		}
7107 
cmpltps(RValue<Float4> x,RValue<Float4> y)7108 		RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y)
7109 		{
7110 			return cmpps(x, y, 1);
7111 		}
7112 
cmpleps(RValue<Float4> x,RValue<Float4> y)7113 		RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y)
7114 		{
7115 			return cmpps(x, y, 2);
7116 		}
7117 
cmpunordps(RValue<Float4> x,RValue<Float4> y)7118 		RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y)
7119 		{
7120 			return cmpps(x, y, 3);
7121 		}
7122 
cmpneqps(RValue<Float4> x,RValue<Float4> y)7123 		RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y)
7124 		{
7125 			return cmpps(x, y, 4);
7126 		}
7127 
cmpnltps(RValue<Float4> x,RValue<Float4> y)7128 		RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y)
7129 		{
7130 			return cmpps(x, y, 5);
7131 		}
7132 
cmpnleps(RValue<Float4> x,RValue<Float4> y)7133 		RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y)
7134 		{
7135 			return cmpps(x, y, 6);
7136 		}
7137 
cmpordps(RValue<Float4> x,RValue<Float4> y)7138 		RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y)
7139 		{
7140 			return cmpps(x, y, 7);
7141 		}
7142 
cmpss(RValue<Float> x,RValue<Float> y,unsigned char imm)7143 		RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm)
7144 		{
7145 			Module *module = Nucleus::getModule();
7146 			llvm::Function *cmpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ss);
7147 
7148 			Value *vector1 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), x.value, 0);
7149 			Value *vector2 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), y.value, 0);
7150 
7151 			return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(cmpss, vector1, vector2, Nucleus::createConstantByte(imm)), 0));
7152 		}
7153 
cmpeqss(RValue<Float> x,RValue<Float> y)7154 		RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y)
7155 		{
7156 			return cmpss(x, y, 0);
7157 		}
7158 
cmpltss(RValue<Float> x,RValue<Float> y)7159 		RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y)
7160 		{
7161 			return cmpss(x, y, 1);
7162 		}
7163 
cmpless(RValue<Float> x,RValue<Float> y)7164 		RValue<Float> cmpless(RValue<Float> x, RValue<Float> y)
7165 		{
7166 			return cmpss(x, y, 2);
7167 		}
7168 
cmpunordss(RValue<Float> x,RValue<Float> y)7169 		RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y)
7170 		{
7171 			return cmpss(x, y, 3);
7172 		}
7173 
cmpneqss(RValue<Float> x,RValue<Float> y)7174 		RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y)
7175 		{
7176 			return cmpss(x, y, 4);
7177 		}
7178 
cmpnltss(RValue<Float> x,RValue<Float> y)7179 		RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y)
7180 		{
7181 			return cmpss(x, y, 5);
7182 		}
7183 
cmpnless(RValue<Float> x,RValue<Float> y)7184 		RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y)
7185 		{
7186 			return cmpss(x, y, 6);
7187 		}
7188 
cmpordss(RValue<Float> x,RValue<Float> y)7189 		RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y)
7190 		{
7191 			return cmpss(x, y, 7);
7192 		}
7193 
pabsd(RValue<Int4> x)7194 		RValue<Int4> pabsd(RValue<Int4> x)
7195 		{
7196 			Module *module = Nucleus::getModule();
7197 			llvm::Function *pabsd = Intrinsic::getDeclaration(module, Intrinsic::x86_ssse3_pabs_d_128);
7198 
7199 			return RValue<Int4>(Nucleus::createCall(pabsd, x.value));
7200 		}
7201 
paddsw(RValue<Short4> x,RValue<Short4> y)7202 		RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
7203 		{
7204 			Module *module = Nucleus::getModule();
7205 			llvm::Function *paddsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_w);
7206 
7207 			return As<Short4>(RValue<MMX>(Nucleus::createCall(paddsw, As<MMX>(x).value, As<MMX>(y).value)));
7208 		}
7209 
psubsw(RValue<Short4> x,RValue<Short4> y)7210 		RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
7211 		{
7212 			Module *module = Nucleus::getModule();
7213 			llvm::Function *psubsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_w);
7214 
7215 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psubsw, As<MMX>(x).value, As<MMX>(y).value)));
7216 		}
7217 
paddusw(RValue<UShort4> x,RValue<UShort4> y)7218 		RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
7219 		{
7220 			Module *module = Nucleus::getModule();
7221 			llvm::Function *paddusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_w);
7222 
7223 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(paddusw, As<MMX>(x).value, As<MMX>(y).value)));
7224 		}
7225 
psubusw(RValue<UShort4> x,RValue<UShort4> y)7226 		RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
7227 		{
7228 			Module *module = Nucleus::getModule();
7229 			llvm::Function *psubusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_w);
7230 
7231 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(psubusw, As<MMX>(x).value, As<MMX>(y).value)));
7232 		}
7233 
paddsb(RValue<SByte8> x,RValue<SByte8> y)7234 		RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
7235 		{
7236 			Module *module = Nucleus::getModule();
7237 			llvm::Function *paddsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_b);
7238 
7239 			return As<SByte8>(RValue<MMX>(Nucleus::createCall(paddsb, As<MMX>(x).value, As<MMX>(y).value)));
7240 		}
7241 
psubsb(RValue<SByte8> x,RValue<SByte8> y)7242 		RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
7243 		{
7244 			Module *module = Nucleus::getModule();
7245 			llvm::Function *psubsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_b);
7246 
7247 			return As<SByte8>(RValue<MMX>(Nucleus::createCall(psubsb, As<MMX>(x).value, As<MMX>(y).value)));
7248 		}
7249 
paddusb(RValue<Byte8> x,RValue<Byte8> y)7250 		RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
7251 		{
7252 			Module *module = Nucleus::getModule();
7253 			llvm::Function *paddusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_b);
7254 
7255 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(paddusb, As<MMX>(x).value, As<MMX>(y).value)));
7256 		}
7257 
psubusb(RValue<Byte8> x,RValue<Byte8> y)7258 		RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
7259 		{
7260 			Module *module = Nucleus::getModule();
7261 			llvm::Function *psubusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_b);
7262 
7263 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(psubusb, As<MMX>(x).value, As<MMX>(y).value)));
7264 		}
7265 
paddw(RValue<Short4> x,RValue<Short4> y)7266 		RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y)
7267 		{
7268 			Module *module = Nucleus::getModule();
7269 			llvm::Function *paddw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_w);
7270 
7271 			return As<Short4>(RValue<MMX>(Nucleus::createCall(paddw, As<MMX>(x).value, As<MMX>(y).value)));
7272 		}
7273 
psubw(RValue<Short4> x,RValue<Short4> y)7274 		RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y)
7275 		{
7276 			Module *module = Nucleus::getModule();
7277 			llvm::Function *psubw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_w);
7278 
7279 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psubw, As<MMX>(x).value, As<MMX>(y).value)));
7280 		}
7281 
pmullw(RValue<Short4> x,RValue<Short4> y)7282 		RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y)
7283 		{
7284 			Module *module = Nucleus::getModule();
7285 			llvm::Function *pmullw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmull_w);
7286 
7287 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pmullw, As<MMX>(x).value, As<MMX>(y).value)));
7288 		}
7289 
pand(RValue<Short4> x,RValue<Short4> y)7290 		RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y)
7291 		{
7292 			Module *module = Nucleus::getModule();
7293 			llvm::Function *pand = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pand);
7294 
7295 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pand, As<MMX>(x).value, As<MMX>(y).value)));
7296 		}
7297 
por(RValue<Short4> x,RValue<Short4> y)7298 		RValue<Short4> por(RValue<Short4> x, RValue<Short4> y)
7299 		{
7300 			Module *module = Nucleus::getModule();
7301 			llvm::Function *por = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_por);
7302 
7303 			return As<Short4>(RValue<MMX>(Nucleus::createCall(por, As<MMX>(x).value, As<MMX>(y).value)));
7304 		}
7305 
pxor(RValue<Short4> x,RValue<Short4> y)7306 		RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y)
7307 		{
7308 			Module *module = Nucleus::getModule();
7309 			llvm::Function *pxor = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pxor);
7310 
7311 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pxor, As<MMX>(x).value, As<MMX>(y).value)));
7312 		}
7313 
pshufw(RValue<Short4> x,unsigned char y)7314 		RValue<Short4> pshufw(RValue<Short4> x, unsigned char y)
7315 		{
7316 			Module *module = Nucleus::getModule();
7317 			llvm::Function *pshufw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_pshuf_w);
7318 
7319 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pshufw, As<MMX>(x).value, Nucleus::createConstantByte(y))));
7320 		}
7321 
punpcklwd(RValue<Short4> x,RValue<Short4> y)7322 		RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y)
7323 		{
7324 			Module *module = Nucleus::getModule();
7325 			llvm::Function *punpcklwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpcklwd);
7326 
7327 			return As<Int2>(RValue<MMX>(Nucleus::createCall(punpcklwd, As<MMX>(x).value, As<MMX>(y).value)));
7328 		}
7329 
punpckhwd(RValue<Short4> x,RValue<Short4> y)7330 		RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y)
7331 		{
7332 			Module *module = Nucleus::getModule();
7333 			llvm::Function *punpckhwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhwd);
7334 
7335 			return As<Int2>(RValue<MMX>(Nucleus::createCall(punpckhwd, As<MMX>(x).value, As<MMX>(y).value)));
7336 		}
7337 
pinsrw(RValue<Short4> x,RValue<Int> y,unsigned int i)7338 		RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i)
7339 		{
7340 			Module *module = Nucleus::getModule();
7341 			llvm::Function *pinsrw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pinsr_w);
7342 
7343 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pinsrw, As<MMX>(x).value, y.value, Nucleus::createConstantInt(i))));
7344 		}
7345 
pextrw(RValue<Short4> x,unsigned int i)7346 		RValue<Int> pextrw(RValue<Short4> x, unsigned int i)
7347 		{
7348 			Module *module = Nucleus::getModule();
7349 			llvm::Function *pextrw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pextr_w);
7350 
7351 			return RValue<Int>(Nucleus::createCall(pextrw, As<MMX>(x).value, Nucleus::createConstantInt(i)));
7352 		}
7353 
punpckldq(RValue<Int2> x,RValue<Int2> y)7354 		RValue<Long1> punpckldq(RValue<Int2> x, RValue<Int2> y)
7355 		{
7356 			Module *module = Nucleus::getModule();
7357 			llvm::Function *punpckldq = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckldq);
7358 
7359 			return As<Long1>(RValue<MMX>(Nucleus::createCall(punpckldq, As<MMX>(x).value, As<MMX>(y).value)));
7360 		}
7361 
punpckhdq(RValue<Int2> x,RValue<Int2> y)7362 		RValue<Long1> punpckhdq(RValue<Int2> x, RValue<Int2> y)
7363 		{
7364 			Module *module = Nucleus::getModule();
7365 			llvm::Function *punpckhdq = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhdq);
7366 
7367 			return As<Long1>(RValue<MMX>(Nucleus::createCall(punpckhdq, As<MMX>(x).value, As<MMX>(y).value)));
7368 		}
7369 
punpcklbw(RValue<Byte8> x,RValue<Byte8> y)7370 		RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y)
7371 		{
7372 			Module *module = Nucleus::getModule();
7373 			llvm::Function *punpcklbw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpcklbw);
7374 
7375 			return As<Short4>(RValue<MMX>(Nucleus::createCall(punpcklbw, As<MMX>(x).value, As<MMX>(y).value)));
7376 		}
7377 
punpckhbw(RValue<Byte8> x,RValue<Byte8> y)7378 		RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y)
7379 		{
7380 			Module *module = Nucleus::getModule();
7381 			llvm::Function *punpckhbw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhbw);
7382 
7383 			return As<Short4>(RValue<MMX>(Nucleus::createCall(punpckhbw, As<MMX>(x).value, As<MMX>(y).value)));
7384 		}
7385 
paddb(RValue<Byte8> x,RValue<Byte8> y)7386 		RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y)
7387 		{
7388 			Module *module = Nucleus::getModule();
7389 			llvm::Function *paddb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_b);
7390 
7391 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(paddb, As<MMX>(x).value, As<MMX>(y).value)));
7392 		}
7393 
psubb(RValue<Byte8> x,RValue<Byte8> y)7394 		RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y)
7395 		{
7396 			Module *module = Nucleus::getModule();
7397 			llvm::Function *psubb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_b);
7398 
7399 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(psubb, As<MMX>(x).value, As<MMX>(y).value)));
7400 		}
7401 
paddd(RValue<Int2> x,RValue<Int2> y)7402 		RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y)
7403 		{
7404 			Module *module = Nucleus::getModule();
7405 			llvm::Function *paddd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_d);
7406 
7407 			return As<Int2>(RValue<MMX>(Nucleus::createCall(paddd, As<MMX>(x).value, As<MMX>(y).value)));
7408 		}
7409 
psubd(RValue<Int2> x,RValue<Int2> y)7410 		RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y)
7411 		{
7412 			Module *module = Nucleus::getModule();
7413 			llvm::Function *psubd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_d);
7414 
7415 			return As<Int2>(RValue<MMX>(Nucleus::createCall(psubd, As<MMX>(x).value, As<MMX>(y).value)));
7416 		}
7417 
pavgw(RValue<UShort4> x,RValue<UShort4> y)7418 		RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
7419 		{
7420 			Module *module = Nucleus::getModule();
7421 			llvm::Function *pavgw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pavg_w);
7422 
7423 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(pavgw, As<MMX>(x).value, As<MMX>(y).value)));
7424 		}
7425 
pmaxsw(RValue<Short4> x,RValue<Short4> y)7426 		RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
7427 		{
7428 			Module *module = Nucleus::getModule();
7429 			llvm::Function *pmaxsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmaxs_w);
7430 
7431 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pmaxsw, As<MMX>(x).value, As<MMX>(y).value)));
7432 		}
7433 
pminsw(RValue<Short4> x,RValue<Short4> y)7434 		RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
7435 		{
7436 			Module *module = Nucleus::getModule();
7437 			llvm::Function *pminsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmins_w);
7438 
7439 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pminsw,  As<MMX>(x).value, As<MMX>(y).value)));
7440 		}
7441 
pcmpgtw(RValue<Short4> x,RValue<Short4> y)7442 		RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
7443 		{
7444 			Module *module = Nucleus::getModule();
7445 			llvm::Function *pcmpgtw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_w);
7446 
7447 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value)));
7448 		}
7449 
pcmpeqw(RValue<Short4> x,RValue<Short4> y)7450 		RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
7451 		{
7452 			Module *module = Nucleus::getModule();
7453 			llvm::Function *pcmpeqw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_w);
7454 
7455 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value)));
7456 		}
7457 
pcmpgtb(RValue<SByte8> x,RValue<SByte8> y)7458 		RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
7459 		{
7460 			Module *module = Nucleus::getModule();
7461 			llvm::Function *pcmpgtb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_b);
7462 
7463 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value)));
7464 		}
7465 
pcmpeqb(RValue<Byte8> x,RValue<Byte8> y)7466 		RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
7467 		{
7468 			Module *module = Nucleus::getModule();
7469 			llvm::Function *pcmpeqb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_b);
7470 
7471 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value)));
7472 		}
7473 
packssdw(RValue<Int2> x,RValue<Int2> y)7474 		RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
7475 		{
7476 			Module *module = Nucleus::getModule();
7477 			llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packssdw);
7478 
7479 			return As<Short4>(RValue<MMX>(Nucleus::createCall(packssdw, As<MMX>(x).value, As<MMX>(y).value)));
7480 		}
7481 
packssdw(RValue<Int4> x,RValue<Int4> y)7482 		RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
7483 		{
7484 			if(CPUID::supportsSSE2())
7485 			{
7486 				Module *module = Nucleus::getModule();
7487 				llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_packssdw_128);
7488 
7489 				return RValue<Short8>(Nucleus::createCall(packssdw, x.value, y.value));
7490 			}
7491 			else
7492 			{
7493 				Int2 loX = Int2(x);
7494 				Int2 hiX = Int2(Swizzle(x, 0xEE));
7495 
7496 				Int2 loY = Int2(y);
7497 				Int2 hiY = Int2(Swizzle(y, 0xEE));
7498 
7499 				Short4 lo = x86::packssdw(loX, hiX);
7500 				Short4 hi = x86::packssdw(loY, hiY);
7501 
7502 				return Short8(lo, hi);
7503 			}
7504 		}
7505 
packsswb(RValue<Short4> x,RValue<Short4> y)7506 		RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
7507 		{
7508 			Module *module = Nucleus::getModule();
7509 			llvm::Function *packsswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packsswb);
7510 
7511 			return As<SByte8>(RValue<MMX>(Nucleus::createCall(packsswb, As<MMX>(x).value, As<MMX>(y).value)));
7512 		}
7513 
packuswb(RValue<UShort4> x,RValue<UShort4> y)7514 		RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
7515 		{
7516 			Module *module = Nucleus::getModule();
7517 			llvm::Function *packuswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packuswb);
7518 
7519 			return As<Byte8>(RValue<MMX>(Nucleus::createCall(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
7520 		}
7521 
packusdw(RValue<UInt4> x,RValue<UInt4> y)7522 		RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y)
7523 		{
7524 			if(CPUID::supportsSSE4_1())
7525 			{
7526 				Module *module = Nucleus::getModule();
7527 				llvm::Function *packusdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_packusdw);
7528 
7529 				return RValue<UShort8>(Nucleus::createCall(packusdw, x.value, y.value));
7530 			}
7531 			else
7532 			{
7533 				// FIXME: Not an exact replacement!
7534 				return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u));
7535 			}
7536 		}
7537 
psrlw(RValue<UShort4> x,unsigned char y)7538 		RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
7539 		{
7540 			Module *module = Nucleus::getModule();
7541 			llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_w);
7542 
7543 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(psrlw, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7544 		}
7545 
psrlw(RValue<UShort8> x,unsigned char y)7546 		RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
7547 		{
7548 			Module *module = Nucleus::getModule();
7549 			llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_w);
7550 
7551 			return RValue<UShort8>(Nucleus::createCall(psrlw, x.value, Nucleus::createConstantInt(y)));
7552 		}
7553 
psraw(RValue<Short4> x,unsigned char y)7554 		RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
7555 		{
7556 			Module *module = Nucleus::getModule();
7557 			llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_w);
7558 
7559 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psraw, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7560 		}
7561 
psraw(RValue<Short8> x,unsigned char y)7562 		RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
7563 		{
7564 			Module *module = Nucleus::getModule();
7565 			llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_w);
7566 
7567 			return RValue<Short8>(Nucleus::createCall(psraw, x.value, Nucleus::createConstantInt(y)));
7568 		}
7569 
psllw(RValue<Short4> x,unsigned char y)7570 		RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
7571 		{
7572 			Module *module = Nucleus::getModule();
7573 			llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_w);
7574 
7575 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psllw, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7576 		}
7577 
psllw(RValue<Short8> x,unsigned char y)7578 		RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
7579 		{
7580 			Module *module = Nucleus::getModule();
7581 			llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_w);
7582 
7583 			return RValue<Short8>(Nucleus::createCall(psllw, x.value, Nucleus::createConstantInt(y)));
7584 		}
7585 
pslld(RValue<Int2> x,unsigned char y)7586 		RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
7587 		{
7588 			Module *module = Nucleus::getModule();
7589 			llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_d);
7590 
7591 			return As<Int2>(RValue<MMX>(Nucleus::createCall(pslld, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7592 		}
7593 
pslld(RValue<Int4> x,unsigned char y)7594 		RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
7595 		{
7596 			if(CPUID::supportsSSE2())
7597 			{
7598 				Module *module = Nucleus::getModule();
7599 				llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_d);
7600 
7601 				return RValue<Int4>(Nucleus::createCall(pslld, x.value, Nucleus::createConstantInt(y)));
7602 			}
7603 			else
7604 			{
7605 				Int2 lo = Int2(x);
7606 				Int2 hi = Int2(Swizzle(x, 0xEE));
7607 
7608 				lo = x86::pslld(lo, y);
7609 				hi = x86::pslld(hi, y);
7610 
7611 				return Int4(lo, hi);
7612 			}
7613 		}
7614 
psrad(RValue<Int2> x,unsigned char y)7615 		RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
7616 		{
7617 			Module *module = Nucleus::getModule();
7618 			llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_d);
7619 
7620 			return As<Int2>(RValue<MMX>(Nucleus::createCall(psrad, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7621 		}
7622 
psrad(RValue<Int4> x,unsigned char y)7623 		RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
7624 		{
7625 			if(CPUID::supportsSSE2())
7626 			{
7627 				Module *module = Nucleus::getModule();
7628 				llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_d);
7629 
7630 				return RValue<Int4>(Nucleus::createCall(psrad, x.value, Nucleus::createConstantInt(y)));
7631 			}
7632 			else
7633 			{
7634 				Int2 lo = Int2(x);
7635 				Int2 hi = Int2(Swizzle(x, 0xEE));
7636 
7637 				lo = x86::psrad(lo, y);
7638 				hi = x86::psrad(hi, y);
7639 
7640 				return Int4(lo, hi);
7641 			}
7642 		}
7643 
psrld(RValue<UInt2> x,unsigned char y)7644 		RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
7645 		{
7646 			Module *module = Nucleus::getModule();
7647 			llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_d);
7648 
7649 			return As<UInt2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7650 		}
7651 
psrld(RValue<UInt4> x,unsigned char y)7652 		RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
7653 		{
7654 			if(CPUID::supportsSSE2())
7655 			{
7656 				Module *module = Nucleus::getModule();
7657 				llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_d);
7658 
7659 				return RValue<UInt4>(Nucleus::createCall(psrld, x.value, Nucleus::createConstantInt(y)));
7660 			}
7661 			else
7662 			{
7663 				UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
7664 				UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
7665 
7666 				lo = x86::psrld(lo, y);
7667 				hi = x86::psrld(hi, y);
7668 
7669 				return UInt4(lo, hi);
7670 			}
7671 		}
7672 
psrlw(RValue<UShort4> x,RValue<Long1> y)7673 		RValue<UShort4> psrlw(RValue<UShort4> x, RValue<Long1> y)
7674 		{
7675 			Module *module = Nucleus::getModule();
7676 			llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_w);
7677 
7678 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(psrlw, As<MMX>(x).value, As<MMX>(y).value)));
7679 		}
7680 
psraw(RValue<Short4> x,RValue<Long1> y)7681 		RValue<Short4> psraw(RValue<Short4> x, RValue<Long1> y)
7682 		{
7683 			Module *module = Nucleus::getModule();
7684 			llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_w);
7685 
7686 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psraw, As<MMX>(x).value, As<MMX>(y).value)));
7687 		}
7688 
psllw(RValue<Short4> x,RValue<Long1> y)7689 		RValue<Short4> psllw(RValue<Short4> x, RValue<Long1> y)
7690 		{
7691 			Module *module = Nucleus::getModule();
7692 			llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_w);
7693 
7694 			return As<Short4>(RValue<MMX>(Nucleus::createCall(psllw, As<MMX>(x).value, As<MMX>(y).value)));
7695 		}
7696 
pslld(RValue<Int2> x,RValue<Long1> y)7697 		RValue<Int2> pslld(RValue<Int2> x, RValue<Long1> y)
7698 		{
7699 			Module *module = Nucleus::getModule();
7700 			llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_d);
7701 
7702 			return As<Int2>(RValue<MMX>(Nucleus::createCall(pslld, As<MMX>(x).value, As<MMX>(y).value)));
7703 		}
7704 
psrld(RValue<UInt2> x,RValue<Long1> y)7705 		RValue<UInt2> psrld(RValue<UInt2> x, RValue<Long1> y)
7706 		{
7707 			Module *module = Nucleus::getModule();
7708 			llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_d);
7709 
7710 			return As<UInt2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, As<MMX>(y).value)));
7711 		}
7712 
psrad(RValue<Int2> x,RValue<Long1> y)7713 		RValue<Int2> psrad(RValue<Int2> x, RValue<Long1> y)
7714 		{
7715 			Module *module = Nucleus::getModule();
7716 			llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_d);
7717 
7718 			return As<Int2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, As<MMX>(y).value)));
7719 		}
7720 
pmaxsd(RValue<Int4> x,RValue<Int4> y)7721 		RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
7722 		{
7723 			Module *module = Nucleus::getModule();
7724 			llvm::Function *pmaxsd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmaxsd);
7725 
7726 			return RValue<Int4>(Nucleus::createCall(pmaxsd, x.value, y.value));
7727 		}
7728 
pminsd(RValue<Int4> x,RValue<Int4> y)7729 		RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
7730 		{
7731 			Module *module = Nucleus::getModule();
7732 			llvm::Function *pminsd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pminsd);
7733 
7734 			return RValue<Int4>(Nucleus::createCall(pminsd, x.value, y.value));
7735 		}
7736 
pmaxud(RValue<UInt4> x,RValue<UInt4> y)7737 		RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
7738 		{
7739 			Module *module = Nucleus::getModule();
7740 			llvm::Function *pmaxud = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmaxud);
7741 
7742 			return RValue<UInt4>(Nucleus::createCall(pmaxud, x.value, y.value));
7743 		}
7744 
pminud(RValue<UInt4> x,RValue<UInt4> y)7745 		RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
7746 		{
7747 			Module *module = Nucleus::getModule();
7748 			llvm::Function *pminud = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pminud);
7749 
7750 			return RValue<UInt4>(Nucleus::createCall(pminud, x.value, y.value));
7751 		}
7752 
pmulhw(RValue<Short4> x,RValue<Short4> y)7753 		RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
7754 		{
7755 			Module *module = Nucleus::getModule();
7756 			llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulh_w);
7757 
7758 			return As<Short4>(RValue<MMX>(Nucleus::createCall(pmulhw, As<MMX>(x).value, As<MMX>(y).value)));
7759 		}
7760 
pmulhuw(RValue<UShort4> x,RValue<UShort4> y)7761 		RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
7762 		{
7763 			Module *module = Nucleus::getModule();
7764 			llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulhu_w);
7765 
7766 			return As<UShort4>(RValue<MMX>(Nucleus::createCall(pmulhuw, As<MMX>(x).value, As<MMX>(y).value)));
7767 		}
7768 
pmaddwd(RValue<Short4> x,RValue<Short4> y)7769 		RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
7770 		{
7771 			Module *module = Nucleus::getModule();
7772 			llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmadd_wd);
7773 
7774 			return As<Int2>(RValue<MMX>(Nucleus::createCall(pmaddwd, As<MMX>(x).value, As<MMX>(y).value)));
7775 		}
7776 
pmulhw(RValue<Short8> x,RValue<Short8> y)7777 		RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
7778 		{
7779 			Module *module = Nucleus::getModule();
7780 			llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulh_w);
7781 
7782 			return RValue<Short8>(Nucleus::createCall(pmulhw, x.value, y.value));
7783 		}
7784 
pmulhuw(RValue<UShort8> x,RValue<UShort8> y)7785 		RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
7786 		{
7787 			Module *module = Nucleus::getModule();
7788 			llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulhu_w);
7789 
7790 			return RValue<UShort8>(Nucleus::createCall(pmulhuw, x.value, y.value));
7791 		}
7792 
pmaddwd(RValue<Short8> x,RValue<Short8> y)7793 		RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
7794 		{
7795 			Module *module = Nucleus::getModule();
7796 			llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmadd_wd);
7797 
7798 			return RValue<Int4>(Nucleus::createCall(pmaddwd, x.value, y.value));
7799 		}
7800 
movmskps(RValue<Float4> x)7801 		RValue<Int> movmskps(RValue<Float4> x)
7802 		{
7803 			Module *module = Nucleus::getModule();
7804 			llvm::Function *movmskps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_movmsk_ps);
7805 
7806 			return RValue<Int>(Nucleus::createCall(movmskps, x.value));
7807 		}
7808 
pmovmskb(RValue<Byte8> x)7809 		RValue<Int> pmovmskb(RValue<Byte8> x)
7810 		{
7811 			Module *module = Nucleus::getModule();
7812 			llvm::Function *pmovmskb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmovmskb);
7813 
7814 			return RValue<Int>(Nucleus::createCall(pmovmskb, As<MMX>(x).value));
7815 		}
7816 
7817 		//RValue<Int2> movd(RValue<Pointer<Int>> x)
7818 		//{
7819 		//	Value *element = Nucleus::createLoad(x.value);
7820 
7821 		////	Value *int2 = UndefValue::get(Int2::getType());
7822 		////	int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0));
7823 
7824 		//	Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
7825 
7826 		//	return RValue<Int2>(int2);
7827 		//}
7828 
7829 		//RValue<Int2> movdq2q(RValue<Int4> x)
7830 		//{
7831 		//	Value *long2 = Nucleus::createBitCast(x.value, Long2::getType());
7832 		//	Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0));
7833 
7834 		//	return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
7835 		//}
7836 
pmovzxbd(RValue<Int4> x)7837 		RValue<Int4> pmovzxbd(RValue<Int4> x)
7838 		{
7839 			Module *module = Nucleus::getModule();
7840 			llvm::Function *pmovzxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxbd);
7841 
7842 			return RValue<Int4>(Nucleus::createCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType())));
7843 		}
7844 
pmovsxbd(RValue<Int4> x)7845 		RValue<Int4> pmovsxbd(RValue<Int4> x)
7846 		{
7847 			Module *module = Nucleus::getModule();
7848 			llvm::Function *pmovsxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxbd);
7849 
7850 			return RValue<Int4>(Nucleus::createCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType())));
7851 		}
7852 
pmovzxwd(RValue<Int4> x)7853 		RValue<Int4> pmovzxwd(RValue<Int4> x)
7854 		{
7855 			Module *module = Nucleus::getModule();
7856 			llvm::Function *pmovzxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxwd);
7857 
7858 			return RValue<Int4>(Nucleus::createCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType())));
7859 		}
7860 
pmovsxwd(RValue<Int4> x)7861 		RValue<Int4> pmovsxwd(RValue<Int4> x)
7862 		{
7863 			Module *module = Nucleus::getModule();
7864 			llvm::Function *pmovsxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxwd);
7865 
7866 			return RValue<Int4>(Nucleus::createCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType())));
7867 		}
7868 
emms()7869 		void emms()
7870 		{
7871 			Module *module = Nucleus::getModule();
7872 			llvm::Function *emms = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_emms);
7873 
7874 			Nucleus::createCall(emms);
7875 		}
7876 	}
7877 }
7878