1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Nucleus.hpp"
16
17 #include "llvm/Support/IRBuilder.h"
18 #include "llvm/Function.h"
19 #include "llvm/GlobalVariable.h"
20 #include "llvm/Module.h"
21 #include "llvm/LLVMContext.h"
22 #include "llvm/Constants.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/PassManager.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/Transforms/Scalar.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetOptions.h"
29 #include "llvm/Support/TargetSelect.h"
30 #include "../lib/ExecutionEngine/JIT/JIT.h"
31
32 #include "Routine.hpp"
33 #include "RoutineManager.hpp"
34 #include "x86.hpp"
35 #include "CPUID.hpp"
36 #include "Thread.hpp"
37 #include "Memory.hpp"
38
39 #include <xmmintrin.h>
40 #include <fstream>
41
42 #if defined(__x86_64__) && defined(_WIN32)
X86CompilationCallback()43 extern "C" void X86CompilationCallback()
44 {
45 assert(false); // UNIMPLEMENTED
46 }
47 #endif
48
49 extern "C"
50 {
51 bool (*CodeAnalystInitialize)() = 0;
52 void (*CodeAnalystCompleteJITLog)() = 0;
53 bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
54 }
55
56 namespace llvm
57 {
58 extern bool JITEmitDebugInfo;
59 }
60
61 namespace sw
62 {
63 Optimization optimization[10] = {InstructionCombining, Disabled};
64
65 using namespace llvm;
66
67 RoutineManager *Nucleus::routineManager = 0;
68 ExecutionEngine *Nucleus::executionEngine = 0;
69 Builder *Nucleus::builder = 0;
70 LLVMContext *Nucleus::context = 0;
71 Module *Nucleus::module = 0;
72 llvm::Function *Nucleus::function = 0;
73 BackoffLock Nucleus::codegenMutex;
74
75 class Builder : public IRBuilder<>
76 {
77 };
78
Nucleus()79 Nucleus::Nucleus()
80 {
81 codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
82
83 InitializeNativeTarget();
84 JITEmitDebugInfo = false;
85
86 if(!context)
87 {
88 context = new LLVMContext();
89 }
90
91 module = new Module("", *context);
92 routineManager = new RoutineManager();
93
94 #if defined(__x86_64__)
95 const char *architecture = "x86-64";
96 #else
97 const char *architecture = "x86";
98 #endif
99
100 SmallVector<std::string, 1> MAttrs;
101 MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
102 MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
103 MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
104 MAttrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
105 MAttrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
106 MAttrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
107 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
108
109 std::string error;
110 TargetMachine *targetMachine = EngineBuilder::selectTarget(module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error);
111 executionEngine = JIT::createJIT(module, 0, routineManager, CodeGenOpt::Aggressive, true, targetMachine);
112
113 if(!builder)
114 {
115 builder = static_cast<Builder*>(new IRBuilder<>(*context));
116
117 #if defined(_WIN32)
118 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
119 if(CodeAnalyst)
120 {
121 CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
122 CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
123 CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
124
125 CodeAnalystInitialize();
126 }
127 #endif
128 }
129 }
130
~Nucleus()131 Nucleus::~Nucleus()
132 {
133 delete executionEngine;
134 executionEngine = 0;
135
136 routineManager = 0;
137 function = 0;
138 module = 0;
139
140 codegenMutex.unlock();
141 }
142
acquireRoutine(const wchar_t * name,bool runOptimizations)143 Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
144 {
145 if(builder->GetInsertBlock()->empty() || !builder->GetInsertBlock()->back().isTerminator())
146 {
147 Type *type = function->getReturnType();
148
149 if(type->isVoidTy())
150 {
151 createRetVoid();
152 }
153 else
154 {
155 createRet(UndefValue::get(type));
156 }
157 }
158
159 if(false)
160 {
161 std::string error;
162 raw_fd_ostream file("llvm-dump-unopt.txt", error);
163 module->print(file, 0);
164 }
165
166 if(runOptimizations)
167 {
168 optimize();
169 }
170
171 if(false)
172 {
173 std::string error;
174 raw_fd_ostream file("llvm-dump-opt.txt", error);
175 module->print(file, 0);
176 }
177
178 void *entry = executionEngine->getPointerToFunction(function);
179 Routine *routine = routineManager->acquireRoutine(entry);
180
181 if(CodeAnalystLogJITCode)
182 {
183 CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
184 }
185
186 return routine;
187 }
188
optimize()189 void Nucleus::optimize()
190 {
191 static PassManager *passManager = 0;
192
193 if(!passManager)
194 {
195 passManager = new PassManager();
196
197 UnsafeFPMath = true;
198 // NoInfsFPMath = true;
199 // NoNaNsFPMath = true;
200
201 passManager->add(new TargetData(*executionEngine->getTargetData()));
202 passManager->add(createScalarReplAggregatesPass());
203
204 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
205 {
206 switch(optimization[pass])
207 {
208 case Disabled: break;
209 case CFGSimplification: passManager->add(createCFGSimplificationPass()); break;
210 case LICM: passManager->add(createLICMPass()); break;
211 case AggressiveDCE: passManager->add(createAggressiveDCEPass()); break;
212 case GVN: passManager->add(createGVNPass()); break;
213 case InstructionCombining: passManager->add(createInstructionCombiningPass()); break;
214 case Reassociate: passManager->add(createReassociatePass()); break;
215 case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break;
216 case SCCP: passManager->add(createSCCPPass()); break;
217 case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break;
218 default:
219 assert(false);
220 }
221 }
222 }
223
224 passManager->run(*module);
225 }
226
setFunction(llvm::Function * function)227 void Nucleus::setFunction(llvm::Function *function)
228 {
229 Nucleus::function = function;
230
231 builder->SetInsertPoint(BasicBlock::Create(*context, "", function));
232 }
233
getModule()234 Module *Nucleus::getModule()
235 {
236 return module;
237 }
238
getFunction()239 llvm::Function *Nucleus::getFunction()
240 {
241 return function;
242 }
243
getContext()244 llvm::LLVMContext *Nucleus::getContext()
245 {
246 return context;
247 }
248
allocateStackVariable(Type * type,int arraySize)249 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
250 {
251 // Need to allocate it in the entry block for mem2reg to work
252 llvm::Function *function = getFunction();
253 BasicBlock &entryBlock = function->getEntryBlock();
254
255 Instruction *declaration;
256
257 if(arraySize)
258 {
259 declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize));
260 }
261 else
262 {
263 declaration = new AllocaInst(type, (Value*)0);
264 }
265
266 entryBlock.getInstList().push_front(declaration);
267
268 return declaration;
269 }
270
createBasicBlock()271 BasicBlock *Nucleus::createBasicBlock()
272 {
273 return BasicBlock::Create(*context, "", Nucleus::getFunction());
274 }
275
getInsertBlock()276 BasicBlock *Nucleus::getInsertBlock()
277 {
278 return builder->GetInsertBlock();
279 }
280
setInsertBlock(BasicBlock * basicBlock)281 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
282 {
283 // assert(builder->GetInsertBlock()->back().isTerminator());
284 return builder->SetInsertPoint(basicBlock);
285 }
286
getPredecessor(BasicBlock * basicBlock)287 BasicBlock *Nucleus::getPredecessor(BasicBlock *basicBlock)
288 {
289 return *pred_begin(basicBlock);
290 }
291
createFunction(llvm::Type * ReturnType,std::vector<llvm::Type * > & Params)292 llvm::Function *Nucleus::createFunction(llvm::Type *ReturnType, std::vector<llvm::Type*> &Params)
293 {
294 llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, Params, false);
295 llvm::Function *function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", Nucleus::getModule());
296 function->setCallingConv(llvm::CallingConv::C);
297
298 return function;
299 }
300
getArgument(llvm::Function * function,unsigned int index)301 llvm::Value *Nucleus::getArgument(llvm::Function *function, unsigned int index)
302 {
303 llvm::Function::arg_iterator args = function->arg_begin();
304
305 while(index)
306 {
307 args++;
308 index--;
309 }
310
311 return &*args;
312 }
313
createRetVoid()314 Value *Nucleus::createRetVoid()
315 {
316 x86::emms();
317
318 return builder->CreateRetVoid();
319 }
320
createRet(Value * V)321 Value *Nucleus::createRet(Value *V)
322 {
323 x86::emms();
324
325 return builder->CreateRet(V);
326 }
327
createBr(BasicBlock * dest)328 Value *Nucleus::createBr(BasicBlock *dest)
329 {
330 return builder->CreateBr(dest);
331 }
332
createCondBr(Value * cond,BasicBlock * ifTrue,BasicBlock * ifFalse)333 Value *Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
334 {
335 return builder->CreateCondBr(cond, ifTrue, ifFalse);
336 }
337
createAdd(Value * lhs,Value * rhs)338 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
339 {
340 return builder->CreateAdd(lhs, rhs);
341 }
342
createSub(Value * lhs,Value * rhs)343 Value *Nucleus::createSub(Value *lhs, Value *rhs)
344 {
345 return builder->CreateSub(lhs, rhs);
346 }
347
createMul(Value * lhs,Value * rhs)348 Value *Nucleus::createMul(Value *lhs, Value *rhs)
349 {
350 return builder->CreateMul(lhs, rhs);
351 }
352
createUDiv(Value * lhs,Value * rhs)353 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
354 {
355 return builder->CreateUDiv(lhs, rhs);
356 }
357
createSDiv(Value * lhs,Value * rhs)358 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
359 {
360 return builder->CreateSDiv(lhs, rhs);
361 }
362
createFAdd(Value * lhs,Value * rhs)363 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
364 {
365 return builder->CreateFAdd(lhs, rhs);
366 }
367
createFSub(Value * lhs,Value * rhs)368 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
369 {
370 return builder->CreateFSub(lhs, rhs);
371 }
372
createFMul(Value * lhs,Value * rhs)373 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
374 {
375 return builder->CreateFMul(lhs, rhs);
376 }
377
createFDiv(Value * lhs,Value * rhs)378 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
379 {
380 return builder->CreateFDiv(lhs, rhs);
381 }
382
createURem(Value * lhs,Value * rhs)383 Value *Nucleus::createURem(Value *lhs, Value *rhs)
384 {
385 return builder->CreateURem(lhs, rhs);
386 }
387
createSRem(Value * lhs,Value * rhs)388 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
389 {
390 return builder->CreateSRem(lhs, rhs);
391 }
392
createFRem(Value * lhs,Value * rhs)393 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
394 {
395 return builder->CreateFRem(lhs, rhs);
396 }
397
createShl(Value * lhs,Value * rhs)398 Value *Nucleus::createShl(Value *lhs, Value *rhs)
399 {
400 return builder->CreateShl(lhs, rhs);
401 }
402
createLShr(Value * lhs,Value * rhs)403 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
404 {
405 return builder->CreateLShr(lhs, rhs);
406 }
407
createAShr(Value * lhs,Value * rhs)408 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
409 {
410 return builder->CreateAShr(lhs, rhs);
411 }
412
createAnd(Value * lhs,Value * rhs)413 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
414 {
415 return builder->CreateAnd(lhs, rhs);
416 }
417
createOr(Value * lhs,Value * rhs)418 Value *Nucleus::createOr(Value *lhs, Value *rhs)
419 {
420 return builder->CreateOr(lhs, rhs);
421 }
422
createXor(Value * lhs,Value * rhs)423 Value *Nucleus::createXor(Value *lhs, Value *rhs)
424 {
425 return builder->CreateXor(lhs, rhs);
426 }
427
createNeg(Value * V)428 Value *Nucleus::createNeg(Value *V)
429 {
430 return builder->CreateNeg(V);
431 }
432
createFNeg(Value * V)433 Value *Nucleus::createFNeg(Value *V)
434 {
435 return builder->CreateFNeg(V);
436 }
437
createNot(Value * V)438 Value *Nucleus::createNot(Value *V)
439 {
440 return builder->CreateNot(V);
441 }
442
createLoad(Value * ptr,bool isVolatile,unsigned int align)443 Value *Nucleus::createLoad(Value *ptr, bool isVolatile, unsigned int align)
444 {
445 return builder->Insert(new LoadInst(ptr, "", isVolatile, align));
446 }
447
createStore(Value * value,Value * ptr,bool isVolatile,unsigned int align)448 Value *Nucleus::createStore(Value *value, Value *ptr, bool isVolatile, unsigned int align)
449 {
450 return builder->Insert(new StoreInst(value, ptr, isVolatile, align));
451 }
452
createGEP(Value * ptr,Value * index)453 Value *Nucleus::createGEP(Value *ptr, Value *index)
454 {
455 return builder->CreateGEP(ptr, index);
456 }
457
createAtomicAdd(Value * ptr,Value * value)458 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
459 {
460 return builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent);
461 }
462
createTrunc(Value * V,Type * destType)463 Value *Nucleus::createTrunc(Value *V, Type *destType)
464 {
465 return builder->CreateTrunc(V, destType);
466 }
467
createZExt(Value * V,Type * destType)468 Value *Nucleus::createZExt(Value *V, Type *destType)
469 {
470 return builder->CreateZExt(V, destType);
471 }
472
createSExt(Value * V,Type * destType)473 Value *Nucleus::createSExt(Value *V, Type *destType)
474 {
475 return builder->CreateSExt(V, destType);
476 }
477
createFPToUI(Value * V,Type * destType)478 Value *Nucleus::createFPToUI(Value *V, Type *destType)
479 {
480 return builder->CreateFPToUI(V, destType);
481 }
482
createFPToSI(Value * V,Type * destType)483 Value *Nucleus::createFPToSI(Value *V, Type *destType)
484 {
485 return builder->CreateFPToSI(V, destType);
486 }
487
createUIToFP(Value * V,Type * destType)488 Value *Nucleus::createUIToFP(Value *V, Type *destType)
489 {
490 return builder->CreateUIToFP(V, destType);
491 }
492
createSIToFP(Value * V,Type * destType)493 Value *Nucleus::createSIToFP(Value *V, Type *destType)
494 {
495 return builder->CreateSIToFP(V, destType);
496 }
497
createFPTrunc(Value * V,Type * destType)498 Value *Nucleus::createFPTrunc(Value *V, Type *destType)
499 {
500 return builder->CreateFPTrunc(V, destType);
501 }
502
createFPExt(Value * V,Type * destType)503 Value *Nucleus::createFPExt(Value *V, Type *destType)
504 {
505 return builder->CreateFPExt(V, destType);
506 }
507
createPtrToInt(Value * V,Type * destType)508 Value *Nucleus::createPtrToInt(Value *V, Type *destType)
509 {
510 return builder->CreatePtrToInt(V, destType);
511 }
512
createIntToPtr(Value * V,Type * destType)513 Value *Nucleus::createIntToPtr(Value *V, Type *destType)
514 {
515 return builder->CreateIntToPtr(V, destType);
516 }
517
createBitCast(Value * V,Type * destType)518 Value *Nucleus::createBitCast(Value *V, Type *destType)
519 {
520 return builder->CreateBitCast(V, destType);
521 }
522
createIntCast(Value * V,Type * destType,bool isSigned)523 Value *Nucleus::createIntCast(Value *V, Type *destType, bool isSigned)
524 {
525 return builder->CreateIntCast(V, destType, isSigned);
526 }
527
createICmpEQ(Value * lhs,Value * rhs)528 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
529 {
530 return builder->CreateICmpEQ(lhs, rhs);
531 }
532
createICmpNE(Value * lhs,Value * rhs)533 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
534 {
535 return builder->CreateICmpNE(lhs, rhs);
536 }
537
createICmpUGT(Value * lhs,Value * rhs)538 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
539 {
540 return builder->CreateICmpUGT(lhs, rhs);
541 }
542
createICmpUGE(Value * lhs,Value * rhs)543 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
544 {
545 return builder->CreateICmpUGE(lhs, rhs);
546 }
547
createICmpULT(Value * lhs,Value * rhs)548 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
549 {
550 return builder->CreateICmpULT(lhs, rhs);
551 }
552
createICmpULE(Value * lhs,Value * rhs)553 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
554 {
555 return builder->CreateICmpULE(lhs, rhs);
556 }
557
createICmpSGT(Value * lhs,Value * rhs)558 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
559 {
560 return builder->CreateICmpSGT(lhs, rhs);
561 }
562
createICmpSGE(Value * lhs,Value * rhs)563 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
564 {
565 return builder->CreateICmpSGE(lhs, rhs);
566 }
567
createICmpSLT(Value * lhs,Value * rhs)568 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
569 {
570 return builder->CreateICmpSLT(lhs, rhs);
571 }
572
createICmpSLE(Value * lhs,Value * rhs)573 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
574 {
575 return builder->CreateICmpSLE(lhs, rhs);
576 }
577
createFCmpOEQ(Value * lhs,Value * rhs)578 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
579 {
580 return builder->CreateFCmpOEQ(lhs, rhs);
581 }
582
createFCmpOGT(Value * lhs,Value * rhs)583 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
584 {
585 return builder->CreateFCmpOGT(lhs, rhs);
586 }
587
createFCmpOGE(Value * lhs,Value * rhs)588 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
589 {
590 return builder->CreateFCmpOGE(lhs, rhs);
591 }
592
createFCmpOLT(Value * lhs,Value * rhs)593 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
594 {
595 return builder->CreateFCmpOLT(lhs, rhs);
596 }
597
createFCmpOLE(Value * lhs,Value * rhs)598 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
599 {
600 return builder->CreateFCmpOLE(lhs, rhs);
601 }
602
createFCmpONE(Value * lhs,Value * rhs)603 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
604 {
605 return builder->CreateFCmpONE(lhs, rhs);
606 }
607
createFCmpORD(Value * lhs,Value * rhs)608 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
609 {
610 return builder->CreateFCmpORD(lhs, rhs);
611 }
612
createFCmpUNO(Value * lhs,Value * rhs)613 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
614 {
615 return builder->CreateFCmpUNO(lhs, rhs);
616 }
617
createFCmpUEQ(Value * lhs,Value * rhs)618 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
619 {
620 return builder->CreateFCmpUEQ(lhs, rhs);
621 }
622
createFCmpUGT(Value * lhs,Value * rhs)623 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
624 {
625 return builder->CreateFCmpUGT(lhs, rhs);
626 }
627
createFCmpUGE(Value * lhs,Value * rhs)628 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
629 {
630 return builder->CreateFCmpUGE(lhs, rhs);
631 }
632
createFCmpULT(Value * lhs,Value * rhs)633 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
634 {
635 return builder->CreateFCmpULT(lhs, rhs);
636 }
637
createFCmpULE(Value * lhs,Value * rhs)638 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
639 {
640 return builder->CreateFCmpULE(lhs, rhs);
641 }
642
createFCmpUNE(Value * lhs,Value * rhs)643 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
644 {
645 return builder->CreateFCmpULE(lhs, rhs);
646 }
647
createCall(Value * callee)648 Value *Nucleus::createCall(Value *callee)
649 {
650 return builder->CreateCall(callee);
651 }
652
createCall(Value * callee,Value * arg)653 Value *Nucleus::createCall(Value *callee, Value *arg)
654 {
655 return builder->CreateCall(callee, arg);
656 }
657
createCall(Value * callee,Value * arg1,Value * arg2)658 Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2)
659 {
660 return builder->CreateCall2(callee, arg1, arg2);
661 }
662
createCall(Value * callee,Value * arg1,Value * arg2,Value * arg3)663 Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3)
664 {
665 return builder->CreateCall3(callee, arg1, arg2, arg3);
666 }
667
createCall(Value * callee,Value * arg1,Value * arg2,Value * arg3,Value * arg4)668 Value *Nucleus::createCall(Value *callee, Value *arg1, Value *arg2, Value *arg3, Value *arg4)
669 {
670 return builder->CreateCall4(callee, arg1, arg2, arg3, arg4);
671 }
672
createExtractElement(Value * vector,int index)673 Value *Nucleus::createExtractElement(Value *vector, int index)
674 {
675 return builder->CreateExtractElement(vector, createConstantInt(index));
676 }
677
createInsertElement(Value * vector,Value * element,int index)678 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
679 {
680 return builder->CreateInsertElement(vector, element, createConstantInt(index));
681 }
682
createShuffleVector(Value * V1,Value * V2,Value * mask)683 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, Value *mask)
684 {
685 return builder->CreateShuffleVector(V1, V2, mask);
686 }
687
createSelect(Value * C,Value * ifTrue,Value * ifFalse)688 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
689 {
690 return builder->CreateSelect(C, ifTrue, ifFalse);
691 }
692
createSwitch(llvm::Value * V,llvm::BasicBlock * Dest,unsigned NumCases)693 Value *Nucleus::createSwitch(llvm::Value *V, llvm::BasicBlock *Dest, unsigned NumCases)
694 {
695 return builder->CreateSwitch(V, Dest, NumCases);
696 }
697
addSwitchCase(llvm::Value * Switch,int Case,llvm::BasicBlock * Branch)698 void Nucleus::addSwitchCase(llvm::Value *Switch, int Case, llvm::BasicBlock *Branch)
699 {
700 static_cast<SwitchInst*>(Switch)->addCase(Nucleus::createConstantInt(Case), Branch);
701 }
702
createUnreachable()703 Value *Nucleus::createUnreachable()
704 {
705 return builder->CreateUnreachable();
706 }
707
createSwizzle(Value * val,unsigned char select)708 Value *Nucleus::createSwizzle(Value *val, unsigned char select)
709 {
710 Constant *swizzle[4];
711 swizzle[0] = Nucleus::createConstantInt((select >> 0) & 0x03);
712 swizzle[1] = Nucleus::createConstantInt((select >> 2) & 0x03);
713 swizzle[2] = Nucleus::createConstantInt((select >> 4) & 0x03);
714 swizzle[3] = Nucleus::createConstantInt((select >> 6) & 0x03);
715
716 Value *shuffle = Nucleus::createShuffleVector(val, UndefValue::get(val->getType()), Nucleus::createConstantVector(swizzle, 4));
717
718 return shuffle;
719 }
720
createMask(Value * lhs,Value * rhs,unsigned char select)721 Value *Nucleus::createMask(Value *lhs, Value *rhs, unsigned char select)
722 {
723 bool mask[4] = {false, false, false, false};
724
725 mask[(select >> 0) & 0x03] = true;
726 mask[(select >> 2) & 0x03] = true;
727 mask[(select >> 4) & 0x03] = true;
728 mask[(select >> 6) & 0x03] = true;
729
730 Constant *swizzle[4];
731 swizzle[0] = Nucleus::createConstantInt(mask[0] ? 4 : 0);
732 swizzle[1] = Nucleus::createConstantInt(mask[1] ? 5 : 1);
733 swizzle[2] = Nucleus::createConstantInt(mask[2] ? 6 : 2);
734 swizzle[3] = Nucleus::createConstantInt(mask[3] ? 7 : 3);
735
736 Value *shuffle = Nucleus::createShuffleVector(lhs, rhs, Nucleus::createConstantVector(swizzle, 4));
737
738 return shuffle;
739 }
740
getGlobalValueAtAddress(void * Addr)741 const llvm::GlobalValue *Nucleus::getGlobalValueAtAddress(void *Addr)
742 {
743 return executionEngine->getGlobalValueAtAddress(Addr);
744 }
745
addGlobalMapping(const llvm::GlobalValue * GV,void * Addr)746 void Nucleus::addGlobalMapping(const llvm::GlobalValue *GV, void *Addr)
747 {
748 executionEngine->addGlobalMapping(GV, Addr);
749 }
750
createGlobalValue(llvm::Type * Ty,bool isConstant,unsigned int Align)751 llvm::GlobalValue *Nucleus::createGlobalValue(llvm::Type *Ty, bool isConstant, unsigned int Align)
752 {
753 llvm::GlobalValue *global = new llvm::GlobalVariable(*Nucleus::getModule(), Ty, isConstant, llvm::GlobalValue::ExternalLinkage, 0, "");
754 global->setAlignment(Align);
755
756 return global;
757 }
758
getPointerType(llvm::Type * ElementType)759 llvm::Type *Nucleus::getPointerType(llvm::Type *ElementType)
760 {
761 return llvm::PointerType::get(ElementType, 0);
762 }
763
createNullValue(llvm::Type * Ty)764 llvm::Constant *Nucleus::createNullValue(llvm::Type *Ty)
765 {
766 return llvm::Constant::getNullValue(Ty);
767 }
768
createConstantInt(int64_t i)769 llvm::ConstantInt *Nucleus::createConstantInt(int64_t i)
770 {
771 return llvm::ConstantInt::get(Type::getInt64Ty(*context), i, true);
772 }
773
createConstantInt(int i)774 llvm::ConstantInt *Nucleus::createConstantInt(int i)
775 {
776 return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, true);
777 }
778
createConstantInt(unsigned int i)779 llvm::ConstantInt *Nucleus::createConstantInt(unsigned int i)
780 {
781 return llvm::ConstantInt::get(Type::getInt32Ty(*context), i, false);
782 }
783
createConstantBool(bool b)784 llvm::ConstantInt *Nucleus::createConstantBool(bool b)
785 {
786 return llvm::ConstantInt::get(Type::getInt1Ty(*context), b);
787 }
788
createConstantByte(signed char i)789 llvm::ConstantInt *Nucleus::createConstantByte(signed char i)
790 {
791 return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, true);
792 }
793
createConstantByte(unsigned char i)794 llvm::ConstantInt *Nucleus::createConstantByte(unsigned char i)
795 {
796 return llvm::ConstantInt::get(Type::getInt8Ty(*context), i, false);
797 }
798
createConstantShort(short i)799 llvm::ConstantInt *Nucleus::createConstantShort(short i)
800 {
801 return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, true);
802 }
803
createConstantShort(unsigned short i)804 llvm::ConstantInt *Nucleus::createConstantShort(unsigned short i)
805 {
806 return llvm::ConstantInt::get(Type::getInt16Ty(*context), i, false);
807 }
808
createConstantFloat(float x)809 llvm::Constant *Nucleus::createConstantFloat(float x)
810 {
811 return ConstantFP::get(Float::getType(), x);
812 }
813
createNullPointer(llvm::Type * Ty)814 llvm::Value *Nucleus::createNullPointer(llvm::Type *Ty)
815 {
816 return llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0));
817 }
818
createConstantVector(llvm::Constant * const * Vals,unsigned NumVals)819 llvm::Value *Nucleus::createConstantVector(llvm::Constant *const *Vals, unsigned NumVals)
820 {
821 return llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(Vals, NumVals));
822 }
823
getType()824 Type *Void::getType()
825 {
826 return Type::getVoidTy(*Nucleus::getContext());
827 }
828
LValue(llvm::Type * type,int arraySize)829 LValue::LValue(llvm::Type *type, int arraySize)
830 {
831 address = Nucleus::allocateStackVariable(type, arraySize);
832 }
833
loadValue(unsigned int alignment) const834 llvm::Value *LValue::loadValue(unsigned int alignment) const
835 {
836 return Nucleus::createLoad(address, false, alignment);
837 }
838
storeValue(llvm::Value * value,unsigned int alignment) const839 llvm::Value *LValue::storeValue(llvm::Value *value, unsigned int alignment) const
840 {
841 return Nucleus::createStore(value, address, false, alignment);
842 }
843
getAddress(llvm::Value * index) const844 llvm::Value *LValue::getAddress(llvm::Value *index) const
845 {
846 return Nucleus::createGEP(address, index);
847 }
848
getType()849 Type *MMX::getType()
850 {
851 return Type::getX86_MMXTy(*Nucleus::getContext());
852 }
853
Bool(Argument<Bool> argument)854 Bool::Bool(Argument<Bool> argument)
855 {
856 storeValue(argument.value);
857 }
858
Bool()859 Bool::Bool()
860 {
861 }
862
Bool(bool x)863 Bool::Bool(bool x)
864 {
865 storeValue(Nucleus::createConstantBool(x));
866 }
867
Bool(RValue<Bool> rhs)868 Bool::Bool(RValue<Bool> rhs)
869 {
870 storeValue(rhs.value);
871 }
872
Bool(const Bool & rhs)873 Bool::Bool(const Bool &rhs)
874 {
875 Value *value = rhs.loadValue();
876 storeValue(value);
877 }
878
Bool(const Reference<Bool> & rhs)879 Bool::Bool(const Reference<Bool> &rhs)
880 {
881 Value *value = rhs.loadValue();
882 storeValue(value);
883 }
884
operator =(RValue<Bool> rhs) const885 RValue<Bool> Bool::operator=(RValue<Bool> rhs) const
886 {
887 storeValue(rhs.value);
888
889 return rhs;
890 }
891
operator =(const Bool & rhs) const892 RValue<Bool> Bool::operator=(const Bool &rhs) const
893 {
894 Value *value = rhs.loadValue();
895 storeValue(value);
896
897 return RValue<Bool>(value);
898 }
899
operator =(const Reference<Bool> & rhs) const900 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) const
901 {
902 Value *value = rhs.loadValue();
903 storeValue(value);
904
905 return RValue<Bool>(value);
906 }
907
operator !(RValue<Bool> val)908 RValue<Bool> operator!(RValue<Bool> val)
909 {
910 return RValue<Bool>(Nucleus::createNot(val.value));
911 }
912
operator &&(RValue<Bool> lhs,RValue<Bool> rhs)913 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
914 {
915 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
916 }
917
operator ||(RValue<Bool> lhs,RValue<Bool> rhs)918 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
919 {
920 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
921 }
922
getType()923 Type *Bool::getType()
924 {
925 return Type::getInt1Ty(*Nucleus::getContext());
926 }
927
Byte(Argument<Byte> argument)928 Byte::Byte(Argument<Byte> argument)
929 {
930 storeValue(argument.value);
931 }
932
Byte(RValue<Int> cast)933 Byte::Byte(RValue<Int> cast)
934 {
935 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
936
937 storeValue(integer);
938 }
939
Byte(RValue<UInt> cast)940 Byte::Byte(RValue<UInt> cast)
941 {
942 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
943
944 storeValue(integer);
945 }
946
Byte(RValue<UShort> cast)947 Byte::Byte(RValue<UShort> cast)
948 {
949 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
950
951 storeValue(integer);
952 }
953
Byte()954 Byte::Byte()
955 {
956 }
957
Byte(int x)958 Byte::Byte(int x)
959 {
960 storeValue(Nucleus::createConstantByte((unsigned char)x));
961 }
962
Byte(unsigned char x)963 Byte::Byte(unsigned char x)
964 {
965 storeValue(Nucleus::createConstantByte(x));
966 }
967
Byte(RValue<Byte> rhs)968 Byte::Byte(RValue<Byte> rhs)
969 {
970 storeValue(rhs.value);
971 }
972
Byte(const Byte & rhs)973 Byte::Byte(const Byte &rhs)
974 {
975 Value *value = rhs.loadValue();
976 storeValue(value);
977 }
978
Byte(const Reference<Byte> & rhs)979 Byte::Byte(const Reference<Byte> &rhs)
980 {
981 Value *value = rhs.loadValue();
982 storeValue(value);
983 }
984
operator =(RValue<Byte> rhs) const985 RValue<Byte> Byte::operator=(RValue<Byte> rhs) const
986 {
987 storeValue(rhs.value);
988
989 return rhs;
990 }
991
operator =(const Byte & rhs) const992 RValue<Byte> Byte::operator=(const Byte &rhs) const
993 {
994 Value *value = rhs.loadValue();
995 storeValue(value);
996
997 return RValue<Byte>(value);
998 }
999
operator =(const Reference<Byte> & rhs) const1000 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) const
1001 {
1002 Value *value = rhs.loadValue();
1003 storeValue(value);
1004
1005 return RValue<Byte>(value);
1006 }
1007
operator +(RValue<Byte> lhs,RValue<Byte> rhs)1008 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1009 {
1010 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1011 }
1012
operator -(RValue<Byte> lhs,RValue<Byte> rhs)1013 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1014 {
1015 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1016 }
1017
operator *(RValue<Byte> lhs,RValue<Byte> rhs)1018 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1019 {
1020 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1021 }
1022
operator /(RValue<Byte> lhs,RValue<Byte> rhs)1023 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1024 {
1025 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1026 }
1027
operator %(RValue<Byte> lhs,RValue<Byte> rhs)1028 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1029 {
1030 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1031 }
1032
operator &(RValue<Byte> lhs,RValue<Byte> rhs)1033 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1034 {
1035 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1036 }
1037
operator |(RValue<Byte> lhs,RValue<Byte> rhs)1038 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1039 {
1040 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1041 }
1042
operator ^(RValue<Byte> lhs,RValue<Byte> rhs)1043 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1044 {
1045 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1046 }
1047
operator <<(RValue<Byte> lhs,RValue<Byte> rhs)1048 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1049 {
1050 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1051 }
1052
operator >>(RValue<Byte> lhs,RValue<Byte> rhs)1053 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1054 {
1055 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1056 }
1057
operator +=(const Byte & lhs,RValue<Byte> rhs)1058 RValue<Byte> operator+=(const Byte &lhs, RValue<Byte> rhs)
1059 {
1060 return lhs = lhs + rhs;
1061 }
1062
operator -=(const Byte & lhs,RValue<Byte> rhs)1063 RValue<Byte> operator-=(const Byte &lhs, RValue<Byte> rhs)
1064 {
1065 return lhs = lhs - rhs;
1066 }
1067
operator *=(const Byte & lhs,RValue<Byte> rhs)1068 RValue<Byte> operator*=(const Byte &lhs, RValue<Byte> rhs)
1069 {
1070 return lhs = lhs * rhs;
1071 }
1072
operator /=(const Byte & lhs,RValue<Byte> rhs)1073 RValue<Byte> operator/=(const Byte &lhs, RValue<Byte> rhs)
1074 {
1075 return lhs = lhs / rhs;
1076 }
1077
operator %=(const Byte & lhs,RValue<Byte> rhs)1078 RValue<Byte> operator%=(const Byte &lhs, RValue<Byte> rhs)
1079 {
1080 return lhs = lhs % rhs;
1081 }
1082
operator &=(const Byte & lhs,RValue<Byte> rhs)1083 RValue<Byte> operator&=(const Byte &lhs, RValue<Byte> rhs)
1084 {
1085 return lhs = lhs & rhs;
1086 }
1087
operator |=(const Byte & lhs,RValue<Byte> rhs)1088 RValue<Byte> operator|=(const Byte &lhs, RValue<Byte> rhs)
1089 {
1090 return lhs = lhs | rhs;
1091 }
1092
operator ^=(const Byte & lhs,RValue<Byte> rhs)1093 RValue<Byte> operator^=(const Byte &lhs, RValue<Byte> rhs)
1094 {
1095 return lhs = lhs ^ rhs;
1096 }
1097
operator <<=(const Byte & lhs,RValue<Byte> rhs)1098 RValue<Byte> operator<<=(const Byte &lhs, RValue<Byte> rhs)
1099 {
1100 return lhs = lhs << rhs;
1101 }
1102
operator >>=(const Byte & lhs,RValue<Byte> rhs)1103 RValue<Byte> operator>>=(const Byte &lhs, RValue<Byte> rhs)
1104 {
1105 return lhs = lhs >> rhs;
1106 }
1107
operator +(RValue<Byte> val)1108 RValue<Byte> operator+(RValue<Byte> val)
1109 {
1110 return val;
1111 }
1112
operator -(RValue<Byte> val)1113 RValue<Byte> operator-(RValue<Byte> val)
1114 {
1115 return RValue<Byte>(Nucleus::createNeg(val.value));
1116 }
1117
operator ~(RValue<Byte> val)1118 RValue<Byte> operator~(RValue<Byte> val)
1119 {
1120 return RValue<Byte>(Nucleus::createNot(val.value));
1121 }
1122
operator ++(const Byte & val,int)1123 RValue<Byte> operator++(const Byte &val, int) // Post-increment
1124 {
1125 RValue<Byte> res = val;
1126
1127 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((unsigned char)1));
1128 val.storeValue(inc);
1129
1130 return res;
1131 }
1132
operator ++(const Byte & val)1133 const Byte &operator++(const Byte &val) // Pre-increment
1134 {
1135 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
1136 val.storeValue(inc);
1137
1138 return val;
1139 }
1140
operator --(const Byte & val,int)1141 RValue<Byte> operator--(const Byte &val, int) // Post-decrement
1142 {
1143 RValue<Byte> res = val;
1144
1145 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((unsigned char)1));
1146 val.storeValue(inc);
1147
1148 return res;
1149 }
1150
operator --(const Byte & val)1151 const Byte &operator--(const Byte &val) // Pre-decrement
1152 {
1153 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((unsigned char)1));
1154 val.storeValue(inc);
1155
1156 return val;
1157 }
1158
operator <(RValue<Byte> lhs,RValue<Byte> rhs)1159 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1160 {
1161 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1162 }
1163
operator <=(RValue<Byte> lhs,RValue<Byte> rhs)1164 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1165 {
1166 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1167 }
1168
operator >(RValue<Byte> lhs,RValue<Byte> rhs)1169 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1170 {
1171 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1172 }
1173
operator >=(RValue<Byte> lhs,RValue<Byte> rhs)1174 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1175 {
1176 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1177 }
1178
operator !=(RValue<Byte> lhs,RValue<Byte> rhs)1179 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1180 {
1181 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1182 }
1183
operator ==(RValue<Byte> lhs,RValue<Byte> rhs)1184 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1185 {
1186 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1187 }
1188
getType()1189 Type *Byte::getType()
1190 {
1191 return Type::getInt8Ty(*Nucleus::getContext());
1192 }
1193
SByte(Argument<SByte> argument)1194 SByte::SByte(Argument<SByte> argument)
1195 {
1196 storeValue(argument.value);
1197 }
1198
SByte(RValue<Int> cast)1199 SByte::SByte(RValue<Int> cast)
1200 {
1201 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1202
1203 storeValue(integer);
1204 }
1205
SByte(RValue<Short> cast)1206 SByte::SByte(RValue<Short> cast)
1207 {
1208 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1209
1210 storeValue(integer);
1211 }
1212
SByte()1213 SByte::SByte()
1214 {
1215 }
1216
SByte(signed char x)1217 SByte::SByte(signed char x)
1218 {
1219 storeValue(Nucleus::createConstantByte(x));
1220 }
1221
SByte(RValue<SByte> rhs)1222 SByte::SByte(RValue<SByte> rhs)
1223 {
1224 storeValue(rhs.value);
1225 }
1226
SByte(const SByte & rhs)1227 SByte::SByte(const SByte &rhs)
1228 {
1229 Value *value = rhs.loadValue();
1230 storeValue(value);
1231 }
1232
SByte(const Reference<SByte> & rhs)1233 SByte::SByte(const Reference<SByte> &rhs)
1234 {
1235 Value *value = rhs.loadValue();
1236 storeValue(value);
1237 }
1238
operator =(RValue<SByte> rhs) const1239 RValue<SByte> SByte::operator=(RValue<SByte> rhs) const
1240 {
1241 storeValue(rhs.value);
1242
1243 return rhs;
1244 }
1245
operator =(const SByte & rhs) const1246 RValue<SByte> SByte::operator=(const SByte &rhs) const
1247 {
1248 Value *value = rhs.loadValue();
1249 storeValue(value);
1250
1251 return RValue<SByte>(value);
1252 }
1253
operator =(const Reference<SByte> & rhs) const1254 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) const
1255 {
1256 Value *value = rhs.loadValue();
1257 storeValue(value);
1258
1259 return RValue<SByte>(value);
1260 }
1261
operator +(RValue<SByte> lhs,RValue<SByte> rhs)1262 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1263 {
1264 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1265 }
1266
operator -(RValue<SByte> lhs,RValue<SByte> rhs)1267 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1268 {
1269 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1270 }
1271
operator *(RValue<SByte> lhs,RValue<SByte> rhs)1272 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1273 {
1274 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1275 }
1276
operator /(RValue<SByte> lhs,RValue<SByte> rhs)1277 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1278 {
1279 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1280 }
1281
operator %(RValue<SByte> lhs,RValue<SByte> rhs)1282 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1283 {
1284 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1285 }
1286
operator &(RValue<SByte> lhs,RValue<SByte> rhs)1287 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1288 {
1289 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1290 }
1291
operator |(RValue<SByte> lhs,RValue<SByte> rhs)1292 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1293 {
1294 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1295 }
1296
operator ^(RValue<SByte> lhs,RValue<SByte> rhs)1297 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1298 {
1299 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1300 }
1301
operator <<(RValue<SByte> lhs,RValue<SByte> rhs)1302 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1303 {
1304 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1305 }
1306
operator >>(RValue<SByte> lhs,RValue<SByte> rhs)1307 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1308 {
1309 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1310 }
1311
operator +=(const SByte & lhs,RValue<SByte> rhs)1312 RValue<SByte> operator+=(const SByte &lhs, RValue<SByte> rhs)
1313 {
1314 return lhs = lhs + rhs;
1315 }
1316
operator -=(const SByte & lhs,RValue<SByte> rhs)1317 RValue<SByte> operator-=(const SByte &lhs, RValue<SByte> rhs)
1318 {
1319 return lhs = lhs - rhs;
1320 }
1321
operator *=(const SByte & lhs,RValue<SByte> rhs)1322 RValue<SByte> operator*=(const SByte &lhs, RValue<SByte> rhs)
1323 {
1324 return lhs = lhs * rhs;
1325 }
1326
operator /=(const SByte & lhs,RValue<SByte> rhs)1327 RValue<SByte> operator/=(const SByte &lhs, RValue<SByte> rhs)
1328 {
1329 return lhs = lhs / rhs;
1330 }
1331
operator %=(const SByte & lhs,RValue<SByte> rhs)1332 RValue<SByte> operator%=(const SByte &lhs, RValue<SByte> rhs)
1333 {
1334 return lhs = lhs % rhs;
1335 }
1336
operator &=(const SByte & lhs,RValue<SByte> rhs)1337 RValue<SByte> operator&=(const SByte &lhs, RValue<SByte> rhs)
1338 {
1339 return lhs = lhs & rhs;
1340 }
1341
operator |=(const SByte & lhs,RValue<SByte> rhs)1342 RValue<SByte> operator|=(const SByte &lhs, RValue<SByte> rhs)
1343 {
1344 return lhs = lhs | rhs;
1345 }
1346
operator ^=(const SByte & lhs,RValue<SByte> rhs)1347 RValue<SByte> operator^=(const SByte &lhs, RValue<SByte> rhs)
1348 {
1349 return lhs = lhs ^ rhs;
1350 }
1351
operator <<=(const SByte & lhs,RValue<SByte> rhs)1352 RValue<SByte> operator<<=(const SByte &lhs, RValue<SByte> rhs)
1353 {
1354 return lhs = lhs << rhs;
1355 }
1356
operator >>=(const SByte & lhs,RValue<SByte> rhs)1357 RValue<SByte> operator>>=(const SByte &lhs, RValue<SByte> rhs)
1358 {
1359 return lhs = lhs >> rhs;
1360 }
1361
operator +(RValue<SByte> val)1362 RValue<SByte> operator+(RValue<SByte> val)
1363 {
1364 return val;
1365 }
1366
operator -(RValue<SByte> val)1367 RValue<SByte> operator-(RValue<SByte> val)
1368 {
1369 return RValue<SByte>(Nucleus::createNeg(val.value));
1370 }
1371
operator ~(RValue<SByte> val)1372 RValue<SByte> operator~(RValue<SByte> val)
1373 {
1374 return RValue<SByte>(Nucleus::createNot(val.value));
1375 }
1376
operator ++(const SByte & val,int)1377 RValue<SByte> operator++(const SByte &val, int) // Post-increment
1378 {
1379 RValue<SByte> res = val;
1380
1381 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantByte((signed char)1));
1382 val.storeValue(inc);
1383
1384 return res;
1385 }
1386
operator ++(const SByte & val)1387 const SByte &operator++(const SByte &val) // Pre-increment
1388 {
1389 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantByte((signed char)1));
1390 val.storeValue(inc);
1391
1392 return val;
1393 }
1394
operator --(const SByte & val,int)1395 RValue<SByte> operator--(const SByte &val, int) // Post-decrement
1396 {
1397 RValue<SByte> res = val;
1398
1399 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantByte((signed char)1));
1400 val.storeValue(inc);
1401
1402 return res;
1403 }
1404
operator --(const SByte & val)1405 const SByte &operator--(const SByte &val) // Pre-decrement
1406 {
1407 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantByte((signed char)1));
1408 val.storeValue(inc);
1409
1410 return val;
1411 }
1412
operator <(RValue<SByte> lhs,RValue<SByte> rhs)1413 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1414 {
1415 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1416 }
1417
operator <=(RValue<SByte> lhs,RValue<SByte> rhs)1418 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1419 {
1420 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1421 }
1422
operator >(RValue<SByte> lhs,RValue<SByte> rhs)1423 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1424 {
1425 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1426 }
1427
operator >=(RValue<SByte> lhs,RValue<SByte> rhs)1428 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1429 {
1430 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1431 }
1432
operator !=(RValue<SByte> lhs,RValue<SByte> rhs)1433 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1434 {
1435 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1436 }
1437
operator ==(RValue<SByte> lhs,RValue<SByte> rhs)1438 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1439 {
1440 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1441 }
1442
getType()1443 Type *SByte::getType()
1444 {
1445 return Type::getInt8Ty(*Nucleus::getContext());
1446 }
1447
Short(Argument<Short> argument)1448 Short::Short(Argument<Short> argument)
1449 {
1450 storeValue(argument.value);
1451 }
1452
Short(RValue<Int> cast)1453 Short::Short(RValue<Int> cast)
1454 {
1455 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1456
1457 storeValue(integer);
1458 }
1459
Short()1460 Short::Short()
1461 {
1462 }
1463
Short(short x)1464 Short::Short(short x)
1465 {
1466 storeValue(Nucleus::createConstantShort(x));
1467 }
1468
Short(RValue<Short> rhs)1469 Short::Short(RValue<Short> rhs)
1470 {
1471 storeValue(rhs.value);
1472 }
1473
Short(const Short & rhs)1474 Short::Short(const Short &rhs)
1475 {
1476 Value *value = rhs.loadValue();
1477 storeValue(value);
1478 }
1479
Short(const Reference<Short> & rhs)1480 Short::Short(const Reference<Short> &rhs)
1481 {
1482 Value *value = rhs.loadValue();
1483 storeValue(value);
1484 }
1485
operator =(RValue<Short> rhs) const1486 RValue<Short> Short::operator=(RValue<Short> rhs) const
1487 {
1488 storeValue(rhs.value);
1489
1490 return rhs;
1491 }
1492
operator =(const Short & rhs) const1493 RValue<Short> Short::operator=(const Short &rhs) const
1494 {
1495 Value *value = rhs.loadValue();
1496 storeValue(value);
1497
1498 return RValue<Short>(value);
1499 }
1500
operator =(const Reference<Short> & rhs) const1501 RValue<Short> Short::operator=(const Reference<Short> &rhs) const
1502 {
1503 Value *value = rhs.loadValue();
1504 storeValue(value);
1505
1506 return RValue<Short>(value);
1507 }
1508
operator +(RValue<Short> lhs,RValue<Short> rhs)1509 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1510 {
1511 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1512 }
1513
operator -(RValue<Short> lhs,RValue<Short> rhs)1514 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1515 {
1516 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1517 }
1518
operator *(RValue<Short> lhs,RValue<Short> rhs)1519 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1520 {
1521 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1522 }
1523
operator /(RValue<Short> lhs,RValue<Short> rhs)1524 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1525 {
1526 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1527 }
1528
operator %(RValue<Short> lhs,RValue<Short> rhs)1529 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1530 {
1531 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1532 }
1533
operator &(RValue<Short> lhs,RValue<Short> rhs)1534 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1535 {
1536 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1537 }
1538
operator |(RValue<Short> lhs,RValue<Short> rhs)1539 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1540 {
1541 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1542 }
1543
operator ^(RValue<Short> lhs,RValue<Short> rhs)1544 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1545 {
1546 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1547 }
1548
operator <<(RValue<Short> lhs,RValue<Short> rhs)1549 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1550 {
1551 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1552 }
1553
operator >>(RValue<Short> lhs,RValue<Short> rhs)1554 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1555 {
1556 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1557 }
1558
operator +=(const Short & lhs,RValue<Short> rhs)1559 RValue<Short> operator+=(const Short &lhs, RValue<Short> rhs)
1560 {
1561 return lhs = lhs + rhs;
1562 }
1563
operator -=(const Short & lhs,RValue<Short> rhs)1564 RValue<Short> operator-=(const Short &lhs, RValue<Short> rhs)
1565 {
1566 return lhs = lhs - rhs;
1567 }
1568
operator *=(const Short & lhs,RValue<Short> rhs)1569 RValue<Short> operator*=(const Short &lhs, RValue<Short> rhs)
1570 {
1571 return lhs = lhs * rhs;
1572 }
1573
operator /=(const Short & lhs,RValue<Short> rhs)1574 RValue<Short> operator/=(const Short &lhs, RValue<Short> rhs)
1575 {
1576 return lhs = lhs / rhs;
1577 }
1578
operator %=(const Short & lhs,RValue<Short> rhs)1579 RValue<Short> operator%=(const Short &lhs, RValue<Short> rhs)
1580 {
1581 return lhs = lhs % rhs;
1582 }
1583
operator &=(const Short & lhs,RValue<Short> rhs)1584 RValue<Short> operator&=(const Short &lhs, RValue<Short> rhs)
1585 {
1586 return lhs = lhs & rhs;
1587 }
1588
operator |=(const Short & lhs,RValue<Short> rhs)1589 RValue<Short> operator|=(const Short &lhs, RValue<Short> rhs)
1590 {
1591 return lhs = lhs | rhs;
1592 }
1593
operator ^=(const Short & lhs,RValue<Short> rhs)1594 RValue<Short> operator^=(const Short &lhs, RValue<Short> rhs)
1595 {
1596 return lhs = lhs ^ rhs;
1597 }
1598
operator <<=(const Short & lhs,RValue<Short> rhs)1599 RValue<Short> operator<<=(const Short &lhs, RValue<Short> rhs)
1600 {
1601 return lhs = lhs << rhs;
1602 }
1603
operator >>=(const Short & lhs,RValue<Short> rhs)1604 RValue<Short> operator>>=(const Short &lhs, RValue<Short> rhs)
1605 {
1606 return lhs = lhs >> rhs;
1607 }
1608
operator +(RValue<Short> val)1609 RValue<Short> operator+(RValue<Short> val)
1610 {
1611 return val;
1612 }
1613
operator -(RValue<Short> val)1614 RValue<Short> operator-(RValue<Short> val)
1615 {
1616 return RValue<Short>(Nucleus::createNeg(val.value));
1617 }
1618
operator ~(RValue<Short> val)1619 RValue<Short> operator~(RValue<Short> val)
1620 {
1621 return RValue<Short>(Nucleus::createNot(val.value));
1622 }
1623
operator ++(const Short & val,int)1624 RValue<Short> operator++(const Short &val, int) // Post-increment
1625 {
1626 RValue<Short> res = val;
1627
1628 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((short)1));
1629 val.storeValue(inc);
1630
1631 return res;
1632 }
1633
operator ++(const Short & val)1634 const Short &operator++(const Short &val) // Pre-increment
1635 {
1636 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((short)1));
1637 val.storeValue(inc);
1638
1639 return val;
1640 }
1641
operator --(const Short & val,int)1642 RValue<Short> operator--(const Short &val, int) // Post-decrement
1643 {
1644 RValue<Short> res = val;
1645
1646 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((short)1));
1647 val.storeValue(inc);
1648
1649 return res;
1650 }
1651
operator --(const Short & val)1652 const Short &operator--(const Short &val) // Pre-decrement
1653 {
1654 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((short)1));
1655 val.storeValue(inc);
1656
1657 return val;
1658 }
1659
operator <(RValue<Short> lhs,RValue<Short> rhs)1660 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
1661 {
1662 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1663 }
1664
operator <=(RValue<Short> lhs,RValue<Short> rhs)1665 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
1666 {
1667 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1668 }
1669
operator >(RValue<Short> lhs,RValue<Short> rhs)1670 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
1671 {
1672 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1673 }
1674
operator >=(RValue<Short> lhs,RValue<Short> rhs)1675 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
1676 {
1677 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1678 }
1679
operator !=(RValue<Short> lhs,RValue<Short> rhs)1680 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
1681 {
1682 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1683 }
1684
operator ==(RValue<Short> lhs,RValue<Short> rhs)1685 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
1686 {
1687 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1688 }
1689
getType()1690 Type *Short::getType()
1691 {
1692 return Type::getInt16Ty(*Nucleus::getContext());
1693 }
1694
UShort(Argument<UShort> argument)1695 UShort::UShort(Argument<UShort> argument)
1696 {
1697 storeValue(argument.value);
1698 }
1699
UShort(RValue<UInt> cast)1700 UShort::UShort(RValue<UInt> cast)
1701 {
1702 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1703
1704 storeValue(integer);
1705 }
1706
UShort(RValue<Int> cast)1707 UShort::UShort(RValue<Int> cast)
1708 {
1709 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1710
1711 storeValue(integer);
1712 }
1713
UShort()1714 UShort::UShort()
1715 {
1716 }
1717
UShort(unsigned short x)1718 UShort::UShort(unsigned short x)
1719 {
1720 storeValue(Nucleus::createConstantShort(x));
1721 }
1722
UShort(RValue<UShort> rhs)1723 UShort::UShort(RValue<UShort> rhs)
1724 {
1725 storeValue(rhs.value);
1726 }
1727
UShort(const UShort & rhs)1728 UShort::UShort(const UShort &rhs)
1729 {
1730 Value *value = rhs.loadValue();
1731 storeValue(value);
1732 }
1733
UShort(const Reference<UShort> & rhs)1734 UShort::UShort(const Reference<UShort> &rhs)
1735 {
1736 Value *value = rhs.loadValue();
1737 storeValue(value);
1738 }
1739
operator =(RValue<UShort> rhs) const1740 RValue<UShort> UShort::operator=(RValue<UShort> rhs) const
1741 {
1742 storeValue(rhs.value);
1743
1744 return rhs;
1745 }
1746
operator =(const UShort & rhs) const1747 RValue<UShort> UShort::operator=(const UShort &rhs) const
1748 {
1749 Value *value = rhs.loadValue();
1750 storeValue(value);
1751
1752 return RValue<UShort>(value);
1753 }
1754
operator =(const Reference<UShort> & rhs) const1755 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) const
1756 {
1757 Value *value = rhs.loadValue();
1758 storeValue(value);
1759
1760 return RValue<UShort>(value);
1761 }
1762
operator +(RValue<UShort> lhs,RValue<UShort> rhs)1763 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
1764 {
1765 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
1766 }
1767
operator -(RValue<UShort> lhs,RValue<UShort> rhs)1768 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
1769 {
1770 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
1771 }
1772
operator *(RValue<UShort> lhs,RValue<UShort> rhs)1773 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
1774 {
1775 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
1776 }
1777
operator /(RValue<UShort> lhs,RValue<UShort> rhs)1778 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
1779 {
1780 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
1781 }
1782
operator %(RValue<UShort> lhs,RValue<UShort> rhs)1783 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
1784 {
1785 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
1786 }
1787
operator &(RValue<UShort> lhs,RValue<UShort> rhs)1788 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
1789 {
1790 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
1791 }
1792
operator |(RValue<UShort> lhs,RValue<UShort> rhs)1793 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
1794 {
1795 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
1796 }
1797
operator ^(RValue<UShort> lhs,RValue<UShort> rhs)1798 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
1799 {
1800 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
1801 }
1802
operator <<(RValue<UShort> lhs,RValue<UShort> rhs)1803 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
1804 {
1805 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
1806 }
1807
operator >>(RValue<UShort> lhs,RValue<UShort> rhs)1808 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
1809 {
1810 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
1811 }
1812
operator +=(const UShort & lhs,RValue<UShort> rhs)1813 RValue<UShort> operator+=(const UShort &lhs, RValue<UShort> rhs)
1814 {
1815 return lhs = lhs + rhs;
1816 }
1817
operator -=(const UShort & lhs,RValue<UShort> rhs)1818 RValue<UShort> operator-=(const UShort &lhs, RValue<UShort> rhs)
1819 {
1820 return lhs = lhs - rhs;
1821 }
1822
operator *=(const UShort & lhs,RValue<UShort> rhs)1823 RValue<UShort> operator*=(const UShort &lhs, RValue<UShort> rhs)
1824 {
1825 return lhs = lhs * rhs;
1826 }
1827
operator /=(const UShort & lhs,RValue<UShort> rhs)1828 RValue<UShort> operator/=(const UShort &lhs, RValue<UShort> rhs)
1829 {
1830 return lhs = lhs / rhs;
1831 }
1832
operator %=(const UShort & lhs,RValue<UShort> rhs)1833 RValue<UShort> operator%=(const UShort &lhs, RValue<UShort> rhs)
1834 {
1835 return lhs = lhs % rhs;
1836 }
1837
operator &=(const UShort & lhs,RValue<UShort> rhs)1838 RValue<UShort> operator&=(const UShort &lhs, RValue<UShort> rhs)
1839 {
1840 return lhs = lhs & rhs;
1841 }
1842
operator |=(const UShort & lhs,RValue<UShort> rhs)1843 RValue<UShort> operator|=(const UShort &lhs, RValue<UShort> rhs)
1844 {
1845 return lhs = lhs | rhs;
1846 }
1847
operator ^=(const UShort & lhs,RValue<UShort> rhs)1848 RValue<UShort> operator^=(const UShort &lhs, RValue<UShort> rhs)
1849 {
1850 return lhs = lhs ^ rhs;
1851 }
1852
operator <<=(const UShort & lhs,RValue<UShort> rhs)1853 RValue<UShort> operator<<=(const UShort &lhs, RValue<UShort> rhs)
1854 {
1855 return lhs = lhs << rhs;
1856 }
1857
operator >>=(const UShort & lhs,RValue<UShort> rhs)1858 RValue<UShort> operator>>=(const UShort &lhs, RValue<UShort> rhs)
1859 {
1860 return lhs = lhs >> rhs;
1861 }
1862
operator +(RValue<UShort> val)1863 RValue<UShort> operator+(RValue<UShort> val)
1864 {
1865 return val;
1866 }
1867
operator -(RValue<UShort> val)1868 RValue<UShort> operator-(RValue<UShort> val)
1869 {
1870 return RValue<UShort>(Nucleus::createNeg(val.value));
1871 }
1872
operator ~(RValue<UShort> val)1873 RValue<UShort> operator~(RValue<UShort> val)
1874 {
1875 return RValue<UShort>(Nucleus::createNot(val.value));
1876 }
1877
operator ++(const UShort & val,int)1878 RValue<UShort> operator++(const UShort &val, int) // Post-increment
1879 {
1880 RValue<UShort> res = val;
1881
1882 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantShort((unsigned short)1));
1883 val.storeValue(inc);
1884
1885 return res;
1886 }
1887
operator ++(const UShort & val)1888 const UShort &operator++(const UShort &val) // Pre-increment
1889 {
1890 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
1891 val.storeValue(inc);
1892
1893 return val;
1894 }
1895
operator --(const UShort & val,int)1896 RValue<UShort> operator--(const UShort &val, int) // Post-decrement
1897 {
1898 RValue<UShort> res = val;
1899
1900 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantShort((unsigned short)1));
1901 val.storeValue(inc);
1902
1903 return res;
1904 }
1905
operator --(const UShort & val)1906 const UShort &operator--(const UShort &val) // Pre-decrement
1907 {
1908 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantShort((unsigned short)1));
1909 val.storeValue(inc);
1910
1911 return val;
1912 }
1913
operator <(RValue<UShort> lhs,RValue<UShort> rhs)1914 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
1915 {
1916 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1917 }
1918
operator <=(RValue<UShort> lhs,RValue<UShort> rhs)1919 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
1920 {
1921 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1922 }
1923
operator >(RValue<UShort> lhs,RValue<UShort> rhs)1924 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
1925 {
1926 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1927 }
1928
operator >=(RValue<UShort> lhs,RValue<UShort> rhs)1929 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
1930 {
1931 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1932 }
1933
operator !=(RValue<UShort> lhs,RValue<UShort> rhs)1934 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
1935 {
1936 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1937 }
1938
operator ==(RValue<UShort> lhs,RValue<UShort> rhs)1939 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
1940 {
1941 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1942 }
1943
getType()1944 Type *UShort::getType()
1945 {
1946 return Type::getInt16Ty(*Nucleus::getContext());
1947 }
1948
getType()1949 Type *Byte4::getType()
1950 {
1951 #if 0
1952 return VectorType::get(Byte::getType(), 4);
1953 #else
1954 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1955 #endif
1956 }
1957
getType()1958 Type *SByte4::getType()
1959 {
1960 #if 0
1961 return VectorType::get(SByte::getType(), 4);
1962 #else
1963 return Int::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1964 #endif
1965 }
1966
Byte8()1967 Byte8::Byte8()
1968 {
1969 // xyzw.parent = this;
1970 }
1971
Byte8(byte x0,byte x1,byte x2,byte x3,byte x4,byte x5,byte x6,byte x7)1972 Byte8::Byte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7)
1973 {
1974 // xyzw.parent = this;
1975
1976 Constant *constantVector[8];
1977 constantVector[0] = Nucleus::createConstantByte(x0);
1978 constantVector[1] = Nucleus::createConstantByte(x1);
1979 constantVector[2] = Nucleus::createConstantByte(x2);
1980 constantVector[3] = Nucleus::createConstantByte(x3);
1981 constantVector[4] = Nucleus::createConstantByte(x4);
1982 constantVector[5] = Nucleus::createConstantByte(x5);
1983 constantVector[6] = Nucleus::createConstantByte(x6);
1984 constantVector[7] = Nucleus::createConstantByte(x7);
1985 Value *vector = Nucleus::createConstantVector(constantVector, 8);
1986
1987 storeValue(Nucleus::createBitCast(vector, getType()));
1988 }
1989
Byte8(int64_t x)1990 Byte8::Byte8(int64_t x)
1991 {
1992 // xyzw.parent = this;
1993
1994 Constant *constantVector[8];
1995 constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >> 0));
1996 constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >> 8));
1997 constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
1998 constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
1999 constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
2000 constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
2001 constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
2002 constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
2003 Value *vector = Nucleus::createConstantVector(constantVector, 8);
2004
2005 storeValue(Nucleus::createBitCast(vector, getType()));
2006 }
2007
Byte8(RValue<Byte8> rhs)2008 Byte8::Byte8(RValue<Byte8> rhs)
2009 {
2010 // xyzw.parent = this;
2011
2012 storeValue(rhs.value);
2013 }
2014
Byte8(const Byte8 & rhs)2015 Byte8::Byte8(const Byte8 &rhs)
2016 {
2017 // xyzw.parent = this;
2018
2019 Value *value = rhs.loadValue();
2020 storeValue(value);
2021 }
2022
Byte8(const Reference<Byte8> & rhs)2023 Byte8::Byte8(const Reference<Byte8> &rhs)
2024 {
2025 // xyzw.parent = this;
2026
2027 Value *value = rhs.loadValue();
2028 storeValue(value);
2029 }
2030
operator =(RValue<Byte8> rhs) const2031 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) const
2032 {
2033 storeValue(rhs.value);
2034
2035 return rhs;
2036 }
2037
operator =(const Byte8 & rhs) const2038 RValue<Byte8> Byte8::operator=(const Byte8 &rhs) const
2039 {
2040 Value *value = rhs.loadValue();
2041 storeValue(value);
2042
2043 return RValue<Byte8>(value);
2044 }
2045
operator =(const Reference<Byte8> & rhs) const2046 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) const
2047 {
2048 Value *value = rhs.loadValue();
2049 storeValue(value);
2050
2051 return RValue<Byte8>(value);
2052 }
2053
operator +(RValue<Byte8> lhs,RValue<Byte8> rhs)2054 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2055 {
2056 if(CPUID::supportsMMX2())
2057 {
2058 return x86::paddb(lhs, rhs);
2059 }
2060 else
2061 {
2062 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2063 }
2064 }
2065
operator -(RValue<Byte8> lhs,RValue<Byte8> rhs)2066 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2067 {
2068 if(CPUID::supportsMMX2())
2069 {
2070 return x86::psubb(lhs, rhs);
2071 }
2072 else
2073 {
2074 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2075 }
2076 }
2077
2078 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2079 // {
2080 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2081 // }
2082
2083 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2084 // {
2085 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2086 // }
2087
2088 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2089 // {
2090 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2091 // }
2092
operator &(RValue<Byte8> lhs,RValue<Byte8> rhs)2093 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2094 {
2095 if(CPUID::supportsMMX2())
2096 {
2097 return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
2098 }
2099 else
2100 {
2101 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2102 }
2103 }
2104
operator |(RValue<Byte8> lhs,RValue<Byte8> rhs)2105 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2106 {
2107 if(CPUID::supportsMMX2())
2108 {
2109 return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
2110 }
2111 else
2112 {
2113 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2114 }
2115 }
2116
operator ^(RValue<Byte8> lhs,RValue<Byte8> rhs)2117 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2118 {
2119 if(CPUID::supportsMMX2())
2120 {
2121 return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
2122 }
2123 else
2124 {
2125 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2126 }
2127 }
2128
2129 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2130 // {
2131 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2132 // }
2133
2134 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2135 // {
2136 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2137 // }
2138
operator +=(const Byte8 & lhs,RValue<Byte8> rhs)2139 RValue<Byte8> operator+=(const Byte8 &lhs, RValue<Byte8> rhs)
2140 {
2141 return lhs = lhs + rhs;
2142 }
2143
operator -=(const Byte8 & lhs,RValue<Byte8> rhs)2144 RValue<Byte8> operator-=(const Byte8 &lhs, RValue<Byte8> rhs)
2145 {
2146 return lhs = lhs - rhs;
2147 }
2148
2149 // RValue<Byte8> operator*=(const Byte8 &lhs, RValue<Byte8> rhs)
2150 // {
2151 // return lhs = lhs * rhs;
2152 // }
2153
2154 // RValue<Byte8> operator/=(const Byte8 &lhs, RValue<Byte8> rhs)
2155 // {
2156 // return lhs = lhs / rhs;
2157 // }
2158
2159 // RValue<Byte8> operator%=(const Byte8 &lhs, RValue<Byte8> rhs)
2160 // {
2161 // return lhs = lhs % rhs;
2162 // }
2163
operator &=(const Byte8 & lhs,RValue<Byte8> rhs)2164 RValue<Byte8> operator&=(const Byte8 &lhs, RValue<Byte8> rhs)
2165 {
2166 return lhs = lhs & rhs;
2167 }
2168
operator |=(const Byte8 & lhs,RValue<Byte8> rhs)2169 RValue<Byte8> operator|=(const Byte8 &lhs, RValue<Byte8> rhs)
2170 {
2171 return lhs = lhs | rhs;
2172 }
2173
operator ^=(const Byte8 & lhs,RValue<Byte8> rhs)2174 RValue<Byte8> operator^=(const Byte8 &lhs, RValue<Byte8> rhs)
2175 {
2176 return lhs = lhs ^ rhs;
2177 }
2178
2179 // RValue<Byte8> operator<<=(const Byte8 &lhs, RValue<Byte8> rhs)
2180 // {
2181 // return lhs = lhs << rhs;
2182 // }
2183
2184 // RValue<Byte8> operator>>=(const Byte8 &lhs, RValue<Byte8> rhs)
2185 // {
2186 // return lhs = lhs >> rhs;
2187 // }
2188
2189 // RValue<Byte8> operator+(RValue<Byte8> val)
2190 // {
2191 // return val;
2192 // }
2193
2194 // RValue<Byte8> operator-(RValue<Byte8> val)
2195 // {
2196 // return RValue<Byte8>(Nucleus::createNeg(val.value));
2197 // }
2198
operator ~(RValue<Byte8> val)2199 RValue<Byte8> operator~(RValue<Byte8> val)
2200 {
2201 if(CPUID::supportsMMX2())
2202 {
2203 return val ^ Byte8(0xFFFFFFFFFFFFFFFF);
2204 }
2205 else
2206 {
2207 return RValue<Byte8>(Nucleus::createNot(val.value));
2208 }
2209 }
2210
AddSat(RValue<Byte8> x,RValue<Byte8> y)2211 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2212 {
2213 return x86::paddusb(x, y);
2214 }
2215
SubSat(RValue<Byte8> x,RValue<Byte8> y)2216 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2217 {
2218 return x86::psubusb(x, y);
2219 }
2220
Unpack(RValue<Byte4> x)2221 RValue<Short4> Unpack(RValue<Byte4> x)
2222 {
2223 Value *int2 = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), x.value, 0);
2224 Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType());
2225
2226 return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
2227 }
2228
UnpackLow(RValue<Byte8> x,RValue<Byte8> y)2229 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2230 {
2231 if(CPUID::supportsMMX2())
2232 {
2233 return x86::punpcklbw(x, y);
2234 }
2235 else
2236 {
2237 Constant *shuffle[8];
2238 shuffle[0] = Nucleus::createConstantInt(0);
2239 shuffle[1] = Nucleus::createConstantInt(8);
2240 shuffle[2] = Nucleus::createConstantInt(1);
2241 shuffle[3] = Nucleus::createConstantInt(9);
2242 shuffle[4] = Nucleus::createConstantInt(2);
2243 shuffle[5] = Nucleus::createConstantInt(10);
2244 shuffle[6] = Nucleus::createConstantInt(3);
2245 shuffle[7] = Nucleus::createConstantInt(11);
2246
2247 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
2248
2249 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2250 }
2251 }
2252
UnpackHigh(RValue<Byte8> x,RValue<Byte8> y)2253 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2254 {
2255 if(CPUID::supportsMMX2())
2256 {
2257 return x86::punpckhbw(x, y);
2258 }
2259 else
2260 {
2261 Constant *shuffle[8];
2262 shuffle[0] = Nucleus::createConstantInt(4);
2263 shuffle[1] = Nucleus::createConstantInt(12);
2264 shuffle[2] = Nucleus::createConstantInt(5);
2265 shuffle[3] = Nucleus::createConstantInt(13);
2266 shuffle[4] = Nucleus::createConstantInt(6);
2267 shuffle[5] = Nucleus::createConstantInt(14);
2268 shuffle[6] = Nucleus::createConstantInt(7);
2269 shuffle[7] = Nucleus::createConstantInt(15);
2270
2271 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
2272
2273 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2274 }
2275 }
2276
SignMask(RValue<Byte8> x)2277 RValue<Int> SignMask(RValue<Byte8> x)
2278 {
2279 return x86::pmovmskb(x);
2280 }
2281
2282 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2283 // {
2284 // return x86::pcmpgtb(x, y); // FIXME: Signedness
2285 // }
2286
CmpEQ(RValue<Byte8> x,RValue<Byte8> y)2287 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2288 {
2289 return x86::pcmpeqb(x, y);
2290 }
2291
getType()2292 Type *Byte8::getType()
2293 {
2294 if(CPUID::supportsMMX2())
2295 {
2296 return MMX::getType();
2297 }
2298 else
2299 {
2300 return VectorType::get(Byte::getType(), 8);
2301 }
2302 }
2303
SByte8()2304 SByte8::SByte8()
2305 {
2306 // xyzw.parent = this;
2307 }
2308
SByte8(byte x0,byte x1,byte x2,byte x3,byte x4,byte x5,byte x6,byte x7)2309 SByte8::SByte8(byte x0, byte x1, byte x2, byte x3, byte x4, byte x5, byte x6, byte x7)
2310 {
2311 // xyzw.parent = this;
2312
2313 Constant *constantVector[8];
2314 constantVector[0] = Nucleus::createConstantByte(x0);
2315 constantVector[1] = Nucleus::createConstantByte(x1);
2316 constantVector[2] = Nucleus::createConstantByte(x2);
2317 constantVector[3] = Nucleus::createConstantByte(x3);
2318 constantVector[4] = Nucleus::createConstantByte(x4);
2319 constantVector[5] = Nucleus::createConstantByte(x5);
2320 constantVector[6] = Nucleus::createConstantByte(x6);
2321 constantVector[7] = Nucleus::createConstantByte(x7);
2322 Value *vector = Nucleus::createConstantVector(constantVector, 8);
2323
2324 storeValue(Nucleus::createBitCast(vector, getType()));
2325 }
2326
SByte8(int64_t x)2327 SByte8::SByte8(int64_t x)
2328 {
2329 // xyzw.parent = this;
2330
2331 Constant *constantVector[8];
2332 constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >> 0));
2333 constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >> 8));
2334 constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
2335 constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
2336 constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
2337 constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
2338 constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
2339 constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
2340 Value *vector = Nucleus::createConstantVector(constantVector, 8);
2341
2342 storeValue(Nucleus::createBitCast(vector, getType()));
2343 }
2344
SByte8(RValue<SByte8> rhs)2345 SByte8::SByte8(RValue<SByte8> rhs)
2346 {
2347 // xyzw.parent = this;
2348
2349 storeValue(rhs.value);
2350 }
2351
SByte8(const SByte8 & rhs)2352 SByte8::SByte8(const SByte8 &rhs)
2353 {
2354 // xyzw.parent = this;
2355
2356 Value *value = rhs.loadValue();
2357 storeValue(value);
2358 }
2359
SByte8(const Reference<SByte8> & rhs)2360 SByte8::SByte8(const Reference<SByte8> &rhs)
2361 {
2362 // xyzw.parent = this;
2363
2364 Value *value = rhs.loadValue();
2365 storeValue(value);
2366 }
2367
operator =(RValue<SByte8> rhs) const2368 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) const
2369 {
2370 storeValue(rhs.value);
2371
2372 return rhs;
2373 }
2374
operator =(const SByte8 & rhs) const2375 RValue<SByte8> SByte8::operator=(const SByte8 &rhs) const
2376 {
2377 Value *value = rhs.loadValue();
2378 storeValue(value);
2379
2380 return RValue<SByte8>(value);
2381 }
2382
operator =(const Reference<SByte8> & rhs) const2383 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) const
2384 {
2385 Value *value = rhs.loadValue();
2386 storeValue(value);
2387
2388 return RValue<SByte8>(value);
2389 }
2390
operator +(RValue<SByte8> lhs,RValue<SByte8> rhs)2391 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2392 {
2393 if(CPUID::supportsMMX2())
2394 {
2395 return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs)));
2396 }
2397 else
2398 {
2399 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2400 }
2401 }
2402
operator -(RValue<SByte8> lhs,RValue<SByte8> rhs)2403 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2404 {
2405 if(CPUID::supportsMMX2())
2406 {
2407 return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs)));
2408 }
2409 else
2410 {
2411 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2412 }
2413 }
2414
2415 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2416 // {
2417 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2418 // }
2419
2420 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2421 // {
2422 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2423 // }
2424
2425 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2426 // {
2427 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2428 // }
2429
operator &(RValue<SByte8> lhs,RValue<SByte8> rhs)2430 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2431 {
2432 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2433 }
2434
operator |(RValue<SByte8> lhs,RValue<SByte8> rhs)2435 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2436 {
2437 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2438 }
2439
operator ^(RValue<SByte8> lhs,RValue<SByte8> rhs)2440 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2441 {
2442 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2443 }
2444
2445 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2446 // {
2447 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
2448 // }
2449
2450 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2451 // {
2452 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
2453 // }
2454
operator +=(const SByte8 & lhs,RValue<SByte8> rhs)2455 RValue<SByte8> operator+=(const SByte8 &lhs, RValue<SByte8> rhs)
2456 {
2457 return lhs = lhs + rhs;
2458 }
2459
operator -=(const SByte8 & lhs,RValue<SByte8> rhs)2460 RValue<SByte8> operator-=(const SByte8 &lhs, RValue<SByte8> rhs)
2461 {
2462 return lhs = lhs - rhs;
2463 }
2464
2465 // RValue<SByte8> operator*=(const SByte8 &lhs, RValue<SByte8> rhs)
2466 // {
2467 // return lhs = lhs * rhs;
2468 // }
2469
2470 // RValue<SByte8> operator/=(const SByte8 &lhs, RValue<SByte8> rhs)
2471 // {
2472 // return lhs = lhs / rhs;
2473 // }
2474
2475 // RValue<SByte8> operator%=(const SByte8 &lhs, RValue<SByte8> rhs)
2476 // {
2477 // return lhs = lhs % rhs;
2478 // }
2479
operator &=(const SByte8 & lhs,RValue<SByte8> rhs)2480 RValue<SByte8> operator&=(const SByte8 &lhs, RValue<SByte8> rhs)
2481 {
2482 return lhs = lhs & rhs;
2483 }
2484
operator |=(const SByte8 & lhs,RValue<SByte8> rhs)2485 RValue<SByte8> operator|=(const SByte8 &lhs, RValue<SByte8> rhs)
2486 {
2487 return lhs = lhs | rhs;
2488 }
2489
operator ^=(const SByte8 & lhs,RValue<SByte8> rhs)2490 RValue<SByte8> operator^=(const SByte8 &lhs, RValue<SByte8> rhs)
2491 {
2492 return lhs = lhs ^ rhs;
2493 }
2494
2495 // RValue<SByte8> operator<<=(const SByte8 &lhs, RValue<SByte8> rhs)
2496 // {
2497 // return lhs = lhs << rhs;
2498 // }
2499
2500 // RValue<SByte8> operator>>=(const SByte8 &lhs, RValue<SByte8> rhs)
2501 // {
2502 // return lhs = lhs >> rhs;
2503 // }
2504
2505 // RValue<SByte8> operator+(RValue<SByte8> val)
2506 // {
2507 // return val;
2508 // }
2509
2510 // RValue<SByte8> operator-(RValue<SByte8> val)
2511 // {
2512 // return RValue<SByte8>(Nucleus::createNeg(val.value));
2513 // }
2514
operator ~(RValue<SByte8> val)2515 RValue<SByte8> operator~(RValue<SByte8> val)
2516 {
2517 if(CPUID::supportsMMX2())
2518 {
2519 return val ^ SByte8(0xFFFFFFFFFFFFFFFF);
2520 }
2521 else
2522 {
2523 return RValue<SByte8>(Nucleus::createNot(val.value));
2524 }
2525 }
2526
AddSat(RValue<SByte8> x,RValue<SByte8> y)2527 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2528 {
2529 return x86::paddsb(x, y);
2530 }
2531
SubSat(RValue<SByte8> x,RValue<SByte8> y)2532 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2533 {
2534 return x86::psubsb(x, y);
2535 }
2536
UnpackLow(RValue<SByte8> x,RValue<SByte8> y)2537 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2538 {
2539 if(CPUID::supportsMMX2())
2540 {
2541 return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y)));
2542 }
2543 else
2544 {
2545 Constant *shuffle[8];
2546 shuffle[0] = Nucleus::createConstantInt(0);
2547 shuffle[1] = Nucleus::createConstantInt(8);
2548 shuffle[2] = Nucleus::createConstantInt(1);
2549 shuffle[3] = Nucleus::createConstantInt(9);
2550 shuffle[4] = Nucleus::createConstantInt(2);
2551 shuffle[5] = Nucleus::createConstantInt(10);
2552 shuffle[6] = Nucleus::createConstantInt(3);
2553 shuffle[7] = Nucleus::createConstantInt(11);
2554
2555 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
2556
2557 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2558 }
2559 }
2560
UnpackHigh(RValue<SByte8> x,RValue<SByte8> y)2561 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2562 {
2563 if(CPUID::supportsMMX2())
2564 {
2565 return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y)));
2566 }
2567 else
2568 {
2569 Constant *shuffle[8];
2570 shuffle[0] = Nucleus::createConstantInt(4);
2571 shuffle[1] = Nucleus::createConstantInt(12);
2572 shuffle[2] = Nucleus::createConstantInt(5);
2573 shuffle[3] = Nucleus::createConstantInt(13);
2574 shuffle[4] = Nucleus::createConstantInt(6);
2575 shuffle[5] = Nucleus::createConstantInt(14);
2576 shuffle[6] = Nucleus::createConstantInt(7);
2577 shuffle[7] = Nucleus::createConstantInt(15);
2578
2579 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 8));
2580
2581 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2582 }
2583 }
2584
SignMask(RValue<SByte8> x)2585 RValue<Int> SignMask(RValue<SByte8> x)
2586 {
2587 return x86::pmovmskb(As<Byte8>(x));
2588 }
2589
CmpGT(RValue<SByte8> x,RValue<SByte8> y)2590 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2591 {
2592 return x86::pcmpgtb(x, y);
2593 }
2594
CmpEQ(RValue<SByte8> x,RValue<SByte8> y)2595 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2596 {
2597 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
2598 }
2599
getType()2600 Type *SByte8::getType()
2601 {
2602 if(CPUID::supportsMMX2())
2603 {
2604 return MMX::getType();
2605 }
2606 else
2607 {
2608 return VectorType::get(SByte::getType(), 8);
2609 }
2610 }
2611
Byte16(RValue<Byte16> rhs)2612 Byte16::Byte16(RValue<Byte16> rhs)
2613 {
2614 // xyzw.parent = this;
2615
2616 storeValue(rhs.value);
2617 }
2618
Byte16(const Byte16 & rhs)2619 Byte16::Byte16(const Byte16 &rhs)
2620 {
2621 // xyzw.parent = this;
2622
2623 Value *value = rhs.loadValue();
2624 storeValue(value);
2625 }
2626
Byte16(const Reference<Byte16> & rhs)2627 Byte16::Byte16(const Reference<Byte16> &rhs)
2628 {
2629 // xyzw.parent = this;
2630
2631 Value *value = rhs.loadValue();
2632 storeValue(value);
2633 }
2634
operator =(RValue<Byte16> rhs) const2635 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) const
2636 {
2637 storeValue(rhs.value);
2638
2639 return rhs;
2640 }
2641
operator =(const Byte16 & rhs) const2642 RValue<Byte16> Byte16::operator=(const Byte16 &rhs) const
2643 {
2644 Value *value = rhs.loadValue();
2645 storeValue(value);
2646
2647 return RValue<Byte16>(value);
2648 }
2649
operator =(const Reference<Byte16> & rhs) const2650 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) const
2651 {
2652 Value *value = rhs.loadValue();
2653 storeValue(value);
2654
2655 return RValue<Byte16>(value);
2656 }
2657
getType()2658 Type *Byte16::getType()
2659 {
2660 return VectorType::get(Byte::getType(), 16);
2661 }
2662
getType()2663 Type *SByte16::getType()
2664 {
2665 return VectorType::get(SByte::getType(), 16);
2666 }
2667
Short4(RValue<Int> cast)2668 Short4::Short4(RValue<Int> cast)
2669 {
2670 Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2671 Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value;
2672
2673 storeValue(swizzle);
2674 }
2675
Short4(RValue<Int4> cast)2676 Short4::Short4(RValue<Int4> cast)
2677 {
2678 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2679
2680 #if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd)
2681 Constant *pack[8];
2682 pack[0] = Nucleus::createConstantInt(0);
2683 pack[1] = Nucleus::createConstantInt(2);
2684 pack[2] = Nucleus::createConstantInt(4);
2685 pack[3] = Nucleus::createConstantInt(6);
2686
2687 Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
2688 #else
2689 Value *packed;
2690
2691 // FIXME: Use Swizzle<Short8>
2692 if(!CPUID::supportsSSSE3())
2693 {
2694 Constant *pshuflw[8];
2695 pshuflw[0] = Nucleus::createConstantInt(0);
2696 pshuflw[1] = Nucleus::createConstantInt(2);
2697 pshuflw[2] = Nucleus::createConstantInt(0);
2698 pshuflw[3] = Nucleus::createConstantInt(2);
2699 pshuflw[4] = Nucleus::createConstantInt(4);
2700 pshuflw[5] = Nucleus::createConstantInt(5);
2701 pshuflw[6] = Nucleus::createConstantInt(6);
2702 pshuflw[7] = Nucleus::createConstantInt(7);
2703
2704 Constant *pshufhw[8];
2705 pshufhw[0] = Nucleus::createConstantInt(0);
2706 pshufhw[1] = Nucleus::createConstantInt(1);
2707 pshufhw[2] = Nucleus::createConstantInt(2);
2708 pshufhw[3] = Nucleus::createConstantInt(3);
2709 pshufhw[4] = Nucleus::createConstantInt(4);
2710 pshufhw[5] = Nucleus::createConstantInt(6);
2711 pshufhw[6] = Nucleus::createConstantInt(4);
2712 pshufhw[7] = Nucleus::createConstantInt(6);
2713
2714 Value *shuffle1 = Nucleus::createShuffleVector(short8, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshuflw, 8));
2715 Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, UndefValue::get(Short8::getType()), Nucleus::createConstantVector(pshufhw, 8));
2716 Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
2717 packed = Nucleus::createSwizzle(int4, 0x88);
2718 }
2719 else
2720 {
2721 Constant *pshufb[16];
2722 pshufb[0] = Nucleus::createConstantInt(0);
2723 pshufb[1] = Nucleus::createConstantInt(1);
2724 pshufb[2] = Nucleus::createConstantInt(4);
2725 pshufb[3] = Nucleus::createConstantInt(5);
2726 pshufb[4] = Nucleus::createConstantInt(8);
2727 pshufb[5] = Nucleus::createConstantInt(9);
2728 pshufb[6] = Nucleus::createConstantInt(12);
2729 pshufb[7] = Nucleus::createConstantInt(13);
2730 pshufb[8] = Nucleus::createConstantInt(0);
2731 pshufb[9] = Nucleus::createConstantInt(1);
2732 pshufb[10] = Nucleus::createConstantInt(4);
2733 pshufb[11] = Nucleus::createConstantInt(5);
2734 pshufb[12] = Nucleus::createConstantInt(8);
2735 pshufb[13] = Nucleus::createConstantInt(9);
2736 pshufb[14] = Nucleus::createConstantInt(12);
2737 pshufb[15] = Nucleus::createConstantInt(13);
2738
2739 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
2740 packed = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16));
2741 }
2742
2743 #if 0 // FIXME: No optimal instruction selection
2744 Value *qword2 = Nucleus::createBitCast(packed, Long2::getType());
2745 Value *element = Nucleus::createExtractElement(qword2, 0);
2746 Value *short4 = Nucleus::createBitCast(element, Short4::getType());
2747 #else // FIXME: Requires SSE
2748 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
2749 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2750 #endif
2751 #endif
2752
2753 storeValue(short4);
2754 }
2755
2756 // Short4::Short4(RValue<Float> cast)
2757 // {
2758 // }
2759
Short4(RValue<Float4> cast)2760 Short4::Short4(RValue<Float4> cast)
2761 {
2762 Int4 v4i32 = Int4(cast);
2763 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
2764
2765 storeValue(As<Short4>(Int2(v4i32)).value);
2766 }
2767
Short4()2768 Short4::Short4()
2769 {
2770 // xyzw.parent = this;
2771 }
2772
Short4(short xyzw)2773 Short4::Short4(short xyzw)
2774 {
2775 // xyzw.parent = this;
2776
2777 Constant *constantVector[4];
2778 constantVector[0] = Nucleus::createConstantShort(xyzw);
2779 constantVector[1] = Nucleus::createConstantShort(xyzw);
2780 constantVector[2] = Nucleus::createConstantShort(xyzw);
2781 constantVector[3] = Nucleus::createConstantShort(xyzw);
2782 Value *vector = Nucleus::createConstantVector(constantVector, 4);
2783
2784 storeValue(Nucleus::createBitCast(vector, getType()));
2785 }
2786
Short4(short x,short y,short z,short w)2787 Short4::Short4(short x, short y, short z, short w)
2788 {
2789 // xyzw.parent = this;
2790
2791 Constant *constantVector[4];
2792 constantVector[0] = Nucleus::createConstantShort(x);
2793 constantVector[1] = Nucleus::createConstantShort(y);
2794 constantVector[2] = Nucleus::createConstantShort(z);
2795 constantVector[3] = Nucleus::createConstantShort(w);
2796 Value *vector = Nucleus::createConstantVector(constantVector, 4);
2797
2798 storeValue(Nucleus::createBitCast(vector, getType()));
2799 }
2800
Short4(RValue<Short4> rhs)2801 Short4::Short4(RValue<Short4> rhs)
2802 {
2803 // xyzw.parent = this;
2804
2805 storeValue(rhs.value);
2806 }
2807
Short4(const Short4 & rhs)2808 Short4::Short4(const Short4 &rhs)
2809 {
2810 // xyzw.parent = this;
2811
2812 Value *value = rhs.loadValue();
2813 storeValue(value);
2814 }
2815
Short4(const Reference<Short4> & rhs)2816 Short4::Short4(const Reference<Short4> &rhs)
2817 {
2818 // xyzw.parent = this;
2819
2820 Value *value = rhs.loadValue();
2821 storeValue(value);
2822 }
2823
Short4(RValue<UShort4> rhs)2824 Short4::Short4(RValue<UShort4> rhs)
2825 {
2826 // xyzw.parent = this;
2827
2828 storeValue(rhs.value);
2829 }
2830
Short4(const UShort4 & rhs)2831 Short4::Short4(const UShort4 &rhs)
2832 {
2833 // xyzw.parent = this;
2834
2835 storeValue(rhs.loadValue());
2836 }
2837
Short4(const Reference<UShort4> & rhs)2838 Short4::Short4(const Reference<UShort4> &rhs)
2839 {
2840 // xyzw.parent = this;
2841
2842 storeValue(rhs.loadValue());
2843 }
2844
operator =(RValue<Short4> rhs) const2845 RValue<Short4> Short4::operator=(RValue<Short4> rhs) const
2846 {
2847 storeValue(rhs.value);
2848
2849 return rhs;
2850 }
2851
operator =(const Short4 & rhs) const2852 RValue<Short4> Short4::operator=(const Short4 &rhs) const
2853 {
2854 Value *value = rhs.loadValue();
2855 storeValue(value);
2856
2857 return RValue<Short4>(value);
2858 }
2859
operator =(const Reference<Short4> & rhs) const2860 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) const
2861 {
2862 Value *value = rhs.loadValue();
2863 storeValue(value);
2864
2865 return RValue<Short4>(value);
2866 }
2867
operator =(RValue<UShort4> rhs) const2868 RValue<Short4> Short4::operator=(RValue<UShort4> rhs) const
2869 {
2870 storeValue(rhs.value);
2871
2872 return RValue<Short4>(rhs);
2873 }
2874
operator =(const UShort4 & rhs) const2875 RValue<Short4> Short4::operator=(const UShort4 &rhs) const
2876 {
2877 Value *value = rhs.loadValue();
2878 storeValue(value);
2879
2880 return RValue<Short4>(value);
2881 }
2882
operator =(const Reference<UShort4> & rhs) const2883 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) const
2884 {
2885 Value *value = rhs.loadValue();
2886 storeValue(value);
2887
2888 return RValue<Short4>(value);
2889 }
2890
operator +(RValue<Short4> lhs,RValue<Short4> rhs)2891 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
2892 {
2893 if(CPUID::supportsMMX2())
2894 {
2895 return x86::paddw(lhs, rhs);
2896 }
2897 else
2898 {
2899 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
2900 }
2901 }
2902
operator -(RValue<Short4> lhs,RValue<Short4> rhs)2903 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
2904 {
2905 if(CPUID::supportsMMX2())
2906 {
2907 return x86::psubw(lhs, rhs);
2908 }
2909 else
2910 {
2911 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
2912 }
2913 }
2914
operator *(RValue<Short4> lhs,RValue<Short4> rhs)2915 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
2916 {
2917 if(CPUID::supportsMMX2())
2918 {
2919 return x86::pmullw(lhs, rhs);
2920 }
2921 else
2922 {
2923 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
2924 }
2925 }
2926
2927 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
2928 // {
2929 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
2930 // }
2931
2932 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
2933 // {
2934 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
2935 // }
2936
operator &(RValue<Short4> lhs,RValue<Short4> rhs)2937 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
2938 {
2939 if(CPUID::supportsMMX2())
2940 {
2941 return x86::pand(lhs, rhs);
2942 }
2943 else
2944 {
2945 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
2946 }
2947 }
2948
operator |(RValue<Short4> lhs,RValue<Short4> rhs)2949 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
2950 {
2951 if(CPUID::supportsMMX2())
2952 {
2953 return x86::por(lhs, rhs);
2954 }
2955 else
2956 {
2957 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
2958 }
2959 }
2960
operator ^(RValue<Short4> lhs,RValue<Short4> rhs)2961 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
2962 {
2963 if(CPUID::supportsMMX2())
2964 {
2965 return x86::pxor(lhs, rhs);
2966 }
2967 else
2968 {
2969 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
2970 }
2971 }
2972
operator <<(RValue<Short4> lhs,unsigned char rhs)2973 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2974 {
2975 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2976
2977 return x86::psllw(lhs, rhs);
2978 }
2979
operator >>(RValue<Short4> lhs,unsigned char rhs)2980 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2981 {
2982 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2983
2984 return x86::psraw(lhs, rhs);
2985 }
2986
operator <<(RValue<Short4> lhs,RValue<Long1> rhs)2987 RValue<Short4> operator<<(RValue<Short4> lhs, RValue<Long1> rhs)
2988 {
2989 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2990
2991 return x86::psllw(lhs, rhs);
2992 }
2993
operator >>(RValue<Short4> lhs,RValue<Long1> rhs)2994 RValue<Short4> operator>>(RValue<Short4> lhs, RValue<Long1> rhs)
2995 {
2996 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2997
2998 return x86::psraw(lhs, rhs);
2999 }
3000
operator +=(const Short4 & lhs,RValue<Short4> rhs)3001 RValue<Short4> operator+=(const Short4 &lhs, RValue<Short4> rhs)
3002 {
3003 return lhs = lhs + rhs;
3004 }
3005
operator -=(const Short4 & lhs,RValue<Short4> rhs)3006 RValue<Short4> operator-=(const Short4 &lhs, RValue<Short4> rhs)
3007 {
3008 return lhs = lhs - rhs;
3009 }
3010
operator *=(const Short4 & lhs,RValue<Short4> rhs)3011 RValue<Short4> operator*=(const Short4 &lhs, RValue<Short4> rhs)
3012 {
3013 return lhs = lhs * rhs;
3014 }
3015
3016 // RValue<Short4> operator/=(const Short4 &lhs, RValue<Short4> rhs)
3017 // {
3018 // return lhs = lhs / rhs;
3019 // }
3020
3021 // RValue<Short4> operator%=(const Short4 &lhs, RValue<Short4> rhs)
3022 // {
3023 // return lhs = lhs % rhs;
3024 // }
3025
operator &=(const Short4 & lhs,RValue<Short4> rhs)3026 RValue<Short4> operator&=(const Short4 &lhs, RValue<Short4> rhs)
3027 {
3028 return lhs = lhs & rhs;
3029 }
3030
operator |=(const Short4 & lhs,RValue<Short4> rhs)3031 RValue<Short4> operator|=(const Short4 &lhs, RValue<Short4> rhs)
3032 {
3033 return lhs = lhs | rhs;
3034 }
3035
operator ^=(const Short4 & lhs,RValue<Short4> rhs)3036 RValue<Short4> operator^=(const Short4 &lhs, RValue<Short4> rhs)
3037 {
3038 return lhs = lhs ^ rhs;
3039 }
3040
operator <<=(const Short4 & lhs,unsigned char rhs)3041 RValue<Short4> operator<<=(const Short4 &lhs, unsigned char rhs)
3042 {
3043 return lhs = lhs << rhs;
3044 }
3045
operator >>=(const Short4 & lhs,unsigned char rhs)3046 RValue<Short4> operator>>=(const Short4 &lhs, unsigned char rhs)
3047 {
3048 return lhs = lhs >> rhs;
3049 }
3050
operator <<=(const Short4 & lhs,RValue<Long1> rhs)3051 RValue<Short4> operator<<=(const Short4 &lhs, RValue<Long1> rhs)
3052 {
3053 return lhs = lhs << rhs;
3054 }
3055
operator >>=(const Short4 & lhs,RValue<Long1> rhs)3056 RValue<Short4> operator>>=(const Short4 &lhs, RValue<Long1> rhs)
3057 {
3058 return lhs = lhs >> rhs;
3059 }
3060
3061 // RValue<Short4> operator+(RValue<Short4> val)
3062 // {
3063 // return val;
3064 // }
3065
operator -(RValue<Short4> val)3066 RValue<Short4> operator-(RValue<Short4> val)
3067 {
3068 if(CPUID::supportsMMX2())
3069 {
3070 return Short4(0, 0, 0, 0) - val;
3071 }
3072 else
3073 {
3074 return RValue<Short4>(Nucleus::createNeg(val.value));
3075 }
3076 }
3077
operator ~(RValue<Short4> val)3078 RValue<Short4> operator~(RValue<Short4> val)
3079 {
3080 if(CPUID::supportsMMX2())
3081 {
3082 return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu);
3083 }
3084 else
3085 {
3086 return RValue<Short4>(Nucleus::createNot(val.value));
3087 }
3088 }
3089
RoundShort4(RValue<Float4> cast)3090 RValue<Short4> RoundShort4(RValue<Float4> cast)
3091 {
3092 RValue<Int4> v4i32 = x86::cvtps2dq(cast);
3093 RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32);
3094
3095 return As<Short4>(Int2(As<Int4>(v8i16)));
3096 }
3097
Max(RValue<Short4> x,RValue<Short4> y)3098 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3099 {
3100 return x86::pmaxsw(x, y);
3101 }
3102
Min(RValue<Short4> x,RValue<Short4> y)3103 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3104 {
3105 return x86::pminsw(x, y);
3106 }
3107
AddSat(RValue<Short4> x,RValue<Short4> y)3108 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3109 {
3110 return x86::paddsw(x, y);
3111 }
3112
SubSat(RValue<Short4> x,RValue<Short4> y)3113 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3114 {
3115 return x86::psubsw(x, y);
3116 }
3117
MulHigh(RValue<Short4> x,RValue<Short4> y)3118 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3119 {
3120 return x86::pmulhw(x, y);
3121 }
3122
MulAdd(RValue<Short4> x,RValue<Short4> y)3123 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3124 {
3125 return x86::pmaddwd(x, y);
3126 }
3127
Pack(RValue<Short4> x,RValue<Short4> y)3128 RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3129 {
3130 return x86::packsswb(x, y);
3131 }
3132
UnpackLow(RValue<Short4> x,RValue<Short4> y)3133 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3134 {
3135 if(CPUID::supportsMMX2())
3136 {
3137 return x86::punpcklwd(x, y);
3138 }
3139 else
3140 {
3141 Constant *shuffle[4];
3142 shuffle[0] = Nucleus::createConstantInt(0);
3143 shuffle[1] = Nucleus::createConstantInt(4);
3144 shuffle[2] = Nucleus::createConstantInt(1);
3145 shuffle[3] = Nucleus::createConstantInt(5);
3146
3147 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4));
3148
3149 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
3150 }
3151 }
3152
UnpackHigh(RValue<Short4> x,RValue<Short4> y)3153 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3154 {
3155 if(CPUID::supportsMMX2())
3156 {
3157 return x86::punpckhwd(x, y);
3158 }
3159 else
3160 {
3161 Constant *shuffle[4];
3162 shuffle[0] = Nucleus::createConstantInt(2);
3163 shuffle[1] = Nucleus::createConstantInt(6);
3164 shuffle[2] = Nucleus::createConstantInt(3);
3165 shuffle[3] = Nucleus::createConstantInt(7);
3166
3167 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4));
3168
3169 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
3170 }
3171 }
3172
Swizzle(RValue<Short4> x,unsigned char select)3173 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3174 {
3175 if(CPUID::supportsMMX2())
3176 {
3177 return x86::pshufw(x, select);
3178 }
3179 else
3180 {
3181 return RValue<Short4>(Nucleus::createSwizzle(x.value, select));
3182 }
3183 }
3184
Insert(RValue<Short4> val,RValue<Short> element,int i)3185 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3186 {
3187 if(CPUID::supportsMMX2())
3188 {
3189 return x86::pinsrw(val, Int(element), i);
3190 }
3191 else
3192 {
3193 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3194 }
3195 }
3196
Extract(RValue<Short4> val,int i)3197 RValue<Short> Extract(RValue<Short4> val, int i)
3198 {
3199 if(CPUID::supportsMMX2())
3200 {
3201 return Short(x86::pextrw(val, i));
3202 }
3203 else
3204 {
3205 return RValue<Short>(Nucleus::createExtractElement(val.value, i));
3206 }
3207 }
3208
CmpGT(RValue<Short4> x,RValue<Short4> y)3209 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3210 {
3211 return x86::pcmpgtw(x, y);
3212 }
3213
CmpEQ(RValue<Short4> x,RValue<Short4> y)3214 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3215 {
3216 return x86::pcmpeqw(x, y);
3217 }
3218
getType()3219 Type *Short4::getType()
3220 {
3221 if(CPUID::supportsMMX2())
3222 {
3223 return MMX::getType();
3224 }
3225 else
3226 {
3227 return VectorType::get(Short::getType(), 4);
3228 }
3229 }
3230
UShort4(RValue<Int4> cast)3231 UShort4::UShort4(RValue<Int4> cast)
3232 {
3233 *this = Short4(cast);
3234 }
3235
UShort4(RValue<Float4> cast,bool saturate)3236 UShort4::UShort4(RValue<Float4> cast, bool saturate)
3237 {
3238 Float4 sat;
3239
3240 if(saturate)
3241 {
3242 if(CPUID::supportsSSE4_1())
3243 {
3244 sat = Min(cast, Float4(0xFFFF)); // packusdw takes care of 0x0000 saturation
3245 }
3246 else
3247 {
3248 sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000));
3249 }
3250 }
3251 else
3252 {
3253 sat = cast;
3254 }
3255
3256 Int4 int4(sat);
3257
3258 if(!saturate || !CPUID::supportsSSE4_1())
3259 {
3260 *this = Short4(Int4(int4));
3261 }
3262 else
3263 {
3264 *this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4)))));
3265 }
3266 }
3267
UShort4()3268 UShort4::UShort4()
3269 {
3270 // xyzw.parent = this;
3271 }
3272
UShort4(unsigned short x,unsigned short y,unsigned short z,unsigned short w)3273 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3274 {
3275 // xyzw.parent = this;
3276
3277 Constant *constantVector[4];
3278 constantVector[0] = Nucleus::createConstantShort(x);
3279 constantVector[1] = Nucleus::createConstantShort(y);
3280 constantVector[2] = Nucleus::createConstantShort(z);
3281 constantVector[3] = Nucleus::createConstantShort(w);
3282 Value *vector = Nucleus::createConstantVector(constantVector, 4);
3283
3284 storeValue(Nucleus::createBitCast(vector, getType()));
3285 }
3286
UShort4(RValue<UShort4> rhs)3287 UShort4::UShort4(RValue<UShort4> rhs)
3288 {
3289 // xyzw.parent = this;
3290
3291 storeValue(rhs.value);
3292 }
3293
UShort4(const UShort4 & rhs)3294 UShort4::UShort4(const UShort4 &rhs)
3295 {
3296 // xyzw.parent = this;
3297
3298 Value *value = rhs.loadValue();
3299 storeValue(value);
3300 }
3301
UShort4(const Reference<UShort4> & rhs)3302 UShort4::UShort4(const Reference<UShort4> &rhs)
3303 {
3304 // xyzw.parent = this;
3305
3306 Value *value = rhs.loadValue();
3307 storeValue(value);
3308 }
3309
UShort4(RValue<Short4> rhs)3310 UShort4::UShort4(RValue<Short4> rhs)
3311 {
3312 // xyzw.parent = this;
3313
3314 storeValue(rhs.value);
3315 }
3316
UShort4(const Short4 & rhs)3317 UShort4::UShort4(const Short4 &rhs)
3318 {
3319 // xyzw.parent = this;
3320
3321 Value *value = rhs.loadValue();
3322 storeValue(value);
3323 }
3324
UShort4(const Reference<Short4> & rhs)3325 UShort4::UShort4(const Reference<Short4> &rhs)
3326 {
3327 // xyzw.parent = this;
3328
3329 Value *value = rhs.loadValue();
3330 storeValue(value);
3331 }
3332
operator =(RValue<UShort4> rhs) const3333 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) const
3334 {
3335 storeValue(rhs.value);
3336
3337 return rhs;
3338 }
3339
operator =(const UShort4 & rhs) const3340 RValue<UShort4> UShort4::operator=(const UShort4 &rhs) const
3341 {
3342 Value *value = rhs.loadValue();
3343 storeValue(value);
3344
3345 return RValue<UShort4>(value);
3346 }
3347
operator =(const Reference<UShort4> & rhs) const3348 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) const
3349 {
3350 Value *value = rhs.loadValue();
3351 storeValue(value);
3352
3353 return RValue<UShort4>(value);
3354 }
3355
operator =(RValue<Short4> rhs) const3356 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) const
3357 {
3358 storeValue(rhs.value);
3359
3360 return RValue<UShort4>(rhs);
3361 }
3362
operator =(const Short4 & rhs) const3363 RValue<UShort4> UShort4::operator=(const Short4 &rhs) const
3364 {
3365 Value *value = rhs.loadValue();
3366 storeValue(value);
3367
3368 return RValue<UShort4>(value);
3369 }
3370
operator =(const Reference<Short4> & rhs) const3371 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) const
3372 {
3373 Value *value = rhs.loadValue();
3374 storeValue(value);
3375
3376 return RValue<UShort4>(value);
3377 }
3378
operator +(RValue<UShort4> lhs,RValue<UShort4> rhs)3379 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3380 {
3381 if(CPUID::supportsMMX2())
3382 {
3383 return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs)));
3384 }
3385 else
3386 {
3387 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3388 }
3389 }
3390
operator -(RValue<UShort4> lhs,RValue<UShort4> rhs)3391 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3392 {
3393 if(CPUID::supportsMMX2())
3394 {
3395 return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs)));
3396 }
3397 else
3398 {
3399 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3400 }
3401 }
3402
3403
operator *(RValue<UShort4> lhs,RValue<UShort4> rhs)3404 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3405 {
3406 if(CPUID::supportsMMX2())
3407 {
3408 return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs)));
3409 }
3410 else
3411 {
3412 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3413 }
3414 }
3415
operator <<(RValue<UShort4> lhs,unsigned char rhs)3416 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3417 {
3418 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3419
3420 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3421 }
3422
operator >>(RValue<UShort4> lhs,unsigned char rhs)3423 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3424 {
3425 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3426
3427 return x86::psrlw(lhs, rhs);
3428 }
3429
operator <<(RValue<UShort4> lhs,RValue<Long1> rhs)3430 RValue<UShort4> operator<<(RValue<UShort4> lhs, RValue<Long1> rhs)
3431 {
3432 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3433
3434 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3435 }
3436
operator >>(RValue<UShort4> lhs,RValue<Long1> rhs)3437 RValue<UShort4> operator>>(RValue<UShort4> lhs, RValue<Long1> rhs)
3438 {
3439 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3440
3441 return x86::psrlw(lhs, rhs);
3442 }
3443
operator <<=(const UShort4 & lhs,unsigned char rhs)3444 RValue<UShort4> operator<<=(const UShort4 &lhs, unsigned char rhs)
3445 {
3446 return lhs = lhs << rhs;
3447 }
3448
operator >>=(const UShort4 & lhs,unsigned char rhs)3449 RValue<UShort4> operator>>=(const UShort4 &lhs, unsigned char rhs)
3450 {
3451 return lhs = lhs >> rhs;
3452 }
3453
operator <<=(const UShort4 & lhs,RValue<Long1> rhs)3454 RValue<UShort4> operator<<=(const UShort4 &lhs, RValue<Long1> rhs)
3455 {
3456 return lhs = lhs << rhs;
3457 }
3458
operator >>=(const UShort4 & lhs,RValue<Long1> rhs)3459 RValue<UShort4> operator>>=(const UShort4 &lhs, RValue<Long1> rhs)
3460 {
3461 return lhs = lhs >> rhs;
3462 }
3463
operator ~(RValue<UShort4> val)3464 RValue<UShort4> operator~(RValue<UShort4> val)
3465 {
3466 if(CPUID::supportsMMX2())
3467 {
3468 return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu));
3469 }
3470 else
3471 {
3472 return RValue<UShort4>(Nucleus::createNot(val.value));
3473 }
3474 }
3475
Max(RValue<UShort4> x,RValue<UShort4> y)3476 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3477 {
3478 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3479 }
3480
Min(RValue<UShort4> x,RValue<UShort4> y)3481 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3482 {
3483 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3484 }
3485
AddSat(RValue<UShort4> x,RValue<UShort4> y)3486 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3487 {
3488 return x86::paddusw(x, y);
3489 }
3490
SubSat(RValue<UShort4> x,RValue<UShort4> y)3491 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3492 {
3493 return x86::psubusw(x, y);
3494 }
3495
MulHigh(RValue<UShort4> x,RValue<UShort4> y)3496 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3497 {
3498 return x86::pmulhuw(x, y);
3499 }
3500
Average(RValue<UShort4> x,RValue<UShort4> y)3501 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3502 {
3503 return x86::pavgw(x, y);
3504 }
3505
Pack(RValue<UShort4> x,RValue<UShort4> y)3506 RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3507 {
3508 return x86::packuswb(x, y);
3509 }
3510
getType()3511 Type *UShort4::getType()
3512 {
3513 if(CPUID::supportsMMX2())
3514 {
3515 return MMX::getType();
3516 }
3517 else
3518 {
3519 return VectorType::get(UShort::getType(), 4);
3520 }
3521 }
3522
Short8(short c0,short c1,short c2,short c3,short c4,short c5,short c6,short c7)3523 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3524 {
3525 // xyzw.parent = this;
3526
3527 Constant *constantVector[8];
3528 constantVector[0] = Nucleus::createConstantShort(c0);
3529 constantVector[1] = Nucleus::createConstantShort(c1);
3530 constantVector[2] = Nucleus::createConstantShort(c2);
3531 constantVector[3] = Nucleus::createConstantShort(c3);
3532 constantVector[4] = Nucleus::createConstantShort(c4);
3533 constantVector[5] = Nucleus::createConstantShort(c5);
3534 constantVector[6] = Nucleus::createConstantShort(c6);
3535 constantVector[7] = Nucleus::createConstantShort(c7);
3536
3537 storeValue(Nucleus::createConstantVector(constantVector, 8));
3538 }
3539
Short8(RValue<Short8> rhs)3540 Short8::Short8(RValue<Short8> rhs)
3541 {
3542 // xyzw.parent = this;
3543
3544 storeValue(rhs.value);
3545 }
3546
Short8(RValue<Short4> lo,RValue<Short4> hi)3547 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3548 {
3549 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3550 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3551
3552 Value *long2 = UndefValue::get(Long2::getType());
3553 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3554 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3555 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3556
3557 storeValue(short8);
3558 }
3559
operator +(RValue<Short8> lhs,RValue<Short8> rhs)3560 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3561 {
3562 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3563 }
3564
operator &(RValue<Short8> lhs,RValue<Short8> rhs)3565 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3566 {
3567 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3568 }
3569
operator <<(RValue<Short8> lhs,unsigned char rhs)3570 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3571 {
3572 return x86::psllw(lhs, rhs); // FIXME: Fallback required
3573 }
3574
operator >>(RValue<Short8> lhs,unsigned char rhs)3575 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3576 {
3577 return x86::psraw(lhs, rhs); // FIXME: Fallback required
3578 }
3579
MulAdd(RValue<Short8> x,RValue<Short8> y)3580 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3581 {
3582 return x86::pmaddwd(x, y); // FIXME: Fallback required
3583 }
3584
Abs(RValue<Int4> x)3585 RValue<Int4> Abs(RValue<Int4> x)
3586 {
3587 if(CPUID::supportsSSSE3())
3588 {
3589 return x86::pabsd(x);
3590 }
3591 else
3592 {
3593 Int4 mask = (x >> 31);
3594 return (mask ^ x) - mask;
3595 }
3596 }
3597
MulHigh(RValue<Short8> x,RValue<Short8> y)3598 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3599 {
3600 return x86::pmulhw(x, y); // FIXME: Fallback required
3601 }
3602
getType()3603 Type *Short8::getType()
3604 {
3605 return VectorType::get(Short::getType(), 8);
3606 }
3607
UShort8(unsigned short c0,unsigned short c1,unsigned short c2,unsigned short c3,unsigned short c4,unsigned short c5,unsigned short c6,unsigned short c7)3608 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3609 {
3610 // xyzw.parent = this;
3611
3612 Constant *constantVector[8];
3613 constantVector[0] = Nucleus::createConstantShort(c0);
3614 constantVector[1] = Nucleus::createConstantShort(c1);
3615 constantVector[2] = Nucleus::createConstantShort(c2);
3616 constantVector[3] = Nucleus::createConstantShort(c3);
3617 constantVector[4] = Nucleus::createConstantShort(c4);
3618 constantVector[5] = Nucleus::createConstantShort(c5);
3619 constantVector[6] = Nucleus::createConstantShort(c6);
3620 constantVector[7] = Nucleus::createConstantShort(c7);
3621
3622 storeValue(Nucleus::createConstantVector(constantVector, 8));
3623 }
3624
UShort8(RValue<UShort8> rhs)3625 UShort8::UShort8(RValue<UShort8> rhs)
3626 {
3627 // xyzw.parent = this;
3628
3629 storeValue(rhs.value);
3630 }
3631
UShort8(RValue<UShort4> lo,RValue<UShort4> hi)3632 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3633 {
3634 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3635 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3636
3637 Value *long2 = UndefValue::get(Long2::getType());
3638 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3639 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3640 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3641
3642 storeValue(short8);
3643 }
3644
operator =(RValue<UShort8> rhs) const3645 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) const
3646 {
3647 storeValue(rhs.value);
3648
3649 return rhs;
3650 }
3651
operator =(const UShort8 & rhs) const3652 RValue<UShort8> UShort8::operator=(const UShort8 &rhs) const
3653 {
3654 Value *value = rhs.loadValue();
3655 storeValue(value);
3656
3657 return RValue<UShort8>(value);
3658 }
3659
operator =(const Reference<UShort8> & rhs) const3660 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) const
3661 {
3662 Value *value = rhs.loadValue();
3663 storeValue(value);
3664
3665 return RValue<UShort8>(value);
3666 }
3667
operator &(RValue<UShort8> lhs,RValue<UShort8> rhs)3668 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3669 {
3670 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3671 }
3672
operator <<(RValue<UShort8> lhs,unsigned char rhs)3673 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3674 {
3675 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); // FIXME: Fallback required
3676 }
3677
operator >>(RValue<UShort8> lhs,unsigned char rhs)3678 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3679 {
3680 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
3681 }
3682
operator +(RValue<UShort8> lhs,RValue<UShort8> rhs)3683 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3684 {
3685 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3686 }
3687
operator *(RValue<UShort8> lhs,RValue<UShort8> rhs)3688 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3689 {
3690 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3691 }
3692
operator +=(const UShort8 & lhs,RValue<UShort8> rhs)3693 RValue<UShort8> operator+=(const UShort8 &lhs, RValue<UShort8> rhs)
3694 {
3695 return lhs = lhs + rhs;
3696 }
3697
operator ~(RValue<UShort8> val)3698 RValue<UShort8> operator~(RValue<UShort8> val)
3699 {
3700 return RValue<UShort8>(Nucleus::createNot(val.value));
3701 }
3702
Swizzle(RValue<UShort8> x,char select0,char select1,char select2,char select3,char select4,char select5,char select6,char select7)3703 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3704 {
3705 Constant *pshufb[16];
3706 pshufb[0] = Nucleus::createConstantInt(select0 + 0);
3707 pshufb[1] = Nucleus::createConstantInt(select0 + 1);
3708 pshufb[2] = Nucleus::createConstantInt(select1 + 0);
3709 pshufb[3] = Nucleus::createConstantInt(select1 + 1);
3710 pshufb[4] = Nucleus::createConstantInt(select2 + 0);
3711 pshufb[5] = Nucleus::createConstantInt(select2 + 1);
3712 pshufb[6] = Nucleus::createConstantInt(select3 + 0);
3713 pshufb[7] = Nucleus::createConstantInt(select3 + 1);
3714 pshufb[8] = Nucleus::createConstantInt(select4 + 0);
3715 pshufb[9] = Nucleus::createConstantInt(select4 + 1);
3716 pshufb[10] = Nucleus::createConstantInt(select5 + 0);
3717 pshufb[11] = Nucleus::createConstantInt(select5 + 1);
3718 pshufb[12] = Nucleus::createConstantInt(select6 + 0);
3719 pshufb[13] = Nucleus::createConstantInt(select6 + 1);
3720 pshufb[14] = Nucleus::createConstantInt(select7 + 0);
3721 pshufb[15] = Nucleus::createConstantInt(select7 + 1);
3722
3723 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
3724 Value *shuffle = Nucleus::createShuffleVector(byte16, UndefValue::get(Byte16::getType()), Nucleus::createConstantVector(pshufb, 16));
3725 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3726
3727 return RValue<UShort8>(short8);
3728 }
3729
MulHigh(RValue<UShort8> x,RValue<UShort8> y)3730 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3731 {
3732 return x86::pmulhuw(x, y); // FIXME: Fallback required
3733 }
3734
3735 // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
3736 // RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
3737 // {
3738 // Constant *pshufb[16];
3739 // pshufb[0] = Nucleus::createConstantInt(element + 0);
3740 // pshufb[1] = Nucleus::createConstantInt(element + 0);
3741 // pshufb[2] = Nucleus::createConstantInt(element + 4);
3742 // pshufb[3] = Nucleus::createConstantInt(element + 4);
3743 // pshufb[4] = Nucleus::createConstantInt(element + 8);
3744 // pshufb[5] = Nucleus::createConstantInt(element + 8);
3745 // pshufb[6] = Nucleus::createConstantInt(element + 12);
3746 // pshufb[7] = Nucleus::createConstantInt(element + 12);
3747 // pshufb[8] = Nucleus::createConstantInt(element + 16);
3748 // pshufb[9] = Nucleus::createConstantInt(element + 16);
3749 // pshufb[10] = Nucleus::createConstantInt(element + 20);
3750 // pshufb[11] = Nucleus::createConstantInt(element + 20);
3751 // pshufb[12] = Nucleus::createConstantInt(element + 24);
3752 // pshufb[13] = Nucleus::createConstantInt(element + 24);
3753 // pshufb[14] = Nucleus::createConstantInt(element + 28);
3754 // pshufb[15] = Nucleus::createConstantInt(element + 28);
3755 //
3756 // Value *shuffle = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(pshufb, 16));
3757 // Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3758 //
3759 // return RValue<UShort8>(short8);
3760 // }
3761
getType()3762 Type *UShort8::getType()
3763 {
3764 return VectorType::get(UShort::getType(), 8);
3765 }
3766
Int(Argument<Int> argument)3767 Int::Int(Argument<Int> argument)
3768 {
3769 storeValue(argument.value);
3770 }
3771
Int(RValue<Byte> cast)3772 Int::Int(RValue<Byte> cast)
3773 {
3774 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3775
3776 storeValue(integer);
3777 }
3778
Int(RValue<SByte> cast)3779 Int::Int(RValue<SByte> cast)
3780 {
3781 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3782
3783 storeValue(integer);
3784 }
3785
Int(RValue<Short> cast)3786 Int::Int(RValue<Short> cast)
3787 {
3788 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3789
3790 storeValue(integer);
3791 }
3792
Int(RValue<UShort> cast)3793 Int::Int(RValue<UShort> cast)
3794 {
3795 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3796
3797 storeValue(integer);
3798 }
3799
Int(RValue<Int2> cast)3800 Int::Int(RValue<Int2> cast)
3801 {
3802 *this = Extract(cast, 0);
3803 }
3804
Int(RValue<Long> cast)3805 Int::Int(RValue<Long> cast)
3806 {
3807 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3808
3809 storeValue(integer);
3810 }
3811
Int(RValue<Float> cast)3812 Int::Int(RValue<Float> cast)
3813 {
3814 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3815
3816 storeValue(integer);
3817 }
3818
Int()3819 Int::Int()
3820 {
3821 }
3822
Int(int x)3823 Int::Int(int x)
3824 {
3825 storeValue(Nucleus::createConstantInt(x));
3826 }
3827
Int(RValue<Int> rhs)3828 Int::Int(RValue<Int> rhs)
3829 {
3830 storeValue(rhs.value);
3831 }
3832
Int(RValue<UInt> rhs)3833 Int::Int(RValue<UInt> rhs)
3834 {
3835 storeValue(rhs.value);
3836 }
3837
Int(const Int & rhs)3838 Int::Int(const Int &rhs)
3839 {
3840 Value *value = rhs.loadValue();
3841 storeValue(value);
3842 }
3843
Int(const Reference<Int> & rhs)3844 Int::Int(const Reference<Int> &rhs)
3845 {
3846 Value *value = rhs.loadValue();
3847 storeValue(value);
3848 }
3849
Int(const UInt & rhs)3850 Int::Int(const UInt &rhs)
3851 {
3852 Value *value = rhs.loadValue();
3853 storeValue(value);
3854 }
3855
Int(const Reference<UInt> & rhs)3856 Int::Int(const Reference<UInt> &rhs)
3857 {
3858 Value *value = rhs.loadValue();
3859 storeValue(value);
3860 }
3861
operator =(int rhs) const3862 RValue<Int> Int::operator=(int rhs) const
3863 {
3864 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3865 }
3866
operator =(RValue<Int> rhs) const3867 RValue<Int> Int::operator=(RValue<Int> rhs) const
3868 {
3869 storeValue(rhs.value);
3870
3871 return rhs;
3872 }
3873
operator =(RValue<UInt> rhs) const3874 RValue<Int> Int::operator=(RValue<UInt> rhs) const
3875 {
3876 storeValue(rhs.value);
3877
3878 return RValue<Int>(rhs);
3879 }
3880
operator =(const Int & rhs) const3881 RValue<Int> Int::operator=(const Int &rhs) const
3882 {
3883 Value *value = rhs.loadValue();
3884 storeValue(value);
3885
3886 return RValue<Int>(value);
3887 }
3888
operator =(const Reference<Int> & rhs) const3889 RValue<Int> Int::operator=(const Reference<Int> &rhs) const
3890 {
3891 Value *value = rhs.loadValue();
3892 storeValue(value);
3893
3894 return RValue<Int>(value);
3895 }
3896
operator =(const UInt & rhs) const3897 RValue<Int> Int::operator=(const UInt &rhs) const
3898 {
3899 Value *value = rhs.loadValue();
3900 storeValue(value);
3901
3902 return RValue<Int>(value);
3903 }
3904
operator =(const Reference<UInt> & rhs) const3905 RValue<Int> Int::operator=(const Reference<UInt> &rhs) const
3906 {
3907 Value *value = rhs.loadValue();
3908 storeValue(value);
3909
3910 return RValue<Int>(value);
3911 }
3912
operator +(RValue<Int> lhs,RValue<Int> rhs)3913 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3914 {
3915 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3916 }
3917
operator -(RValue<Int> lhs,RValue<Int> rhs)3918 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3919 {
3920 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3921 }
3922
operator *(RValue<Int> lhs,RValue<Int> rhs)3923 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3924 {
3925 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3926 }
3927
operator /(RValue<Int> lhs,RValue<Int> rhs)3928 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3929 {
3930 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3931 }
3932
operator %(RValue<Int> lhs,RValue<Int> rhs)3933 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3934 {
3935 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3936 }
3937
operator &(RValue<Int> lhs,RValue<Int> rhs)3938 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3939 {
3940 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3941 }
3942
operator |(RValue<Int> lhs,RValue<Int> rhs)3943 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3944 {
3945 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3946 }
3947
operator ^(RValue<Int> lhs,RValue<Int> rhs)3948 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3949 {
3950 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3951 }
3952
operator <<(RValue<Int> lhs,RValue<Int> rhs)3953 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3954 {
3955 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3956 }
3957
operator >>(RValue<Int> lhs,RValue<Int> rhs)3958 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3959 {
3960 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3961 }
3962
operator +=(const Int & lhs,RValue<Int> rhs)3963 RValue<Int> operator+=(const Int &lhs, RValue<Int> rhs)
3964 {
3965 return lhs = lhs + rhs;
3966 }
3967
operator -=(const Int & lhs,RValue<Int> rhs)3968 RValue<Int> operator-=(const Int &lhs, RValue<Int> rhs)
3969 {
3970 return lhs = lhs - rhs;
3971 }
3972
operator *=(const Int & lhs,RValue<Int> rhs)3973 RValue<Int> operator*=(const Int &lhs, RValue<Int> rhs)
3974 {
3975 return lhs = lhs * rhs;
3976 }
3977
operator /=(const Int & lhs,RValue<Int> rhs)3978 RValue<Int> operator/=(const Int &lhs, RValue<Int> rhs)
3979 {
3980 return lhs = lhs / rhs;
3981 }
3982
operator %=(const Int & lhs,RValue<Int> rhs)3983 RValue<Int> operator%=(const Int &lhs, RValue<Int> rhs)
3984 {
3985 return lhs = lhs % rhs;
3986 }
3987
operator &=(const Int & lhs,RValue<Int> rhs)3988 RValue<Int> operator&=(const Int &lhs, RValue<Int> rhs)
3989 {
3990 return lhs = lhs & rhs;
3991 }
3992
operator |=(const Int & lhs,RValue<Int> rhs)3993 RValue<Int> operator|=(const Int &lhs, RValue<Int> rhs)
3994 {
3995 return lhs = lhs | rhs;
3996 }
3997
operator ^=(const Int & lhs,RValue<Int> rhs)3998 RValue<Int> operator^=(const Int &lhs, RValue<Int> rhs)
3999 {
4000 return lhs = lhs ^ rhs;
4001 }
4002
operator <<=(const Int & lhs,RValue<Int> rhs)4003 RValue<Int> operator<<=(const Int &lhs, RValue<Int> rhs)
4004 {
4005 return lhs = lhs << rhs;
4006 }
4007
operator >>=(const Int & lhs,RValue<Int> rhs)4008 RValue<Int> operator>>=(const Int &lhs, RValue<Int> rhs)
4009 {
4010 return lhs = lhs >> rhs;
4011 }
4012
operator +(RValue<Int> val)4013 RValue<Int> operator+(RValue<Int> val)
4014 {
4015 return val;
4016 }
4017
operator -(RValue<Int> val)4018 RValue<Int> operator-(RValue<Int> val)
4019 {
4020 return RValue<Int>(Nucleus::createNeg(val.value));
4021 }
4022
operator ~(RValue<Int> val)4023 RValue<Int> operator~(RValue<Int> val)
4024 {
4025 return RValue<Int>(Nucleus::createNot(val.value));
4026 }
4027
operator ++(const Int & val,int)4028 RValue<Int> operator++(const Int &val, int) // Post-increment
4029 {
4030 RValue<Int> res = val;
4031
4032 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
4033 val.storeValue(inc);
4034
4035 return res;
4036 }
4037
operator ++(const Int & val)4038 const Int &operator++(const Int &val) // Pre-increment
4039 {
4040 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
4041 val.storeValue(inc);
4042
4043 return val;
4044 }
4045
operator --(const Int & val,int)4046 RValue<Int> operator--(const Int &val, int) // Post-decrement
4047 {
4048 RValue<Int> res = val;
4049
4050 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
4051 val.storeValue(inc);
4052
4053 return res;
4054 }
4055
operator --(const Int & val)4056 const Int &operator--(const Int &val) // Pre-decrement
4057 {
4058 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
4059 val.storeValue(inc);
4060
4061 return val;
4062 }
4063
operator <(RValue<Int> lhs,RValue<Int> rhs)4064 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4065 {
4066 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4067 }
4068
operator <=(RValue<Int> lhs,RValue<Int> rhs)4069 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4070 {
4071 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4072 }
4073
operator >(RValue<Int> lhs,RValue<Int> rhs)4074 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4075 {
4076 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4077 }
4078
operator >=(RValue<Int> lhs,RValue<Int> rhs)4079 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4080 {
4081 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4082 }
4083
operator !=(RValue<Int> lhs,RValue<Int> rhs)4084 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4085 {
4086 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4087 }
4088
operator ==(RValue<Int> lhs,RValue<Int> rhs)4089 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4090 {
4091 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4092 }
4093
Max(RValue<Int> x,RValue<Int> y)4094 RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4095 {
4096 return IfThenElse(x > y, x, y);
4097 }
4098
Min(RValue<Int> x,RValue<Int> y)4099 RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4100 {
4101 return IfThenElse(x < y, x, y);
4102 }
4103
Clamp(RValue<Int> x,RValue<Int> min,RValue<Int> max)4104 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4105 {
4106 return Min(Max(x, min), max);
4107 }
4108
RoundInt(RValue<Float> cast)4109 RValue<Int> RoundInt(RValue<Float> cast)
4110 {
4111 return x86::cvtss2si(cast);
4112
4113 // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4114 }
4115
getType()4116 Type *Int::getType()
4117 {
4118 return Type::getInt32Ty(*Nucleus::getContext());
4119 }
4120
Long(RValue<Int> cast)4121 Long::Long(RValue<Int> cast)
4122 {
4123
4124
4125 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4126
4127 storeValue(integer);
4128 }
4129
Long(RValue<UInt> cast)4130 Long::Long(RValue<UInt> cast)
4131 {
4132 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4133
4134 storeValue(integer);
4135 }
4136
Long()4137 Long::Long()
4138 {
4139 }
4140
Long(RValue<Long> rhs)4141 Long::Long(RValue<Long> rhs)
4142 {
4143 storeValue(rhs.value);
4144 }
4145
operator =(int64_t rhs) const4146 RValue<Long> Long::operator=(int64_t rhs) const
4147 {
4148 return RValue<Long>(storeValue(Nucleus::createConstantInt(rhs)));
4149 }
4150
operator =(RValue<Long> rhs) const4151 RValue<Long> Long::operator=(RValue<Long> rhs) const
4152 {
4153 storeValue(rhs.value);
4154
4155 return rhs;
4156 }
4157
operator =(const Long & rhs) const4158 RValue<Long> Long::operator=(const Long &rhs) const
4159 {
4160 Value *value = rhs.loadValue();
4161 storeValue(value);
4162
4163 return RValue<Long>(value);
4164 }
4165
operator =(const Reference<Long> & rhs) const4166 RValue<Long> Long::operator=(const Reference<Long> &rhs) const
4167 {
4168 Value *value = rhs.loadValue();
4169 storeValue(value);
4170
4171 return RValue<Long>(value);
4172 }
4173
operator +(RValue<Long> lhs,RValue<Long> rhs)4174 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4175 {
4176 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4177 }
4178
operator -(RValue<Long> lhs,RValue<Long> rhs)4179 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4180 {
4181 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4182 }
4183
operator +=(const Long & lhs,RValue<Long> rhs)4184 RValue<Long> operator+=(const Long &lhs, RValue<Long> rhs)
4185 {
4186 return lhs = lhs + rhs;
4187 }
4188
operator -=(const Long & lhs,RValue<Long> rhs)4189 RValue<Long> operator-=(const Long &lhs, RValue<Long> rhs)
4190 {
4191 return lhs = lhs - rhs;
4192 }
4193
AddAtomic(RValue<Pointer<Long>> x,RValue<Long> y)4194 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4195 {
4196 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4197 }
4198
getType()4199 Type *Long::getType()
4200 {
4201 return Type::getInt64Ty(*Nucleus::getContext());
4202 }
4203
Long1(const RValue<UInt> cast)4204 Long1::Long1(const RValue<UInt> cast)
4205 {
4206 Value *undefCast = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), cast.value, 0);
4207 Value *zeroCast = Nucleus::createInsertElement(undefCast, Nucleus::createConstantInt(0), 1);
4208
4209 storeValue(Nucleus::createBitCast(zeroCast, Long1::getType()));
4210 }
4211
Long1(RValue<Long1> rhs)4212 Long1::Long1(RValue<Long1> rhs)
4213 {
4214 storeValue(rhs.value);
4215 }
4216
getType()4217 Type *Long1::getType()
4218 {
4219 if(CPUID::supportsMMX2())
4220 {
4221 return MMX::getType();
4222 }
4223 else
4224 {
4225 return VectorType::get(Long::getType(), 1);
4226 }
4227 }
4228
UnpackHigh(RValue<Long2> x,RValue<Long2> y)4229 RValue<Long2> UnpackHigh(RValue<Long2> x, RValue<Long2> y)
4230 {
4231 Constant *shuffle[2];
4232 shuffle[0] = Nucleus::createConstantInt(1);
4233 shuffle[1] = Nucleus::createConstantInt(3);
4234
4235 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
4236
4237 return RValue<Long2>(packed);
4238 }
4239
getType()4240 Type *Long2::getType()
4241 {
4242 return VectorType::get(Long::getType(), 2);
4243 }
4244
UInt(Argument<UInt> argument)4245 UInt::UInt(Argument<UInt> argument)
4246 {
4247 storeValue(argument.value);
4248 }
4249
UInt(RValue<UShort> cast)4250 UInt::UInt(RValue<UShort> cast)
4251 {
4252 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4253
4254 storeValue(integer);
4255 }
4256
UInt(RValue<Long> cast)4257 UInt::UInt(RValue<Long> cast)
4258 {
4259 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4260
4261 storeValue(integer);
4262 }
4263
UInt(RValue<Float> cast)4264 UInt::UInt(RValue<Float> cast)
4265 {
4266 Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
4267
4268 storeValue(integer);
4269 }
4270
UInt()4271 UInt::UInt()
4272 {
4273 }
4274
UInt(int x)4275 UInt::UInt(int x)
4276 {
4277 storeValue(Nucleus::createConstantInt(x));
4278 }
4279
UInt(unsigned int x)4280 UInt::UInt(unsigned int x)
4281 {
4282 storeValue(Nucleus::createConstantInt(x));
4283 }
4284
UInt(RValue<UInt> rhs)4285 UInt::UInt(RValue<UInt> rhs)
4286 {
4287 storeValue(rhs.value);
4288 }
4289
UInt(RValue<Int> rhs)4290 UInt::UInt(RValue<Int> rhs)
4291 {
4292 storeValue(rhs.value);
4293 }
4294
UInt(const UInt & rhs)4295 UInt::UInt(const UInt &rhs)
4296 {
4297 Value *value = rhs.loadValue();
4298 storeValue(value);
4299 }
4300
UInt(const Reference<UInt> & rhs)4301 UInt::UInt(const Reference<UInt> &rhs)
4302 {
4303 Value *value = rhs.loadValue();
4304 storeValue(value);
4305 }
4306
UInt(const Int & rhs)4307 UInt::UInt(const Int &rhs)
4308 {
4309 Value *value = rhs.loadValue();
4310 storeValue(value);
4311 }
4312
UInt(const Reference<Int> & rhs)4313 UInt::UInt(const Reference<Int> &rhs)
4314 {
4315 Value *value = rhs.loadValue();
4316 storeValue(value);
4317 }
4318
operator =(unsigned int rhs) const4319 RValue<UInt> UInt::operator=(unsigned int rhs) const
4320 {
4321 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4322 }
4323
operator =(RValue<UInt> rhs) const4324 RValue<UInt> UInt::operator=(RValue<UInt> rhs) const
4325 {
4326 storeValue(rhs.value);
4327
4328 return rhs;
4329 }
4330
operator =(RValue<Int> rhs) const4331 RValue<UInt> UInt::operator=(RValue<Int> rhs) const
4332 {
4333 storeValue(rhs.value);
4334
4335 return RValue<UInt>(rhs);
4336 }
4337
operator =(const UInt & rhs) const4338 RValue<UInt> UInt::operator=(const UInt &rhs) const
4339 {
4340 Value *value = rhs.loadValue();
4341 storeValue(value);
4342
4343 return RValue<UInt>(value);
4344 }
4345
operator =(const Reference<UInt> & rhs) const4346 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) const
4347 {
4348 Value *value = rhs.loadValue();
4349 storeValue(value);
4350
4351 return RValue<UInt>(value);
4352 }
4353
operator =(const Int & rhs) const4354 RValue<UInt> UInt::operator=(const Int &rhs) const
4355 {
4356 Value *value = rhs.loadValue();
4357 storeValue(value);
4358
4359 return RValue<UInt>(value);
4360 }
4361
operator =(const Reference<Int> & rhs) const4362 RValue<UInt> UInt::operator=(const Reference<Int> &rhs) const
4363 {
4364 Value *value = rhs.loadValue();
4365 storeValue(value);
4366
4367 return RValue<UInt>(value);
4368 }
4369
operator +(RValue<UInt> lhs,RValue<UInt> rhs)4370 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4371 {
4372 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4373 }
4374
operator -(RValue<UInt> lhs,RValue<UInt> rhs)4375 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4376 {
4377 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4378 }
4379
operator *(RValue<UInt> lhs,RValue<UInt> rhs)4380 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4381 {
4382 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4383 }
4384
operator /(RValue<UInt> lhs,RValue<UInt> rhs)4385 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4386 {
4387 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4388 }
4389
operator %(RValue<UInt> lhs,RValue<UInt> rhs)4390 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4391 {
4392 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4393 }
4394
operator &(RValue<UInt> lhs,RValue<UInt> rhs)4395 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4396 {
4397 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4398 }
4399
operator |(RValue<UInt> lhs,RValue<UInt> rhs)4400 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4401 {
4402 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4403 }
4404
operator ^(RValue<UInt> lhs,RValue<UInt> rhs)4405 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4406 {
4407 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4408 }
4409
operator <<(RValue<UInt> lhs,RValue<UInt> rhs)4410 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4411 {
4412 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4413 }
4414
operator >>(RValue<UInt> lhs,RValue<UInt> rhs)4415 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4416 {
4417 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4418 }
4419
operator +=(const UInt & lhs,RValue<UInt> rhs)4420 RValue<UInt> operator+=(const UInt &lhs, RValue<UInt> rhs)
4421 {
4422 return lhs = lhs + rhs;
4423 }
4424
operator -=(const UInt & lhs,RValue<UInt> rhs)4425 RValue<UInt> operator-=(const UInt &lhs, RValue<UInt> rhs)
4426 {
4427 return lhs = lhs - rhs;
4428 }
4429
operator *=(const UInt & lhs,RValue<UInt> rhs)4430 RValue<UInt> operator*=(const UInt &lhs, RValue<UInt> rhs)
4431 {
4432 return lhs = lhs * rhs;
4433 }
4434
operator /=(const UInt & lhs,RValue<UInt> rhs)4435 RValue<UInt> operator/=(const UInt &lhs, RValue<UInt> rhs)
4436 {
4437 return lhs = lhs / rhs;
4438 }
4439
operator %=(const UInt & lhs,RValue<UInt> rhs)4440 RValue<UInt> operator%=(const UInt &lhs, RValue<UInt> rhs)
4441 {
4442 return lhs = lhs % rhs;
4443 }
4444
operator &=(const UInt & lhs,RValue<UInt> rhs)4445 RValue<UInt> operator&=(const UInt &lhs, RValue<UInt> rhs)
4446 {
4447 return lhs = lhs & rhs;
4448 }
4449
operator |=(const UInt & lhs,RValue<UInt> rhs)4450 RValue<UInt> operator|=(const UInt &lhs, RValue<UInt> rhs)
4451 {
4452 return lhs = lhs | rhs;
4453 }
4454
operator ^=(const UInt & lhs,RValue<UInt> rhs)4455 RValue<UInt> operator^=(const UInt &lhs, RValue<UInt> rhs)
4456 {
4457 return lhs = lhs ^ rhs;
4458 }
4459
operator <<=(const UInt & lhs,RValue<UInt> rhs)4460 RValue<UInt> operator<<=(const UInt &lhs, RValue<UInt> rhs)
4461 {
4462 return lhs = lhs << rhs;
4463 }
4464
operator >>=(const UInt & lhs,RValue<UInt> rhs)4465 RValue<UInt> operator>>=(const UInt &lhs, RValue<UInt> rhs)
4466 {
4467 return lhs = lhs >> rhs;
4468 }
4469
operator +(RValue<UInt> val)4470 RValue<UInt> operator+(RValue<UInt> val)
4471 {
4472 return val;
4473 }
4474
operator -(RValue<UInt> val)4475 RValue<UInt> operator-(RValue<UInt> val)
4476 {
4477 return RValue<UInt>(Nucleus::createNeg(val.value));
4478 }
4479
operator ~(RValue<UInt> val)4480 RValue<UInt> operator~(RValue<UInt> val)
4481 {
4482 return RValue<UInt>(Nucleus::createNot(val.value));
4483 }
4484
operator ++(const UInt & val,int)4485 RValue<UInt> operator++(const UInt &val, int) // Post-increment
4486 {
4487 RValue<UInt> res = val;
4488
4489 Value *inc = Nucleus::createAdd(res.value, Nucleus::createConstantInt(1));
4490 val.storeValue(inc);
4491
4492 return res;
4493 }
4494
operator ++(const UInt & val)4495 const UInt &operator++(const UInt &val) // Pre-increment
4496 {
4497 Value *inc = Nucleus::createAdd(val.loadValue(), Nucleus::createConstantInt(1));
4498 val.storeValue(inc);
4499
4500 return val;
4501 }
4502
operator --(const UInt & val,int)4503 RValue<UInt> operator--(const UInt &val, int) // Post-decrement
4504 {
4505 RValue<UInt> res = val;
4506
4507 Value *inc = Nucleus::createSub(res.value, Nucleus::createConstantInt(1));
4508 val.storeValue(inc);
4509
4510 return res;
4511 }
4512
operator --(const UInt & val)4513 const UInt &operator--(const UInt &val) // Pre-decrement
4514 {
4515 Value *inc = Nucleus::createSub(val.loadValue(), Nucleus::createConstantInt(1));
4516 val.storeValue(inc);
4517
4518 return val;
4519 }
4520
Max(RValue<UInt> x,RValue<UInt> y)4521 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4522 {
4523 return IfThenElse(x > y, x, y);
4524 }
4525
Min(RValue<UInt> x,RValue<UInt> y)4526 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4527 {
4528 return IfThenElse(x < y, x, y);
4529 }
4530
Clamp(RValue<UInt> x,RValue<UInt> min,RValue<UInt> max)4531 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4532 {
4533 return Min(Max(x, min), max);
4534 }
4535
operator <(RValue<UInt> lhs,RValue<UInt> rhs)4536 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4537 {
4538 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4539 }
4540
operator <=(RValue<UInt> lhs,RValue<UInt> rhs)4541 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4542 {
4543 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4544 }
4545
operator >(RValue<UInt> lhs,RValue<UInt> rhs)4546 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4547 {
4548 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4549 }
4550
operator >=(RValue<UInt> lhs,RValue<UInt> rhs)4551 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4552 {
4553 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4554 }
4555
operator !=(RValue<UInt> lhs,RValue<UInt> rhs)4556 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4557 {
4558 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4559 }
4560
operator ==(RValue<UInt> lhs,RValue<UInt> rhs)4561 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4562 {
4563 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4564 }
4565
4566 // RValue<UInt> RoundUInt(RValue<Float> cast)
4567 // {
4568 // return x86::cvtss2si(val); // FIXME: Unsigned
4569 //
4570 // // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4571 // }
4572
getType()4573 Type *UInt::getType()
4574 {
4575 return Type::getInt32Ty(*Nucleus::getContext());
4576 }
4577
4578 // Int2::Int2(RValue<Int> cast)
4579 // {
4580 // Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4581 // Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4582 //
4583 // Constant *shuffle[2];
4584 // shuffle[0] = Nucleus::createConstantInt(0);
4585 // shuffle[1] = Nucleus::createConstantInt(0);
4586 //
4587 // Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
4588 //
4589 // storeValue(replicate);
4590 // }
4591
Int2(RValue<Int4> cast)4592 Int2::Int2(RValue<Int4> cast)
4593 {
4594 Value *long2 = Nucleus::createBitCast(cast.value, Long2::getType());
4595 Value *element = Nucleus::createExtractElement(long2, 0);
4596 Value *int2 = Nucleus::createBitCast(element, Int2::getType());
4597
4598 storeValue(int2);
4599 }
4600
Int2()4601 Int2::Int2()
4602 {
4603 // xy.parent = this;
4604 }
4605
Int2(int x,int y)4606 Int2::Int2(int x, int y)
4607 {
4608 // xy.parent = this;
4609
4610 Constant *constantVector[2];
4611 constantVector[0] = Nucleus::createConstantInt(x);
4612 constantVector[1] = Nucleus::createConstantInt(y);
4613 Value *vector = Nucleus::createConstantVector(constantVector, 2);
4614
4615 storeValue(Nucleus::createBitCast(vector, getType()));
4616 }
4617
Int2(RValue<Int2> rhs)4618 Int2::Int2(RValue<Int2> rhs)
4619 {
4620 // xy.parent = this;
4621
4622 storeValue(rhs.value);
4623 }
4624
Int2(const Int2 & rhs)4625 Int2::Int2(const Int2 &rhs)
4626 {
4627 // xy.parent = this;
4628
4629 Value *value = rhs.loadValue();
4630 storeValue(value);
4631 }
4632
Int2(const Reference<Int2> & rhs)4633 Int2::Int2(const Reference<Int2> &rhs)
4634 {
4635 // xy.parent = this;
4636
4637 Value *value = rhs.loadValue();
4638 storeValue(value);
4639 }
4640
Int2(RValue<Int> lo,RValue<Int> hi)4641 Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4642 {
4643 if(CPUID::supportsMMX2())
4644 {
4645 // movd mm0, lo
4646 // movd mm1, hi
4647 // punpckldq mm0, mm1
4648 storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value);
4649 }
4650 else
4651 {
4652 Constant *shuffle[2];
4653 shuffle[0] = Nucleus::createConstantInt(0);
4654 shuffle[1] = Nucleus::createConstantInt(1);
4655
4656 Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, VectorType::get(Int::getType(), 1)), Nucleus::createBitCast(hi.value, VectorType::get(Int::getType(), 1)), Nucleus::createConstantVector(shuffle, 2));
4657
4658 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4659 }
4660 }
4661
operator =(RValue<Int2> rhs) const4662 RValue<Int2> Int2::operator=(RValue<Int2> rhs) const
4663 {
4664 storeValue(rhs.value);
4665
4666 return rhs;
4667 }
4668
operator =(const Int2 & rhs) const4669 RValue<Int2> Int2::operator=(const Int2 &rhs) const
4670 {
4671 Value *value = rhs.loadValue();
4672 storeValue(value);
4673
4674 return RValue<Int2>(value);
4675 }
4676
operator =(const Reference<Int2> & rhs) const4677 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) const
4678 {
4679 Value *value = rhs.loadValue();
4680 storeValue(value);
4681
4682 return RValue<Int2>(value);
4683 }
4684
operator +(RValue<Int2> lhs,RValue<Int2> rhs)4685 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4686 {
4687 if(CPUID::supportsMMX2())
4688 {
4689 return x86::paddd(lhs, rhs);
4690 }
4691 else
4692 {
4693 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4694 }
4695 }
4696
operator -(RValue<Int2> lhs,RValue<Int2> rhs)4697 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4698 {
4699 if(CPUID::supportsMMX2())
4700 {
4701 return x86::psubd(lhs, rhs);
4702 }
4703 else
4704 {
4705 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4706 }
4707 }
4708
4709 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4710 // {
4711 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4712 // }
4713
4714 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4715 // {
4716 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4717 // }
4718
4719 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4720 // {
4721 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4722 // }
4723
operator &(RValue<Int2> lhs,RValue<Int2> rhs)4724 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4725 {
4726 if(CPUID::supportsMMX2())
4727 {
4728 return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
4729 }
4730 else
4731 {
4732 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4733 }
4734 }
4735
operator |(RValue<Int2> lhs,RValue<Int2> rhs)4736 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4737 {
4738 if(CPUID::supportsMMX2())
4739 {
4740 return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
4741 }
4742 else
4743 {
4744 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4745 }
4746 }
4747
operator ^(RValue<Int2> lhs,RValue<Int2> rhs)4748 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4749 {
4750 if(CPUID::supportsMMX2())
4751 {
4752 return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
4753 }
4754 else
4755 {
4756 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4757 }
4758 }
4759
operator <<(RValue<Int2> lhs,unsigned char rhs)4760 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4761 {
4762 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4763
4764 return x86::pslld(lhs, rhs);
4765 }
4766
operator >>(RValue<Int2> lhs,unsigned char rhs)4767 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4768 {
4769 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4770
4771 return x86::psrad(lhs, rhs);
4772 }
4773
operator <<(RValue<Int2> lhs,RValue<Long1> rhs)4774 RValue<Int2> operator<<(RValue<Int2> lhs, RValue<Long1> rhs)
4775 {
4776 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4777
4778 return x86::pslld(lhs, rhs);
4779 }
4780
operator >>(RValue<Int2> lhs,RValue<Long1> rhs)4781 RValue<Int2> operator>>(RValue<Int2> lhs, RValue<Long1> rhs)
4782 {
4783 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4784
4785 return x86::psrad(lhs, rhs);
4786 }
4787
operator +=(const Int2 & lhs,RValue<Int2> rhs)4788 RValue<Int2> operator+=(const Int2 &lhs, RValue<Int2> rhs)
4789 {
4790 return lhs = lhs + rhs;
4791 }
4792
operator -=(const Int2 & lhs,RValue<Int2> rhs)4793 RValue<Int2> operator-=(const Int2 &lhs, RValue<Int2> rhs)
4794 {
4795 return lhs = lhs - rhs;
4796 }
4797
4798 // RValue<Int2> operator*=(const Int2 &lhs, RValue<Int2> rhs)
4799 // {
4800 // return lhs = lhs * rhs;
4801 // }
4802
4803 // RValue<Int2> operator/=(const Int2 &lhs, RValue<Int2> rhs)
4804 // {
4805 // return lhs = lhs / rhs;
4806 // }
4807
4808 // RValue<Int2> operator%=(const Int2 &lhs, RValue<Int2> rhs)
4809 // {
4810 // return lhs = lhs % rhs;
4811 // }
4812
operator &=(const Int2 & lhs,RValue<Int2> rhs)4813 RValue<Int2> operator&=(const Int2 &lhs, RValue<Int2> rhs)
4814 {
4815 return lhs = lhs & rhs;
4816 }
4817
operator |=(const Int2 & lhs,RValue<Int2> rhs)4818 RValue<Int2> operator|=(const Int2 &lhs, RValue<Int2> rhs)
4819 {
4820 return lhs = lhs | rhs;
4821 }
4822
operator ^=(const Int2 & lhs,RValue<Int2> rhs)4823 RValue<Int2> operator^=(const Int2 &lhs, RValue<Int2> rhs)
4824 {
4825 return lhs = lhs ^ rhs;
4826 }
4827
operator <<=(const Int2 & lhs,unsigned char rhs)4828 RValue<Int2> operator<<=(const Int2 &lhs, unsigned char rhs)
4829 {
4830 return lhs = lhs << rhs;
4831 }
4832
operator >>=(const Int2 & lhs,unsigned char rhs)4833 RValue<Int2> operator>>=(const Int2 &lhs, unsigned char rhs)
4834 {
4835 return lhs = lhs >> rhs;
4836 }
4837
operator <<=(const Int2 & lhs,RValue<Long1> rhs)4838 RValue<Int2> operator<<=(const Int2 &lhs, RValue<Long1> rhs)
4839 {
4840 return lhs = lhs << rhs;
4841 }
4842
operator >>=(const Int2 & lhs,RValue<Long1> rhs)4843 RValue<Int2> operator>>=(const Int2 &lhs, RValue<Long1> rhs)
4844 {
4845 return lhs = lhs >> rhs;
4846 }
4847
4848 // RValue<Int2> operator+(RValue<Int2> val)
4849 // {
4850 // return val;
4851 // }
4852
4853 // RValue<Int2> operator-(RValue<Int2> val)
4854 // {
4855 // return RValue<Int2>(Nucleus::createNeg(val.value));
4856 // }
4857
operator ~(RValue<Int2> val)4858 RValue<Int2> operator~(RValue<Int2> val)
4859 {
4860 if(CPUID::supportsMMX2())
4861 {
4862 return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF);
4863 }
4864 else
4865 {
4866 return RValue<Int2>(Nucleus::createNot(val.value));
4867 }
4868 }
4869
UnpackLow(RValue<Int2> x,RValue<Int2> y)4870 RValue<Long1> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4871 {
4872 if(CPUID::supportsMMX2())
4873 {
4874 return x86::punpckldq(x, y);
4875 }
4876 else
4877 {
4878 Constant *shuffle[2];
4879 shuffle[0] = Nucleus::createConstantInt(0);
4880 shuffle[1] = Nucleus::createConstantInt(2);
4881
4882 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
4883
4884 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
4885 }
4886 }
4887
UnpackHigh(RValue<Int2> x,RValue<Int2> y)4888 RValue<Long1> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4889 {
4890 if(CPUID::supportsMMX2())
4891 {
4892 return x86::punpckhdq(x, y);
4893 }
4894 else
4895 {
4896 Constant *shuffle[2];
4897 shuffle[0] = Nucleus::createConstantInt(1);
4898 shuffle[1] = Nucleus::createConstantInt(3);
4899
4900 Value *packed = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 2));
4901
4902 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
4903 }
4904 }
4905
Extract(RValue<Int2> val,int i)4906 RValue<Int> Extract(RValue<Int2> val, int i)
4907 {
4908 if(false) // FIXME: LLVM does not generate optimal code
4909 {
4910 return RValue<Int>(Nucleus::createExtractElement(val.value, i));
4911 }
4912 else
4913 {
4914 if(i == 0)
4915 {
4916 return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, VectorType::get(Int::getType(), 2)), 0));
4917 }
4918 else
4919 {
4920 Int2 val2 = As<Int2>(UnpackHigh(val, val));
4921
4922 return Extract(val2, 0);
4923 }
4924 }
4925 }
4926
Insert(RValue<Int2> val,RValue<Int> element,int i)4927 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4928 {
4929 return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, VectorType::get(Int::getType(), 2)), element.value, i), Int2::getType()));
4930 }
4931
getType()4932 Type *Int2::getType()
4933 {
4934 if(CPUID::supportsMMX2())
4935 {
4936 return MMX::getType();
4937 }
4938 else
4939 {
4940 return VectorType::get(Int::getType(), 2);
4941 }
4942 }
4943
UInt2()4944 UInt2::UInt2()
4945 {
4946 // xy.parent = this;
4947 }
4948
UInt2(unsigned int x,unsigned int y)4949 UInt2::UInt2(unsigned int x, unsigned int y)
4950 {
4951 // xy.parent = this;
4952
4953 Constant *constantVector[2];
4954 constantVector[0] = Nucleus::createConstantInt(x);
4955 constantVector[1] = Nucleus::createConstantInt(y);
4956 Value *vector = Nucleus::createConstantVector(constantVector, 2);
4957
4958 storeValue(Nucleus::createBitCast(vector, getType()));
4959 }
4960
UInt2(RValue<UInt2> rhs)4961 UInt2::UInt2(RValue<UInt2> rhs)
4962 {
4963 // xy.parent = this;
4964
4965 storeValue(rhs.value);
4966 }
4967
UInt2(const UInt2 & rhs)4968 UInt2::UInt2(const UInt2 &rhs)
4969 {
4970 // xy.parent = this;
4971
4972 Value *value = rhs.loadValue();
4973 storeValue(value);
4974 }
4975
UInt2(const Reference<UInt2> & rhs)4976 UInt2::UInt2(const Reference<UInt2> &rhs)
4977 {
4978 // xy.parent = this;
4979
4980 Value *value = rhs.loadValue();
4981 storeValue(value);
4982 }
4983
operator =(RValue<UInt2> rhs) const4984 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) const
4985 {
4986 storeValue(rhs.value);
4987
4988 return rhs;
4989 }
4990
operator =(const UInt2 & rhs) const4991 RValue<UInt2> UInt2::operator=(const UInt2 &rhs) const
4992 {
4993 Value *value = rhs.loadValue();
4994 storeValue(value);
4995
4996 return RValue<UInt2>(value);
4997 }
4998
operator =(const Reference<UInt2> & rhs) const4999 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) const
5000 {
5001 Value *value = rhs.loadValue();
5002 storeValue(value);
5003
5004 return RValue<UInt2>(value);
5005 }
5006
operator +(RValue<UInt2> lhs,RValue<UInt2> rhs)5007 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
5008 {
5009 if(CPUID::supportsMMX2())
5010 {
5011 return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs)));
5012 }
5013 else
5014 {
5015 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
5016 }
5017 }
5018
operator -(RValue<UInt2> lhs,RValue<UInt2> rhs)5019 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
5020 {
5021 if(CPUID::supportsMMX2())
5022 {
5023 return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs)));
5024 }
5025 else
5026 {
5027 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
5028 }
5029 }
5030
5031 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
5032 // {
5033 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
5034 // }
5035
5036 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
5037 // {
5038 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
5039 // }
5040
5041 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
5042 // {
5043 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
5044 // }
5045
operator &(RValue<UInt2> lhs,RValue<UInt2> rhs)5046 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
5047 {
5048 if(CPUID::supportsMMX2())
5049 {
5050 return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
5051 }
5052 else
5053 {
5054 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
5055 }
5056 }
5057
operator |(RValue<UInt2> lhs,RValue<UInt2> rhs)5058 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
5059 {
5060 if(CPUID::supportsMMX2())
5061 {
5062 return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
5063 }
5064 else
5065 {
5066 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
5067 }
5068 }
5069
operator ^(RValue<UInt2> lhs,RValue<UInt2> rhs)5070 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
5071 {
5072 if(CPUID::supportsMMX2())
5073 {
5074 return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
5075 }
5076 else
5077 {
5078 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
5079 }
5080 }
5081
operator <<(RValue<UInt2> lhs,unsigned char rhs)5082 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
5083 {
5084 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
5085
5086 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
5087 }
5088
operator >>(RValue<UInt2> lhs,unsigned char rhs)5089 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
5090 {
5091 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
5092
5093 return x86::psrld(lhs, rhs);
5094 }
5095
operator <<(RValue<UInt2> lhs,RValue<Long1> rhs)5096 RValue<UInt2> operator<<(RValue<UInt2> lhs, RValue<Long1> rhs)
5097 {
5098 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
5099
5100 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
5101 }
5102
operator >>(RValue<UInt2> lhs,RValue<Long1> rhs)5103 RValue<UInt2> operator>>(RValue<UInt2> lhs, RValue<Long1> rhs)
5104 {
5105 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
5106
5107 return x86::psrld(lhs, rhs);
5108 }
5109
operator +=(const UInt2 & lhs,RValue<UInt2> rhs)5110 RValue<UInt2> operator+=(const UInt2 &lhs, RValue<UInt2> rhs)
5111 {
5112 return lhs = lhs + rhs;
5113 }
5114
operator -=(const UInt2 & lhs,RValue<UInt2> rhs)5115 RValue<UInt2> operator-=(const UInt2 &lhs, RValue<UInt2> rhs)
5116 {
5117 return lhs = lhs - rhs;
5118 }
5119
5120 // RValue<UInt2> operator*=(const UInt2 &lhs, RValue<UInt2> rhs)
5121 // {
5122 // return lhs = lhs * rhs;
5123 // }
5124
5125 // RValue<UInt2> operator/=(const UInt2 &lhs, RValue<UInt2> rhs)
5126 // {
5127 // return lhs = lhs / rhs;
5128 // }
5129
5130 // RValue<UInt2> operator%=(const UInt2 &lhs, RValue<UInt2> rhs)
5131 // {
5132 // return lhs = lhs % rhs;
5133 // }
5134
operator &=(const UInt2 & lhs,RValue<UInt2> rhs)5135 RValue<UInt2> operator&=(const UInt2 &lhs, RValue<UInt2> rhs)
5136 {
5137 return lhs = lhs & rhs;
5138 }
5139
operator |=(const UInt2 & lhs,RValue<UInt2> rhs)5140 RValue<UInt2> operator|=(const UInt2 &lhs, RValue<UInt2> rhs)
5141 {
5142 return lhs = lhs | rhs;
5143 }
5144
operator ^=(const UInt2 & lhs,RValue<UInt2> rhs)5145 RValue<UInt2> operator^=(const UInt2 &lhs, RValue<UInt2> rhs)
5146 {
5147 return lhs = lhs ^ rhs;
5148 }
5149
operator <<=(const UInt2 & lhs,unsigned char rhs)5150 RValue<UInt2> operator<<=(const UInt2 &lhs, unsigned char rhs)
5151 {
5152 return lhs = lhs << rhs;
5153 }
5154
operator >>=(const UInt2 & lhs,unsigned char rhs)5155 RValue<UInt2> operator>>=(const UInt2 &lhs, unsigned char rhs)
5156 {
5157 return lhs = lhs >> rhs;
5158 }
5159
operator <<=(const UInt2 & lhs,RValue<Long1> rhs)5160 RValue<UInt2> operator<<=(const UInt2 &lhs, RValue<Long1> rhs)
5161 {
5162 return lhs = lhs << rhs;
5163 }
5164
operator >>=(const UInt2 & lhs,RValue<Long1> rhs)5165 RValue<UInt2> operator>>=(const UInt2 &lhs, RValue<Long1> rhs)
5166 {
5167 return lhs = lhs >> rhs;
5168 }
5169
5170 // RValue<UInt2> operator+(RValue<UInt2> val)
5171 // {
5172 // return val;
5173 // }
5174
5175 // RValue<UInt2> operator-(RValue<UInt2> val)
5176 // {
5177 // return RValue<UInt2>(Nucleus::createNeg(val.value));
5178 // }
5179
operator ~(RValue<UInt2> val)5180 RValue<UInt2> operator~(RValue<UInt2> val)
5181 {
5182 if(CPUID::supportsMMX2())
5183 {
5184 return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF);
5185 }
5186 else
5187 {
5188 return RValue<UInt2>(Nucleus::createNot(val.value));
5189 }
5190 }
5191
getType()5192 Type *UInt2::getType()
5193 {
5194 if(CPUID::supportsMMX2())
5195 {
5196 return MMX::getType();
5197 }
5198 else
5199 {
5200 return VectorType::get(UInt::getType(), 2);
5201 }
5202 }
5203
Int4(RValue<Float4> cast)5204 Int4::Int4(RValue<Float4> cast)
5205 {
5206 // xyzw.parent = this;
5207
5208 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5209
5210 storeValue(xyzw);
5211 }
5212
Int4(RValue<Short4> cast)5213 Int4::Int4(RValue<Short4> cast)
5214 {
5215 Value *long2 = UndefValue::get(Long2::getType());
5216 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
5217 long2 = Nucleus::createInsertElement(long2, element, 0);
5218 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5219
5220 if(CPUID::supportsSSE4_1())
5221 {
5222 storeValue(x86::pmovsxwd(vector).value);
5223 }
5224 else
5225 {
5226 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5227
5228 Constant *swizzle[8];
5229 swizzle[0] = Nucleus::createConstantInt(0);
5230 swizzle[1] = Nucleus::createConstantInt(0);
5231 swizzle[2] = Nucleus::createConstantInt(1);
5232 swizzle[3] = Nucleus::createConstantInt(1);
5233 swizzle[4] = Nucleus::createConstantInt(2);
5234 swizzle[5] = Nucleus::createConstantInt(2);
5235 swizzle[6] = Nucleus::createConstantInt(3);
5236 swizzle[7] = Nucleus::createConstantInt(3);
5237
5238 Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 8));
5239 Value *d = Nucleus::createBitCast(c, Int4::getType());
5240 storeValue(d);
5241
5242 // Each Short is packed into each Int in the (Short | Short) format.
5243 // Shifting by 16 will retrieve the original Short value.
5244 // Shitfing an Int will propagate the sign bit, which will work
5245 // for both positive and negative values of a Short.
5246 *this >>= 16;
5247 }
5248 }
5249
Int4(RValue<UShort4> cast)5250 Int4::Int4(RValue<UShort4> cast)
5251 {
5252 Value *long2 = UndefValue::get(Long2::getType());
5253 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
5254 long2 = Nucleus::createInsertElement(long2, element, 0);
5255 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5256
5257 if(CPUID::supportsSSE4_1())
5258 {
5259 storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value);
5260 }
5261 else
5262 {
5263 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5264
5265 Constant *swizzle[8];
5266 swizzle[0] = Nucleus::createConstantInt(0);
5267 swizzle[1] = Nucleus::createConstantInt(8);
5268 swizzle[2] = Nucleus::createConstantInt(1);
5269 swizzle[3] = Nucleus::createConstantInt(9);
5270 swizzle[4] = Nucleus::createConstantInt(2);
5271 swizzle[5] = Nucleus::createConstantInt(10);
5272 swizzle[6] = Nucleus::createConstantInt(3);
5273 swizzle[7] = Nucleus::createConstantInt(11);
5274
5275 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle, 8));
5276 Value *d = Nucleus::createBitCast(c, Int4::getType());
5277 storeValue(d);
5278 }
5279 }
5280
Int4()5281 Int4::Int4()
5282 {
5283 // xyzw.parent = this;
5284 }
5285
Int4(int xyzw)5286 Int4::Int4(int xyzw)
5287 {
5288 constant(xyzw, xyzw, xyzw, xyzw);
5289 }
5290
Int4(int x,int yzw)5291 Int4::Int4(int x, int yzw)
5292 {
5293 constant(x, yzw, yzw, yzw);
5294 }
5295
Int4(int x,int y,int zw)5296 Int4::Int4(int x, int y, int zw)
5297 {
5298 constant(x, y, zw, zw);
5299 }
5300
Int4(int x,int y,int z,int w)5301 Int4::Int4(int x, int y, int z, int w)
5302 {
5303 constant(x, y, z, w);
5304 }
5305
constant(int x,int y,int z,int w)5306 void Int4::constant(int x, int y, int z, int w)
5307 {
5308 // xyzw.parent = this;
5309
5310 Constant *constantVector[4];
5311 constantVector[0] = Nucleus::createConstantInt(x);
5312 constantVector[1] = Nucleus::createConstantInt(y);
5313 constantVector[2] = Nucleus::createConstantInt(z);
5314 constantVector[3] = Nucleus::createConstantInt(w);
5315
5316 storeValue(Nucleus::createConstantVector(constantVector, 4));
5317 }
5318
Int4(RValue<Int4> rhs)5319 Int4::Int4(RValue<Int4> rhs)
5320 {
5321 // xyzw.parent = this;
5322
5323 storeValue(rhs.value);
5324 }
5325
Int4(const Int4 & rhs)5326 Int4::Int4(const Int4 &rhs)
5327 {
5328 // xyzw.parent = this;
5329
5330 Value *value = rhs.loadValue();
5331 storeValue(value);
5332 }
5333
Int4(const Reference<Int4> & rhs)5334 Int4::Int4(const Reference<Int4> &rhs)
5335 {
5336 // xyzw.parent = this;
5337
5338 Value *value = rhs.loadValue();
5339 storeValue(value);
5340 }
5341
Int4(RValue<UInt4> rhs)5342 Int4::Int4(RValue<UInt4> rhs)
5343 {
5344 // xyzw.parent = this;
5345
5346 storeValue(rhs.value);
5347 }
5348
Int4(const UInt4 & rhs)5349 Int4::Int4(const UInt4 &rhs)
5350 {
5351 // xyzw.parent = this;
5352
5353 Value *value = rhs.loadValue();
5354 storeValue(value);
5355 }
5356
Int4(const Reference<UInt4> & rhs)5357 Int4::Int4(const Reference<UInt4> &rhs)
5358 {
5359 // xyzw.parent = this;
5360
5361 Value *value = rhs.loadValue();
5362 storeValue(value);
5363 }
5364
Int4(RValue<Int2> lo,RValue<Int2> hi)5365 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5366 {
5367 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5368 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5369
5370 Value *long2 = UndefValue::get(Long2::getType());
5371 long2 = Nucleus::createInsertElement(long2, loLong, 0);
5372 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5373 Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
5374
5375 storeValue(int4);
5376 }
5377
operator =(RValue<Int4> rhs) const5378 RValue<Int4> Int4::operator=(RValue<Int4> rhs) const
5379 {
5380 storeValue(rhs.value);
5381
5382 return rhs;
5383 }
5384
operator =(const Int4 & rhs) const5385 RValue<Int4> Int4::operator=(const Int4 &rhs) const
5386 {
5387 Value *value = rhs.loadValue();
5388 storeValue(value);
5389
5390 return RValue<Int4>(value);
5391 }
5392
operator =(const Reference<Int4> & rhs) const5393 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) const
5394 {
5395 Value *value = rhs.loadValue();
5396 storeValue(value);
5397
5398 return RValue<Int4>(value);
5399 }
5400
operator +(RValue<Int4> lhs,RValue<Int4> rhs)5401 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5402 {
5403 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5404 }
5405
operator -(RValue<Int4> lhs,RValue<Int4> rhs)5406 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5407 {
5408 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5409 }
5410
operator *(RValue<Int4> lhs,RValue<Int4> rhs)5411 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5412 {
5413 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5414 }
5415
operator /(RValue<Int4> lhs,RValue<Int4> rhs)5416 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5417 {
5418 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5419 }
5420
operator %(RValue<Int4> lhs,RValue<Int4> rhs)5421 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5422 {
5423 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5424 }
5425
operator &(RValue<Int4> lhs,RValue<Int4> rhs)5426 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5427 {
5428 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5429 }
5430
operator |(RValue<Int4> lhs,RValue<Int4> rhs)5431 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5432 {
5433 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5434 }
5435
operator ^(RValue<Int4> lhs,RValue<Int4> rhs)5436 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5437 {
5438 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5439 }
5440
operator <<(RValue<Int4> lhs,unsigned char rhs)5441 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5442 {
5443 return x86::pslld(lhs, rhs);
5444 }
5445
operator >>(RValue<Int4> lhs,unsigned char rhs)5446 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5447 {
5448 return x86::psrad(lhs, rhs);
5449 }
5450
operator <<(RValue<Int4> lhs,RValue<Int4> rhs)5451 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5452 {
5453 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5454 }
5455
operator >>(RValue<Int4> lhs,RValue<Int4> rhs)5456 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5457 {
5458 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5459 }
5460
operator +=(const Int4 & lhs,RValue<Int4> rhs)5461 RValue<Int4> operator+=(const Int4 &lhs, RValue<Int4> rhs)
5462 {
5463 return lhs = lhs + rhs;
5464 }
5465
operator -=(const Int4 & lhs,RValue<Int4> rhs)5466 RValue<Int4> operator-=(const Int4 &lhs, RValue<Int4> rhs)
5467 {
5468 return lhs = lhs - rhs;
5469 }
5470
operator *=(const Int4 & lhs,RValue<Int4> rhs)5471 RValue<Int4> operator*=(const Int4 &lhs, RValue<Int4> rhs)
5472 {
5473 return lhs = lhs * rhs;
5474 }
5475
5476 // RValue<Int4> operator/=(const Int4 &lhs, RValue<Int4> rhs)
5477 // {
5478 // return lhs = lhs / rhs;
5479 // }
5480
5481 // RValue<Int4> operator%=(const Int4 &lhs, RValue<Int4> rhs)
5482 // {
5483 // return lhs = lhs % rhs;
5484 // }
5485
operator &=(const Int4 & lhs,RValue<Int4> rhs)5486 RValue<Int4> operator&=(const Int4 &lhs, RValue<Int4> rhs)
5487 {
5488 return lhs = lhs & rhs;
5489 }
5490
operator |=(const Int4 & lhs,RValue<Int4> rhs)5491 RValue<Int4> operator|=(const Int4 &lhs, RValue<Int4> rhs)
5492 {
5493 return lhs = lhs | rhs;
5494 }
5495
operator ^=(const Int4 & lhs,RValue<Int4> rhs)5496 RValue<Int4> operator^=(const Int4 &lhs, RValue<Int4> rhs)
5497 {
5498 return lhs = lhs ^ rhs;
5499 }
5500
operator <<=(const Int4 & lhs,unsigned char rhs)5501 RValue<Int4> operator<<=(const Int4 &lhs, unsigned char rhs)
5502 {
5503 return lhs = lhs << rhs;
5504 }
5505
operator >>=(const Int4 & lhs,unsigned char rhs)5506 RValue<Int4> operator>>=(const Int4 &lhs, unsigned char rhs)
5507 {
5508 return lhs = lhs >> rhs;
5509 }
5510
operator +(RValue<Int4> val)5511 RValue<Int4> operator+(RValue<Int4> val)
5512 {
5513 return val;
5514 }
5515
operator -(RValue<Int4> val)5516 RValue<Int4> operator-(RValue<Int4> val)
5517 {
5518 return RValue<Int4>(Nucleus::createNeg(val.value));
5519 }
5520
operator ~(RValue<Int4> val)5521 RValue<Int4> operator~(RValue<Int4> val)
5522 {
5523 return RValue<Int4>(Nucleus::createNot(val.value));
5524 }
5525
CmpEQ(RValue<Int4> x,RValue<Int4> y)5526 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5527 {
5528 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5529 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5530 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5531 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5532 }
5533
CmpLT(RValue<Int4> x,RValue<Int4> y)5534 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5535 {
5536 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
5537 }
5538
CmpLE(RValue<Int4> x,RValue<Int4> y)5539 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5540 {
5541 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5542 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5543 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
5544 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5545 }
5546
CmpNEQ(RValue<Int4> x,RValue<Int4> y)5547 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5548 {
5549 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5550 }
5551
CmpNLT(RValue<Int4> x,RValue<Int4> y)5552 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5553 {
5554 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5555 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5556 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
5557 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5558 }
5559
CmpNLE(RValue<Int4> x,RValue<Int4> y)5560 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5561 {
5562 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
5563 }
5564
Max(RValue<Int4> x,RValue<Int4> y)5565 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5566 {
5567 if(CPUID::supportsSSE4_1())
5568 {
5569 return x86::pmaxsd(x, y);
5570 }
5571 else
5572 {
5573 RValue<Int4> greater = CmpNLE(x, y);
5574 return x & greater | y & ~greater;
5575 }
5576 }
5577
Min(RValue<Int4> x,RValue<Int4> y)5578 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5579 {
5580 if(CPUID::supportsSSE4_1())
5581 {
5582 return x86::pminsd(x, y);
5583 }
5584 else
5585 {
5586 RValue<Int4> less = CmpLT(x, y);
5587 return x & less | y & ~less;
5588 }
5589 }
5590
RoundInt(RValue<Float4> cast)5591 RValue<Int4> RoundInt(RValue<Float4> cast)
5592 {
5593 return x86::cvtps2dq(cast);
5594 }
5595
Pack(RValue<Int4> x,RValue<Int4> y)5596 RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5597 {
5598 return x86::packssdw(x, y);
5599 }
5600
Extract(RValue<Int4> x,int i)5601 RValue<Int> Extract(RValue<Int4> x, int i)
5602 {
5603 return RValue<Int>(Nucleus::createExtractElement(x.value, i));
5604 }
5605
Insert(RValue<Int4> x,RValue<Int> element,int i)5606 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5607 {
5608 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5609 }
5610
SignMask(RValue<Int4> x)5611 RValue<Int> SignMask(RValue<Int4> x)
5612 {
5613 return x86::movmskps(As<Float4>(x));
5614 }
5615
Swizzle(RValue<Int4> x,unsigned char select)5616 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5617 {
5618 return RValue<Int4>(Nucleus::createSwizzle(x.value, select));
5619 }
5620
getType()5621 Type *Int4::getType()
5622 {
5623 return VectorType::get(Int::getType(), 4);
5624 }
5625
UInt4(RValue<Float4> cast)5626 UInt4::UInt4(RValue<Float4> cast)
5627 {
5628 // xyzw.parent = this;
5629
5630 Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
5631
5632 storeValue(xyzw);
5633 }
5634
UInt4()5635 UInt4::UInt4()
5636 {
5637 // xyzw.parent = this;
5638 }
5639
UInt4(int xyzw)5640 UInt4::UInt4(int xyzw)
5641 {
5642 constant(xyzw, xyzw, xyzw, xyzw);
5643 }
5644
UInt4(int x,int yzw)5645 UInt4::UInt4(int x, int yzw)
5646 {
5647 constant(x, yzw, yzw, yzw);
5648 }
5649
UInt4(int x,int y,int zw)5650 UInt4::UInt4(int x, int y, int zw)
5651 {
5652 constant(x, y, zw, zw);
5653 }
5654
UInt4(int x,int y,int z,int w)5655 UInt4::UInt4(int x, int y, int z, int w)
5656 {
5657 constant(x, y, z, w);
5658 }
5659
constant(int x,int y,int z,int w)5660 void UInt4::constant(int x, int y, int z, int w)
5661 {
5662 // xyzw.parent = this;
5663
5664 Constant *constantVector[4];
5665 constantVector[0] = Nucleus::createConstantInt(x);
5666 constantVector[1] = Nucleus::createConstantInt(y);
5667 constantVector[2] = Nucleus::createConstantInt(z);
5668 constantVector[3] = Nucleus::createConstantInt(w);
5669
5670 storeValue(Nucleus::createConstantVector(constantVector, 4));
5671 }
5672
UInt4(RValue<UInt4> rhs)5673 UInt4::UInt4(RValue<UInt4> rhs)
5674 {
5675 // xyzw.parent = this;
5676
5677 storeValue(rhs.value);
5678 }
5679
UInt4(const UInt4 & rhs)5680 UInt4::UInt4(const UInt4 &rhs)
5681 {
5682 // xyzw.parent = this;
5683
5684 Value *value = rhs.loadValue();
5685 storeValue(value);
5686 }
5687
UInt4(const Reference<UInt4> & rhs)5688 UInt4::UInt4(const Reference<UInt4> &rhs)
5689 {
5690 // xyzw.parent = this;
5691
5692 Value *value = rhs.loadValue();
5693 storeValue(value);
5694 }
5695
UInt4(RValue<Int4> rhs)5696 UInt4::UInt4(RValue<Int4> rhs)
5697 {
5698 // xyzw.parent = this;
5699
5700 storeValue(rhs.value);
5701 }
5702
UInt4(const Int4 & rhs)5703 UInt4::UInt4(const Int4 &rhs)
5704 {
5705 // xyzw.parent = this;
5706
5707 Value *value = rhs.loadValue();
5708 storeValue(value);
5709 }
5710
UInt4(const Reference<Int4> & rhs)5711 UInt4::UInt4(const Reference<Int4> &rhs)
5712 {
5713 // xyzw.parent = this;
5714
5715 Value *value = rhs.loadValue();
5716 storeValue(value);
5717 }
5718
UInt4(RValue<UInt2> lo,RValue<UInt2> hi)5719 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5720 {
5721 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5722 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5723
5724 Value *long2 = UndefValue::get(Long2::getType());
5725 long2 = Nucleus::createInsertElement(long2, loLong, 0);
5726 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5727 Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
5728
5729 storeValue(uint4);
5730 }
5731
operator =(RValue<UInt4> rhs) const5732 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) const
5733 {
5734 storeValue(rhs.value);
5735
5736 return rhs;
5737 }
5738
operator =(const UInt4 & rhs) const5739 RValue<UInt4> UInt4::operator=(const UInt4 &rhs) const
5740 {
5741 Value *value = rhs.loadValue();
5742 storeValue(value);
5743
5744 return RValue<UInt4>(value);
5745 }
5746
operator =(const Reference<UInt4> & rhs) const5747 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) const
5748 {
5749 Value *value = rhs.loadValue();
5750 storeValue(value);
5751
5752 return RValue<UInt4>(value);
5753 }
5754
operator +(RValue<UInt4> lhs,RValue<UInt4> rhs)5755 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5756 {
5757 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5758 }
5759
operator -(RValue<UInt4> lhs,RValue<UInt4> rhs)5760 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5761 {
5762 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5763 }
5764
operator *(RValue<UInt4> lhs,RValue<UInt4> rhs)5765 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5766 {
5767 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5768 }
5769
operator /(RValue<UInt4> lhs,RValue<UInt4> rhs)5770 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5771 {
5772 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5773 }
5774
operator %(RValue<UInt4> lhs,RValue<UInt4> rhs)5775 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5776 {
5777 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5778 }
5779
operator &(RValue<UInt4> lhs,RValue<UInt4> rhs)5780 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5781 {
5782 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5783 }
5784
operator |(RValue<UInt4> lhs,RValue<UInt4> rhs)5785 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5786 {
5787 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5788 }
5789
operator ^(RValue<UInt4> lhs,RValue<UInt4> rhs)5790 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5791 {
5792 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5793 }
5794
operator <<(RValue<UInt4> lhs,unsigned char rhs)5795 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5796 {
5797 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5798 }
5799
operator >>(RValue<UInt4> lhs,unsigned char rhs)5800 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5801 {
5802 return x86::psrld(lhs, rhs);
5803 }
5804
operator <<(RValue<UInt4> lhs,RValue<UInt4> rhs)5805 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5806 {
5807 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5808 }
5809
operator >>(RValue<UInt4> lhs,RValue<UInt4> rhs)5810 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5811 {
5812 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5813 }
5814
operator +=(const UInt4 & lhs,RValue<UInt4> rhs)5815 RValue<UInt4> operator+=(const UInt4 &lhs, RValue<UInt4> rhs)
5816 {
5817 return lhs = lhs + rhs;
5818 }
5819
operator -=(const UInt4 & lhs,RValue<UInt4> rhs)5820 RValue<UInt4> operator-=(const UInt4 &lhs, RValue<UInt4> rhs)
5821 {
5822 return lhs = lhs - rhs;
5823 }
5824
operator *=(const UInt4 & lhs,RValue<UInt4> rhs)5825 RValue<UInt4> operator*=(const UInt4 &lhs, RValue<UInt4> rhs)
5826 {
5827 return lhs = lhs * rhs;
5828 }
5829
5830 // RValue<UInt4> operator/=(const UInt4 &lhs, RValue<UInt4> rhs)
5831 // {
5832 // return lhs = lhs / rhs;
5833 // }
5834
5835 // RValue<UInt4> operator%=(const UInt4 &lhs, RValue<UInt4> rhs)
5836 // {
5837 // return lhs = lhs % rhs;
5838 // }
5839
operator &=(const UInt4 & lhs,RValue<UInt4> rhs)5840 RValue<UInt4> operator&=(const UInt4 &lhs, RValue<UInt4> rhs)
5841 {
5842 return lhs = lhs & rhs;
5843 }
5844
operator |=(const UInt4 & lhs,RValue<UInt4> rhs)5845 RValue<UInt4> operator|=(const UInt4 &lhs, RValue<UInt4> rhs)
5846 {
5847 return lhs = lhs | rhs;
5848 }
5849
operator ^=(const UInt4 & lhs,RValue<UInt4> rhs)5850 RValue<UInt4> operator^=(const UInt4 &lhs, RValue<UInt4> rhs)
5851 {
5852 return lhs = lhs ^ rhs;
5853 }
5854
operator <<=(const UInt4 & lhs,unsigned char rhs)5855 RValue<UInt4> operator<<=(const UInt4 &lhs, unsigned char rhs)
5856 {
5857 return lhs = lhs << rhs;
5858 }
5859
operator >>=(const UInt4 & lhs,unsigned char rhs)5860 RValue<UInt4> operator>>=(const UInt4 &lhs, unsigned char rhs)
5861 {
5862 return lhs = lhs >> rhs;
5863 }
5864
operator +(RValue<UInt4> val)5865 RValue<UInt4> operator+(RValue<UInt4> val)
5866 {
5867 return val;
5868 }
5869
operator -(RValue<UInt4> val)5870 RValue<UInt4> operator-(RValue<UInt4> val)
5871 {
5872 return RValue<UInt4>(Nucleus::createNeg(val.value));
5873 }
5874
operator ~(RValue<UInt4> val)5875 RValue<UInt4> operator~(RValue<UInt4> val)
5876 {
5877 return RValue<UInt4>(Nucleus::createNot(val.value));
5878 }
5879
CmpEQ(RValue<UInt4> x,RValue<UInt4> y)5880 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5881 {
5882 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5883 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5884 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5885 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5886 }
5887
CmpLT(RValue<UInt4> x,RValue<UInt4> y)5888 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5889 {
5890 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
5891 }
5892
CmpLE(RValue<UInt4> x,RValue<UInt4> y)5893 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5894 {
5895 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5896 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5897 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
5898 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5899 }
5900
CmpNEQ(RValue<UInt4> x,RValue<UInt4> y)5901 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5902 {
5903 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5904 }
5905
CmpNLT(RValue<UInt4> x,RValue<UInt4> y)5906 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5907 {
5908 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5909 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5910 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
5911 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5912 }
5913
CmpNLE(RValue<UInt4> x,RValue<UInt4> y)5914 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5915 {
5916 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
5917 }
5918
Max(RValue<UInt4> x,RValue<UInt4> y)5919 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5920 {
5921 if(CPUID::supportsSSE4_1())
5922 {
5923 return x86::pmaxud(x, y);
5924 }
5925 else
5926 {
5927 RValue<UInt4> greater = CmpNLE(x, y);
5928 return x & greater | y & ~greater;
5929 }
5930 }
5931
Min(RValue<UInt4> x,RValue<UInt4> y)5932 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5933 {
5934 if(CPUID::supportsSSE4_1())
5935 {
5936 return x86::pminud(x, y);
5937 }
5938 else
5939 {
5940 RValue<UInt4> less = CmpLT(x, y);
5941 return x & less | y & ~less;
5942 }
5943 }
5944
Pack(RValue<UInt4> x,RValue<UInt4> y)5945 RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5946 {
5947 return x86::packusdw(x, y); // FIXME: Fallback required
5948 }
5949
getType()5950 Type *UInt4::getType()
5951 {
5952 return VectorType::get(UInt::getType(), 4);
5953 }
5954
Float(RValue<Int> cast)5955 Float::Float(RValue<Int> cast)
5956 {
5957 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5958
5959 storeValue(integer);
5960 }
5961
Float()5962 Float::Float()
5963 {
5964
5965 }
5966
Float(float x)5967 Float::Float(float x)
5968 {
5969 storeValue(Nucleus::createConstantFloat(x));
5970 }
5971
Float(RValue<Float> rhs)5972 Float::Float(RValue<Float> rhs)
5973 {
5974 storeValue(rhs.value);
5975 }
5976
Float(const Float & rhs)5977 Float::Float(const Float &rhs)
5978 {
5979 Value *value = rhs.loadValue();
5980 storeValue(value);
5981 }
5982
Float(const Reference<Float> & rhs)5983 Float::Float(const Reference<Float> &rhs)
5984 {
5985 Value *value = rhs.loadValue();
5986 storeValue(value);
5987 }
5988
operator =(RValue<Float> rhs) const5989 RValue<Float> Float::operator=(RValue<Float> rhs) const
5990 {
5991 storeValue(rhs.value);
5992
5993 return rhs;
5994 }
5995
operator =(const Float & rhs) const5996 RValue<Float> Float::operator=(const Float &rhs) const
5997 {
5998 Value *value = rhs.loadValue();
5999 storeValue(value);
6000
6001 return RValue<Float>(value);
6002 }
6003
operator =(const Reference<Float> & rhs) const6004 RValue<Float> Float::operator=(const Reference<Float> &rhs) const
6005 {
6006 Value *value = rhs.loadValue();
6007 storeValue(value);
6008
6009 return RValue<Float>(value);
6010 }
6011
operator +(RValue<Float> lhs,RValue<Float> rhs)6012 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
6013 {
6014 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
6015 }
6016
operator -(RValue<Float> lhs,RValue<Float> rhs)6017 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
6018 {
6019 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
6020 }
6021
operator *(RValue<Float> lhs,RValue<Float> rhs)6022 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
6023 {
6024 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
6025 }
6026
operator /(RValue<Float> lhs,RValue<Float> rhs)6027 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
6028 {
6029 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
6030 }
6031
operator +=(const Float & lhs,RValue<Float> rhs)6032 RValue<Float> operator+=(const Float &lhs, RValue<Float> rhs)
6033 {
6034 return lhs = lhs + rhs;
6035 }
6036
operator -=(const Float & lhs,RValue<Float> rhs)6037 RValue<Float> operator-=(const Float &lhs, RValue<Float> rhs)
6038 {
6039 return lhs = lhs - rhs;
6040 }
6041
operator *=(const Float & lhs,RValue<Float> rhs)6042 RValue<Float> operator*=(const Float &lhs, RValue<Float> rhs)
6043 {
6044 return lhs = lhs * rhs;
6045 }
6046
operator /=(const Float & lhs,RValue<Float> rhs)6047 RValue<Float> operator/=(const Float &lhs, RValue<Float> rhs)
6048 {
6049 return lhs = lhs / rhs;
6050 }
6051
operator +(RValue<Float> val)6052 RValue<Float> operator+(RValue<Float> val)
6053 {
6054 return val;
6055 }
6056
operator -(RValue<Float> val)6057 RValue<Float> operator-(RValue<Float> val)
6058 {
6059 return RValue<Float>(Nucleus::createFNeg(val.value));
6060 }
6061
operator <(RValue<Float> lhs,RValue<Float> rhs)6062 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
6063 {
6064 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
6065 }
6066
operator <=(RValue<Float> lhs,RValue<Float> rhs)6067 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
6068 {
6069 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
6070 }
6071
operator >(RValue<Float> lhs,RValue<Float> rhs)6072 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
6073 {
6074 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
6075 }
6076
operator >=(RValue<Float> lhs,RValue<Float> rhs)6077 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
6078 {
6079 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
6080 }
6081
operator !=(RValue<Float> lhs,RValue<Float> rhs)6082 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
6083 {
6084 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
6085 }
6086
operator ==(RValue<Float> lhs,RValue<Float> rhs)6087 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
6088 {
6089 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
6090 }
6091
Abs(RValue<Float> x)6092 RValue<Float> Abs(RValue<Float> x)
6093 {
6094 return IfThenElse(x > 0.0f, x, -x);
6095 }
6096
Max(RValue<Float> x,RValue<Float> y)6097 RValue<Float> Max(RValue<Float> x, RValue<Float> y)
6098 {
6099 return IfThenElse(x > y, x, y);
6100 }
6101
Min(RValue<Float> x,RValue<Float> y)6102 RValue<Float> Min(RValue<Float> x, RValue<Float> y)
6103 {
6104 return IfThenElse(x < y, x, y);
6105 }
6106
Rcp_pp(RValue<Float> x,bool exactAtPow2)6107 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
6108 {
6109 if(exactAtPow2)
6110 {
6111 // rcpss uses a piecewise-linear approximation which minimizes the relative error
6112 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6113 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6114 }
6115 else
6116 {
6117 return x86::rcpss(x);
6118 }
6119 }
6120
RcpSqrt_pp(RValue<Float> x)6121 RValue<Float> RcpSqrt_pp(RValue<Float> x)
6122 {
6123 return x86::rsqrtss(x);
6124 }
6125
Sqrt(RValue<Float> x)6126 RValue<Float> Sqrt(RValue<Float> x)
6127 {
6128 return x86::sqrtss(x);
6129 }
6130
Round(RValue<Float> x)6131 RValue<Float> Round(RValue<Float> x)
6132 {
6133 if(CPUID::supportsSSE4_1())
6134 {
6135 return x86::roundss(x, 0);
6136 }
6137 else
6138 {
6139 return Float4(Round(Float4(x))).x;
6140 }
6141 }
6142
Trunc(RValue<Float> x)6143 RValue<Float> Trunc(RValue<Float> x)
6144 {
6145 if(CPUID::supportsSSE4_1())
6146 {
6147 return x86::roundss(x, 3);
6148 }
6149 else
6150 {
6151 return Float(Int(x)); // Rounded toward zero
6152 }
6153 }
6154
Frac(RValue<Float> x)6155 RValue<Float> Frac(RValue<Float> x)
6156 {
6157 if(CPUID::supportsSSE4_1())
6158 {
6159 return x - x86::floorss(x);
6160 }
6161 else
6162 {
6163 return Float4(Frac(Float4(x))).x;
6164 }
6165 }
6166
Floor(RValue<Float> x)6167 RValue<Float> Floor(RValue<Float> x)
6168 {
6169 if(CPUID::supportsSSE4_1())
6170 {
6171 return x86::floorss(x);
6172 }
6173 else
6174 {
6175 return Float4(Floor(Float4(x))).x;
6176 }
6177 }
6178
Ceil(RValue<Float> x)6179 RValue<Float> Ceil(RValue<Float> x)
6180 {
6181 if(CPUID::supportsSSE4_1())
6182 {
6183 return x86::ceilss(x);
6184 }
6185 else
6186 {
6187 return Float4(Ceil(Float4(x))).x;
6188 }
6189 }
6190
getType()6191 Type *Float::getType()
6192 {
6193 return Type::getFloatTy(*Nucleus::getContext());
6194 }
6195
Float2(RValue<Float4> cast)6196 Float2::Float2(RValue<Float4> cast)
6197 {
6198 // xyzw.parent = this;
6199
6200 Value *int64x2 = Nucleus::createBitCast(cast.value, Long2::getType());
6201 Value *int64 = Nucleus::createExtractElement(int64x2, 0);
6202 Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
6203
6204 storeValue(float2);
6205 }
6206
getType()6207 Type *Float2::getType()
6208 {
6209 return VectorType::get(Float::getType(), 2);
6210 }
6211
Float4(RValue<Byte4> cast)6212 Float4::Float4(RValue<Byte4> cast)
6213 {
6214 xyzw.parent = this;
6215
6216 #if 0
6217 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes
6218 #elif 0
6219 Value *vector = loadValue();
6220
6221 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6222 Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
6223 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6224
6225 Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
6226 Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
6227 Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
6228
6229 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6230 Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
6231 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6232
6233 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6234 Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
6235 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6236 #else
6237 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
6238 Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0);
6239
6240 Value *e;
6241
6242 if(CPUID::supportsSSE4_1())
6243 {
6244 e = x86::pmovzxbd(RValue<Int4>(a)).value;
6245 }
6246 else
6247 {
6248 Constant *swizzle[16];
6249 swizzle[0] = Nucleus::createConstantInt(0);
6250 swizzle[1] = Nucleus::createConstantInt(16);
6251 swizzle[2] = Nucleus::createConstantInt(1);
6252 swizzle[3] = Nucleus::createConstantInt(17);
6253 swizzle[4] = Nucleus::createConstantInt(2);
6254 swizzle[5] = Nucleus::createConstantInt(18);
6255 swizzle[6] = Nucleus::createConstantInt(3);
6256 swizzle[7] = Nucleus::createConstantInt(19);
6257 swizzle[8] = Nucleus::createConstantInt(4);
6258 swizzle[9] = Nucleus::createConstantInt(20);
6259 swizzle[10] = Nucleus::createConstantInt(5);
6260 swizzle[11] = Nucleus::createConstantInt(21);
6261 swizzle[12] = Nucleus::createConstantInt(6);
6262 swizzle[13] = Nucleus::createConstantInt(22);
6263 swizzle[14] = Nucleus::createConstantInt(7);
6264 swizzle[15] = Nucleus::createConstantInt(23);
6265
6266 Value *b = Nucleus::createBitCast(a, Byte16::getType());
6267 Value *c = Nucleus::createShuffleVector(b, Nucleus::createNullValue(Byte16::getType()), Nucleus::createConstantVector(swizzle, 16));
6268
6269 Constant *swizzle2[8];
6270 swizzle2[0] = Nucleus::createConstantInt(0);
6271 swizzle2[1] = Nucleus::createConstantInt(8);
6272 swizzle2[2] = Nucleus::createConstantInt(1);
6273 swizzle2[3] = Nucleus::createConstantInt(9);
6274 swizzle2[4] = Nucleus::createConstantInt(2);
6275 swizzle2[5] = Nucleus::createConstantInt(10);
6276 swizzle2[6] = Nucleus::createConstantInt(3);
6277 swizzle2[7] = Nucleus::createConstantInt(11);
6278
6279 Value *d = Nucleus::createBitCast(c, Short8::getType());
6280 e = Nucleus::createShuffleVector(d, Nucleus::createNullValue(Short8::getType()), Nucleus::createConstantVector(swizzle2, 8));
6281 }
6282
6283 Value *f = Nucleus::createBitCast(e, Int4::getType());
6284 Value *g = Nucleus::createSIToFP(f, Float4::getType());
6285 Value *xyzw = g;
6286 #endif
6287
6288 storeValue(xyzw);
6289 }
6290
Float4(RValue<SByte4> cast)6291 Float4::Float4(RValue<SByte4> cast)
6292 {
6293 xyzw.parent = this;
6294
6295 #if 0
6296 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes
6297 #elif 0
6298 Value *vector = loadValue();
6299
6300 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6301 Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
6302 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6303
6304 Value *i8y = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(1));
6305 Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
6306 Value *xy = Nucleus::createInsertElement(x, f32y, Nucleus::createConstantInt(1));
6307
6308 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6309 Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
6310 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6311
6312 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6313 Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
6314 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6315 #else
6316 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
6317 Value *a = Nucleus::createInsertElement(UndefValue::get(Int4::getType()), x, 0);
6318
6319 Value *g;
6320
6321 if(CPUID::supportsSSE4_1())
6322 {
6323 g = x86::pmovsxbd(RValue<Int4>(a)).value;
6324 }
6325 else
6326 {
6327 Constant *swizzle[16];
6328 swizzle[0] = Nucleus::createConstantInt(0);
6329 swizzle[1] = Nucleus::createConstantInt(0);
6330 swizzle[2] = Nucleus::createConstantInt(1);
6331 swizzle[3] = Nucleus::createConstantInt(1);
6332 swizzle[4] = Nucleus::createConstantInt(2);
6333 swizzle[5] = Nucleus::createConstantInt(2);
6334 swizzle[6] = Nucleus::createConstantInt(3);
6335 swizzle[7] = Nucleus::createConstantInt(3);
6336 swizzle[8] = Nucleus::createConstantInt(4);
6337 swizzle[9] = Nucleus::createConstantInt(4);
6338 swizzle[10] = Nucleus::createConstantInt(5);
6339 swizzle[11] = Nucleus::createConstantInt(5);
6340 swizzle[12] = Nucleus::createConstantInt(6);
6341 swizzle[13] = Nucleus::createConstantInt(6);
6342 swizzle[14] = Nucleus::createConstantInt(7);
6343 swizzle[15] = Nucleus::createConstantInt(7);
6344
6345 Value *b = Nucleus::createBitCast(a, Byte16::getType());
6346 Value *c = Nucleus::createShuffleVector(b, b, Nucleus::createConstantVector(swizzle, 16));
6347
6348 Constant *swizzle2[8];
6349 swizzle2[0] = Nucleus::createConstantInt(0);
6350 swizzle2[1] = Nucleus::createConstantInt(0);
6351 swizzle2[2] = Nucleus::createConstantInt(1);
6352 swizzle2[3] = Nucleus::createConstantInt(1);
6353 swizzle2[4] = Nucleus::createConstantInt(2);
6354 swizzle2[5] = Nucleus::createConstantInt(2);
6355 swizzle2[6] = Nucleus::createConstantInt(3);
6356 swizzle2[7] = Nucleus::createConstantInt(3);
6357
6358 Value *d = Nucleus::createBitCast(c, Short8::getType());
6359 Value *e = Nucleus::createShuffleVector(d, d, Nucleus::createConstantVector(swizzle2, 8));
6360
6361 Value *f = Nucleus::createBitCast(e, Int4::getType());
6362 // g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
6363 g = x86::psrad(RValue<Int4>(f), 24).value;
6364 }
6365
6366 Value *xyzw = Nucleus::createSIToFP(g, Float4::getType());
6367 #endif
6368
6369 storeValue(xyzw);
6370 }
6371
Float4(RValue<Short4> cast)6372 Float4::Float4(RValue<Short4> cast)
6373 {
6374 xyzw.parent = this;
6375
6376 Int4 c(cast);
6377 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6378 }
6379
Float4(RValue<UShort4> cast)6380 Float4::Float4(RValue<UShort4> cast)
6381 {
6382 xyzw.parent = this;
6383
6384 Int4 c(cast);
6385 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6386 }
6387
Float4(RValue<Int4> cast)6388 Float4::Float4(RValue<Int4> cast)
6389 {
6390 xyzw.parent = this;
6391
6392 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6393
6394 storeValue(xyzw);
6395 }
6396
Float4(RValue<UInt4> cast)6397 Float4::Float4(RValue<UInt4> cast)
6398 {
6399 xyzw.parent = this;
6400
6401 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());
6402
6403 storeValue(xyzw);
6404 }
6405
Float4()6406 Float4::Float4()
6407 {
6408 xyzw.parent = this;
6409 }
6410
Float4(float xyzw)6411 Float4::Float4(float xyzw)
6412 {
6413 constant(xyzw, xyzw, xyzw, xyzw);
6414 }
6415
Float4(float x,float yzw)6416 Float4::Float4(float x, float yzw)
6417 {
6418 constant(x, yzw, yzw, yzw);
6419 }
6420
Float4(float x,float y,float zw)6421 Float4::Float4(float x, float y, float zw)
6422 {
6423 constant(x, y, zw, zw);
6424 }
6425
Float4(float x,float y,float z,float w)6426 Float4::Float4(float x, float y, float z, float w)
6427 {
6428 constant(x, y, z, w);
6429 }
6430
constant(float x,float y,float z,float w)6431 void Float4::constant(float x, float y, float z, float w)
6432 {
6433 xyzw.parent = this;
6434
6435 Constant *constantVector[4];
6436 constantVector[0] = Nucleus::createConstantFloat(x);
6437 constantVector[1] = Nucleus::createConstantFloat(y);
6438 constantVector[2] = Nucleus::createConstantFloat(z);
6439 constantVector[3] = Nucleus::createConstantFloat(w);
6440
6441 storeValue(Nucleus::createConstantVector(constantVector, 4));
6442 }
6443
Float4(RValue<Float4> rhs)6444 Float4::Float4(RValue<Float4> rhs)
6445 {
6446 xyzw.parent = this;
6447
6448 storeValue(rhs.value);
6449 }
6450
Float4(const Float4 & rhs)6451 Float4::Float4(const Float4 &rhs)
6452 {
6453 xyzw.parent = this;
6454
6455 Value *value = rhs.loadValue();
6456 storeValue(value);
6457 }
6458
Float4(const Reference<Float4> & rhs)6459 Float4::Float4(const Reference<Float4> &rhs)
6460 {
6461 xyzw.parent = this;
6462
6463 Value *value = rhs.loadValue();
6464 storeValue(value);
6465 }
6466
Float4(RValue<Float> rhs)6467 Float4::Float4(RValue<Float> rhs)
6468 {
6469 xyzw.parent = this;
6470
6471 Value *vector = loadValue();
6472 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
6473
6474 Constant *swizzle[4];
6475 swizzle[0] = Nucleus::createConstantInt(0);
6476 swizzle[1] = Nucleus::createConstantInt(0);
6477 swizzle[2] = Nucleus::createConstantInt(0);
6478 swizzle[3] = Nucleus::createConstantInt(0);
6479
6480 Value *replicate = Nucleus::createShuffleVector(insert, UndefValue::get(Float4::getType()), Nucleus::createConstantVector(swizzle, 4));
6481
6482 storeValue(replicate);
6483 }
6484
Float4(const Float & rhs)6485 Float4::Float4(const Float &rhs)
6486 {
6487 xyzw.parent = this;
6488
6489 *this = RValue<Float>(rhs.loadValue());
6490 }
6491
Float4(const Reference<Float> & rhs)6492 Float4::Float4(const Reference<Float> &rhs)
6493 {
6494 xyzw.parent = this;
6495
6496 *this = RValue<Float>(rhs.loadValue());
6497 }
6498
operator =(float x) const6499 RValue<Float4> Float4::operator=(float x) const
6500 {
6501 return *this = Float4(x, x, x, x);
6502 }
6503
operator =(RValue<Float4> rhs) const6504 RValue<Float4> Float4::operator=(RValue<Float4> rhs) const
6505 {
6506 storeValue(rhs.value);
6507
6508 return rhs;
6509 }
6510
operator =(const Float4 & rhs) const6511 RValue<Float4> Float4::operator=(const Float4 &rhs) const
6512 {
6513 Value *value = rhs.loadValue();
6514 storeValue(value);
6515
6516 return RValue<Float4>(value);
6517 }
6518
operator =(const Reference<Float4> & rhs) const6519 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) const
6520 {
6521 Value *value = rhs.loadValue();
6522 storeValue(value);
6523
6524 return RValue<Float4>(value);
6525 }
6526
operator =(RValue<Float> rhs) const6527 RValue<Float4> Float4::operator=(RValue<Float> rhs) const
6528 {
6529 return *this = Float4(rhs);
6530 }
6531
operator =(const Float & rhs) const6532 RValue<Float4> Float4::operator=(const Float &rhs) const
6533 {
6534 return *this = Float4(rhs);
6535 }
6536
operator =(const Reference<Float> & rhs) const6537 RValue<Float4> Float4::operator=(const Reference<Float> &rhs) const
6538 {
6539 return *this = Float4(rhs);
6540 }
6541
operator +(RValue<Float4> lhs,RValue<Float4> rhs)6542 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6543 {
6544 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6545 }
6546
operator -(RValue<Float4> lhs,RValue<Float4> rhs)6547 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6548 {
6549 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6550 }
6551
operator *(RValue<Float4> lhs,RValue<Float4> rhs)6552 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6553 {
6554 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6555 }
6556
operator /(RValue<Float4> lhs,RValue<Float4> rhs)6557 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6558 {
6559 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6560 }
6561
operator %(RValue<Float4> lhs,RValue<Float4> rhs)6562 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6563 {
6564 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6565 }
6566
operator +=(const Float4 & lhs,RValue<Float4> rhs)6567 RValue<Float4> operator+=(const Float4 &lhs, RValue<Float4> rhs)
6568 {
6569 return lhs = lhs + rhs;
6570 }
6571
operator -=(const Float4 & lhs,RValue<Float4> rhs)6572 RValue<Float4> operator-=(const Float4 &lhs, RValue<Float4> rhs)
6573 {
6574 return lhs = lhs - rhs;
6575 }
6576
operator *=(const Float4 & lhs,RValue<Float4> rhs)6577 RValue<Float4> operator*=(const Float4 &lhs, RValue<Float4> rhs)
6578 {
6579 return lhs = lhs * rhs;
6580 }
6581
operator /=(const Float4 & lhs,RValue<Float4> rhs)6582 RValue<Float4> operator/=(const Float4 &lhs, RValue<Float4> rhs)
6583 {
6584 return lhs = lhs / rhs;
6585 }
6586
operator %=(const Float4 & lhs,RValue<Float4> rhs)6587 RValue<Float4> operator%=(const Float4 &lhs, RValue<Float4> rhs)
6588 {
6589 return lhs = lhs % rhs;
6590 }
6591
operator +(RValue<Float4> val)6592 RValue<Float4> operator+(RValue<Float4> val)
6593 {
6594 return val;
6595 }
6596
operator -(RValue<Float4> val)6597 RValue<Float4> operator-(RValue<Float4> val)
6598 {
6599 return RValue<Float4>(Nucleus::createFNeg(val.value));
6600 }
6601
Abs(RValue<Float4> x)6602 RValue<Float4> Abs(RValue<Float4> x)
6603 {
6604 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6605
6606 Constant *constantVector[4];
6607 constantVector[0] = Nucleus::createConstantInt(0x7FFFFFFF);
6608 constantVector[1] = Nucleus::createConstantInt(0x7FFFFFFF);
6609 constantVector[2] = Nucleus::createConstantInt(0x7FFFFFFF);
6610 constantVector[3] = Nucleus::createConstantInt(0x7FFFFFFF);
6611
6612 Value *result = Nucleus::createAnd(vector, Nucleus::createConstantVector(constantVector, 4));
6613
6614 return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType()));
6615 }
6616
Max(RValue<Float4> x,RValue<Float4> y)6617 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6618 {
6619 return x86::maxps(x, y);
6620 }
6621
Min(RValue<Float4> x,RValue<Float4> y)6622 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6623 {
6624 return x86::minps(x, y);
6625 }
6626
Rcp_pp(RValue<Float4> x,bool exactAtPow2)6627 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6628 {
6629 if(exactAtPow2)
6630 {
6631 // rcpps uses a piecewise-linear approximation which minimizes the relative error
6632 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6633 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6634 }
6635 else
6636 {
6637 return x86::rcpps(x);
6638 }
6639 }
6640
RcpSqrt_pp(RValue<Float4> x)6641 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6642 {
6643 return x86::rsqrtps(x);
6644 }
6645
Sqrt(RValue<Float4> x)6646 RValue<Float4> Sqrt(RValue<Float4> x)
6647 {
6648 return x86::sqrtps(x);
6649 }
6650
Insert(const Float4 & val,RValue<Float> element,int i)6651 RValue<Float4> Insert(const Float4 &val, RValue<Float> element, int i)
6652 {
6653 llvm::Value *value = val.loadValue();
6654 llvm::Value *insert = Nucleus::createInsertElement(value, element.value, i);
6655
6656 val = RValue<Float4>(insert);
6657
6658 return val;
6659 }
6660
Extract(RValue<Float4> x,int i)6661 RValue<Float> Extract(RValue<Float4> x, int i)
6662 {
6663 return RValue<Float>(Nucleus::createExtractElement(x.value, i));
6664 }
6665
Swizzle(RValue<Float4> x,unsigned char select)6666 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6667 {
6668 return RValue<Float4>(Nucleus::createSwizzle(x.value, select));
6669 }
6670
ShuffleLowHigh(RValue<Float4> x,RValue<Float4> y,unsigned char imm)6671 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6672 {
6673 Constant *shuffle[4];
6674 shuffle[0] = Nucleus::createConstantInt(((imm >> 0) & 0x03) + 0);
6675 shuffle[1] = Nucleus::createConstantInt(((imm >> 2) & 0x03) + 0);
6676 shuffle[2] = Nucleus::createConstantInt(((imm >> 4) & 0x03) + 4);
6677 shuffle[3] = Nucleus::createConstantInt(((imm >> 6) & 0x03) + 4);
6678
6679 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
6680 }
6681
UnpackLow(RValue<Float4> x,RValue<Float4> y)6682 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6683 {
6684 Constant *shuffle[4];
6685 shuffle[0] = Nucleus::createConstantInt(0);
6686 shuffle[1] = Nucleus::createConstantInt(4);
6687 shuffle[2] = Nucleus::createConstantInt(1);
6688 shuffle[3] = Nucleus::createConstantInt(5);
6689
6690 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
6691 }
6692
UnpackHigh(RValue<Float4> x,RValue<Float4> y)6693 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6694 {
6695 Constant *shuffle[4];
6696 shuffle[0] = Nucleus::createConstantInt(2);
6697 shuffle[1] = Nucleus::createConstantInt(6);
6698 shuffle[2] = Nucleus::createConstantInt(3);
6699 shuffle[3] = Nucleus::createConstantInt(7);
6700
6701 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(shuffle, 4)));
6702 }
6703
Mask(Float4 & lhs,RValue<Float4> rhs,unsigned char select)6704 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6705 {
6706 Value *vector = lhs.loadValue();
6707 Value *shuffle = Nucleus::createMask(vector, rhs.value, select);
6708 lhs.storeValue(shuffle);
6709
6710 return RValue<Float4>(shuffle);
6711 }
6712
SignMask(RValue<Float4> x)6713 RValue<Int> SignMask(RValue<Float4> x)
6714 {
6715 return x86::movmskps(x);
6716 }
6717
CmpEQ(RValue<Float4> x,RValue<Float4> y)6718 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6719 {
6720 // return As<Int4>(x86::cmpeqps(x, y));
6721 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
6722 }
6723
CmpLT(RValue<Float4> x,RValue<Float4> y)6724 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6725 {
6726 // return As<Int4>(x86::cmpltps(x, y));
6727 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
6728 }
6729
CmpLE(RValue<Float4> x,RValue<Float4> y)6730 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6731 {
6732 // return As<Int4>(x86::cmpleps(x, y));
6733 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
6734 }
6735
CmpNEQ(RValue<Float4> x,RValue<Float4> y)6736 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6737 {
6738 // return As<Int4>(x86::cmpneqps(x, y));
6739 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
6740 }
6741
CmpNLT(RValue<Float4> x,RValue<Float4> y)6742 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6743 {
6744 // return As<Int4>(x86::cmpnltps(x, y));
6745 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
6746 }
6747
CmpNLE(RValue<Float4> x,RValue<Float4> y)6748 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6749 {
6750 // return As<Int4>(x86::cmpnleps(x, y));
6751 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
6752 }
6753
Round(RValue<Float4> x)6754 RValue<Float4> Round(RValue<Float4> x)
6755 {
6756 if(CPUID::supportsSSE4_1())
6757 {
6758 return x86::roundps(x, 0);
6759 }
6760 else
6761 {
6762 return Float4(RoundInt(x));
6763 }
6764 }
6765
Trunc(RValue<Float4> x)6766 RValue<Float4> Trunc(RValue<Float4> x)
6767 {
6768 if(CPUID::supportsSSE4_1())
6769 {
6770 return x86::roundps(x, 3);
6771 }
6772 else
6773 {
6774 return Float4(Int4(x)); // Rounded toward zero
6775 }
6776 }
6777
Frac(RValue<Float4> x)6778 RValue<Float4> Frac(RValue<Float4> x)
6779 {
6780 if(CPUID::supportsSSE4_1())
6781 {
6782 return x - x86::floorps(x);
6783 }
6784 else
6785 {
6786 Float4 frc = x - Float4(Int4(x)); // Signed fractional part
6787
6788 return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
6789 }
6790 }
6791
Floor(RValue<Float4> x)6792 RValue<Float4> Floor(RValue<Float4> x)
6793 {
6794 if(CPUID::supportsSSE4_1())
6795 {
6796 return x86::floorps(x);
6797 }
6798 else
6799 {
6800 return x - Frac(x);
6801 }
6802 }
6803
Ceil(RValue<Float4> x)6804 RValue<Float4> Ceil(RValue<Float4> x)
6805 {
6806 if(CPUID::supportsSSE4_1())
6807 {
6808 return x86::ceilps(x);
6809 }
6810 else
6811 {
6812 return -Floor(-x);
6813 }
6814 }
6815
getType()6816 Type *Float4::getType()
6817 {
6818 return VectorType::get(Float::getType(), 4);
6819 }
6820
operator +(RValue<Pointer<Byte>> lhs,int offset)6821 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6822 {
6823 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Nucleus::createConstantInt(offset)));
6824 }
6825
operator +(RValue<Pointer<Byte>> lhs,RValue<Int> offset)6826 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6827 {
6828 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value));
6829 }
6830
operator +(RValue<Pointer<Byte>> lhs,RValue<UInt> offset)6831 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6832 {
6833 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, offset.value));
6834 }
6835
operator +=(const Pointer<Byte> & lhs,int offset)6836 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, int offset)
6837 {
6838 return lhs = lhs + offset;
6839 }
6840
operator +=(const Pointer<Byte> & lhs,RValue<Int> offset)6841 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<Int> offset)
6842 {
6843 return lhs = lhs + offset;
6844 }
6845
operator +=(const Pointer<Byte> & lhs,RValue<UInt> offset)6846 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<UInt> offset)
6847 {
6848 return lhs = lhs + offset;
6849 }
6850
operator -(RValue<Pointer<Byte>> lhs,int offset)6851 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6852 {
6853 return lhs + -offset;
6854 }
6855
operator -(RValue<Pointer<Byte>> lhs,RValue<Int> offset)6856 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6857 {
6858 return lhs + -offset;
6859 }
6860
operator -(RValue<Pointer<Byte>> lhs,RValue<UInt> offset)6861 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6862 {
6863 return lhs + -offset;
6864 }
6865
operator -=(const Pointer<Byte> & lhs,int offset)6866 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, int offset)
6867 {
6868 return lhs = lhs - offset;
6869 }
6870
operator -=(const Pointer<Byte> & lhs,RValue<Int> offset)6871 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<Int> offset)
6872 {
6873 return lhs = lhs - offset;
6874 }
6875
operator -=(const Pointer<Byte> & lhs,RValue<UInt> offset)6876 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<UInt> offset)
6877 {
6878 return lhs = lhs - offset;
6879 }
6880
Return()6881 void Return()
6882 {
6883 Nucleus::createRetVoid();
6884 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6885 Nucleus::createUnreachable();
6886 }
6887
Return(bool ret)6888 void Return(bool ret)
6889 {
6890 Nucleus::createRet(Nucleus::createConstantBool(ret));
6891 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6892 Nucleus::createUnreachable();
6893 }
6894
Return(const Int & ret)6895 void Return(const Int &ret)
6896 {
6897 Nucleus::createRet(ret.loadValue());
6898 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6899 Nucleus::createUnreachable();
6900 }
6901
beginLoop()6902 BasicBlock *beginLoop()
6903 {
6904 BasicBlock *loopBB = Nucleus::createBasicBlock();
6905
6906 Nucleus::createBr(loopBB);
6907 Nucleus::setInsertBlock(loopBB);
6908
6909 return loopBB;
6910 }
6911
branch(RValue<Bool> cmp,BasicBlock * bodyBB,BasicBlock * endBB)6912 bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6913 {
6914 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6915 Nucleus::setInsertBlock(bodyBB);
6916
6917 return true;
6918 }
6919
elseBlock(BasicBlock * falseBB)6920 bool elseBlock(BasicBlock *falseBB)
6921 {
6922 falseBB->back().eraseFromParent();
6923 Nucleus::setInsertBlock(falseBB);
6924
6925 return true;
6926 }
6927
Ticks()6928 RValue<Long> Ticks()
6929 {
6930 Module *module = Nucleus::getModule();
6931 llvm::Function *rdtsc = Intrinsic::getDeclaration(module, Intrinsic::readcyclecounter);
6932
6933 return RValue<Long>(Nucleus::createCall(rdtsc));
6934 }
6935 }
6936
6937 namespace sw
6938 {
6939 namespace x86
6940 {
cvtss2si(RValue<Float> val)6941 RValue<Int> cvtss2si(RValue<Float> val)
6942 {
6943 Module *module = Nucleus::getModule();
6944 llvm::Function *cvtss2si = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtss2si);
6945
6946 Float4 vector;
6947 vector.x = val;
6948
6949 return RValue<Int>(Nucleus::createCall(cvtss2si, RValue<Float4>(vector).value));
6950 }
6951
cvtps2pi(RValue<Float4> val)6952 RValue<Int2> cvtps2pi(RValue<Float4> val)
6953 {
6954 Module *module = Nucleus::getModule();
6955 llvm::Function *cvtps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvtps2pi);
6956
6957 return RValue<Int2>(Nucleus::createCall(cvtps2pi, val.value));
6958 }
6959
cvttps2pi(RValue<Float4> val)6960 RValue<Int2> cvttps2pi(RValue<Float4> val)
6961 {
6962 Module *module = Nucleus::getModule();
6963 llvm::Function *cvttps2pi = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cvttps2pi);
6964
6965 return RValue<Int2>(Nucleus::createCall(cvttps2pi, val.value));
6966 }
6967
cvtps2dq(RValue<Float4> val)6968 RValue<Int4> cvtps2dq(RValue<Float4> val)
6969 {
6970 if(CPUID::supportsSSE2())
6971 {
6972 Module *module = Nucleus::getModule();
6973 llvm::Function *cvtps2dq = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_cvtps2dq);
6974
6975 return RValue<Int4>(Nucleus::createCall(cvtps2dq, val.value));
6976 }
6977 else
6978 {
6979 Int2 lo = x86::cvtps2pi(val);
6980 Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
6981
6982 return Int4(lo, hi);
6983 }
6984 }
6985
rcpss(RValue<Float> val)6986 RValue<Float> rcpss(RValue<Float> val)
6987 {
6988 Module *module = Nucleus::getModule();
6989 llvm::Function *rcpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ss);
6990
6991 Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
6992
6993 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rcpss, vector), 0));
6994 }
6995
sqrtss(RValue<Float> val)6996 RValue<Float> sqrtss(RValue<Float> val)
6997 {
6998 Module *module = Nucleus::getModule();
6999 llvm::Function *sqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ss);
7000
7001 Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
7002
7003 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(sqrtss, vector), 0));
7004 }
7005
rsqrtss(RValue<Float> val)7006 RValue<Float> rsqrtss(RValue<Float> val)
7007 {
7008 Module *module = Nucleus::getModule();
7009 llvm::Function *rsqrtss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ss);
7010
7011 Value *vector = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), val.value, 0);
7012
7013 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(rsqrtss, vector), 0));
7014 }
7015
rcpps(RValue<Float4> val)7016 RValue<Float4> rcpps(RValue<Float4> val)
7017 {
7018 Module *module = Nucleus::getModule();
7019 llvm::Function *rcpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rcp_ps);
7020
7021 return RValue<Float4>(Nucleus::createCall(rcpps, val.value));
7022 }
7023
sqrtps(RValue<Float4> val)7024 RValue<Float4> sqrtps(RValue<Float4> val)
7025 {
7026 Module *module = Nucleus::getModule();
7027 llvm::Function *sqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_sqrt_ps);
7028
7029 return RValue<Float4>(Nucleus::createCall(sqrtps, val.value));
7030 }
7031
rsqrtps(RValue<Float4> val)7032 RValue<Float4> rsqrtps(RValue<Float4> val)
7033 {
7034 Module *module = Nucleus::getModule();
7035 llvm::Function *rsqrtps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_rsqrt_ps);
7036
7037 return RValue<Float4>(Nucleus::createCall(rsqrtps, val.value));
7038 }
7039
maxps(RValue<Float4> x,RValue<Float4> y)7040 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
7041 {
7042 Module *module = Nucleus::getModule();
7043 llvm::Function *maxps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_max_ps);
7044
7045 return RValue<Float4>(Nucleus::createCall(maxps, x.value, y.value));
7046 }
7047
minps(RValue<Float4> x,RValue<Float4> y)7048 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
7049 {
7050 Module *module = Nucleus::getModule();
7051 llvm::Function *minps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_min_ps);
7052
7053 return RValue<Float4>(Nucleus::createCall(minps, x.value, y.value));
7054 }
7055
roundss(RValue<Float> val,unsigned char imm)7056 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
7057 {
7058 Module *module = Nucleus::getModule();
7059 llvm::Function *roundss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ss);
7060
7061 Value *undef = UndefValue::get(Float4::getType());
7062 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
7063
7064 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(roundss, undef, vector, Nucleus::createConstantInt(imm)), 0));
7065 }
7066
floorss(RValue<Float> val)7067 RValue<Float> floorss(RValue<Float> val)
7068 {
7069 return roundss(val, 1);
7070 }
7071
ceilss(RValue<Float> val)7072 RValue<Float> ceilss(RValue<Float> val)
7073 {
7074 return roundss(val, 2);
7075 }
7076
roundps(RValue<Float4> val,unsigned char imm)7077 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
7078 {
7079 Module *module = Nucleus::getModule();
7080 llvm::Function *roundps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_round_ps);
7081
7082 return RValue<Float4>(Nucleus::createCall(roundps, val.value, Nucleus::createConstantInt(imm)));
7083 }
7084
floorps(RValue<Float4> val)7085 RValue<Float4> floorps(RValue<Float4> val)
7086 {
7087 return roundps(val, 1);
7088 }
7089
ceilps(RValue<Float4> val)7090 RValue<Float4> ceilps(RValue<Float4> val)
7091 {
7092 return roundps(val, 2);
7093 }
7094
cmpps(RValue<Float4> x,RValue<Float4> y,unsigned char imm)7095 RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
7096 {
7097 Module *module = Nucleus::getModule();
7098 llvm::Function *cmpps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ps);
7099
7100 return RValue<Float4>(Nucleus::createCall(cmpps, x.value, y.value, Nucleus::createConstantByte(imm)));
7101 }
7102
cmpeqps(RValue<Float4> x,RValue<Float4> y)7103 RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y)
7104 {
7105 return cmpps(x, y, 0);
7106 }
7107
cmpltps(RValue<Float4> x,RValue<Float4> y)7108 RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y)
7109 {
7110 return cmpps(x, y, 1);
7111 }
7112
cmpleps(RValue<Float4> x,RValue<Float4> y)7113 RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y)
7114 {
7115 return cmpps(x, y, 2);
7116 }
7117
cmpunordps(RValue<Float4> x,RValue<Float4> y)7118 RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y)
7119 {
7120 return cmpps(x, y, 3);
7121 }
7122
cmpneqps(RValue<Float4> x,RValue<Float4> y)7123 RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y)
7124 {
7125 return cmpps(x, y, 4);
7126 }
7127
cmpnltps(RValue<Float4> x,RValue<Float4> y)7128 RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y)
7129 {
7130 return cmpps(x, y, 5);
7131 }
7132
cmpnleps(RValue<Float4> x,RValue<Float4> y)7133 RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y)
7134 {
7135 return cmpps(x, y, 6);
7136 }
7137
cmpordps(RValue<Float4> x,RValue<Float4> y)7138 RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y)
7139 {
7140 return cmpps(x, y, 7);
7141 }
7142
cmpss(RValue<Float> x,RValue<Float> y,unsigned char imm)7143 RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm)
7144 {
7145 Module *module = Nucleus::getModule();
7146 llvm::Function *cmpss = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_cmp_ss);
7147
7148 Value *vector1 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), x.value, 0);
7149 Value *vector2 = Nucleus::createInsertElement(UndefValue::get(Float4::getType()), y.value, 0);
7150
7151 return RValue<Float>(Nucleus::createExtractElement(Nucleus::createCall(cmpss, vector1, vector2, Nucleus::createConstantByte(imm)), 0));
7152 }
7153
cmpeqss(RValue<Float> x,RValue<Float> y)7154 RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y)
7155 {
7156 return cmpss(x, y, 0);
7157 }
7158
cmpltss(RValue<Float> x,RValue<Float> y)7159 RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y)
7160 {
7161 return cmpss(x, y, 1);
7162 }
7163
cmpless(RValue<Float> x,RValue<Float> y)7164 RValue<Float> cmpless(RValue<Float> x, RValue<Float> y)
7165 {
7166 return cmpss(x, y, 2);
7167 }
7168
cmpunordss(RValue<Float> x,RValue<Float> y)7169 RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y)
7170 {
7171 return cmpss(x, y, 3);
7172 }
7173
cmpneqss(RValue<Float> x,RValue<Float> y)7174 RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y)
7175 {
7176 return cmpss(x, y, 4);
7177 }
7178
cmpnltss(RValue<Float> x,RValue<Float> y)7179 RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y)
7180 {
7181 return cmpss(x, y, 5);
7182 }
7183
cmpnless(RValue<Float> x,RValue<Float> y)7184 RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y)
7185 {
7186 return cmpss(x, y, 6);
7187 }
7188
cmpordss(RValue<Float> x,RValue<Float> y)7189 RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y)
7190 {
7191 return cmpss(x, y, 7);
7192 }
7193
pabsd(RValue<Int4> x)7194 RValue<Int4> pabsd(RValue<Int4> x)
7195 {
7196 Module *module = Nucleus::getModule();
7197 llvm::Function *pabsd = Intrinsic::getDeclaration(module, Intrinsic::x86_ssse3_pabs_d_128);
7198
7199 return RValue<Int4>(Nucleus::createCall(pabsd, x.value));
7200 }
7201
paddsw(RValue<Short4> x,RValue<Short4> y)7202 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
7203 {
7204 Module *module = Nucleus::getModule();
7205 llvm::Function *paddsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_w);
7206
7207 return As<Short4>(RValue<MMX>(Nucleus::createCall(paddsw, As<MMX>(x).value, As<MMX>(y).value)));
7208 }
7209
psubsw(RValue<Short4> x,RValue<Short4> y)7210 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
7211 {
7212 Module *module = Nucleus::getModule();
7213 llvm::Function *psubsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_w);
7214
7215 return As<Short4>(RValue<MMX>(Nucleus::createCall(psubsw, As<MMX>(x).value, As<MMX>(y).value)));
7216 }
7217
paddusw(RValue<UShort4> x,RValue<UShort4> y)7218 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
7219 {
7220 Module *module = Nucleus::getModule();
7221 llvm::Function *paddusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_w);
7222
7223 return As<UShort4>(RValue<MMX>(Nucleus::createCall(paddusw, As<MMX>(x).value, As<MMX>(y).value)));
7224 }
7225
psubusw(RValue<UShort4> x,RValue<UShort4> y)7226 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
7227 {
7228 Module *module = Nucleus::getModule();
7229 llvm::Function *psubusw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_w);
7230
7231 return As<UShort4>(RValue<MMX>(Nucleus::createCall(psubusw, As<MMX>(x).value, As<MMX>(y).value)));
7232 }
7233
paddsb(RValue<SByte8> x,RValue<SByte8> y)7234 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
7235 {
7236 Module *module = Nucleus::getModule();
7237 llvm::Function *paddsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padds_b);
7238
7239 return As<SByte8>(RValue<MMX>(Nucleus::createCall(paddsb, As<MMX>(x).value, As<MMX>(y).value)));
7240 }
7241
psubsb(RValue<SByte8> x,RValue<SByte8> y)7242 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
7243 {
7244 Module *module = Nucleus::getModule();
7245 llvm::Function *psubsb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubs_b);
7246
7247 return As<SByte8>(RValue<MMX>(Nucleus::createCall(psubsb, As<MMX>(x).value, As<MMX>(y).value)));
7248 }
7249
paddusb(RValue<Byte8> x,RValue<Byte8> y)7250 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
7251 {
7252 Module *module = Nucleus::getModule();
7253 llvm::Function *paddusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_paddus_b);
7254
7255 return As<Byte8>(RValue<MMX>(Nucleus::createCall(paddusb, As<MMX>(x).value, As<MMX>(y).value)));
7256 }
7257
psubusb(RValue<Byte8> x,RValue<Byte8> y)7258 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
7259 {
7260 Module *module = Nucleus::getModule();
7261 llvm::Function *psubusb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psubus_b);
7262
7263 return As<Byte8>(RValue<MMX>(Nucleus::createCall(psubusb, As<MMX>(x).value, As<MMX>(y).value)));
7264 }
7265
paddw(RValue<Short4> x,RValue<Short4> y)7266 RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y)
7267 {
7268 Module *module = Nucleus::getModule();
7269 llvm::Function *paddw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_w);
7270
7271 return As<Short4>(RValue<MMX>(Nucleus::createCall(paddw, As<MMX>(x).value, As<MMX>(y).value)));
7272 }
7273
psubw(RValue<Short4> x,RValue<Short4> y)7274 RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y)
7275 {
7276 Module *module = Nucleus::getModule();
7277 llvm::Function *psubw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_w);
7278
7279 return As<Short4>(RValue<MMX>(Nucleus::createCall(psubw, As<MMX>(x).value, As<MMX>(y).value)));
7280 }
7281
pmullw(RValue<Short4> x,RValue<Short4> y)7282 RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y)
7283 {
7284 Module *module = Nucleus::getModule();
7285 llvm::Function *pmullw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmull_w);
7286
7287 return As<Short4>(RValue<MMX>(Nucleus::createCall(pmullw, As<MMX>(x).value, As<MMX>(y).value)));
7288 }
7289
pand(RValue<Short4> x,RValue<Short4> y)7290 RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y)
7291 {
7292 Module *module = Nucleus::getModule();
7293 llvm::Function *pand = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pand);
7294
7295 return As<Short4>(RValue<MMX>(Nucleus::createCall(pand, As<MMX>(x).value, As<MMX>(y).value)));
7296 }
7297
por(RValue<Short4> x,RValue<Short4> y)7298 RValue<Short4> por(RValue<Short4> x, RValue<Short4> y)
7299 {
7300 Module *module = Nucleus::getModule();
7301 llvm::Function *por = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_por);
7302
7303 return As<Short4>(RValue<MMX>(Nucleus::createCall(por, As<MMX>(x).value, As<MMX>(y).value)));
7304 }
7305
pxor(RValue<Short4> x,RValue<Short4> y)7306 RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y)
7307 {
7308 Module *module = Nucleus::getModule();
7309 llvm::Function *pxor = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pxor);
7310
7311 return As<Short4>(RValue<MMX>(Nucleus::createCall(pxor, As<MMX>(x).value, As<MMX>(y).value)));
7312 }
7313
pshufw(RValue<Short4> x,unsigned char y)7314 RValue<Short4> pshufw(RValue<Short4> x, unsigned char y)
7315 {
7316 Module *module = Nucleus::getModule();
7317 llvm::Function *pshufw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_pshuf_w);
7318
7319 return As<Short4>(RValue<MMX>(Nucleus::createCall(pshufw, As<MMX>(x).value, Nucleus::createConstantByte(y))));
7320 }
7321
punpcklwd(RValue<Short4> x,RValue<Short4> y)7322 RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y)
7323 {
7324 Module *module = Nucleus::getModule();
7325 llvm::Function *punpcklwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpcklwd);
7326
7327 return As<Int2>(RValue<MMX>(Nucleus::createCall(punpcklwd, As<MMX>(x).value, As<MMX>(y).value)));
7328 }
7329
punpckhwd(RValue<Short4> x,RValue<Short4> y)7330 RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y)
7331 {
7332 Module *module = Nucleus::getModule();
7333 llvm::Function *punpckhwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhwd);
7334
7335 return As<Int2>(RValue<MMX>(Nucleus::createCall(punpckhwd, As<MMX>(x).value, As<MMX>(y).value)));
7336 }
7337
pinsrw(RValue<Short4> x,RValue<Int> y,unsigned int i)7338 RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i)
7339 {
7340 Module *module = Nucleus::getModule();
7341 llvm::Function *pinsrw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pinsr_w);
7342
7343 return As<Short4>(RValue<MMX>(Nucleus::createCall(pinsrw, As<MMX>(x).value, y.value, Nucleus::createConstantInt(i))));
7344 }
7345
pextrw(RValue<Short4> x,unsigned int i)7346 RValue<Int> pextrw(RValue<Short4> x, unsigned int i)
7347 {
7348 Module *module = Nucleus::getModule();
7349 llvm::Function *pextrw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pextr_w);
7350
7351 return RValue<Int>(Nucleus::createCall(pextrw, As<MMX>(x).value, Nucleus::createConstantInt(i)));
7352 }
7353
punpckldq(RValue<Int2> x,RValue<Int2> y)7354 RValue<Long1> punpckldq(RValue<Int2> x, RValue<Int2> y)
7355 {
7356 Module *module = Nucleus::getModule();
7357 llvm::Function *punpckldq = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckldq);
7358
7359 return As<Long1>(RValue<MMX>(Nucleus::createCall(punpckldq, As<MMX>(x).value, As<MMX>(y).value)));
7360 }
7361
punpckhdq(RValue<Int2> x,RValue<Int2> y)7362 RValue<Long1> punpckhdq(RValue<Int2> x, RValue<Int2> y)
7363 {
7364 Module *module = Nucleus::getModule();
7365 llvm::Function *punpckhdq = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhdq);
7366
7367 return As<Long1>(RValue<MMX>(Nucleus::createCall(punpckhdq, As<MMX>(x).value, As<MMX>(y).value)));
7368 }
7369
punpcklbw(RValue<Byte8> x,RValue<Byte8> y)7370 RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y)
7371 {
7372 Module *module = Nucleus::getModule();
7373 llvm::Function *punpcklbw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpcklbw);
7374
7375 return As<Short4>(RValue<MMX>(Nucleus::createCall(punpcklbw, As<MMX>(x).value, As<MMX>(y).value)));
7376 }
7377
punpckhbw(RValue<Byte8> x,RValue<Byte8> y)7378 RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y)
7379 {
7380 Module *module = Nucleus::getModule();
7381 llvm::Function *punpckhbw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_punpckhbw);
7382
7383 return As<Short4>(RValue<MMX>(Nucleus::createCall(punpckhbw, As<MMX>(x).value, As<MMX>(y).value)));
7384 }
7385
paddb(RValue<Byte8> x,RValue<Byte8> y)7386 RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y)
7387 {
7388 Module *module = Nucleus::getModule();
7389 llvm::Function *paddb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_b);
7390
7391 return As<Byte8>(RValue<MMX>(Nucleus::createCall(paddb, As<MMX>(x).value, As<MMX>(y).value)));
7392 }
7393
psubb(RValue<Byte8> x,RValue<Byte8> y)7394 RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y)
7395 {
7396 Module *module = Nucleus::getModule();
7397 llvm::Function *psubb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_b);
7398
7399 return As<Byte8>(RValue<MMX>(Nucleus::createCall(psubb, As<MMX>(x).value, As<MMX>(y).value)));
7400 }
7401
paddd(RValue<Int2> x,RValue<Int2> y)7402 RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y)
7403 {
7404 Module *module = Nucleus::getModule();
7405 llvm::Function *paddd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_padd_d);
7406
7407 return As<Int2>(RValue<MMX>(Nucleus::createCall(paddd, As<MMX>(x).value, As<MMX>(y).value)));
7408 }
7409
psubd(RValue<Int2> x,RValue<Int2> y)7410 RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y)
7411 {
7412 Module *module = Nucleus::getModule();
7413 llvm::Function *psubd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psub_d);
7414
7415 return As<Int2>(RValue<MMX>(Nucleus::createCall(psubd, As<MMX>(x).value, As<MMX>(y).value)));
7416 }
7417
pavgw(RValue<UShort4> x,RValue<UShort4> y)7418 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
7419 {
7420 Module *module = Nucleus::getModule();
7421 llvm::Function *pavgw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pavg_w);
7422
7423 return As<UShort4>(RValue<MMX>(Nucleus::createCall(pavgw, As<MMX>(x).value, As<MMX>(y).value)));
7424 }
7425
pmaxsw(RValue<Short4> x,RValue<Short4> y)7426 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
7427 {
7428 Module *module = Nucleus::getModule();
7429 llvm::Function *pmaxsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmaxs_w);
7430
7431 return As<Short4>(RValue<MMX>(Nucleus::createCall(pmaxsw, As<MMX>(x).value, As<MMX>(y).value)));
7432 }
7433
pminsw(RValue<Short4> x,RValue<Short4> y)7434 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
7435 {
7436 Module *module = Nucleus::getModule();
7437 llvm::Function *pminsw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmins_w);
7438
7439 return As<Short4>(RValue<MMX>(Nucleus::createCall(pminsw, As<MMX>(x).value, As<MMX>(y).value)));
7440 }
7441
pcmpgtw(RValue<Short4> x,RValue<Short4> y)7442 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
7443 {
7444 Module *module = Nucleus::getModule();
7445 llvm::Function *pcmpgtw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_w);
7446
7447 return As<Short4>(RValue<MMX>(Nucleus::createCall(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value)));
7448 }
7449
pcmpeqw(RValue<Short4> x,RValue<Short4> y)7450 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
7451 {
7452 Module *module = Nucleus::getModule();
7453 llvm::Function *pcmpeqw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_w);
7454
7455 return As<Short4>(RValue<MMX>(Nucleus::createCall(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value)));
7456 }
7457
pcmpgtb(RValue<SByte8> x,RValue<SByte8> y)7458 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
7459 {
7460 Module *module = Nucleus::getModule();
7461 llvm::Function *pcmpgtb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpgt_b);
7462
7463 return As<Byte8>(RValue<MMX>(Nucleus::createCall(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value)));
7464 }
7465
pcmpeqb(RValue<Byte8> x,RValue<Byte8> y)7466 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
7467 {
7468 Module *module = Nucleus::getModule();
7469 llvm::Function *pcmpeqb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pcmpeq_b);
7470
7471 return As<Byte8>(RValue<MMX>(Nucleus::createCall(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value)));
7472 }
7473
packssdw(RValue<Int2> x,RValue<Int2> y)7474 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
7475 {
7476 Module *module = Nucleus::getModule();
7477 llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packssdw);
7478
7479 return As<Short4>(RValue<MMX>(Nucleus::createCall(packssdw, As<MMX>(x).value, As<MMX>(y).value)));
7480 }
7481
packssdw(RValue<Int4> x,RValue<Int4> y)7482 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
7483 {
7484 if(CPUID::supportsSSE2())
7485 {
7486 Module *module = Nucleus::getModule();
7487 llvm::Function *packssdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_packssdw_128);
7488
7489 return RValue<Short8>(Nucleus::createCall(packssdw, x.value, y.value));
7490 }
7491 else
7492 {
7493 Int2 loX = Int2(x);
7494 Int2 hiX = Int2(Swizzle(x, 0xEE));
7495
7496 Int2 loY = Int2(y);
7497 Int2 hiY = Int2(Swizzle(y, 0xEE));
7498
7499 Short4 lo = x86::packssdw(loX, hiX);
7500 Short4 hi = x86::packssdw(loY, hiY);
7501
7502 return Short8(lo, hi);
7503 }
7504 }
7505
packsswb(RValue<Short4> x,RValue<Short4> y)7506 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
7507 {
7508 Module *module = Nucleus::getModule();
7509 llvm::Function *packsswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packsswb);
7510
7511 return As<SByte8>(RValue<MMX>(Nucleus::createCall(packsswb, As<MMX>(x).value, As<MMX>(y).value)));
7512 }
7513
packuswb(RValue<UShort4> x,RValue<UShort4> y)7514 RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
7515 {
7516 Module *module = Nucleus::getModule();
7517 llvm::Function *packuswb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_packuswb);
7518
7519 return As<Byte8>(RValue<MMX>(Nucleus::createCall(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
7520 }
7521
packusdw(RValue<UInt4> x,RValue<UInt4> y)7522 RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y)
7523 {
7524 if(CPUID::supportsSSE4_1())
7525 {
7526 Module *module = Nucleus::getModule();
7527 llvm::Function *packusdw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_packusdw);
7528
7529 return RValue<UShort8>(Nucleus::createCall(packusdw, x.value, y.value));
7530 }
7531 else
7532 {
7533 // FIXME: Not an exact replacement!
7534 return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u));
7535 }
7536 }
7537
psrlw(RValue<UShort4> x,unsigned char y)7538 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
7539 {
7540 Module *module = Nucleus::getModule();
7541 llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_w);
7542
7543 return As<UShort4>(RValue<MMX>(Nucleus::createCall(psrlw, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7544 }
7545
psrlw(RValue<UShort8> x,unsigned char y)7546 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
7547 {
7548 Module *module = Nucleus::getModule();
7549 llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_w);
7550
7551 return RValue<UShort8>(Nucleus::createCall(psrlw, x.value, Nucleus::createConstantInt(y)));
7552 }
7553
psraw(RValue<Short4> x,unsigned char y)7554 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
7555 {
7556 Module *module = Nucleus::getModule();
7557 llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_w);
7558
7559 return As<Short4>(RValue<MMX>(Nucleus::createCall(psraw, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7560 }
7561
psraw(RValue<Short8> x,unsigned char y)7562 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
7563 {
7564 Module *module = Nucleus::getModule();
7565 llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_w);
7566
7567 return RValue<Short8>(Nucleus::createCall(psraw, x.value, Nucleus::createConstantInt(y)));
7568 }
7569
psllw(RValue<Short4> x,unsigned char y)7570 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
7571 {
7572 Module *module = Nucleus::getModule();
7573 llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_w);
7574
7575 return As<Short4>(RValue<MMX>(Nucleus::createCall(psllw, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7576 }
7577
psllw(RValue<Short8> x,unsigned char y)7578 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
7579 {
7580 Module *module = Nucleus::getModule();
7581 llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_w);
7582
7583 return RValue<Short8>(Nucleus::createCall(psllw, x.value, Nucleus::createConstantInt(y)));
7584 }
7585
pslld(RValue<Int2> x,unsigned char y)7586 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
7587 {
7588 Module *module = Nucleus::getModule();
7589 llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pslli_d);
7590
7591 return As<Int2>(RValue<MMX>(Nucleus::createCall(pslld, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7592 }
7593
pslld(RValue<Int4> x,unsigned char y)7594 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
7595 {
7596 if(CPUID::supportsSSE2())
7597 {
7598 Module *module = Nucleus::getModule();
7599 llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pslli_d);
7600
7601 return RValue<Int4>(Nucleus::createCall(pslld, x.value, Nucleus::createConstantInt(y)));
7602 }
7603 else
7604 {
7605 Int2 lo = Int2(x);
7606 Int2 hi = Int2(Swizzle(x, 0xEE));
7607
7608 lo = x86::pslld(lo, y);
7609 hi = x86::pslld(hi, y);
7610
7611 return Int4(lo, hi);
7612 }
7613 }
7614
psrad(RValue<Int2> x,unsigned char y)7615 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
7616 {
7617 Module *module = Nucleus::getModule();
7618 llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrai_d);
7619
7620 return As<Int2>(RValue<MMX>(Nucleus::createCall(psrad, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7621 }
7622
psrad(RValue<Int4> x,unsigned char y)7623 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
7624 {
7625 if(CPUID::supportsSSE2())
7626 {
7627 Module *module = Nucleus::getModule();
7628 llvm::Function *psrad = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrai_d);
7629
7630 return RValue<Int4>(Nucleus::createCall(psrad, x.value, Nucleus::createConstantInt(y)));
7631 }
7632 else
7633 {
7634 Int2 lo = Int2(x);
7635 Int2 hi = Int2(Swizzle(x, 0xEE));
7636
7637 lo = x86::psrad(lo, y);
7638 hi = x86::psrad(hi, y);
7639
7640 return Int4(lo, hi);
7641 }
7642 }
7643
psrld(RValue<UInt2> x,unsigned char y)7644 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
7645 {
7646 Module *module = Nucleus::getModule();
7647 llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrli_d);
7648
7649 return As<UInt2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, Nucleus::createConstantInt(y))));
7650 }
7651
psrld(RValue<UInt4> x,unsigned char y)7652 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
7653 {
7654 if(CPUID::supportsSSE2())
7655 {
7656 Module *module = Nucleus::getModule();
7657 llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_psrli_d);
7658
7659 return RValue<UInt4>(Nucleus::createCall(psrld, x.value, Nucleus::createConstantInt(y)));
7660 }
7661 else
7662 {
7663 UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
7664 UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
7665
7666 lo = x86::psrld(lo, y);
7667 hi = x86::psrld(hi, y);
7668
7669 return UInt4(lo, hi);
7670 }
7671 }
7672
psrlw(RValue<UShort4> x,RValue<Long1> y)7673 RValue<UShort4> psrlw(RValue<UShort4> x, RValue<Long1> y)
7674 {
7675 Module *module = Nucleus::getModule();
7676 llvm::Function *psrlw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_w);
7677
7678 return As<UShort4>(RValue<MMX>(Nucleus::createCall(psrlw, As<MMX>(x).value, As<MMX>(y).value)));
7679 }
7680
psraw(RValue<Short4> x,RValue<Long1> y)7681 RValue<Short4> psraw(RValue<Short4> x, RValue<Long1> y)
7682 {
7683 Module *module = Nucleus::getModule();
7684 llvm::Function *psraw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_w);
7685
7686 return As<Short4>(RValue<MMX>(Nucleus::createCall(psraw, As<MMX>(x).value, As<MMX>(y).value)));
7687 }
7688
psllw(RValue<Short4> x,RValue<Long1> y)7689 RValue<Short4> psllw(RValue<Short4> x, RValue<Long1> y)
7690 {
7691 Module *module = Nucleus::getModule();
7692 llvm::Function *psllw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_w);
7693
7694 return As<Short4>(RValue<MMX>(Nucleus::createCall(psllw, As<MMX>(x).value, As<MMX>(y).value)));
7695 }
7696
pslld(RValue<Int2> x,RValue<Long1> y)7697 RValue<Int2> pslld(RValue<Int2> x, RValue<Long1> y)
7698 {
7699 Module *module = Nucleus::getModule();
7700 llvm::Function *pslld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psll_d);
7701
7702 return As<Int2>(RValue<MMX>(Nucleus::createCall(pslld, As<MMX>(x).value, As<MMX>(y).value)));
7703 }
7704
psrld(RValue<UInt2> x,RValue<Long1> y)7705 RValue<UInt2> psrld(RValue<UInt2> x, RValue<Long1> y)
7706 {
7707 Module *module = Nucleus::getModule();
7708 llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psrl_d);
7709
7710 return As<UInt2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, As<MMX>(y).value)));
7711 }
7712
psrad(RValue<Int2> x,RValue<Long1> y)7713 RValue<Int2> psrad(RValue<Int2> x, RValue<Long1> y)
7714 {
7715 Module *module = Nucleus::getModule();
7716 llvm::Function *psrld = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_psra_d);
7717
7718 return As<Int2>(RValue<MMX>(Nucleus::createCall(psrld, As<MMX>(x).value, As<MMX>(y).value)));
7719 }
7720
pmaxsd(RValue<Int4> x,RValue<Int4> y)7721 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
7722 {
7723 Module *module = Nucleus::getModule();
7724 llvm::Function *pmaxsd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmaxsd);
7725
7726 return RValue<Int4>(Nucleus::createCall(pmaxsd, x.value, y.value));
7727 }
7728
pminsd(RValue<Int4> x,RValue<Int4> y)7729 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
7730 {
7731 Module *module = Nucleus::getModule();
7732 llvm::Function *pminsd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pminsd);
7733
7734 return RValue<Int4>(Nucleus::createCall(pminsd, x.value, y.value));
7735 }
7736
pmaxud(RValue<UInt4> x,RValue<UInt4> y)7737 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
7738 {
7739 Module *module = Nucleus::getModule();
7740 llvm::Function *pmaxud = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmaxud);
7741
7742 return RValue<UInt4>(Nucleus::createCall(pmaxud, x.value, y.value));
7743 }
7744
pminud(RValue<UInt4> x,RValue<UInt4> y)7745 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
7746 {
7747 Module *module = Nucleus::getModule();
7748 llvm::Function *pminud = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pminud);
7749
7750 return RValue<UInt4>(Nucleus::createCall(pminud, x.value, y.value));
7751 }
7752
pmulhw(RValue<Short4> x,RValue<Short4> y)7753 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
7754 {
7755 Module *module = Nucleus::getModule();
7756 llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulh_w);
7757
7758 return As<Short4>(RValue<MMX>(Nucleus::createCall(pmulhw, As<MMX>(x).value, As<MMX>(y).value)));
7759 }
7760
pmulhuw(RValue<UShort4> x,RValue<UShort4> y)7761 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
7762 {
7763 Module *module = Nucleus::getModule();
7764 llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmulhu_w);
7765
7766 return As<UShort4>(RValue<MMX>(Nucleus::createCall(pmulhuw, As<MMX>(x).value, As<MMX>(y).value)));
7767 }
7768
pmaddwd(RValue<Short4> x,RValue<Short4> y)7769 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
7770 {
7771 Module *module = Nucleus::getModule();
7772 llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmadd_wd);
7773
7774 return As<Int2>(RValue<MMX>(Nucleus::createCall(pmaddwd, As<MMX>(x).value, As<MMX>(y).value)));
7775 }
7776
pmulhw(RValue<Short8> x,RValue<Short8> y)7777 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
7778 {
7779 Module *module = Nucleus::getModule();
7780 llvm::Function *pmulhw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulh_w);
7781
7782 return RValue<Short8>(Nucleus::createCall(pmulhw, x.value, y.value));
7783 }
7784
pmulhuw(RValue<UShort8> x,RValue<UShort8> y)7785 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
7786 {
7787 Module *module = Nucleus::getModule();
7788 llvm::Function *pmulhuw = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmulhu_w);
7789
7790 return RValue<UShort8>(Nucleus::createCall(pmulhuw, x.value, y.value));
7791 }
7792
pmaddwd(RValue<Short8> x,RValue<Short8> y)7793 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
7794 {
7795 Module *module = Nucleus::getModule();
7796 llvm::Function *pmaddwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse2_pmadd_wd);
7797
7798 return RValue<Int4>(Nucleus::createCall(pmaddwd, x.value, y.value));
7799 }
7800
movmskps(RValue<Float4> x)7801 RValue<Int> movmskps(RValue<Float4> x)
7802 {
7803 Module *module = Nucleus::getModule();
7804 llvm::Function *movmskps = Intrinsic::getDeclaration(module, Intrinsic::x86_sse_movmsk_ps);
7805
7806 return RValue<Int>(Nucleus::createCall(movmskps, x.value));
7807 }
7808
pmovmskb(RValue<Byte8> x)7809 RValue<Int> pmovmskb(RValue<Byte8> x)
7810 {
7811 Module *module = Nucleus::getModule();
7812 llvm::Function *pmovmskb = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_pmovmskb);
7813
7814 return RValue<Int>(Nucleus::createCall(pmovmskb, As<MMX>(x).value));
7815 }
7816
7817 //RValue<Int2> movd(RValue<Pointer<Int>> x)
7818 //{
7819 // Value *element = Nucleus::createLoad(x.value);
7820
7821 //// Value *int2 = UndefValue::get(Int2::getType());
7822 //// int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0));
7823
7824 // Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
7825
7826 // return RValue<Int2>(int2);
7827 //}
7828
7829 //RValue<Int2> movdq2q(RValue<Int4> x)
7830 //{
7831 // Value *long2 = Nucleus::createBitCast(x.value, Long2::getType());
7832 // Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0));
7833
7834 // return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
7835 //}
7836
pmovzxbd(RValue<Int4> x)7837 RValue<Int4> pmovzxbd(RValue<Int4> x)
7838 {
7839 Module *module = Nucleus::getModule();
7840 llvm::Function *pmovzxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxbd);
7841
7842 return RValue<Int4>(Nucleus::createCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType())));
7843 }
7844
pmovsxbd(RValue<Int4> x)7845 RValue<Int4> pmovsxbd(RValue<Int4> x)
7846 {
7847 Module *module = Nucleus::getModule();
7848 llvm::Function *pmovsxbd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxbd);
7849
7850 return RValue<Int4>(Nucleus::createCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType())));
7851 }
7852
pmovzxwd(RValue<Int4> x)7853 RValue<Int4> pmovzxwd(RValue<Int4> x)
7854 {
7855 Module *module = Nucleus::getModule();
7856 llvm::Function *pmovzxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovzxwd);
7857
7858 return RValue<Int4>(Nucleus::createCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType())));
7859 }
7860
pmovsxwd(RValue<Int4> x)7861 RValue<Int4> pmovsxwd(RValue<Int4> x)
7862 {
7863 Module *module = Nucleus::getModule();
7864 llvm::Function *pmovsxwd = Intrinsic::getDeclaration(module, Intrinsic::x86_sse41_pmovsxwd);
7865
7866 return RValue<Int4>(Nucleus::createCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType())));
7867 }
7868
emms()7869 void emms()
7870 {
7871 Module *module = Nucleus::getModule();
7872 llvm::Function *emms = Intrinsic::getDeclaration(module, Intrinsic::x86_mmx_emms);
7873
7874 Nucleus::createCall(emms);
7875 }
7876 }
7877 }
7878