1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86_64.h"
18
19 #include <limits>
20
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method-inl.h"
23 #include "code_generator_x86_64.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "intrinsics.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/string.h"
28 #include "thread.h"
29 #include "utils/x86_64/assembler_x86_64.h"
30 #include "utils/x86_64/constants_x86_64.h"
31
32 namespace art {
33
34 namespace x86_64 {
35
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)36 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
37 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
38 }
39
40
GetAssembler()41 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
42 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
43 }
44
GetAllocator()45 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
46 return codegen_->GetGraph()->GetArena();
47 }
48
TryDispatch(HInvoke * invoke)49 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
50 Dispatch(invoke);
51 const LocationSummary* res = invoke->GetLocations();
52 return res != nullptr && res->Intrinsified();
53 }
54
55 #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
56
57 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,Primitive::Type type,CodeGeneratorX86_64 * codegen)58 static void MoveFromReturnRegister(Location trg,
59 Primitive::Type type,
60 CodeGeneratorX86_64* codegen) {
61 if (!trg.IsValid()) {
62 DCHECK(type == Primitive::kPrimVoid);
63 return;
64 }
65
66 switch (type) {
67 case Primitive::kPrimBoolean:
68 case Primitive::kPrimByte:
69 case Primitive::kPrimChar:
70 case Primitive::kPrimShort:
71 case Primitive::kPrimInt:
72 case Primitive::kPrimNot: {
73 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
74 if (trg_reg.AsRegister() != RAX) {
75 __ movl(trg_reg, CpuRegister(RAX));
76 }
77 break;
78 }
79 case Primitive::kPrimLong: {
80 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
81 if (trg_reg.AsRegister() != RAX) {
82 __ movq(trg_reg, CpuRegister(RAX));
83 }
84 break;
85 }
86
87 case Primitive::kPrimVoid:
88 LOG(FATAL) << "Unexpected void type for valid location " << trg;
89 UNREACHABLE();
90
91 case Primitive::kPrimDouble: {
92 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
93 if (trg_reg.AsFloatRegister() != XMM0) {
94 __ movsd(trg_reg, XmmRegister(XMM0));
95 }
96 break;
97 }
98 case Primitive::kPrimFloat: {
99 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
100 if (trg_reg.AsFloatRegister() != XMM0) {
101 __ movss(trg_reg, XmmRegister(XMM0));
102 }
103 break;
104 }
105 }
106 }
107
MoveArguments(HInvoke * invoke,CodeGeneratorX86_64 * codegen)108 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
109 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
110 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
111 }
112
113 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
114 // call. This will copy the arguments into the positions for a regular call.
115 //
116 // Note: The actual parameters are required to be in the locations given by the invoke's location
117 // summary. If an intrinsic modifies those locations before a slowpath call, they must be
118 // restored!
119 class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
120 public:
IntrinsicSlowPathX86_64(HInvoke * invoke)121 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
122
EmitNativeCode(CodeGenerator * codegen_in)123 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
124 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
125 __ Bind(GetEntryLabel());
126
127 SaveLiveRegisters(codegen, invoke_->GetLocations());
128
129 MoveArguments(invoke_, codegen);
130
131 if (invoke_->IsInvokeStaticOrDirect()) {
132 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
133 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
134 } else {
135 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
136 UNREACHABLE();
137 }
138
139 // Copy the result back to the expected output.
140 Location out = invoke_->GetLocations()->Out();
141 if (out.IsValid()) {
142 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
143 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
144 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
145 }
146
147 RestoreLiveRegisters(codegen, invoke_->GetLocations());
148 __ jmp(GetExitLabel());
149 }
150
151 private:
152 // The instruction where this slow path is happening.
153 HInvoke* const invoke_;
154
155 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
156 };
157
158 #undef __
159 #define __ assembler->
160
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke)161 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
162 LocationSummary* locations = new (arena) LocationSummary(invoke,
163 LocationSummary::kNoCall,
164 kIntrinsified);
165 locations->SetInAt(0, Location::RequiresFpuRegister());
166 locations->SetOut(Location::RequiresRegister());
167 }
168
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke)169 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
170 LocationSummary* locations = new (arena) LocationSummary(invoke,
171 LocationSummary::kNoCall,
172 kIntrinsified);
173 locations->SetInAt(0, Location::RequiresRegister());
174 locations->SetOut(Location::RequiresFpuRegister());
175 }
176
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)177 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
178 Location input = locations->InAt(0);
179 Location output = locations->Out();
180 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
181 }
182
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)183 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
184 Location input = locations->InAt(0);
185 Location output = locations->Out();
186 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
187 }
188
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)189 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
190 CreateFPToIntLocations(arena_, invoke);
191 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)192 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
193 CreateIntToFPLocations(arena_, invoke);
194 }
195
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)196 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
197 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
198 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)199 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
200 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
201 }
202
VisitFloatFloatToRawIntBits(HInvoke * invoke)203 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
204 CreateFPToIntLocations(arena_, invoke);
205 }
VisitFloatIntBitsToFloat(HInvoke * invoke)206 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
207 CreateIntToFPLocations(arena_, invoke);
208 }
209
VisitFloatFloatToRawIntBits(HInvoke * invoke)210 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
211 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
212 }
VisitFloatIntBitsToFloat(HInvoke * invoke)213 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
214 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
215 }
216
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)217 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
218 LocationSummary* locations = new (arena) LocationSummary(invoke,
219 LocationSummary::kNoCall,
220 kIntrinsified);
221 locations->SetInAt(0, Location::RequiresRegister());
222 locations->SetOut(Location::SameAsFirstInput());
223 }
224
GenReverseBytes(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)225 static void GenReverseBytes(LocationSummary* locations,
226 Primitive::Type size,
227 X86_64Assembler* assembler) {
228 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
229
230 switch (size) {
231 case Primitive::kPrimShort:
232 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
233 __ bswapl(out);
234 __ sarl(out, Immediate(16));
235 break;
236 case Primitive::kPrimInt:
237 __ bswapl(out);
238 break;
239 case Primitive::kPrimLong:
240 __ bswapq(out);
241 break;
242 default:
243 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
244 UNREACHABLE();
245 }
246 }
247
VisitIntegerReverseBytes(HInvoke * invoke)248 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
249 CreateIntToIntLocations(arena_, invoke);
250 }
251
VisitIntegerReverseBytes(HInvoke * invoke)252 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
253 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
254 }
255
VisitLongReverseBytes(HInvoke * invoke)256 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
257 CreateIntToIntLocations(arena_, invoke);
258 }
259
VisitLongReverseBytes(HInvoke * invoke)260 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
261 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
262 }
263
VisitShortReverseBytes(HInvoke * invoke)264 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
265 CreateIntToIntLocations(arena_, invoke);
266 }
267
VisitShortReverseBytes(HInvoke * invoke)268 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
269 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
270 }
271
272
273 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
274 // need is 64b.
275
CreateFloatToFloatPlusTemps(ArenaAllocator * arena,HInvoke * invoke)276 static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
277 // TODO: Enable memory operations when the assembler supports them.
278 LocationSummary* locations = new (arena) LocationSummary(invoke,
279 LocationSummary::kNoCall,
280 kIntrinsified);
281 locations->SetInAt(0, Location::RequiresFpuRegister());
282 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
283 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
284 locations->SetOut(Location::SameAsFirstInput());
285 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
286 }
287
MathAbsFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)288 static void MathAbsFP(LocationSummary* locations,
289 bool is64bit,
290 X86_64Assembler* assembler,
291 CodeGeneratorX86_64* codegen) {
292 Location output = locations->Out();
293
294 if (output.IsFpuRegister()) {
295 // In-register
296 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
297
298 // TODO: Can mask directly with constant area using pand if we can guarantee
299 // that the literal is aligned on a 16 byte boundary. This will avoid a
300 // temporary.
301 if (is64bit) {
302 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
303 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
304 } else {
305 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
306 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
307 }
308 } else {
309 // TODO: update when assember support is available.
310 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
311 // Once assembler support is available, in-memory operations look like this:
312 // if (is64bit) {
313 // DCHECK(output.IsDoubleStackSlot());
314 // // No 64b and with literal.
315 // __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
316 // __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
317 // } else {
318 // DCHECK(output.IsStackSlot());
319 // // Can use and with a literal directly.
320 // __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
321 // }
322 }
323 }
324
VisitMathAbsDouble(HInvoke * invoke)325 void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
326 CreateFloatToFloatPlusTemps(arena_, invoke);
327 }
328
VisitMathAbsDouble(HInvoke * invoke)329 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
330 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
331 }
332
VisitMathAbsFloat(HInvoke * invoke)333 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
334 CreateFloatToFloatPlusTemps(arena_, invoke);
335 }
336
VisitMathAbsFloat(HInvoke * invoke)337 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
338 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
339 }
340
CreateIntToIntPlusTemp(ArenaAllocator * arena,HInvoke * invoke)341 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
342 LocationSummary* locations = new (arena) LocationSummary(invoke,
343 LocationSummary::kNoCall,
344 kIntrinsified);
345 locations->SetInAt(0, Location::RequiresRegister());
346 locations->SetOut(Location::SameAsFirstInput());
347 locations->AddTemp(Location::RequiresRegister());
348 }
349
GenAbsInteger(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)350 static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
351 Location output = locations->Out();
352 CpuRegister out = output.AsRegister<CpuRegister>();
353 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
354
355 if (is64bit) {
356 // Create mask.
357 __ movq(mask, out);
358 __ sarq(mask, Immediate(63));
359 // Add mask.
360 __ addq(out, mask);
361 __ xorq(out, mask);
362 } else {
363 // Create mask.
364 __ movl(mask, out);
365 __ sarl(mask, Immediate(31));
366 // Add mask.
367 __ addl(out, mask);
368 __ xorl(out, mask);
369 }
370 }
371
VisitMathAbsInt(HInvoke * invoke)372 void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
373 CreateIntToIntPlusTemp(arena_, invoke);
374 }
375
VisitMathAbsInt(HInvoke * invoke)376 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
377 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
378 }
379
VisitMathAbsLong(HInvoke * invoke)380 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
381 CreateIntToIntPlusTemp(arena_, invoke);
382 }
383
VisitMathAbsLong(HInvoke * invoke)384 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
385 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
386 }
387
GenMinMaxFP(LocationSummary * locations,bool is_min,bool is_double,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen)388 static void GenMinMaxFP(LocationSummary* locations,
389 bool is_min,
390 bool is_double,
391 X86_64Assembler* assembler,
392 CodeGeneratorX86_64* codegen) {
393 Location op1_loc = locations->InAt(0);
394 Location op2_loc = locations->InAt(1);
395 Location out_loc = locations->Out();
396 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
397
398 // Shortcut for same input locations.
399 if (op1_loc.Equals(op2_loc)) {
400 DCHECK(out_loc.Equals(op1_loc));
401 return;
402 }
403
404 // (out := op1)
405 // out <=? op2
406 // if Nan jmp Nan_label
407 // if out is min jmp done
408 // if op2 is min jmp op2_label
409 // handle -0/+0
410 // jmp done
411 // Nan_label:
412 // out := NaN
413 // op2_label:
414 // out := op2
415 // done:
416 //
417 // This removes one jmp, but needs to copy one input (op1) to out.
418 //
419 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
420
421 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
422
423 Label nan, done, op2_label;
424 if (is_double) {
425 __ ucomisd(out, op2);
426 } else {
427 __ ucomiss(out, op2);
428 }
429
430 __ j(Condition::kParityEven, &nan);
431
432 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
433 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
434
435 // Handle 0.0/-0.0.
436 if (is_min) {
437 if (is_double) {
438 __ orpd(out, op2);
439 } else {
440 __ orps(out, op2);
441 }
442 } else {
443 if (is_double) {
444 __ andpd(out, op2);
445 } else {
446 __ andps(out, op2);
447 }
448 }
449 __ jmp(&done);
450
451 // NaN handling.
452 __ Bind(&nan);
453 if (is_double) {
454 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
455 } else {
456 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
457 }
458 __ jmp(&done);
459
460 // out := op2;
461 __ Bind(&op2_label);
462 if (is_double) {
463 __ movsd(out, op2);
464 } else {
465 __ movss(out, op2);
466 }
467
468 // Done.
469 __ Bind(&done);
470 }
471
CreateFPFPToFP(ArenaAllocator * arena,HInvoke * invoke)472 static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
473 LocationSummary* locations = new (arena) LocationSummary(invoke,
474 LocationSummary::kNoCall,
475 kIntrinsified);
476 locations->SetInAt(0, Location::RequiresFpuRegister());
477 locations->SetInAt(1, Location::RequiresFpuRegister());
478 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
479 // the second input to be the output (we can simply swap inputs).
480 locations->SetOut(Location::SameAsFirstInput());
481 }
482
VisitMathMinDoubleDouble(HInvoke * invoke)483 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
484 CreateFPFPToFP(arena_, invoke);
485 }
486
VisitMathMinDoubleDouble(HInvoke * invoke)487 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
488 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
489 }
490
VisitMathMinFloatFloat(HInvoke * invoke)491 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
492 CreateFPFPToFP(arena_, invoke);
493 }
494
VisitMathMinFloatFloat(HInvoke * invoke)495 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
496 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
497 }
498
VisitMathMaxDoubleDouble(HInvoke * invoke)499 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
500 CreateFPFPToFP(arena_, invoke);
501 }
502
VisitMathMaxDoubleDouble(HInvoke * invoke)503 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
504 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
505 }
506
VisitMathMaxFloatFloat(HInvoke * invoke)507 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
508 CreateFPFPToFP(arena_, invoke);
509 }
510
VisitMathMaxFloatFloat(HInvoke * invoke)511 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
512 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
513 }
514
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,X86_64Assembler * assembler)515 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
516 X86_64Assembler* assembler) {
517 Location op1_loc = locations->InAt(0);
518 Location op2_loc = locations->InAt(1);
519
520 // Shortcut for same input locations.
521 if (op1_loc.Equals(op2_loc)) {
522 // Can return immediately, as op1_loc == out_loc.
523 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
524 // a copy here.
525 DCHECK(locations->Out().Equals(op1_loc));
526 return;
527 }
528
529 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
530 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
531
532 // (out := op1)
533 // out <=? op2
534 // if out is min jmp done
535 // out := op2
536 // done:
537
538 if (is_long) {
539 __ cmpq(out, op2);
540 } else {
541 __ cmpl(out, op2);
542 }
543
544 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
545 }
546
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)547 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
548 LocationSummary* locations = new (arena) LocationSummary(invoke,
549 LocationSummary::kNoCall,
550 kIntrinsified);
551 locations->SetInAt(0, Location::RequiresRegister());
552 locations->SetInAt(1, Location::RequiresRegister());
553 locations->SetOut(Location::SameAsFirstInput());
554 }
555
VisitMathMinIntInt(HInvoke * invoke)556 void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
557 CreateIntIntToIntLocations(arena_, invoke);
558 }
559
VisitMathMinIntInt(HInvoke * invoke)560 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
561 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
562 }
563
VisitMathMinLongLong(HInvoke * invoke)564 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
565 CreateIntIntToIntLocations(arena_, invoke);
566 }
567
VisitMathMinLongLong(HInvoke * invoke)568 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
569 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
570 }
571
VisitMathMaxIntInt(HInvoke * invoke)572 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
573 CreateIntIntToIntLocations(arena_, invoke);
574 }
575
VisitMathMaxIntInt(HInvoke * invoke)576 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
577 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
578 }
579
VisitMathMaxLongLong(HInvoke * invoke)580 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
581 CreateIntIntToIntLocations(arena_, invoke);
582 }
583
VisitMathMaxLongLong(HInvoke * invoke)584 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
585 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
586 }
587
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)588 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
589 LocationSummary* locations = new (arena) LocationSummary(invoke,
590 LocationSummary::kNoCall,
591 kIntrinsified);
592 locations->SetInAt(0, Location::RequiresFpuRegister());
593 locations->SetOut(Location::RequiresFpuRegister());
594 }
595
VisitMathSqrt(HInvoke * invoke)596 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
597 CreateFPToFPLocations(arena_, invoke);
598 }
599
VisitMathSqrt(HInvoke * invoke)600 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
601 LocationSummary* locations = invoke->GetLocations();
602 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
603 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
604
605 GetAssembler()->sqrtsd(out, in);
606 }
607
InvokeOutOfLineIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke)608 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
609 MoveArguments(invoke, codegen);
610
611 DCHECK(invoke->IsInvokeStaticOrDirect());
612 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
613 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
614
615 // Copy the result back to the expected output.
616 Location out = invoke->GetLocations()->Out();
617 if (out.IsValid()) {
618 DCHECK(out.IsRegister());
619 MoveFromReturnRegister(out, invoke->GetType(), codegen);
620 }
621 }
622
CreateSSE41FPToFPLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)623 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
624 HInvoke* invoke,
625 CodeGeneratorX86_64* codegen) {
626 // Do we have instruction support?
627 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
628 CreateFPToFPLocations(arena, invoke);
629 return;
630 }
631
632 // We have to fall back to a call to the intrinsic.
633 LocationSummary* locations = new (arena) LocationSummary(invoke,
634 LocationSummary::kCall);
635 InvokeRuntimeCallingConvention calling_convention;
636 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
637 locations->SetOut(Location::FpuRegisterLocation(XMM0));
638 // Needs to be RDI for the invoke.
639 locations->AddTemp(Location::RegisterLocation(RDI));
640 }
641
GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64 * codegen,HInvoke * invoke,X86_64Assembler * assembler,int round_mode)642 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
643 HInvoke* invoke,
644 X86_64Assembler* assembler,
645 int round_mode) {
646 LocationSummary* locations = invoke->GetLocations();
647 if (locations->WillCall()) {
648 InvokeOutOfLineIntrinsic(codegen, invoke);
649 } else {
650 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
651 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
652 __ roundsd(out, in, Immediate(round_mode));
653 }
654 }
655
VisitMathCeil(HInvoke * invoke)656 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
657 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
658 }
659
VisitMathCeil(HInvoke * invoke)660 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
661 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
662 }
663
VisitMathFloor(HInvoke * invoke)664 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
665 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
666 }
667
VisitMathFloor(HInvoke * invoke)668 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
669 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
670 }
671
VisitMathRint(HInvoke * invoke)672 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
673 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
674 }
675
VisitMathRint(HInvoke * invoke)676 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
677 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
678 }
679
CreateSSE41FPToIntLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86_64 * codegen)680 static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
681 HInvoke* invoke,
682 CodeGeneratorX86_64* codegen) {
683 // Do we have instruction support?
684 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
685 LocationSummary* locations = new (arena) LocationSummary(invoke,
686 LocationSummary::kNoCall,
687 kIntrinsified);
688 locations->SetInAt(0, Location::RequiresFpuRegister());
689 locations->SetOut(Location::RequiresRegister());
690 locations->AddTemp(Location::RequiresFpuRegister());
691 return;
692 }
693
694 // We have to fall back to a call to the intrinsic.
695 LocationSummary* locations = new (arena) LocationSummary(invoke,
696 LocationSummary::kCall);
697 InvokeRuntimeCallingConvention calling_convention;
698 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
699 locations->SetOut(Location::RegisterLocation(RAX));
700 // Needs to be RDI for the invoke.
701 locations->AddTemp(Location::RegisterLocation(RDI));
702 }
703
VisitMathRoundFloat(HInvoke * invoke)704 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
705 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
706 }
707
VisitMathRoundFloat(HInvoke * invoke)708 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
709 LocationSummary* locations = invoke->GetLocations();
710 if (locations->WillCall()) {
711 InvokeOutOfLineIntrinsic(codegen_, invoke);
712 return;
713 }
714
715 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
716 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
717 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
718 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
719 Label done, nan;
720 X86_64Assembler* assembler = GetAssembler();
721
722 // Load 0.5 into inPlusPointFive.
723 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
724
725 // Add in the input.
726 __ addss(inPlusPointFive, in);
727
728 // And truncate to an integer.
729 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
730
731 // Load maxInt into out.
732 codegen_->Load64BitValue(out, kPrimIntMax);
733
734 // if inPlusPointFive >= maxInt goto done
735 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
736 __ j(kAboveEqual, &done);
737
738 // if input == NaN goto nan
739 __ j(kUnordered, &nan);
740
741 // output = float-to-int-truncate(input)
742 __ cvttss2si(out, inPlusPointFive);
743 __ jmp(&done);
744 __ Bind(&nan);
745
746 // output = 0
747 __ xorl(out, out);
748 __ Bind(&done);
749 }
750
VisitMathRoundDouble(HInvoke * invoke)751 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
752 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
753 }
754
VisitMathRoundDouble(HInvoke * invoke)755 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
756 LocationSummary* locations = invoke->GetLocations();
757 if (locations->WillCall()) {
758 InvokeOutOfLineIntrinsic(codegen_, invoke);
759 return;
760 }
761
762 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
763 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
764 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
765 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
766 Label done, nan;
767 X86_64Assembler* assembler = GetAssembler();
768
769 // Load 0.5 into inPlusPointFive.
770 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
771
772 // Add in the input.
773 __ addsd(inPlusPointFive, in);
774
775 // And truncate to an integer.
776 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
777
778 // Load maxLong into out.
779 codegen_->Load64BitValue(out, kPrimLongMax);
780
781 // if inPlusPointFive >= maxLong goto done
782 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
783 __ j(kAboveEqual, &done);
784
785 // if input == NaN goto nan
786 __ j(kUnordered, &nan);
787
788 // output = double-to-long-truncate(input)
789 __ cvttsd2si(out, inPlusPointFive, true);
790 __ jmp(&done);
791 __ Bind(&nan);
792
793 // output = 0
794 __ xorl(out, out);
795 __ Bind(&done);
796 }
797
VisitStringCharAt(HInvoke * invoke)798 void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
799 // The inputs plus one temp.
800 LocationSummary* locations = new (arena_) LocationSummary(invoke,
801 LocationSummary::kCallOnSlowPath,
802 kIntrinsified);
803 locations->SetInAt(0, Location::RequiresRegister());
804 locations->SetInAt(1, Location::RequiresRegister());
805 locations->SetOut(Location::SameAsFirstInput());
806 locations->AddTemp(Location::RequiresRegister());
807 }
808
VisitStringCharAt(HInvoke * invoke)809 void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
810 LocationSummary* locations = invoke->GetLocations();
811
812 // Location of reference to data array
813 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
814 // Location of count
815 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
816
817 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
818 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
819 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
820
821 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
822 // the cost.
823 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
824 // we will not optimize the code for constants (which would save a register).
825
826 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
827 codegen_->AddSlowPath(slow_path);
828
829 X86_64Assembler* assembler = GetAssembler();
830
831 __ cmpl(idx, Address(obj, count_offset));
832 codegen_->MaybeRecordImplicitNullCheck(invoke);
833 __ j(kAboveEqual, slow_path->GetEntryLabel());
834
835 // out = out[2*idx].
836 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
837
838 __ Bind(slow_path->GetExitLabel());
839 }
840
VisitStringCompareTo(HInvoke * invoke)841 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
842 LocationSummary* locations = new (arena_) LocationSummary(invoke,
843 LocationSummary::kCall,
844 kIntrinsified);
845 InvokeRuntimeCallingConvention calling_convention;
846 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
847 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
848 locations->SetOut(Location::RegisterLocation(RAX));
849 }
850
VisitStringCompareTo(HInvoke * invoke)851 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
852 X86_64Assembler* assembler = GetAssembler();
853 LocationSummary* locations = invoke->GetLocations();
854
855 // Note that the null check must have been done earlier.
856 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
857
858 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
859 __ testl(argument, argument);
860 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
861 codegen_->AddSlowPath(slow_path);
862 __ j(kEqual, slow_path->GetEntryLabel());
863
864 __ gs()->call(Address::Absolute(
865 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
866 __ Bind(slow_path->GetExitLabel());
867 }
868
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)869 static void CreateStringIndexOfLocations(HInvoke* invoke,
870 ArenaAllocator* allocator,
871 bool start_at_zero) {
872 LocationSummary* locations = new (allocator) LocationSummary(invoke,
873 LocationSummary::kCallOnSlowPath,
874 kIntrinsified);
875 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
876 locations->SetInAt(0, Location::RegisterLocation(RDI));
877 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
878 // allocator to do that, anyways. We can still do the constant check by checking the parameter
879 // of the instruction explicitly.
880 // Note: This works as we don't clobber RAX anywhere.
881 locations->SetInAt(1, Location::RegisterLocation(RAX));
882 if (!start_at_zero) {
883 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
884 }
885 // As we clobber RDI during execution anyways, also use it as the output.
886 locations->SetOut(Location::SameAsFirstInput());
887
888 // repne scasw uses RCX as the counter.
889 locations->AddTemp(Location::RegisterLocation(RCX));
890 // Need another temporary to be able to compute the result.
891 locations->AddTemp(Location::RequiresRegister());
892 }
893
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,ArenaAllocator * allocator,bool start_at_zero)894 static void GenerateStringIndexOf(HInvoke* invoke,
895 X86_64Assembler* assembler,
896 CodeGeneratorX86_64* codegen,
897 ArenaAllocator* allocator,
898 bool start_at_zero) {
899 LocationSummary* locations = invoke->GetLocations();
900
901 // Note that the null check must have been done earlier.
902 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
903
904 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
905 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
906 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
907 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
908 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
909
910 // Check our assumptions for registers.
911 DCHECK_EQ(string_obj.AsRegister(), RDI);
912 DCHECK_EQ(search_value.AsRegister(), RAX);
913 DCHECK_EQ(counter.AsRegister(), RCX);
914 DCHECK_EQ(out.AsRegister(), RDI);
915
916 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
917 // or directly dispatch if we have a constant.
918 SlowPathCodeX86_64* slow_path = nullptr;
919 if (invoke->InputAt(1)->IsIntConstant()) {
920 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
921 std::numeric_limits<uint16_t>::max()) {
922 // Always needs the slow-path. We could directly dispatch to it, but this case should be
923 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
924 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
925 codegen->AddSlowPath(slow_path);
926 __ jmp(slow_path->GetEntryLabel());
927 __ Bind(slow_path->GetExitLabel());
928 return;
929 }
930 } else {
931 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
932 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
933 codegen->AddSlowPath(slow_path);
934 __ j(kAbove, slow_path->GetEntryLabel());
935 }
936
937 // From here down, we know that we are looking for a char that fits in 16 bits.
938 // Location of reference to data array within the String object.
939 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
940 // Location of count within the String object.
941 int32_t count_offset = mirror::String::CountOffset().Int32Value();
942
943 // Load string length, i.e., the count field of the string.
944 __ movl(string_length, Address(string_obj, count_offset));
945
946 // Do a length check.
947 // TODO: Support jecxz.
948 Label not_found_label;
949 __ testl(string_length, string_length);
950 __ j(kEqual, ¬_found_label);
951
952 if (start_at_zero) {
953 // Number of chars to scan is the same as the string length.
954 __ movl(counter, string_length);
955
956 // Move to the start of the string.
957 __ addq(string_obj, Immediate(value_offset));
958 } else {
959 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
960
961 // Do a start_index check.
962 __ cmpl(start_index, string_length);
963 __ j(kGreaterEqual, ¬_found_label);
964
965 // Ensure we have a start index >= 0;
966 __ xorl(counter, counter);
967 __ cmpl(start_index, Immediate(0));
968 __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
969
970 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
971 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
972
973 // Now update ecx, the work counter: it's gonna be string.length - start_index.
974 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
975 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
976 }
977
978 // Everything is set up for repne scasw:
979 // * Comparison address in RDI.
980 // * Counter in ECX.
981 __ repne_scasw();
982
983 // Did we find a match?
984 __ j(kNotEqual, ¬_found_label);
985
986 // Yes, we matched. Compute the index of the result.
987 __ subl(string_length, counter);
988 __ leal(out, Address(string_length, -1));
989
990 Label done;
991 __ jmp(&done);
992
993 // Failed to match; return -1.
994 __ Bind(¬_found_label);
995 __ movl(out, Immediate(-1));
996
997 // And join up at the end.
998 __ Bind(&done);
999 if (slow_path != nullptr) {
1000 __ Bind(slow_path->GetExitLabel());
1001 }
1002 }
1003
VisitStringIndexOf(HInvoke * invoke)1004 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1005 CreateStringIndexOfLocations(invoke, arena_, true);
1006 }
1007
VisitStringIndexOf(HInvoke * invoke)1008 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1009 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
1010 }
1011
VisitStringIndexOfAfter(HInvoke * invoke)1012 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1013 CreateStringIndexOfLocations(invoke, arena_, false);
1014 }
1015
VisitStringIndexOfAfter(HInvoke * invoke)1016 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1017 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
1018 }
1019
VisitStringNewStringFromBytes(HInvoke * invoke)1020 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1021 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1022 LocationSummary::kCall,
1023 kIntrinsified);
1024 InvokeRuntimeCallingConvention calling_convention;
1025 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1026 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1027 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1028 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1029 locations->SetOut(Location::RegisterLocation(RAX));
1030 }
1031
VisitStringNewStringFromBytes(HInvoke * invoke)1032 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1033 X86_64Assembler* assembler = GetAssembler();
1034 LocationSummary* locations = invoke->GetLocations();
1035
1036 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1037 __ testl(byte_array, byte_array);
1038 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1039 codegen_->AddSlowPath(slow_path);
1040 __ j(kEqual, slow_path->GetEntryLabel());
1041
1042 __ gs()->call(Address::Absolute(
1043 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
1044 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1045 __ Bind(slow_path->GetExitLabel());
1046 }
1047
VisitStringNewStringFromChars(HInvoke * invoke)1048 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1049 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1050 LocationSummary::kCall,
1051 kIntrinsified);
1052 InvokeRuntimeCallingConvention calling_convention;
1053 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1054 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1055 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1056 locations->SetOut(Location::RegisterLocation(RAX));
1057 }
1058
VisitStringNewStringFromChars(HInvoke * invoke)1059 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1060 X86_64Assembler* assembler = GetAssembler();
1061
1062 __ gs()->call(Address::Absolute(
1063 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
1064 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1065 }
1066
VisitStringNewStringFromString(HInvoke * invoke)1067 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1068 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1069 LocationSummary::kCall,
1070 kIntrinsified);
1071 InvokeRuntimeCallingConvention calling_convention;
1072 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1073 locations->SetOut(Location::RegisterLocation(RAX));
1074 }
1075
VisitStringNewStringFromString(HInvoke * invoke)1076 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1077 X86_64Assembler* assembler = GetAssembler();
1078 LocationSummary* locations = invoke->GetLocations();
1079
1080 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1081 __ testl(string_to_copy, string_to_copy);
1082 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1083 codegen_->AddSlowPath(slow_path);
1084 __ j(kEqual, slow_path->GetEntryLabel());
1085
1086 __ gs()->call(Address::Absolute(
1087 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
1088 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1089 __ Bind(slow_path->GetExitLabel());
1090 }
1091
GenPeek(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)1092 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1093 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1094 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1095 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1096 // to avoid a SIGBUS.
1097 switch (size) {
1098 case Primitive::kPrimByte:
1099 __ movsxb(out, Address(address, 0));
1100 break;
1101 case Primitive::kPrimShort:
1102 __ movsxw(out, Address(address, 0));
1103 break;
1104 case Primitive::kPrimInt:
1105 __ movl(out, Address(address, 0));
1106 break;
1107 case Primitive::kPrimLong:
1108 __ movq(out, Address(address, 0));
1109 break;
1110 default:
1111 LOG(FATAL) << "Type not recognized for peek: " << size;
1112 UNREACHABLE();
1113 }
1114 }
1115
VisitMemoryPeekByte(HInvoke * invoke)1116 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1117 CreateIntToIntLocations(arena_, invoke);
1118 }
1119
VisitMemoryPeekByte(HInvoke * invoke)1120 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1121 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1122 }
1123
VisitMemoryPeekIntNative(HInvoke * invoke)1124 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1125 CreateIntToIntLocations(arena_, invoke);
1126 }
1127
VisitMemoryPeekIntNative(HInvoke * invoke)1128 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1129 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1130 }
1131
VisitMemoryPeekLongNative(HInvoke * invoke)1132 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1133 CreateIntToIntLocations(arena_, invoke);
1134 }
1135
VisitMemoryPeekLongNative(HInvoke * invoke)1136 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1137 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1138 }
1139
VisitMemoryPeekShortNative(HInvoke * invoke)1140 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1141 CreateIntToIntLocations(arena_, invoke);
1142 }
1143
VisitMemoryPeekShortNative(HInvoke * invoke)1144 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1145 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1146 }
1147
CreateIntIntToVoidLocations(ArenaAllocator * arena,HInvoke * invoke)1148 static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1149 LocationSummary* locations = new (arena) LocationSummary(invoke,
1150 LocationSummary::kNoCall,
1151 kIntrinsified);
1152 locations->SetInAt(0, Location::RequiresRegister());
1153 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
1154 }
1155
GenPoke(LocationSummary * locations,Primitive::Type size,X86_64Assembler * assembler)1156 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1157 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1158 Location value = locations->InAt(1);
1159 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1160 // to avoid a SIGBUS.
1161 switch (size) {
1162 case Primitive::kPrimByte:
1163 if (value.IsConstant()) {
1164 __ movb(Address(address, 0),
1165 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1166 } else {
1167 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1168 }
1169 break;
1170 case Primitive::kPrimShort:
1171 if (value.IsConstant()) {
1172 __ movw(Address(address, 0),
1173 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1174 } else {
1175 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1176 }
1177 break;
1178 case Primitive::kPrimInt:
1179 if (value.IsConstant()) {
1180 __ movl(Address(address, 0),
1181 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1182 } else {
1183 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1184 }
1185 break;
1186 case Primitive::kPrimLong:
1187 if (value.IsConstant()) {
1188 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1189 DCHECK(IsInt<32>(v));
1190 int32_t v_32 = v;
1191 __ movq(Address(address, 0), Immediate(v_32));
1192 } else {
1193 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1194 }
1195 break;
1196 default:
1197 LOG(FATAL) << "Type not recognized for poke: " << size;
1198 UNREACHABLE();
1199 }
1200 }
1201
VisitMemoryPokeByte(HInvoke * invoke)1202 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1203 CreateIntIntToVoidLocations(arena_, invoke);
1204 }
1205
VisitMemoryPokeByte(HInvoke * invoke)1206 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1207 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1208 }
1209
VisitMemoryPokeIntNative(HInvoke * invoke)1210 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1211 CreateIntIntToVoidLocations(arena_, invoke);
1212 }
1213
VisitMemoryPokeIntNative(HInvoke * invoke)1214 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1215 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1216 }
1217
VisitMemoryPokeLongNative(HInvoke * invoke)1218 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1219 CreateIntIntToVoidLocations(arena_, invoke);
1220 }
1221
VisitMemoryPokeLongNative(HInvoke * invoke)1222 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1223 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1224 }
1225
VisitMemoryPokeShortNative(HInvoke * invoke)1226 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1227 CreateIntIntToVoidLocations(arena_, invoke);
1228 }
1229
VisitMemoryPokeShortNative(HInvoke * invoke)1230 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1231 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1232 }
1233
VisitThreadCurrentThread(HInvoke * invoke)1234 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1235 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1236 LocationSummary::kNoCall,
1237 kIntrinsified);
1238 locations->SetOut(Location::RequiresRegister());
1239 }
1240
VisitThreadCurrentThread(HInvoke * invoke)1241 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1242 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1243 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1244 }
1245
GenUnsafeGet(LocationSummary * locations,Primitive::Type type,bool is_volatile ATTRIBUTE_UNUSED,X86_64Assembler * assembler)1246 static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
1247 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1248 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1249 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1250 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1251
1252 switch (type) {
1253 case Primitive::kPrimInt:
1254 case Primitive::kPrimNot:
1255 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1256 break;
1257
1258 case Primitive::kPrimLong:
1259 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1260 break;
1261
1262 default:
1263 LOG(FATAL) << "Unsupported op size " << type;
1264 UNREACHABLE();
1265 }
1266 }
1267
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)1268 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1269 LocationSummary* locations = new (arena) LocationSummary(invoke,
1270 LocationSummary::kNoCall,
1271 kIntrinsified);
1272 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1273 locations->SetInAt(1, Location::RequiresRegister());
1274 locations->SetInAt(2, Location::RequiresRegister());
1275 locations->SetOut(Location::RequiresRegister());
1276 }
1277
VisitUnsafeGet(HInvoke * invoke)1278 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1279 CreateIntIntIntToIntLocations(arena_, invoke);
1280 }
VisitUnsafeGetVolatile(HInvoke * invoke)1281 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1282 CreateIntIntIntToIntLocations(arena_, invoke);
1283 }
VisitUnsafeGetLong(HInvoke * invoke)1284 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1285 CreateIntIntIntToIntLocations(arena_, invoke);
1286 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1287 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1288 CreateIntIntIntToIntLocations(arena_, invoke);
1289 }
VisitUnsafeGetObject(HInvoke * invoke)1290 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1291 CreateIntIntIntToIntLocations(arena_, invoke);
1292 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1293 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1294 CreateIntIntIntToIntLocations(arena_, invoke);
1295 }
1296
1297
VisitUnsafeGet(HInvoke * invoke)1298 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
1299 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
1300 }
VisitUnsafeGetVolatile(HInvoke * invoke)1301 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1302 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
1303 }
VisitUnsafeGetLong(HInvoke * invoke)1304 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1305 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
1306 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1307 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1308 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
1309 }
VisitUnsafeGetObject(HInvoke * invoke)1310 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1311 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1312 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1313 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1314 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1315 }
1316
1317
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)1318 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1319 Primitive::Type type,
1320 HInvoke* invoke) {
1321 LocationSummary* locations = new (arena) LocationSummary(invoke,
1322 LocationSummary::kNoCall,
1323 kIntrinsified);
1324 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1325 locations->SetInAt(1, Location::RequiresRegister());
1326 locations->SetInAt(2, Location::RequiresRegister());
1327 locations->SetInAt(3, Location::RequiresRegister());
1328 if (type == Primitive::kPrimNot) {
1329 // Need temp registers for card-marking.
1330 locations->AddTemp(Location::RequiresRegister());
1331 locations->AddTemp(Location::RequiresRegister());
1332 }
1333 }
1334
VisitUnsafePut(HInvoke * invoke)1335 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1336 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1337 }
VisitUnsafePutOrdered(HInvoke * invoke)1338 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1339 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1340 }
VisitUnsafePutVolatile(HInvoke * invoke)1341 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1342 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1343 }
VisitUnsafePutObject(HInvoke * invoke)1344 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1345 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1346 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1347 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1348 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1349 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1350 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1351 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1352 }
VisitUnsafePutLong(HInvoke * invoke)1353 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1354 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1355 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1356 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1357 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1358 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1359 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1360 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1361 }
1362
1363 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1364 // memory model.
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1365 static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1366 CodeGeneratorX86_64* codegen) {
1367 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1368 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1369 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1370 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1371
1372 if (type == Primitive::kPrimLong) {
1373 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1374 } else {
1375 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1376 }
1377
1378 if (is_volatile) {
1379 __ mfence();
1380 }
1381
1382 if (type == Primitive::kPrimNot) {
1383 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1384 locations->GetTemp(1).AsRegister<CpuRegister>(),
1385 base,
1386 value);
1387 }
1388 }
1389
VisitUnsafePut(HInvoke * invoke)1390 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1391 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1392 }
VisitUnsafePutOrdered(HInvoke * invoke)1393 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1394 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1395 }
VisitUnsafePutVolatile(HInvoke * invoke)1396 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1397 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1398 }
VisitUnsafePutObject(HInvoke * invoke)1399 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1400 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1401 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1402 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1403 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1404 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1405 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1406 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1407 }
VisitUnsafePutLong(HInvoke * invoke)1408 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1409 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1410 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1411 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1412 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1413 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1414 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1415 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1416 }
1417
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)1418 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1419 HInvoke* invoke) {
1420 LocationSummary* locations = new (arena) LocationSummary(invoke,
1421 LocationSummary::kNoCall,
1422 kIntrinsified);
1423 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1424 locations->SetInAt(1, Location::RequiresRegister());
1425 locations->SetInAt(2, Location::RequiresRegister());
1426 // expected value must be in EAX/RAX.
1427 locations->SetInAt(3, Location::RegisterLocation(RAX));
1428 locations->SetInAt(4, Location::RequiresRegister());
1429
1430 locations->SetOut(Location::RequiresRegister());
1431 if (type == Primitive::kPrimNot) {
1432 // Need temp registers for card-marking.
1433 locations->AddTemp(Location::RequiresRegister());
1434 locations->AddTemp(Location::RequiresRegister());
1435 }
1436 }
1437
VisitUnsafeCASInt(HInvoke * invoke)1438 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1439 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1440 }
1441
VisitUnsafeCASLong(HInvoke * invoke)1442 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1443 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1444 }
1445
VisitUnsafeCASObject(HInvoke * invoke)1446 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1447 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1448 }
1449
GenCAS(Primitive::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)1450 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1451 X86_64Assembler* assembler =
1452 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1453 LocationSummary* locations = invoke->GetLocations();
1454
1455 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1456 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1457 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1458 DCHECK_EQ(expected.AsRegister(), RAX);
1459 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1460 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1461
1462 if (type == Primitive::kPrimLong) {
1463 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1464 } else {
1465 // Integer or object.
1466 if (type == Primitive::kPrimNot) {
1467 // Mark card for object assuming new value is stored.
1468 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1469 locations->GetTemp(1).AsRegister<CpuRegister>(),
1470 base,
1471 value);
1472 }
1473
1474 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1475 }
1476
1477 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1478 // barriers at this time.
1479
1480 // Convert ZF into the boolean result.
1481 __ setcc(kZero, out);
1482 __ movzxb(out, out);
1483 }
1484
VisitUnsafeCASInt(HInvoke * invoke)1485 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1486 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1487 }
1488
VisitUnsafeCASLong(HInvoke * invoke)1489 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1490 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1491 }
1492
VisitUnsafeCASObject(HInvoke * invoke)1493 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1494 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1495 }
1496
VisitIntegerReverse(HInvoke * invoke)1497 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1498 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1499 LocationSummary::kNoCall,
1500 kIntrinsified);
1501 locations->SetInAt(0, Location::RequiresRegister());
1502 locations->SetOut(Location::SameAsFirstInput());
1503 locations->AddTemp(Location::RequiresRegister());
1504 }
1505
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)1506 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1507 X86_64Assembler* assembler) {
1508 Immediate imm_shift(shift);
1509 Immediate imm_mask(mask);
1510 __ movl(temp, reg);
1511 __ shrl(reg, imm_shift);
1512 __ andl(temp, imm_mask);
1513 __ andl(reg, imm_mask);
1514 __ shll(temp, imm_shift);
1515 __ orl(reg, temp);
1516 }
1517
VisitIntegerReverse(HInvoke * invoke)1518 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1519 X86_64Assembler* assembler =
1520 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1521 LocationSummary* locations = invoke->GetLocations();
1522
1523 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1524 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1525
1526 /*
1527 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1528 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1529 * compared to generic luni implementation which has 5 rounds of swapping bits.
1530 * x = bswap x
1531 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1532 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1533 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1534 */
1535 __ bswapl(reg);
1536 SwapBits(reg, temp, 1, 0x55555555, assembler);
1537 SwapBits(reg, temp, 2, 0x33333333, assembler);
1538 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1539 }
1540
VisitLongReverse(HInvoke * invoke)1541 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1542 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1543 LocationSummary::kNoCall,
1544 kIntrinsified);
1545 locations->SetInAt(0, Location::RequiresRegister());
1546 locations->SetOut(Location::SameAsFirstInput());
1547 locations->AddTemp(Location::RequiresRegister());
1548 locations->AddTemp(Location::RequiresRegister());
1549 }
1550
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)1551 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1552 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1553 Immediate imm_shift(shift);
1554 __ movq(temp_mask, Immediate(mask));
1555 __ movq(temp, reg);
1556 __ shrq(reg, imm_shift);
1557 __ andq(temp, temp_mask);
1558 __ andq(reg, temp_mask);
1559 __ shlq(temp, imm_shift);
1560 __ orq(reg, temp);
1561 }
1562
VisitLongReverse(HInvoke * invoke)1563 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1564 X86_64Assembler* assembler =
1565 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1566 LocationSummary* locations = invoke->GetLocations();
1567
1568 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1569 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1570 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1571
1572 /*
1573 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1574 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1575 * compared to generic luni implementation which has 5 rounds of swapping bits.
1576 * x = bswap x
1577 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1578 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1579 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1580 */
1581 __ bswapq(reg);
1582 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1583 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1584 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1585 }
1586
1587 // Unimplemented intrinsics.
1588
1589 #define UNIMPLEMENTED_INTRINSIC(Name) \
1590 void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1591 } \
1592 void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1593 }
1594
1595 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
1596 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
1597 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1598
1599 } // namespace x86_64
1600 } // namespace art
1601