1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86_64.h"
18
19 #include <limits>
20
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86_64.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_utils.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object_array-inl.h"
32 #include "mirror/reference.h"
33 #include "mirror/string.h"
34 #include "scoped_thread_state_change-inl.h"
35 #include "thread-current-inl.h"
36 #include "utils/x86_64/assembler_x86_64.h"
37 #include "utils/x86_64/constants_x86_64.h"
38
39 namespace art {
40
41 namespace x86_64 {
42
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)43 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
44 : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
45 }
46
GetAssembler()47 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
48 return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
49 }
50
GetAllocator()51 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
52 return codegen_->GetGraph()->GetAllocator();
53 }
54
TryDispatch(HInvoke * invoke)55 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
56 Dispatch(invoke);
57 LocationSummary* res = invoke->GetLocations();
58 if (res == nullptr) {
59 return false;
60 }
61 return res->Intrinsified();
62 }
63
64 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
65
66 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
67 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
68
69 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
70 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
71 public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)72 explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
73 : SlowPathCode(instruction) {
74 DCHECK(kEmitCompilerReadBarrier);
75 DCHECK(kUseBakerReadBarrier);
76 }
77
EmitNativeCode(CodeGenerator * codegen)78 void EmitNativeCode(CodeGenerator* codegen) override {
79 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
80 LocationSummary* locations = instruction_->GetLocations();
81 DCHECK(locations->CanCall());
82 DCHECK(instruction_->IsInvokeStaticOrDirect())
83 << "Unexpected instruction in read barrier arraycopy slow path: "
84 << instruction_->DebugName();
85 DCHECK(instruction_->GetLocations()->Intrinsified());
86 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
87
88 int32_t element_size = DataType::Size(DataType::Type::kReference);
89
90 CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
91 CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
92 CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
93
94 __ Bind(GetEntryLabel());
95 NearLabel loop;
96 __ Bind(&loop);
97 __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
98 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
99 // TODO: Inline the mark bit check before calling the runtime?
100 // TMP = ReadBarrier::Mark(TMP);
101 // No need to save live registers; it's taken care of by the
102 // entrypoint. Also, there is no need to update the stack mask,
103 // as this runtime call will not trigger a garbage collection.
104 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
105 // This runtime call does not require a stack map.
106 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
107 __ MaybePoisonHeapReference(CpuRegister(TMP));
108 __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
109 __ addl(src_curr_addr, Immediate(element_size));
110 __ addl(dst_curr_addr, Immediate(element_size));
111 __ cmpl(src_curr_addr, src_stop_addr);
112 __ j(kNotEqual, &loop);
113 __ jmp(GetExitLabel());
114 }
115
GetDescription() const116 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
117
118 private:
119 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
120 };
121
122 #undef __
123
124 #define __ assembler->
125
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)126 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
127 LocationSummary* locations =
128 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
129 locations->SetInAt(0, Location::RequiresFpuRegister());
130 locations->SetOut(Location::RequiresRegister());
131 }
132
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)133 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
134 LocationSummary* locations =
135 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
136 locations->SetInAt(0, Location::RequiresRegister());
137 locations->SetOut(Location::RequiresFpuRegister());
138 }
139
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)140 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
141 Location input = locations->InAt(0);
142 Location output = locations->Out();
143 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
144 }
145
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)146 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
147 Location input = locations->InAt(0);
148 Location output = locations->Out();
149 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
150 }
151
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)152 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
153 CreateFPToIntLocations(allocator_, invoke);
154 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)155 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
156 CreateIntToFPLocations(allocator_, invoke);
157 }
158
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)159 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
160 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
161 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)162 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
163 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
164 }
165
VisitFloatFloatToRawIntBits(HInvoke * invoke)166 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
167 CreateFPToIntLocations(allocator_, invoke);
168 }
VisitFloatIntBitsToFloat(HInvoke * invoke)169 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
170 CreateIntToFPLocations(allocator_, invoke);
171 }
172
VisitFloatFloatToRawIntBits(HInvoke * invoke)173 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
174 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
175 }
VisitFloatIntBitsToFloat(HInvoke * invoke)176 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
177 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
178 }
179
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)180 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
181 LocationSummary* locations =
182 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
183 locations->SetInAt(0, Location::RequiresRegister());
184 locations->SetOut(Location::SameAsFirstInput());
185 }
186
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)187 static void GenReverseBytes(LocationSummary* locations,
188 DataType::Type size,
189 X86_64Assembler* assembler) {
190 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
191
192 switch (size) {
193 case DataType::Type::kInt16:
194 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
195 __ bswapl(out);
196 __ sarl(out, Immediate(16));
197 break;
198 case DataType::Type::kInt32:
199 __ bswapl(out);
200 break;
201 case DataType::Type::kInt64:
202 __ bswapq(out);
203 break;
204 default:
205 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
206 UNREACHABLE();
207 }
208 }
209
VisitIntegerReverseBytes(HInvoke * invoke)210 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
211 CreateIntToIntLocations(allocator_, invoke);
212 }
213
VisitIntegerReverseBytes(HInvoke * invoke)214 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
215 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
216 }
217
VisitLongReverseBytes(HInvoke * invoke)218 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
219 CreateIntToIntLocations(allocator_, invoke);
220 }
221
VisitLongReverseBytes(HInvoke * invoke)222 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
223 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
224 }
225
VisitShortReverseBytes(HInvoke * invoke)226 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
227 CreateIntToIntLocations(allocator_, invoke);
228 }
229
VisitShortReverseBytes(HInvoke * invoke)230 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
231 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
232 }
233
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)234 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
235 LocationSummary* locations =
236 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
237 locations->SetInAt(0, Location::RequiresFpuRegister());
238 locations->SetOut(Location::RequiresFpuRegister());
239 }
240
VisitMathSqrt(HInvoke * invoke)241 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
242 CreateFPToFPLocations(allocator_, invoke);
243 }
244
VisitMathSqrt(HInvoke * invoke)245 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
246 LocationSummary* locations = invoke->GetLocations();
247 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
248 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
249
250 GetAssembler()->sqrtsd(out, in);
251 }
252
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)253 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
254 HInvoke* invoke,
255 CodeGeneratorX86_64* codegen) {
256 // Do we have instruction support?
257 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
258 return;
259 }
260
261 CreateFPToFPLocations(allocator, invoke);
262 }
263
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)264 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
265 LocationSummary* locations = invoke->GetLocations();
266 DCHECK(!locations->WillCall());
267 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
268 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
269 __ roundsd(out, in, Immediate(round_mode));
270 }
271
VisitMathCeil(HInvoke * invoke)272 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
273 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
274 }
275
VisitMathCeil(HInvoke * invoke)276 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
277 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
278 }
279
VisitMathFloor(HInvoke * invoke)280 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
281 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
282 }
283
VisitMathFloor(HInvoke * invoke)284 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
285 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
286 }
287
VisitMathRint(HInvoke * invoke)288 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
289 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
290 }
291
VisitMathRint(HInvoke * invoke)292 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
293 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
294 }
295
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)296 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
297 HInvoke* invoke,
298 CodeGeneratorX86_64* codegen) {
299 // Do we have instruction support?
300 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
301 return;
302 }
303
304 LocationSummary* locations =
305 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
306 locations->SetInAt(0, Location::RequiresFpuRegister());
307 locations->SetOut(Location::RequiresRegister());
308 locations->AddTemp(Location::RequiresFpuRegister());
309 locations->AddTemp(Location::RequiresFpuRegister());
310 }
311
VisitMathRoundFloat(HInvoke * invoke)312 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
313 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
314 }
315
VisitMathRoundFloat(HInvoke * invoke)316 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
317 LocationSummary* locations = invoke->GetLocations();
318 DCHECK(!locations->WillCall());
319
320 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
321 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
322 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
323 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
324 NearLabel skip_incr, done;
325 X86_64Assembler* assembler = GetAssembler();
326
327 // Since no direct x86 rounding instruction matches the required semantics,
328 // this intrinsic is implemented as follows:
329 // result = floor(in);
330 // if (in - result >= 0.5f)
331 // result = result + 1.0f;
332 __ movss(t2, in);
333 __ roundss(t1, in, Immediate(1));
334 __ subss(t2, t1);
335 __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
336 __ j(kBelow, &skip_incr);
337 __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
338 __ Bind(&skip_incr);
339
340 // Final conversion to an integer. Unfortunately this also does not have a
341 // direct x86 instruction, since NaN should map to 0 and large positive
342 // values need to be clipped to the extreme value.
343 codegen_->Load32BitValue(out, kPrimIntMax);
344 __ cvtsi2ss(t2, out);
345 __ comiss(t1, t2);
346 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
347 __ movl(out, Immediate(0)); // does not change flags
348 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
349 __ cvttss2si(out, t1);
350 __ Bind(&done);
351 }
352
VisitMathRoundDouble(HInvoke * invoke)353 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
354 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
355 }
356
VisitMathRoundDouble(HInvoke * invoke)357 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
358 LocationSummary* locations = invoke->GetLocations();
359 DCHECK(!locations->WillCall());
360
361 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
362 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
363 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
364 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
365 NearLabel skip_incr, done;
366 X86_64Assembler* assembler = GetAssembler();
367
368 // Since no direct x86 rounding instruction matches the required semantics,
369 // this intrinsic is implemented as follows:
370 // result = floor(in);
371 // if (in - result >= 0.5)
372 // result = result + 1.0f;
373 __ movsd(t2, in);
374 __ roundsd(t1, in, Immediate(1));
375 __ subsd(t2, t1);
376 __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
377 __ j(kBelow, &skip_incr);
378 __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
379 __ Bind(&skip_incr);
380
381 // Final conversion to an integer. Unfortunately this also does not have a
382 // direct x86 instruction, since NaN should map to 0 and large positive
383 // values need to be clipped to the extreme value.
384 codegen_->Load64BitValue(out, kPrimLongMax);
385 __ cvtsi2sd(t2, out, /* is64bit= */ true);
386 __ comisd(t1, t2);
387 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
388 __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit
389 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
390 __ cvttsd2si(out, t1, /* is64bit= */ true);
391 __ Bind(&done);
392 }
393
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)394 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
395 LocationSummary* locations =
396 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
397 InvokeRuntimeCallingConvention calling_convention;
398 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
399 locations->SetOut(Location::FpuRegisterLocation(XMM0));
400
401 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
402 }
403
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)404 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
405 QuickEntrypointEnum entry) {
406 LocationSummary* locations = invoke->GetLocations();
407 DCHECK(locations->WillCall());
408 DCHECK(invoke->IsInvokeStaticOrDirect());
409
410 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
411 }
412
VisitMathCos(HInvoke * invoke)413 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
414 CreateFPToFPCallLocations(allocator_, invoke);
415 }
416
VisitMathCos(HInvoke * invoke)417 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
418 GenFPToFPCall(invoke, codegen_, kQuickCos);
419 }
420
VisitMathSin(HInvoke * invoke)421 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
422 CreateFPToFPCallLocations(allocator_, invoke);
423 }
424
VisitMathSin(HInvoke * invoke)425 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
426 GenFPToFPCall(invoke, codegen_, kQuickSin);
427 }
428
VisitMathAcos(HInvoke * invoke)429 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
430 CreateFPToFPCallLocations(allocator_, invoke);
431 }
432
VisitMathAcos(HInvoke * invoke)433 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
434 GenFPToFPCall(invoke, codegen_, kQuickAcos);
435 }
436
VisitMathAsin(HInvoke * invoke)437 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
438 CreateFPToFPCallLocations(allocator_, invoke);
439 }
440
VisitMathAsin(HInvoke * invoke)441 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
442 GenFPToFPCall(invoke, codegen_, kQuickAsin);
443 }
444
VisitMathAtan(HInvoke * invoke)445 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
446 CreateFPToFPCallLocations(allocator_, invoke);
447 }
448
VisitMathAtan(HInvoke * invoke)449 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
450 GenFPToFPCall(invoke, codegen_, kQuickAtan);
451 }
452
VisitMathCbrt(HInvoke * invoke)453 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
454 CreateFPToFPCallLocations(allocator_, invoke);
455 }
456
VisitMathCbrt(HInvoke * invoke)457 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
458 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
459 }
460
VisitMathCosh(HInvoke * invoke)461 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
462 CreateFPToFPCallLocations(allocator_, invoke);
463 }
464
VisitMathCosh(HInvoke * invoke)465 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
466 GenFPToFPCall(invoke, codegen_, kQuickCosh);
467 }
468
VisitMathExp(HInvoke * invoke)469 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
470 CreateFPToFPCallLocations(allocator_, invoke);
471 }
472
VisitMathExp(HInvoke * invoke)473 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
474 GenFPToFPCall(invoke, codegen_, kQuickExp);
475 }
476
VisitMathExpm1(HInvoke * invoke)477 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
478 CreateFPToFPCallLocations(allocator_, invoke);
479 }
480
VisitMathExpm1(HInvoke * invoke)481 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
482 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
483 }
484
VisitMathLog(HInvoke * invoke)485 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
486 CreateFPToFPCallLocations(allocator_, invoke);
487 }
488
VisitMathLog(HInvoke * invoke)489 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
490 GenFPToFPCall(invoke, codegen_, kQuickLog);
491 }
492
VisitMathLog10(HInvoke * invoke)493 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
494 CreateFPToFPCallLocations(allocator_, invoke);
495 }
496
VisitMathLog10(HInvoke * invoke)497 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
498 GenFPToFPCall(invoke, codegen_, kQuickLog10);
499 }
500
VisitMathSinh(HInvoke * invoke)501 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
502 CreateFPToFPCallLocations(allocator_, invoke);
503 }
504
VisitMathSinh(HInvoke * invoke)505 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
506 GenFPToFPCall(invoke, codegen_, kQuickSinh);
507 }
508
VisitMathTan(HInvoke * invoke)509 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
510 CreateFPToFPCallLocations(allocator_, invoke);
511 }
512
VisitMathTan(HInvoke * invoke)513 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
514 GenFPToFPCall(invoke, codegen_, kQuickTan);
515 }
516
VisitMathTanh(HInvoke * invoke)517 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
518 CreateFPToFPCallLocations(allocator_, invoke);
519 }
520
VisitMathTanh(HInvoke * invoke)521 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
522 GenFPToFPCall(invoke, codegen_, kQuickTanh);
523 }
524
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)525 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
526 LocationSummary* locations =
527 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
528 InvokeRuntimeCallingConvention calling_convention;
529 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
530 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
531 locations->SetOut(Location::FpuRegisterLocation(XMM0));
532
533 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
534 }
535
VisitMathAtan2(HInvoke * invoke)536 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
537 CreateFPFPToFPCallLocations(allocator_, invoke);
538 }
539
VisitMathAtan2(HInvoke * invoke)540 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
541 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
542 }
543
VisitMathPow(HInvoke * invoke)544 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
545 CreateFPFPToFPCallLocations(allocator_, invoke);
546 }
547
VisitMathPow(HInvoke * invoke)548 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
549 GenFPToFPCall(invoke, codegen_, kQuickPow);
550 }
551
VisitMathHypot(HInvoke * invoke)552 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
553 CreateFPFPToFPCallLocations(allocator_, invoke);
554 }
555
VisitMathHypot(HInvoke * invoke)556 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
557 GenFPToFPCall(invoke, codegen_, kQuickHypot);
558 }
559
VisitMathNextAfter(HInvoke * invoke)560 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
561 CreateFPFPToFPCallLocations(allocator_, invoke);
562 }
563
VisitMathNextAfter(HInvoke * invoke)564 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
565 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
566 }
567
VisitSystemArrayCopyChar(HInvoke * invoke)568 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
569 // Check to see if we have known failures that will cause us to have to bail out
570 // to the runtime, and just generate the runtime call directly.
571 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
572 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
573
574 // The positions must be non-negative.
575 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
576 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
577 // We will have to fail anyways.
578 return;
579 }
580
581 // The length must be > 0.
582 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
583 if (length != nullptr) {
584 int32_t len = length->GetValue();
585 if (len < 0) {
586 // Just call as normal.
587 return;
588 }
589 }
590
591 LocationSummary* locations =
592 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
593 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
594 locations->SetInAt(0, Location::RequiresRegister());
595 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
596 locations->SetInAt(2, Location::RequiresRegister());
597 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
598 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
599
600 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
601 locations->AddTemp(Location::RegisterLocation(RSI));
602 locations->AddTemp(Location::RegisterLocation(RDI));
603 locations->AddTemp(Location::RegisterLocation(RCX));
604 }
605
CheckPosition(X86_64Assembler * assembler,Location pos,CpuRegister input,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_input_length=false)606 static void CheckPosition(X86_64Assembler* assembler,
607 Location pos,
608 CpuRegister input,
609 Location length,
610 SlowPathCode* slow_path,
611 CpuRegister temp,
612 bool length_is_input_length = false) {
613 // Where is the length in the Array?
614 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
615
616 if (pos.IsConstant()) {
617 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
618 if (pos_const == 0) {
619 if (!length_is_input_length) {
620 // Check that length(input) >= length.
621 if (length.IsConstant()) {
622 __ cmpl(Address(input, length_offset),
623 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
624 } else {
625 __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
626 }
627 __ j(kLess, slow_path->GetEntryLabel());
628 }
629 } else {
630 // Check that length(input) >= pos.
631 __ movl(temp, Address(input, length_offset));
632 __ subl(temp, Immediate(pos_const));
633 __ j(kLess, slow_path->GetEntryLabel());
634
635 // Check that (length(input) - pos) >= length.
636 if (length.IsConstant()) {
637 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
638 } else {
639 __ cmpl(temp, length.AsRegister<CpuRegister>());
640 }
641 __ j(kLess, slow_path->GetEntryLabel());
642 }
643 } else if (length_is_input_length) {
644 // The only way the copy can succeed is if pos is zero.
645 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
646 __ testl(pos_reg, pos_reg);
647 __ j(kNotEqual, slow_path->GetEntryLabel());
648 } else {
649 // Check that pos >= 0.
650 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
651 __ testl(pos_reg, pos_reg);
652 __ j(kLess, slow_path->GetEntryLabel());
653
654 // Check that pos <= length(input).
655 __ cmpl(Address(input, length_offset), pos_reg);
656 __ j(kLess, slow_path->GetEntryLabel());
657
658 // Check that (length(input) - pos) >= length.
659 __ movl(temp, Address(input, length_offset));
660 __ subl(temp, pos_reg);
661 if (length.IsConstant()) {
662 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
663 } else {
664 __ cmpl(temp, length.AsRegister<CpuRegister>());
665 }
666 __ j(kLess, slow_path->GetEntryLabel());
667 }
668 }
669
VisitSystemArrayCopyChar(HInvoke * invoke)670 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
671 X86_64Assembler* assembler = GetAssembler();
672 LocationSummary* locations = invoke->GetLocations();
673
674 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
675 Location src_pos = locations->InAt(1);
676 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
677 Location dest_pos = locations->InAt(3);
678 Location length = locations->InAt(4);
679
680 // Temporaries that we need for MOVSW.
681 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
682 DCHECK_EQ(src_base.AsRegister(), RSI);
683 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
684 DCHECK_EQ(dest_base.AsRegister(), RDI);
685 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
686 DCHECK_EQ(count.AsRegister(), RCX);
687
688 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
689 codegen_->AddSlowPath(slow_path);
690
691 // Bail out if the source and destination are the same.
692 __ cmpl(src, dest);
693 __ j(kEqual, slow_path->GetEntryLabel());
694
695 // Bail out if the source is null.
696 __ testl(src, src);
697 __ j(kEqual, slow_path->GetEntryLabel());
698
699 // Bail out if the destination is null.
700 __ testl(dest, dest);
701 __ j(kEqual, slow_path->GetEntryLabel());
702
703 // If the length is negative, bail out.
704 // We have already checked in the LocationsBuilder for the constant case.
705 if (!length.IsConstant()) {
706 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
707 __ j(kLess, slow_path->GetEntryLabel());
708 }
709
710 // Validity checks: source. Use src_base as a temporary register.
711 CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
712
713 // Validity checks: dest. Use src_base as a temporary register.
714 CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
715
716 // We need the count in RCX.
717 if (length.IsConstant()) {
718 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
719 } else {
720 __ movl(count, length.AsRegister<CpuRegister>());
721 }
722
723 // Okay, everything checks out. Finally time to do the copy.
724 // Check assumption that sizeof(Char) is 2 (used in scaling below).
725 const size_t char_size = DataType::Size(DataType::Type::kUint16);
726 DCHECK_EQ(char_size, 2u);
727
728 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
729
730 if (src_pos.IsConstant()) {
731 int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
732 __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
733 } else {
734 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
735 ScaleFactor::TIMES_2, data_offset));
736 }
737 if (dest_pos.IsConstant()) {
738 int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
739 __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
740 } else {
741 __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
742 ScaleFactor::TIMES_2, data_offset));
743 }
744
745 // Do the move.
746 __ rep_movsw();
747
748 __ Bind(slow_path->GetExitLabel());
749 }
750
751
VisitSystemArrayCopy(HInvoke * invoke)752 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
753 // The only read barrier implementation supporting the
754 // SystemArrayCopy intrinsic is the Baker-style read barriers.
755 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
756 return;
757 }
758
759 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
760 }
761
762 // Compute base source address, base destination address, and end
763 // source address for the System.arraycopy intrinsic in `src_base`,
764 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(X86_64Assembler * assembler,DataType::Type type,const CpuRegister & src,const Location & src_pos,const CpuRegister & dst,const Location & dst_pos,const Location & copy_length,const CpuRegister & src_base,const CpuRegister & dst_base,const CpuRegister & src_end)765 static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
766 DataType::Type type,
767 const CpuRegister& src,
768 const Location& src_pos,
769 const CpuRegister& dst,
770 const Location& dst_pos,
771 const Location& copy_length,
772 const CpuRegister& src_base,
773 const CpuRegister& dst_base,
774 const CpuRegister& src_end) {
775 // This routine is only used by the SystemArrayCopy intrinsic.
776 DCHECK_EQ(type, DataType::Type::kReference);
777 const int32_t element_size = DataType::Size(type);
778 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
779 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
780
781 if (src_pos.IsConstant()) {
782 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
783 __ leal(src_base, Address(src, element_size * constant + data_offset));
784 } else {
785 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
786 }
787
788 if (dst_pos.IsConstant()) {
789 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
790 __ leal(dst_base, Address(dst, element_size * constant + data_offset));
791 } else {
792 __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
793 }
794
795 if (copy_length.IsConstant()) {
796 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
797 __ leal(src_end, Address(src_base, element_size * constant));
798 } else {
799 __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0));
800 }
801 }
802
VisitSystemArrayCopy(HInvoke * invoke)803 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
804 // The only read barrier implementation supporting the
805 // SystemArrayCopy intrinsic is the Baker-style read barriers.
806 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
807
808 X86_64Assembler* assembler = GetAssembler();
809 LocationSummary* locations = invoke->GetLocations();
810
811 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
812 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
813 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
814 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
815 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
816
817 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
818 Location src_pos = locations->InAt(1);
819 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
820 Location dest_pos = locations->InAt(3);
821 Location length = locations->InAt(4);
822 Location temp1_loc = locations->GetTemp(0);
823 CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
824 Location temp2_loc = locations->GetTemp(1);
825 CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
826 Location temp3_loc = locations->GetTemp(2);
827 CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
828 Location TMP_loc = Location::RegisterLocation(TMP);
829
830 SlowPathCode* intrinsic_slow_path =
831 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
832 codegen_->AddSlowPath(intrinsic_slow_path);
833
834 NearLabel conditions_on_positions_validated;
835 SystemArrayCopyOptimizations optimizations(invoke);
836
837 // If source and destination are the same, we go to slow path if we need to do
838 // forward copying.
839 if (src_pos.IsConstant()) {
840 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
841 if (dest_pos.IsConstant()) {
842 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
843 if (optimizations.GetDestinationIsSource()) {
844 // Checked when building locations.
845 DCHECK_GE(src_pos_constant, dest_pos_constant);
846 } else if (src_pos_constant < dest_pos_constant) {
847 __ cmpl(src, dest);
848 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
849 }
850 } else {
851 if (!optimizations.GetDestinationIsSource()) {
852 __ cmpl(src, dest);
853 __ j(kNotEqual, &conditions_on_positions_validated);
854 }
855 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
856 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
857 }
858 } else {
859 if (!optimizations.GetDestinationIsSource()) {
860 __ cmpl(src, dest);
861 __ j(kNotEqual, &conditions_on_positions_validated);
862 }
863 if (dest_pos.IsConstant()) {
864 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
865 __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
866 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
867 } else {
868 __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
869 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
870 }
871 }
872
873 __ Bind(&conditions_on_positions_validated);
874
875 if (!optimizations.GetSourceIsNotNull()) {
876 // Bail out if the source is null.
877 __ testl(src, src);
878 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
879 }
880
881 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
882 // Bail out if the destination is null.
883 __ testl(dest, dest);
884 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
885 }
886
887 // If the length is negative, bail out.
888 // We have already checked in the LocationsBuilder for the constant case.
889 if (!length.IsConstant() &&
890 !optimizations.GetCountIsSourceLength() &&
891 !optimizations.GetCountIsDestinationLength()) {
892 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
893 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
894 }
895
896 // Validity checks: source.
897 CheckPosition(assembler,
898 src_pos,
899 src,
900 length,
901 intrinsic_slow_path,
902 temp1,
903 optimizations.GetCountIsSourceLength());
904
905 // Validity checks: dest.
906 CheckPosition(assembler,
907 dest_pos,
908 dest,
909 length,
910 intrinsic_slow_path,
911 temp1,
912 optimizations.GetCountIsDestinationLength());
913
914 if (!optimizations.GetDoesNotNeedTypeCheck()) {
915 // Check whether all elements of the source array are assignable to the component
916 // type of the destination array. We do two checks: the classes are the same,
917 // or the destination is Object[]. If none of these checks succeed, we go to the
918 // slow path.
919
920 bool did_unpoison = false;
921 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
922 // /* HeapReference<Class> */ temp1 = dest->klass_
923 codegen_->GenerateFieldLoadWithBakerReadBarrier(
924 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
925 // Register `temp1` is not trashed by the read barrier emitted
926 // by GenerateFieldLoadWithBakerReadBarrier below, as that
927 // method produces a call to a ReadBarrierMarkRegX entry point,
928 // which saves all potentially live registers, including
929 // temporaries such a `temp1`.
930 // /* HeapReference<Class> */ temp2 = src->klass_
931 codegen_->GenerateFieldLoadWithBakerReadBarrier(
932 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
933 // If heap poisoning is enabled, `temp1` and `temp2` have been
934 // unpoisoned by the the previous calls to
935 // GenerateFieldLoadWithBakerReadBarrier.
936 } else {
937 // /* HeapReference<Class> */ temp1 = dest->klass_
938 __ movl(temp1, Address(dest, class_offset));
939 // /* HeapReference<Class> */ temp2 = src->klass_
940 __ movl(temp2, Address(src, class_offset));
941 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
942 !optimizations.GetSourceIsNonPrimitiveArray()) {
943 // One or two of the references need to be unpoisoned. Unpoison them
944 // both to make the identity check valid.
945 __ MaybeUnpoisonHeapReference(temp1);
946 __ MaybeUnpoisonHeapReference(temp2);
947 did_unpoison = true;
948 }
949 }
950
951 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
952 // Bail out if the destination is not a non primitive array.
953 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
954 // /* HeapReference<Class> */ TMP = temp1->component_type_
955 codegen_->GenerateFieldLoadWithBakerReadBarrier(
956 invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
957 __ testl(CpuRegister(TMP), CpuRegister(TMP));
958 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
959 // If heap poisoning is enabled, `TMP` has been unpoisoned by
960 // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
961 } else {
962 // /* HeapReference<Class> */ TMP = temp1->component_type_
963 __ movl(CpuRegister(TMP), Address(temp1, component_offset));
964 __ testl(CpuRegister(TMP), CpuRegister(TMP));
965 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
966 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
967 }
968 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
969 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
970 }
971
972 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
973 // Bail out if the source is not a non primitive array.
974 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
975 // For the same reason given earlier, `temp1` is not trashed by the
976 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
977 // /* HeapReference<Class> */ TMP = temp2->component_type_
978 codegen_->GenerateFieldLoadWithBakerReadBarrier(
979 invoke, TMP_loc, temp2, component_offset, /* needs_null_check= */ false);
980 __ testl(CpuRegister(TMP), CpuRegister(TMP));
981 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
982 // If heap poisoning is enabled, `TMP` has been unpoisoned by
983 // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
984 } else {
985 // /* HeapReference<Class> */ TMP = temp2->component_type_
986 __ movl(CpuRegister(TMP), Address(temp2, component_offset));
987 __ testl(CpuRegister(TMP), CpuRegister(TMP));
988 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
989 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
990 }
991 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
992 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
993 }
994
995 __ cmpl(temp1, temp2);
996
997 if (optimizations.GetDestinationIsTypedObjectArray()) {
998 NearLabel do_copy;
999 __ j(kEqual, &do_copy);
1000 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1001 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1002 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1003 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
1004 // We do not need to emit a read barrier for the following
1005 // heap reference load, as `temp1` is only used in a
1006 // comparison with null below, and this reference is not
1007 // kept afterwards.
1008 __ cmpl(Address(temp1, super_offset), Immediate(0));
1009 } else {
1010 if (!did_unpoison) {
1011 __ MaybeUnpoisonHeapReference(temp1);
1012 }
1013 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1014 __ movl(temp1, Address(temp1, component_offset));
1015 __ MaybeUnpoisonHeapReference(temp1);
1016 // No need to unpoison the following heap reference load, as
1017 // we're comparing against null.
1018 __ cmpl(Address(temp1, super_offset), Immediate(0));
1019 }
1020 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1021 __ Bind(&do_copy);
1022 } else {
1023 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1024 }
1025 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1026 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1027 // Bail out if the source is not a non primitive array.
1028 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1029 // /* HeapReference<Class> */ temp1 = src->klass_
1030 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1031 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
1032 // /* HeapReference<Class> */ TMP = temp1->component_type_
1033 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1034 invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
1035 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1036 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1037 } else {
1038 // /* HeapReference<Class> */ temp1 = src->klass_
1039 __ movl(temp1, Address(src, class_offset));
1040 __ MaybeUnpoisonHeapReference(temp1);
1041 // /* HeapReference<Class> */ TMP = temp1->component_type_
1042 __ movl(CpuRegister(TMP), Address(temp1, component_offset));
1043 // No need to unpoison `TMP` now, as we're comparing against null.
1044 __ testl(CpuRegister(TMP), CpuRegister(TMP));
1045 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1046 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
1047 }
1048 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
1049 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1050 }
1051
1052 const DataType::Type type = DataType::Type::kReference;
1053 const int32_t element_size = DataType::Size(type);
1054
1055 // Compute base source address, base destination address, and end
1056 // source address in `temp1`, `temp2` and `temp3` respectively.
1057 GenSystemArrayCopyAddresses(
1058 GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
1059
1060 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1061 // SystemArrayCopy implementation for Baker read barriers (see
1062 // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1063 //
1064 // if (src_ptr != end_ptr) {
1065 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1066 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
1067 // bool is_gray = (rb_state == ReadBarrier::GrayState());
1068 // if (is_gray) {
1069 // // Slow-path copy.
1070 // do {
1071 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1072 // } while (src_ptr != end_ptr)
1073 // } else {
1074 // // Fast-path copy.
1075 // do {
1076 // *dest_ptr++ = *src_ptr++;
1077 // } while (src_ptr != end_ptr)
1078 // }
1079 // }
1080
1081 NearLabel loop, done;
1082
1083 // Don't enter copy loop if `length == 0`.
1084 __ cmpl(temp1, temp3);
1085 __ j(kEqual, &done);
1086
1087 // Given the numeric representation, it's enough to check the low bit of the rb_state.
1088 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1089 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1090 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1091 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1092 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1093
1094 // if (rb_state == ReadBarrier::GrayState())
1095 // goto slow_path;
1096 // At this point, just do the "if" and make sure that flags are preserved until the branch.
1097 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1098
1099 // Load fence to prevent load-load reordering.
1100 // Note that this is a no-op, thanks to the x86-64 memory model.
1101 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1102
1103 // Slow path used to copy array when `src` is gray.
1104 SlowPathCode* read_barrier_slow_path =
1105 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1106 codegen_->AddSlowPath(read_barrier_slow_path);
1107
1108 // We have done the "if" of the gray bit check above, now branch based on the flags.
1109 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1110
1111 // Fast-path copy.
1112 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1113 // poison/unpoison.
1114 __ Bind(&loop);
1115 __ movl(CpuRegister(TMP), Address(temp1, 0));
1116 __ movl(Address(temp2, 0), CpuRegister(TMP));
1117 __ addl(temp1, Immediate(element_size));
1118 __ addl(temp2, Immediate(element_size));
1119 __ cmpl(temp1, temp3);
1120 __ j(kNotEqual, &loop);
1121
1122 __ Bind(read_barrier_slow_path->GetExitLabel());
1123 __ Bind(&done);
1124 } else {
1125 // Non read barrier code.
1126
1127 // Iterate over the arrays and do a raw copy of the objects. We don't need to
1128 // poison/unpoison.
1129 NearLabel loop, done;
1130 __ cmpl(temp1, temp3);
1131 __ j(kEqual, &done);
1132 __ Bind(&loop);
1133 __ movl(CpuRegister(TMP), Address(temp1, 0));
1134 __ movl(Address(temp2, 0), CpuRegister(TMP));
1135 __ addl(temp1, Immediate(element_size));
1136 __ addl(temp2, Immediate(element_size));
1137 __ cmpl(temp1, temp3);
1138 __ j(kNotEqual, &loop);
1139 __ Bind(&done);
1140 }
1141
1142 // We only need one card marking on the destination array.
1143 codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null= */ false);
1144
1145 __ Bind(intrinsic_slow_path->GetExitLabel());
1146 }
1147
VisitStringCompareTo(HInvoke * invoke)1148 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1149 LocationSummary* locations = new (allocator_) LocationSummary(
1150 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1151 InvokeRuntimeCallingConvention calling_convention;
1152 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1153 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1154 locations->SetOut(Location::RegisterLocation(RAX));
1155 }
1156
VisitStringCompareTo(HInvoke * invoke)1157 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1158 X86_64Assembler* assembler = GetAssembler();
1159 LocationSummary* locations = invoke->GetLocations();
1160
1161 // Note that the null check must have been done earlier.
1162 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1163
1164 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1165 __ testl(argument, argument);
1166 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1167 codegen_->AddSlowPath(slow_path);
1168 __ j(kEqual, slow_path->GetEntryLabel());
1169
1170 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1171 __ Bind(slow_path->GetExitLabel());
1172 }
1173
VisitStringEquals(HInvoke * invoke)1174 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1175 LocationSummary* locations =
1176 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1177 locations->SetInAt(0, Location::RequiresRegister());
1178 locations->SetInAt(1, Location::RequiresRegister());
1179
1180 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1181 locations->AddTemp(Location::RegisterLocation(RCX));
1182 locations->AddTemp(Location::RegisterLocation(RDI));
1183
1184 // Set output, RSI needed for repe_cmpsq instruction anyways.
1185 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1186 }
1187
VisitStringEquals(HInvoke * invoke)1188 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1189 X86_64Assembler* assembler = GetAssembler();
1190 LocationSummary* locations = invoke->GetLocations();
1191
1192 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1193 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1194 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1195 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1196 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1197
1198 NearLabel end, return_true, return_false;
1199
1200 // Get offsets of count, value, and class fields within a string object.
1201 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1202 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1203 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1204
1205 // Note that the null check must have been done earlier.
1206 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1207
1208 StringEqualsOptimizations optimizations(invoke);
1209 if (!optimizations.GetArgumentNotNull()) {
1210 // Check if input is null, return false if it is.
1211 __ testl(arg, arg);
1212 __ j(kEqual, &return_false);
1213 }
1214
1215 if (!optimizations.GetArgumentIsString()) {
1216 // Instanceof check for the argument by comparing class fields.
1217 // All string objects must have the same type since String cannot be subclassed.
1218 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1219 // If the argument is a string object, its class field must be equal to receiver's class field.
1220 //
1221 // As the String class is expected to be non-movable, we can read the class
1222 // field from String.equals' arguments without read barriers.
1223 AssertNonMovableStringClass();
1224 // Also, because we use the loaded class references only to compare them, we
1225 // don't need to unpoison them.
1226 // /* HeapReference<Class> */ rcx = str->klass_
1227 __ movl(rcx, Address(str, class_offset));
1228 // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1229 __ cmpl(rcx, Address(arg, class_offset));
1230 __ j(kNotEqual, &return_false);
1231 }
1232
1233 // Reference equality check, return true if same reference.
1234 __ cmpl(str, arg);
1235 __ j(kEqual, &return_true);
1236
1237 // Load length and compression flag of receiver string.
1238 __ movl(rcx, Address(str, count_offset));
1239 // Check if lengths and compressiond flags are equal, return false if they're not.
1240 // Two identical strings will always have same compression style since
1241 // compression style is decided on alloc.
1242 __ cmpl(rcx, Address(arg, count_offset));
1243 __ j(kNotEqual, &return_false);
1244 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1245 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1246 "Expecting 0=compressed, 1=uncompressed");
1247 __ jrcxz(&return_true);
1248
1249 if (mirror::kUseStringCompression) {
1250 NearLabel string_uncompressed;
1251 // Extract length and differentiate between both compressed or both uncompressed.
1252 // Different compression style is cut above.
1253 __ shrl(rcx, Immediate(1));
1254 __ j(kCarrySet, &string_uncompressed);
1255 // Divide string length by 2, rounding up, and continue as if uncompressed.
1256 // Merge clearing the compression flag with +1 for rounding.
1257 __ addl(rcx, Immediate(1));
1258 __ shrl(rcx, Immediate(1));
1259 __ Bind(&string_uncompressed);
1260 }
1261 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1262 __ leal(rsi, Address(str, value_offset));
1263 __ leal(rdi, Address(arg, value_offset));
1264
1265 // Divide string length by 4 and adjust for lengths not divisible by 4.
1266 __ addl(rcx, Immediate(3));
1267 __ shrl(rcx, Immediate(2));
1268
1269 // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1270 // or 8 characters (compressed) at a time.
1271 DCHECK_ALIGNED(value_offset, 8);
1272 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1273
1274 // Loop to compare strings four characters at a time starting at the beginning of the string.
1275 __ repe_cmpsq();
1276 // If strings are not equal, zero flag will be cleared.
1277 __ j(kNotEqual, &return_false);
1278
1279 // Return true and exit the function.
1280 // If loop does not result in returning false, we return true.
1281 __ Bind(&return_true);
1282 __ movl(rsi, Immediate(1));
1283 __ jmp(&end);
1284
1285 // Return false and exit the function.
1286 __ Bind(&return_false);
1287 __ xorl(rsi, rsi);
1288 __ Bind(&end);
1289 }
1290
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1291 static void CreateStringIndexOfLocations(HInvoke* invoke,
1292 ArenaAllocator* allocator,
1293 bool start_at_zero) {
1294 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1295 LocationSummary::kCallOnSlowPath,
1296 kIntrinsified);
1297 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1298 locations->SetInAt(0, Location::RegisterLocation(RDI));
1299 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1300 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1301 // of the instruction explicitly.
1302 // Note: This works as we don't clobber RAX anywhere.
1303 locations->SetInAt(1, Location::RegisterLocation(RAX));
1304 if (!start_at_zero) {
1305 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1306 }
1307 // As we clobber RDI during execution anyways, also use it as the output.
1308 locations->SetOut(Location::SameAsFirstInput());
1309
1310 // repne scasw uses RCX as the counter.
1311 locations->AddTemp(Location::RegisterLocation(RCX));
1312 // Need another temporary to be able to compute the result.
1313 locations->AddTemp(Location::RequiresRegister());
1314 }
1315
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1316 static void GenerateStringIndexOf(HInvoke* invoke,
1317 X86_64Assembler* assembler,
1318 CodeGeneratorX86_64* codegen,
1319 bool start_at_zero) {
1320 LocationSummary* locations = invoke->GetLocations();
1321
1322 // Note that the null check must have been done earlier.
1323 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1324
1325 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1326 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1327 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1328 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1329 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1330
1331 // Check our assumptions for registers.
1332 DCHECK_EQ(string_obj.AsRegister(), RDI);
1333 DCHECK_EQ(search_value.AsRegister(), RAX);
1334 DCHECK_EQ(counter.AsRegister(), RCX);
1335 DCHECK_EQ(out.AsRegister(), RDI);
1336
1337 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1338 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1339 SlowPathCode* slow_path = nullptr;
1340 HInstruction* code_point = invoke->InputAt(1);
1341 if (code_point->IsIntConstant()) {
1342 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1343 std::numeric_limits<uint16_t>::max()) {
1344 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1345 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1346 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1347 codegen->AddSlowPath(slow_path);
1348 __ jmp(slow_path->GetEntryLabel());
1349 __ Bind(slow_path->GetExitLabel());
1350 return;
1351 }
1352 } else if (code_point->GetType() != DataType::Type::kUint16) {
1353 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1354 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1355 codegen->AddSlowPath(slow_path);
1356 __ j(kAbove, slow_path->GetEntryLabel());
1357 }
1358
1359 // From here down, we know that we are looking for a char that fits in
1360 // 16 bits (uncompressed) or 8 bits (compressed).
1361 // Location of reference to data array within the String object.
1362 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1363 // Location of count within the String object.
1364 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1365
1366 // Load the count field of the string containing the length and compression flag.
1367 __ movl(string_length, Address(string_obj, count_offset));
1368
1369 // Do a zero-length check. Even with string compression `count == 0` means empty.
1370 // TODO: Support jecxz.
1371 NearLabel not_found_label;
1372 __ testl(string_length, string_length);
1373 __ j(kEqual, ¬_found_label);
1374
1375 if (mirror::kUseStringCompression) {
1376 // Use TMP to keep string_length_flagged.
1377 __ movl(CpuRegister(TMP), string_length);
1378 // Mask out first bit used as compression flag.
1379 __ shrl(string_length, Immediate(1));
1380 }
1381
1382 if (start_at_zero) {
1383 // Number of chars to scan is the same as the string length.
1384 __ movl(counter, string_length);
1385 // Move to the start of the string.
1386 __ addq(string_obj, Immediate(value_offset));
1387 } else {
1388 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1389
1390 // Do a start_index check.
1391 __ cmpl(start_index, string_length);
1392 __ j(kGreaterEqual, ¬_found_label);
1393
1394 // Ensure we have a start index >= 0;
1395 __ xorl(counter, counter);
1396 __ cmpl(start_index, Immediate(0));
1397 __ cmov(kGreater, counter, start_index, /* is64bit= */ false); // 32-bit copy is enough.
1398
1399 if (mirror::kUseStringCompression) {
1400 NearLabel modify_counter, offset_uncompressed_label;
1401 __ testl(CpuRegister(TMP), Immediate(1));
1402 __ j(kNotZero, &offset_uncompressed_label);
1403 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1404 __ jmp(&modify_counter);
1405 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1406 __ Bind(&offset_uncompressed_label);
1407 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1408 __ Bind(&modify_counter);
1409 } else {
1410 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1411 }
1412 // Now update ecx, the work counter: it's gonna be string.length - start_index.
1413 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
1414 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1415 }
1416
1417 if (mirror::kUseStringCompression) {
1418 NearLabel uncompressed_string_comparison;
1419 NearLabel comparison_done;
1420 __ testl(CpuRegister(TMP), Immediate(1));
1421 __ j(kNotZero, &uncompressed_string_comparison);
1422 // Check if RAX (search_value) is ASCII.
1423 __ cmpl(search_value, Immediate(127));
1424 __ j(kGreater, ¬_found_label);
1425 // Comparing byte-per-byte.
1426 __ repne_scasb();
1427 __ jmp(&comparison_done);
1428 // Everything is set up for repne scasw:
1429 // * Comparison address in RDI.
1430 // * Counter in ECX.
1431 __ Bind(&uncompressed_string_comparison);
1432 __ repne_scasw();
1433 __ Bind(&comparison_done);
1434 } else {
1435 __ repne_scasw();
1436 }
1437 // Did we find a match?
1438 __ j(kNotEqual, ¬_found_label);
1439
1440 // Yes, we matched. Compute the index of the result.
1441 __ subl(string_length, counter);
1442 __ leal(out, Address(string_length, -1));
1443
1444 NearLabel done;
1445 __ jmp(&done);
1446
1447 // Failed to match; return -1.
1448 __ Bind(¬_found_label);
1449 __ movl(out, Immediate(-1));
1450
1451 // And join up at the end.
1452 __ Bind(&done);
1453 if (slow_path != nullptr) {
1454 __ Bind(slow_path->GetExitLabel());
1455 }
1456 }
1457
VisitStringIndexOf(HInvoke * invoke)1458 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1459 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1460 }
1461
VisitStringIndexOf(HInvoke * invoke)1462 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1463 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1464 }
1465
VisitStringIndexOfAfter(HInvoke * invoke)1466 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1467 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1468 }
1469
VisitStringIndexOfAfter(HInvoke * invoke)1470 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1471 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1472 }
1473
VisitStringNewStringFromBytes(HInvoke * invoke)1474 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1475 LocationSummary* locations = new (allocator_) LocationSummary(
1476 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1477 InvokeRuntimeCallingConvention calling_convention;
1478 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1479 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1480 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1481 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1482 locations->SetOut(Location::RegisterLocation(RAX));
1483 }
1484
VisitStringNewStringFromBytes(HInvoke * invoke)1485 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1486 X86_64Assembler* assembler = GetAssembler();
1487 LocationSummary* locations = invoke->GetLocations();
1488
1489 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1490 __ testl(byte_array, byte_array);
1491 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1492 codegen_->AddSlowPath(slow_path);
1493 __ j(kEqual, slow_path->GetEntryLabel());
1494
1495 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1496 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1497 __ Bind(slow_path->GetExitLabel());
1498 }
1499
VisitStringNewStringFromChars(HInvoke * invoke)1500 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1501 LocationSummary* locations =
1502 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1503 InvokeRuntimeCallingConvention calling_convention;
1504 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1505 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1506 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1507 locations->SetOut(Location::RegisterLocation(RAX));
1508 }
1509
VisitStringNewStringFromChars(HInvoke * invoke)1510 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1511 // No need to emit code checking whether `locations->InAt(2)` is a null
1512 // pointer, as callers of the native method
1513 //
1514 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1515 //
1516 // all include a null check on `data` before calling that method.
1517 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1518 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1519 }
1520
VisitStringNewStringFromString(HInvoke * invoke)1521 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1522 LocationSummary* locations = new (allocator_) LocationSummary(
1523 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1524 InvokeRuntimeCallingConvention calling_convention;
1525 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1526 locations->SetOut(Location::RegisterLocation(RAX));
1527 }
1528
VisitStringNewStringFromString(HInvoke * invoke)1529 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1530 X86_64Assembler* assembler = GetAssembler();
1531 LocationSummary* locations = invoke->GetLocations();
1532
1533 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1534 __ testl(string_to_copy, string_to_copy);
1535 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1536 codegen_->AddSlowPath(slow_path);
1537 __ j(kEqual, slow_path->GetEntryLabel());
1538
1539 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1540 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1541 __ Bind(slow_path->GetExitLabel());
1542 }
1543
VisitStringGetCharsNoCheck(HInvoke * invoke)1544 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1545 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1546 LocationSummary* locations =
1547 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1548 locations->SetInAt(0, Location::RequiresRegister());
1549 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1550 locations->SetInAt(2, Location::RequiresRegister());
1551 locations->SetInAt(3, Location::RequiresRegister());
1552 locations->SetInAt(4, Location::RequiresRegister());
1553
1554 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1555 locations->AddTemp(Location::RegisterLocation(RSI));
1556 locations->AddTemp(Location::RegisterLocation(RDI));
1557 locations->AddTemp(Location::RegisterLocation(RCX));
1558 }
1559
VisitStringGetCharsNoCheck(HInvoke * invoke)1560 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1561 X86_64Assembler* assembler = GetAssembler();
1562 LocationSummary* locations = invoke->GetLocations();
1563
1564 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1565 // Location of data in char array buffer.
1566 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1567 // Location of char array data in string.
1568 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1569
1570 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1571 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1572 Location srcBegin = locations->InAt(1);
1573 int srcBegin_value =
1574 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1575 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1576 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1577 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1578
1579 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1580 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1581 DCHECK_EQ(char_size, 2u);
1582
1583 NearLabel done;
1584 // Compute the number of chars (words) to move.
1585 __ movl(CpuRegister(RCX), srcEnd);
1586 if (srcBegin.IsConstant()) {
1587 __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1588 } else {
1589 DCHECK(srcBegin.IsRegister());
1590 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1591 }
1592 if (mirror::kUseStringCompression) {
1593 NearLabel copy_uncompressed, copy_loop;
1594 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1595 DCHECK_EQ(c_char_size, 1u);
1596 // Location of count in string.
1597 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1598
1599 __ testl(Address(obj, count_offset), Immediate(1));
1600 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1601 "Expecting 0=compressed, 1=uncompressed");
1602 __ j(kNotZero, ©_uncompressed);
1603 // Compute the address of the source string by adding the number of chars from
1604 // the source beginning to the value offset of a string.
1605 __ leaq(CpuRegister(RSI),
1606 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1607 // Start the loop to copy String's value to Array of Char.
1608 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1609
1610 __ Bind(©_loop);
1611 __ jrcxz(&done);
1612 // Use TMP as temporary (convert byte from RSI to word).
1613 // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1614 __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1615 __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1616 __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1617 __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1618 // TODO: Add support for LOOP to X86_64Assembler.
1619 __ subl(CpuRegister(RCX), Immediate(1));
1620 __ jmp(©_loop);
1621
1622 __ Bind(©_uncompressed);
1623 }
1624
1625 __ leaq(CpuRegister(RSI),
1626 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1627 // Compute the address of the destination buffer.
1628 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1629 // Do the move.
1630 __ rep_movsw();
1631
1632 __ Bind(&done);
1633 }
1634
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1635 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1636 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1637 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1638 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1639 // to avoid a SIGBUS.
1640 switch (size) {
1641 case DataType::Type::kInt8:
1642 __ movsxb(out, Address(address, 0));
1643 break;
1644 case DataType::Type::kInt16:
1645 __ movsxw(out, Address(address, 0));
1646 break;
1647 case DataType::Type::kInt32:
1648 __ movl(out, Address(address, 0));
1649 break;
1650 case DataType::Type::kInt64:
1651 __ movq(out, Address(address, 0));
1652 break;
1653 default:
1654 LOG(FATAL) << "Type not recognized for peek: " << size;
1655 UNREACHABLE();
1656 }
1657 }
1658
VisitMemoryPeekByte(HInvoke * invoke)1659 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1660 CreateIntToIntLocations(allocator_, invoke);
1661 }
1662
VisitMemoryPeekByte(HInvoke * invoke)1663 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1664 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1665 }
1666
VisitMemoryPeekIntNative(HInvoke * invoke)1667 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1668 CreateIntToIntLocations(allocator_, invoke);
1669 }
1670
VisitMemoryPeekIntNative(HInvoke * invoke)1671 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1672 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1673 }
1674
VisitMemoryPeekLongNative(HInvoke * invoke)1675 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1676 CreateIntToIntLocations(allocator_, invoke);
1677 }
1678
VisitMemoryPeekLongNative(HInvoke * invoke)1679 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1680 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1681 }
1682
VisitMemoryPeekShortNative(HInvoke * invoke)1683 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1684 CreateIntToIntLocations(allocator_, invoke);
1685 }
1686
VisitMemoryPeekShortNative(HInvoke * invoke)1687 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1688 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1689 }
1690
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1691 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1692 LocationSummary* locations =
1693 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1694 locations->SetInAt(0, Location::RequiresRegister());
1695 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1696 }
1697
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1698 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1699 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1700 Location value = locations->InAt(1);
1701 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1702 // to avoid a SIGBUS.
1703 switch (size) {
1704 case DataType::Type::kInt8:
1705 if (value.IsConstant()) {
1706 __ movb(Address(address, 0),
1707 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1708 } else {
1709 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1710 }
1711 break;
1712 case DataType::Type::kInt16:
1713 if (value.IsConstant()) {
1714 __ movw(Address(address, 0),
1715 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1716 } else {
1717 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1718 }
1719 break;
1720 case DataType::Type::kInt32:
1721 if (value.IsConstant()) {
1722 __ movl(Address(address, 0),
1723 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1724 } else {
1725 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1726 }
1727 break;
1728 case DataType::Type::kInt64:
1729 if (value.IsConstant()) {
1730 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1731 DCHECK(IsInt<32>(v));
1732 int32_t v_32 = v;
1733 __ movq(Address(address, 0), Immediate(v_32));
1734 } else {
1735 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1736 }
1737 break;
1738 default:
1739 LOG(FATAL) << "Type not recognized for poke: " << size;
1740 UNREACHABLE();
1741 }
1742 }
1743
VisitMemoryPokeByte(HInvoke * invoke)1744 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1745 CreateIntIntToVoidLocations(allocator_, invoke);
1746 }
1747
VisitMemoryPokeByte(HInvoke * invoke)1748 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1749 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1750 }
1751
VisitMemoryPokeIntNative(HInvoke * invoke)1752 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1753 CreateIntIntToVoidLocations(allocator_, invoke);
1754 }
1755
VisitMemoryPokeIntNative(HInvoke * invoke)1756 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1757 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1758 }
1759
VisitMemoryPokeLongNative(HInvoke * invoke)1760 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1761 CreateIntIntToVoidLocations(allocator_, invoke);
1762 }
1763
VisitMemoryPokeLongNative(HInvoke * invoke)1764 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1765 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1766 }
1767
VisitMemoryPokeShortNative(HInvoke * invoke)1768 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1769 CreateIntIntToVoidLocations(allocator_, invoke);
1770 }
1771
VisitMemoryPokeShortNative(HInvoke * invoke)1772 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1773 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1774 }
1775
VisitThreadCurrentThread(HInvoke * invoke)1776 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1777 LocationSummary* locations =
1778 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1779 locations->SetOut(Location::RequiresRegister());
1780 }
1781
VisitThreadCurrentThread(HInvoke * invoke)1782 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1783 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1784 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1785 /* no_rip= */ true));
1786 }
1787
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile ATTRIBUTE_UNUSED,CodeGeneratorX86_64 * codegen)1788 static void GenUnsafeGet(HInvoke* invoke,
1789 DataType::Type type,
1790 bool is_volatile ATTRIBUTE_UNUSED,
1791 CodeGeneratorX86_64* codegen) {
1792 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1793 LocationSummary* locations = invoke->GetLocations();
1794 Location base_loc = locations->InAt(1);
1795 CpuRegister base = base_loc.AsRegister<CpuRegister>();
1796 Location offset_loc = locations->InAt(2);
1797 CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1798 Location output_loc = locations->Out();
1799 CpuRegister output = output_loc.AsRegister<CpuRegister>();
1800
1801 switch (type) {
1802 case DataType::Type::kInt32:
1803 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1804 break;
1805
1806 case DataType::Type::kReference: {
1807 if (kEmitCompilerReadBarrier) {
1808 if (kUseBakerReadBarrier) {
1809 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1810 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1811 invoke, output_loc, base, src, /* needs_null_check= */ false);
1812 } else {
1813 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1814 codegen->GenerateReadBarrierSlow(
1815 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1816 }
1817 } else {
1818 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1819 __ MaybeUnpoisonHeapReference(output);
1820 }
1821 break;
1822 }
1823
1824 case DataType::Type::kInt64:
1825 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1826 break;
1827
1828 default:
1829 LOG(FATAL) << "Unsupported op size " << type;
1830 UNREACHABLE();
1831 }
1832 }
1833
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)1834 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1835 bool can_call = kEmitCompilerReadBarrier &&
1836 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1837 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1838 LocationSummary* locations =
1839 new (allocator) LocationSummary(invoke,
1840 can_call
1841 ? LocationSummary::kCallOnSlowPath
1842 : LocationSummary::kNoCall,
1843 kIntrinsified);
1844 if (can_call && kUseBakerReadBarrier) {
1845 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1846 }
1847 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1848 locations->SetInAt(1, Location::RequiresRegister());
1849 locations->SetInAt(2, Location::RequiresRegister());
1850 locations->SetOut(Location::RequiresRegister(),
1851 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1852 }
1853
VisitUnsafeGet(HInvoke * invoke)1854 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1855 CreateIntIntIntToIntLocations(allocator_, invoke);
1856 }
VisitUnsafeGetVolatile(HInvoke * invoke)1857 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1858 CreateIntIntIntToIntLocations(allocator_, invoke);
1859 }
VisitUnsafeGetLong(HInvoke * invoke)1860 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1861 CreateIntIntIntToIntLocations(allocator_, invoke);
1862 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1863 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1864 CreateIntIntIntToIntLocations(allocator_, invoke);
1865 }
VisitUnsafeGetObject(HInvoke * invoke)1866 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1867 CreateIntIntIntToIntLocations(allocator_, invoke);
1868 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1869 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1870 CreateIntIntIntToIntLocations(allocator_, invoke);
1871 }
1872
1873
VisitUnsafeGet(HInvoke * invoke)1874 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
1875 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1876 }
VisitUnsafeGetVolatile(HInvoke * invoke)1877 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1878 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1879 }
VisitUnsafeGetLong(HInvoke * invoke)1880 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1881 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1882 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1883 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1884 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1885 }
VisitUnsafeGetObject(HInvoke * invoke)1886 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1887 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1888 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1889 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1890 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1891 }
1892
1893
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1894 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1895 DataType::Type type,
1896 HInvoke* invoke) {
1897 LocationSummary* locations =
1898 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1899 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1900 locations->SetInAt(1, Location::RequiresRegister());
1901 locations->SetInAt(2, Location::RequiresRegister());
1902 locations->SetInAt(3, Location::RequiresRegister());
1903 if (type == DataType::Type::kReference) {
1904 // Need temp registers for card-marking.
1905 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1906 locations->AddTemp(Location::RequiresRegister());
1907 }
1908 }
1909
VisitUnsafePut(HInvoke * invoke)1910 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1911 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
1912 }
VisitUnsafePutOrdered(HInvoke * invoke)1913 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1914 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
1915 }
VisitUnsafePutVolatile(HInvoke * invoke)1916 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1917 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
1918 }
VisitUnsafePutObject(HInvoke * invoke)1919 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1920 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
1921 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1922 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1923 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
1924 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1925 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1926 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
1927 }
VisitUnsafePutLong(HInvoke * invoke)1928 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1929 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
1930 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1931 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1932 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
1933 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1934 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1935 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
1936 }
1937
1938 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1939 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1940 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
1941 CodeGeneratorX86_64* codegen) {
1942 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1943 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1944 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1945 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1946
1947 if (type == DataType::Type::kInt64) {
1948 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1949 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1950 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1951 __ movl(temp, value);
1952 __ PoisonHeapReference(temp);
1953 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
1954 } else {
1955 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1956 }
1957
1958 if (is_volatile) {
1959 codegen->MemoryFence();
1960 }
1961
1962 if (type == DataType::Type::kReference) {
1963 bool value_can_be_null = true; // TODO: Worth finding out this information?
1964 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1965 locations->GetTemp(1).AsRegister<CpuRegister>(),
1966 base,
1967 value,
1968 value_can_be_null);
1969 }
1970 }
1971
VisitUnsafePut(HInvoke * invoke)1972 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1973 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1974 }
VisitUnsafePutOrdered(HInvoke * invoke)1975 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1976 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1977 }
VisitUnsafePutVolatile(HInvoke * invoke)1978 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1979 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1980 }
VisitUnsafePutObject(HInvoke * invoke)1981 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1982 GenUnsafePut(
1983 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1984 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1985 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1986 GenUnsafePut(
1987 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1988 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1989 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1990 GenUnsafePut(
1991 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1992 }
VisitUnsafePutLong(HInvoke * invoke)1993 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1994 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1995 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1996 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1997 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1998 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1999 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2000 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
2001 }
2002
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2003 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
2004 DataType::Type type,
2005 HInvoke* invoke) {
2006 bool can_call = kEmitCompilerReadBarrier &&
2007 kUseBakerReadBarrier &&
2008 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
2009 LocationSummary* locations =
2010 new (allocator) LocationSummary(invoke,
2011 can_call
2012 ? LocationSummary::kCallOnSlowPath
2013 : LocationSummary::kNoCall,
2014 kIntrinsified);
2015 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2016 locations->SetInAt(1, Location::RequiresRegister());
2017 locations->SetInAt(2, Location::RequiresRegister());
2018 // expected value must be in EAX/RAX.
2019 locations->SetInAt(3, Location::RegisterLocation(RAX));
2020 locations->SetInAt(4, Location::RequiresRegister());
2021
2022 locations->SetOut(Location::RequiresRegister());
2023 if (type == DataType::Type::kReference) {
2024 // Need temporary registers for card-marking, and possibly for
2025 // (Baker) read barrier.
2026 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2027 locations->AddTemp(Location::RequiresRegister());
2028 }
2029 }
2030
VisitUnsafeCASInt(HInvoke * invoke)2031 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2032 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
2033 }
2034
VisitUnsafeCASLong(HInvoke * invoke)2035 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2036 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
2037 }
2038
VisitUnsafeCASObject(HInvoke * invoke)2039 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2040 // The only read barrier implementation supporting the
2041 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2042 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2043 return;
2044 }
2045
2046 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
2047 }
2048
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2049 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2050 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2051 LocationSummary* locations = invoke->GetLocations();
2052
2053 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2054 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2055 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
2056 // Ensure `expected` is in RAX (required by the CMPXCHG instruction).
2057 DCHECK_EQ(expected.AsRegister(), RAX);
2058 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
2059 Location out_loc = locations->Out();
2060 CpuRegister out = out_loc.AsRegister<CpuRegister>();
2061
2062 if (type == DataType::Type::kReference) {
2063 // The only read barrier implementation supporting the
2064 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2065 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2066
2067 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2068 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2069
2070 // Mark card for object assuming new value is stored.
2071 bool value_can_be_null = true; // TODO: Worth finding out this information?
2072 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2073
2074 // The address of the field within the holding object.
2075 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2076
2077 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2078 // Need to make sure the reference stored in the field is a to-space
2079 // one before attempting the CAS or the CAS could fail incorrectly.
2080 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2081 invoke,
2082 out_loc, // Unused, used only as a "temporary" within the read barrier.
2083 base,
2084 field_addr,
2085 /* needs_null_check= */ false,
2086 /* always_update_field= */ true,
2087 &temp1,
2088 &temp2);
2089 }
2090
2091 bool base_equals_value = (base.AsRegister() == value.AsRegister());
2092 Register value_reg = value.AsRegister();
2093 if (kPoisonHeapReferences) {
2094 if (base_equals_value) {
2095 // If `base` and `value` are the same register location, move
2096 // `value_reg` to a temporary register. This way, poisoning
2097 // `value_reg` won't invalidate `base`.
2098 value_reg = temp1.AsRegister();
2099 __ movl(CpuRegister(value_reg), base);
2100 }
2101
2102 // Check that the register allocator did not assign the location
2103 // of `expected` (RAX) to `value` nor to `base`, so that heap
2104 // poisoning (when enabled) works as intended below.
2105 // - If `value` were equal to `expected`, both references would
2106 // be poisoned twice, meaning they would not be poisoned at
2107 // all, as heap poisoning uses address negation.
2108 // - If `base` were equal to `expected`, poisoning `expected`
2109 // would invalidate `base`.
2110 DCHECK_NE(value_reg, expected.AsRegister());
2111 DCHECK_NE(base.AsRegister(), expected.AsRegister());
2112
2113 __ PoisonHeapReference(expected);
2114 __ PoisonHeapReference(CpuRegister(value_reg));
2115 }
2116
2117 __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2118
2119 // LOCK CMPXCHG has full barrier semantics, and we don't need
2120 // scheduling barriers at this time.
2121
2122 // Convert ZF into the Boolean result.
2123 __ setcc(kZero, out);
2124 __ movzxb(out, out);
2125
2126 // If heap poisoning is enabled, we need to unpoison the values
2127 // that were poisoned earlier.
2128 if (kPoisonHeapReferences) {
2129 if (base_equals_value) {
2130 // `value_reg` has been moved to a temporary register, no need
2131 // to unpoison it.
2132 } else {
2133 // Ensure `value` is different from `out`, so that unpoisoning
2134 // the former does not invalidate the latter.
2135 DCHECK_NE(value_reg, out.AsRegister());
2136 __ UnpoisonHeapReference(CpuRegister(value_reg));
2137 }
2138 // Ensure `expected` is different from `out`, so that unpoisoning
2139 // the former does not invalidate the latter.
2140 DCHECK_NE(expected.AsRegister(), out.AsRegister());
2141 __ UnpoisonHeapReference(expected);
2142 }
2143 } else {
2144 if (type == DataType::Type::kInt32) {
2145 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
2146 } else if (type == DataType::Type::kInt64) {
2147 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
2148 } else {
2149 LOG(FATAL) << "Unexpected CAS type " << type;
2150 }
2151
2152 // LOCK CMPXCHG has full barrier semantics, and we don't need
2153 // scheduling barriers at this time.
2154
2155 // Convert ZF into the Boolean result.
2156 __ setcc(kZero, out);
2157 __ movzxb(out, out);
2158 }
2159 }
2160
VisitUnsafeCASInt(HInvoke * invoke)2161 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2162 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2163 }
2164
VisitUnsafeCASLong(HInvoke * invoke)2165 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2166 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2167 }
2168
VisitUnsafeCASObject(HInvoke * invoke)2169 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2170 // The only read barrier implementation supporting the
2171 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2172 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2173
2174 GenCAS(DataType::Type::kReference, invoke, codegen_);
2175 }
2176
VisitIntegerReverse(HInvoke * invoke)2177 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2178 LocationSummary* locations =
2179 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2180 locations->SetInAt(0, Location::RequiresRegister());
2181 locations->SetOut(Location::SameAsFirstInput());
2182 locations->AddTemp(Location::RequiresRegister());
2183 }
2184
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2185 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2186 X86_64Assembler* assembler) {
2187 Immediate imm_shift(shift);
2188 Immediate imm_mask(mask);
2189 __ movl(temp, reg);
2190 __ shrl(reg, imm_shift);
2191 __ andl(temp, imm_mask);
2192 __ andl(reg, imm_mask);
2193 __ shll(temp, imm_shift);
2194 __ orl(reg, temp);
2195 }
2196
VisitIntegerReverse(HInvoke * invoke)2197 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2198 X86_64Assembler* assembler = GetAssembler();
2199 LocationSummary* locations = invoke->GetLocations();
2200
2201 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2202 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2203
2204 /*
2205 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2206 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2207 * compared to generic luni implementation which has 5 rounds of swapping bits.
2208 * x = bswap x
2209 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2210 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2211 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2212 */
2213 __ bswapl(reg);
2214 SwapBits(reg, temp, 1, 0x55555555, assembler);
2215 SwapBits(reg, temp, 2, 0x33333333, assembler);
2216 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2217 }
2218
VisitLongReverse(HInvoke * invoke)2219 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2220 LocationSummary* locations =
2221 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2222 locations->SetInAt(0, Location::RequiresRegister());
2223 locations->SetOut(Location::SameAsFirstInput());
2224 locations->AddTemp(Location::RequiresRegister());
2225 locations->AddTemp(Location::RequiresRegister());
2226 }
2227
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2228 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2229 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2230 Immediate imm_shift(shift);
2231 __ movq(temp_mask, Immediate(mask));
2232 __ movq(temp, reg);
2233 __ shrq(reg, imm_shift);
2234 __ andq(temp, temp_mask);
2235 __ andq(reg, temp_mask);
2236 __ shlq(temp, imm_shift);
2237 __ orq(reg, temp);
2238 }
2239
VisitLongReverse(HInvoke * invoke)2240 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2241 X86_64Assembler* assembler = GetAssembler();
2242 LocationSummary* locations = invoke->GetLocations();
2243
2244 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2245 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
2246 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
2247
2248 /*
2249 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2250 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
2251 * compared to generic luni implementation which has 5 rounds of swapping bits.
2252 * x = bswap x
2253 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
2254 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
2255 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
2256 */
2257 __ bswapq(reg);
2258 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
2259 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
2260 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
2261 }
2262
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)2263 static void CreateBitCountLocations(
2264 ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
2265 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2266 // Do nothing if there is no popcnt support. This results in generating
2267 // a call for the intrinsic rather than direct code.
2268 return;
2269 }
2270 LocationSummary* locations =
2271 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2272 locations->SetInAt(0, Location::Any());
2273 locations->SetOut(Location::RequiresRegister());
2274 }
2275
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2276 static void GenBitCount(X86_64Assembler* assembler,
2277 CodeGeneratorX86_64* codegen,
2278 HInvoke* invoke,
2279 bool is_long) {
2280 LocationSummary* locations = invoke->GetLocations();
2281 Location src = locations->InAt(0);
2282 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2283
2284 if (invoke->InputAt(0)->IsConstant()) {
2285 // Evaluate this at compile time.
2286 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2287 int32_t result = is_long
2288 ? POPCOUNT(static_cast<uint64_t>(value))
2289 : POPCOUNT(static_cast<uint32_t>(value));
2290 codegen->Load32BitValue(out, result);
2291 return;
2292 }
2293
2294 if (src.IsRegister()) {
2295 if (is_long) {
2296 __ popcntq(out, src.AsRegister<CpuRegister>());
2297 } else {
2298 __ popcntl(out, src.AsRegister<CpuRegister>());
2299 }
2300 } else if (is_long) {
2301 DCHECK(src.IsDoubleStackSlot());
2302 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2303 } else {
2304 DCHECK(src.IsStackSlot());
2305 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2306 }
2307 }
2308
VisitIntegerBitCount(HInvoke * invoke)2309 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2310 CreateBitCountLocations(allocator_, codegen_, invoke);
2311 }
2312
VisitIntegerBitCount(HInvoke * invoke)2313 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
2314 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2315 }
2316
VisitLongBitCount(HInvoke * invoke)2317 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
2318 CreateBitCountLocations(allocator_, codegen_, invoke);
2319 }
2320
VisitLongBitCount(HInvoke * invoke)2321 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
2322 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2323 }
2324
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)2325 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
2326 LocationSummary* locations =
2327 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2328 locations->SetInAt(0, Location::Any());
2329 locations->SetOut(Location::RequiresRegister());
2330 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL
2331 : Location::RequiresRegister()); // any will do
2332 }
2333
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)2334 static void GenOneBit(X86_64Assembler* assembler,
2335 CodeGeneratorX86_64* codegen,
2336 HInvoke* invoke,
2337 bool is_high, bool is_long) {
2338 LocationSummary* locations = invoke->GetLocations();
2339 Location src = locations->InAt(0);
2340 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2341
2342 if (invoke->InputAt(0)->IsConstant()) {
2343 // Evaluate this at compile time.
2344 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2345 if (value == 0) {
2346 __ xorl(out, out); // Clears upper bits too.
2347 return;
2348 }
2349 // Nonzero value.
2350 if (is_high) {
2351 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
2352 : 31 - CLZ(static_cast<uint32_t>(value));
2353 } else {
2354 value = is_long ? CTZ(static_cast<uint64_t>(value))
2355 : CTZ(static_cast<uint32_t>(value));
2356 }
2357 if (is_long) {
2358 codegen->Load64BitValue(out, 1ULL << value);
2359 } else {
2360 codegen->Load32BitValue(out, 1 << value);
2361 }
2362 return;
2363 }
2364
2365 // Handle the non-constant cases.
2366 if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
2367 src.IsRegister()) {
2368 __ blsi(out, src.AsRegister<CpuRegister>());
2369 } else {
2370 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
2371 if (is_high) {
2372 // Use architectural support: basically 1 << bsr.
2373 if (src.IsRegister()) {
2374 if (is_long) {
2375 __ bsrq(tmp, src.AsRegister<CpuRegister>());
2376 } else {
2377 __ bsrl(tmp, src.AsRegister<CpuRegister>());
2378 }
2379 } else if (is_long) {
2380 DCHECK(src.IsDoubleStackSlot());
2381 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2382 } else {
2383 DCHECK(src.IsStackSlot());
2384 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2385 }
2386 // BSR sets ZF if the input was zero.
2387 NearLabel is_zero, done;
2388 __ j(kEqual, &is_zero);
2389 __ movl(out, Immediate(1)); // Clears upper bits too.
2390 if (is_long) {
2391 __ shlq(out, tmp);
2392 } else {
2393 __ shll(out, tmp);
2394 }
2395 __ jmp(&done);
2396 __ Bind(&is_zero);
2397 __ xorl(out, out); // Clears upper bits too.
2398 __ Bind(&done);
2399 } else {
2400 // Copy input into temporary.
2401 if (src.IsRegister()) {
2402 if (is_long) {
2403 __ movq(tmp, src.AsRegister<CpuRegister>());
2404 } else {
2405 __ movl(tmp, src.AsRegister<CpuRegister>());
2406 }
2407 } else if (is_long) {
2408 DCHECK(src.IsDoubleStackSlot());
2409 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2410 } else {
2411 DCHECK(src.IsStackSlot());
2412 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
2413 }
2414 // Do the bit twiddling: basically tmp & -tmp;
2415 if (is_long) {
2416 __ movq(out, tmp);
2417 __ negq(tmp);
2418 __ andq(out, tmp);
2419 } else {
2420 __ movl(out, tmp);
2421 __ negl(tmp);
2422 __ andl(out, tmp);
2423 }
2424 }
2425 }
2426 }
2427
VisitIntegerHighestOneBit(HInvoke * invoke)2428 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2429 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
2430 }
2431
VisitIntegerHighestOneBit(HInvoke * invoke)2432 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
2433 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
2434 }
2435
VisitLongHighestOneBit(HInvoke * invoke)2436 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2437 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
2438 }
2439
VisitLongHighestOneBit(HInvoke * invoke)2440 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
2441 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
2442 }
2443
VisitIntegerLowestOneBit(HInvoke * invoke)2444 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2445 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
2446 }
2447
VisitIntegerLowestOneBit(HInvoke * invoke)2448 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
2449 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
2450 }
2451
VisitLongLowestOneBit(HInvoke * invoke)2452 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2453 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
2454 }
2455
VisitLongLowestOneBit(HInvoke * invoke)2456 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
2457 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
2458 }
2459
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2460 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2461 LocationSummary* locations =
2462 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2463 locations->SetInAt(0, Location::Any());
2464 locations->SetOut(Location::RequiresRegister());
2465 }
2466
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2467 static void GenLeadingZeros(X86_64Assembler* assembler,
2468 CodeGeneratorX86_64* codegen,
2469 HInvoke* invoke, bool is_long) {
2470 LocationSummary* locations = invoke->GetLocations();
2471 Location src = locations->InAt(0);
2472 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2473
2474 int zero_value_result = is_long ? 64 : 32;
2475 if (invoke->InputAt(0)->IsConstant()) {
2476 // Evaluate this at compile time.
2477 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2478 if (value == 0) {
2479 value = zero_value_result;
2480 } else {
2481 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2482 }
2483 codegen->Load32BitValue(out, value);
2484 return;
2485 }
2486
2487 // Handle the non-constant cases.
2488 if (src.IsRegister()) {
2489 if (is_long) {
2490 __ bsrq(out, src.AsRegister<CpuRegister>());
2491 } else {
2492 __ bsrl(out, src.AsRegister<CpuRegister>());
2493 }
2494 } else if (is_long) {
2495 DCHECK(src.IsDoubleStackSlot());
2496 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2497 } else {
2498 DCHECK(src.IsStackSlot());
2499 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2500 }
2501
2502 // BSR sets ZF if the input was zero, and the output is undefined.
2503 NearLabel is_zero, done;
2504 __ j(kEqual, &is_zero);
2505
2506 // Correct the result from BSR to get the CLZ result.
2507 __ xorl(out, Immediate(zero_value_result - 1));
2508 __ jmp(&done);
2509
2510 // Fix the zero case with the expected result.
2511 __ Bind(&is_zero);
2512 __ movl(out, Immediate(zero_value_result));
2513
2514 __ Bind(&done);
2515 }
2516
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2517 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2518 CreateLeadingZeroLocations(allocator_, invoke);
2519 }
2520
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2521 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2522 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2523 }
2524
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2525 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2526 CreateLeadingZeroLocations(allocator_, invoke);
2527 }
2528
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2529 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2530 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2531 }
2532
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)2533 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2534 LocationSummary* locations =
2535 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2536 locations->SetInAt(0, Location::Any());
2537 locations->SetOut(Location::RequiresRegister());
2538 }
2539
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)2540 static void GenTrailingZeros(X86_64Assembler* assembler,
2541 CodeGeneratorX86_64* codegen,
2542 HInvoke* invoke, bool is_long) {
2543 LocationSummary* locations = invoke->GetLocations();
2544 Location src = locations->InAt(0);
2545 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2546
2547 int zero_value_result = is_long ? 64 : 32;
2548 if (invoke->InputAt(0)->IsConstant()) {
2549 // Evaluate this at compile time.
2550 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2551 if (value == 0) {
2552 value = zero_value_result;
2553 } else {
2554 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2555 }
2556 codegen->Load32BitValue(out, value);
2557 return;
2558 }
2559
2560 // Handle the non-constant cases.
2561 if (src.IsRegister()) {
2562 if (is_long) {
2563 __ bsfq(out, src.AsRegister<CpuRegister>());
2564 } else {
2565 __ bsfl(out, src.AsRegister<CpuRegister>());
2566 }
2567 } else if (is_long) {
2568 DCHECK(src.IsDoubleStackSlot());
2569 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2570 } else {
2571 DCHECK(src.IsStackSlot());
2572 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
2573 }
2574
2575 // BSF sets ZF if the input was zero, and the output is undefined.
2576 NearLabel done;
2577 __ j(kNotEqual, &done);
2578
2579 // Fix the zero case with the expected result.
2580 __ movl(out, Immediate(zero_value_result));
2581
2582 __ Bind(&done);
2583 }
2584
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2585 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2586 CreateTrailingZeroLocations(allocator_, invoke);
2587 }
2588
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2589 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2590 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2591 }
2592
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2593 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2594 CreateTrailingZeroLocations(allocator_, invoke);
2595 }
2596
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2597 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2598 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2599 }
2600
VisitIntegerValueOf(HInvoke * invoke)2601 void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
2602 InvokeRuntimeCallingConvention calling_convention;
2603 IntrinsicVisitor::ComputeIntegerValueOfLocations(
2604 invoke,
2605 codegen_,
2606 Location::RegisterLocation(RAX),
2607 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2608 }
2609
VisitIntegerValueOf(HInvoke * invoke)2610 void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
2611 IntrinsicVisitor::IntegerValueOfInfo info =
2612 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2613 LocationSummary* locations = invoke->GetLocations();
2614 X86_64Assembler* assembler = GetAssembler();
2615
2616 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2617 InvokeRuntimeCallingConvention calling_convention;
2618 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
2619 auto allocate_instance = [&]() {
2620 codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
2621 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2622 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2623 };
2624 if (invoke->InputAt(0)->IsIntConstant()) {
2625 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2626 if (static_cast<uint32_t>(value - info.low) < info.length) {
2627 // Just embed the j.l.Integer in the code.
2628 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2629 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2630 } else {
2631 DCHECK(locations->CanCall());
2632 // Allocate and initialize a new j.l.Integer.
2633 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2634 // JIT object table.
2635 allocate_instance();
2636 __ movl(Address(out, info.value_offset), Immediate(value));
2637 }
2638 } else {
2639 DCHECK(locations->CanCall());
2640 CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
2641 // Check bounds of our cache.
2642 __ leal(out, Address(in, -info.low));
2643 __ cmpl(out, Immediate(info.length));
2644 NearLabel allocate, done;
2645 __ j(kAboveEqual, &allocate);
2646 // If the value is within the bounds, load the j.l.Integer directly from the array.
2647 DCHECK_NE(out.AsRegister(), argument.AsRegister());
2648 codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
2649 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
2650 "Check heap reference size.");
2651 __ movl(out, Address(argument, out, TIMES_4, 0));
2652 __ MaybeUnpoisonHeapReference(out);
2653 __ jmp(&done);
2654 __ Bind(&allocate);
2655 // Otherwise allocate and initialize a new j.l.Integer.
2656 allocate_instance();
2657 __ movl(Address(out, info.value_offset), in);
2658 __ Bind(&done);
2659 }
2660 }
2661
VisitReferenceGetReferent(HInvoke * invoke)2662 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
2663 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
2664 }
2665
VisitReferenceGetReferent(HInvoke * invoke)2666 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
2667 X86_64Assembler* assembler = GetAssembler();
2668 LocationSummary* locations = invoke->GetLocations();
2669
2670 Location obj = locations->InAt(0);
2671 Location out = locations->Out();
2672
2673 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
2674 codegen_->AddSlowPath(slow_path);
2675
2676 if (kEmitCompilerReadBarrier) {
2677 // Check self->GetWeakRefAccessEnabled().
2678 ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
2679 __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true), Immediate(0));
2680 __ j(kEqual, slow_path->GetEntryLabel());
2681 }
2682
2683 // Load the java.lang.ref.Reference class, use the output register as a temporary.
2684 codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
2685
2686 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
2687 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
2688 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
2689 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
2690 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
2691 __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
2692 Immediate(0));
2693 __ j(kNotEqual, slow_path->GetEntryLabel());
2694
2695 // Load the value from the field.
2696 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2697 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2698 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2699 out,
2700 obj.AsRegister<CpuRegister>(),
2701 referent_offset,
2702 /*needs_null_check=*/ true);
2703 // Note that the fence is a no-op, thanks to the x86-64 memory model.
2704 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2705 } else {
2706 __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
2707 codegen_->MaybeRecordImplicitNullCheck(invoke);
2708 // Note that the fence is a no-op, thanks to the x86-64 memory model.
2709 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2710 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
2711 }
2712 __ Bind(slow_path->GetExitLabel());
2713 }
2714
VisitReferenceRefersTo(HInvoke * invoke)2715 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
2716 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
2717 }
2718
VisitReferenceRefersTo(HInvoke * invoke)2719 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
2720 X86_64Assembler* assembler = GetAssembler();
2721 LocationSummary* locations = invoke->GetLocations();
2722
2723 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
2724 CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
2725 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2726
2727 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
2728 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2729
2730 __ movl(out, Address(obj, referent_offset));
2731 codegen_->MaybeRecordImplicitNullCheck(invoke);
2732 __ MaybeUnpoisonHeapReference(out);
2733 // Note that the fence is a no-op, thanks to the x86-64 memory model.
2734 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
2735
2736 __ cmpl(out, other);
2737
2738 if (kEmitCompilerReadBarrier) {
2739 DCHECK(kUseBakerReadBarrier);
2740
2741 NearLabel calculate_result;
2742 __ j(kEqual, &calculate_result); // ZF set if taken.
2743
2744 // Check if the loaded reference is null in a way that leaves ZF clear for null.
2745 __ cmpl(out, Immediate(1));
2746 __ j(kBelow, &calculate_result); // ZF clear if taken.
2747
2748 // For correct memory visibility, we need a barrier before loading the lock word
2749 // but we already have the barrier emitted for volatile load above which is sufficient.
2750
2751 // Load the lockword and check if it is a forwarding address.
2752 static_assert(LockWord::kStateShift == 30u);
2753 static_assert(LockWord::kStateForwardingAddress == 3u);
2754 __ movl(out, Address(out, monitor_offset));
2755 __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
2756 __ j(kBelow, &calculate_result); // ZF clear if taken.
2757
2758 // Extract the forwarding address and compare with `other`.
2759 __ shll(out, Immediate(LockWord::kForwardingAddressShift));
2760 __ cmpl(out, other);
2761
2762 __ Bind(&calculate_result);
2763 }
2764
2765 // Convert ZF into the Boolean result.
2766 __ setcc(kEqual, out);
2767 __ movzxb(out, out);
2768 }
2769
VisitThreadInterrupted(HInvoke * invoke)2770 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
2771 LocationSummary* locations =
2772 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2773 locations->SetOut(Location::RequiresRegister());
2774 }
2775
VisitThreadInterrupted(HInvoke * invoke)2776 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
2777 X86_64Assembler* assembler = GetAssembler();
2778 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
2779 Address address = Address::Absolute
2780 (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
2781 NearLabel done;
2782 __ gs()->movl(out, address);
2783 __ testl(out, out);
2784 __ j(kEqual, &done);
2785 __ gs()->movl(address, Immediate(0));
2786 codegen_->MemoryFence();
2787 __ Bind(&done);
2788 }
2789
VisitReachabilityFence(HInvoke * invoke)2790 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
2791 LocationSummary* locations =
2792 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2793 locations->SetInAt(0, Location::Any());
2794 }
2795
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)2796 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
2797
VisitIntegerDivideUnsigned(HInvoke * invoke)2798 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2799 LocationSummary* locations =
2800 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2801 locations->SetInAt(0, Location::RegisterLocation(RAX));
2802 locations->SetInAt(1, Location::RequiresRegister());
2803 locations->SetOut(Location::SameAsFirstInput());
2804 // Intel uses edx:eax as the dividend.
2805 locations->AddTemp(Location::RegisterLocation(RDX));
2806 }
2807
VisitIntegerDivideUnsigned(HInvoke * invoke)2808 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
2809 X86_64Assembler* assembler = GetAssembler();
2810 LocationSummary* locations = invoke->GetLocations();
2811 Location out = locations->Out();
2812 Location first = locations->InAt(0);
2813 Location second = locations->InAt(1);
2814 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
2815 CpuRegister second_reg = second.AsRegister<CpuRegister>();
2816
2817 DCHECK_EQ(RAX, first.AsRegister<Register>());
2818 DCHECK_EQ(RAX, out.AsRegister<Register>());
2819 DCHECK_EQ(RDX, rdx.AsRegister());
2820
2821 // Check if divisor is zero, bail to managed implementation to handle.
2822 __ testl(second_reg, second_reg);
2823 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
2824 codegen_->AddSlowPath(slow_path);
2825 __ j(kEqual, slow_path->GetEntryLabel());
2826
2827 __ xorl(rdx, rdx);
2828 __ divl(second_reg);
2829
2830 __ Bind(slow_path->GetExitLabel());
2831 }
2832
VisitMathMultiplyHigh(HInvoke * invoke)2833 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
2834 LocationSummary* locations =
2835 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2836 locations->SetInAt(0, Location::RegisterLocation(RAX));
2837 locations->SetInAt(1, Location::RequiresRegister());
2838 locations->SetOut(Location::RegisterLocation(RDX));
2839 locations->AddTemp(Location::RegisterLocation(RAX));
2840 }
2841
VisitMathMultiplyHigh(HInvoke * invoke)2842 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
2843 X86_64Assembler* assembler = GetAssembler();
2844 LocationSummary* locations = invoke->GetLocations();
2845
2846 CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
2847
2848 DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
2849 DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
2850
2851 __ imulq(y);
2852 }
2853
2854
2855 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
2856 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
2857 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)
2858 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes)
2859 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer)
2860 UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToFloat)
2861 UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf)
2862 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Floor)
2863 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Ceil)
2864 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Rint)
2865 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Greater)
2866 UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals)
2867 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less)
2868 UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals)
2869 UNIMPLEMENTED_INTRINSIC(X86_64, LongDivideUnsigned)
2870
2871 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
2872 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
2873 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend);
2874 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength);
2875 UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString);
2876 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendObject);
2877 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendString);
2878 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharSequence);
2879 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharArray);
2880 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendBoolean);
2881 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendChar);
2882 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendInt);
2883 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendLong);
2884 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendFloat);
2885 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendDouble);
2886 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength);
2887 UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
2888
2889 // 1.8.
2890 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
2891 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
2892 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
2893 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
2894 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
2895
2896 UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvokeExact)
2897 UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvoke)
2898 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleCompareAndExchange)
2899 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleCompareAndExchangeAcquire)
2900 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleCompareAndExchangeRelease)
2901 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleCompareAndSet)
2902 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGet)
2903 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAcquire)
2904 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndAdd)
2905 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndAddAcquire)
2906 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndAddRelease)
2907 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseAnd)
2908 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseAndAcquire)
2909 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseAndRelease)
2910 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseOr)
2911 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseOrAcquire)
2912 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseOrRelease)
2913 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseXor)
2914 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseXorAcquire)
2915 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndBitwiseXorRelease)
2916 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSet)
2917 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetAcquire)
2918 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetRelease)
2919 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetOpaque)
2920 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetVolatile)
2921 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSet)
2922 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetOpaque)
2923 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetRelease)
2924 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetVolatile)
2925 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSet)
2926 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSetAcquire)
2927 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSetPlain)
2928 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSetRelease)
2929
2930 UNREACHABLE_INTRINSICS(X86_64)
2931
2932 #undef __
2933
2934 } // namespace x86_64
2935 } // namespace art
2936