1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21 
22 namespace art {
23 namespace x86_64 {
24 
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
27 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30   HInstruction* input = instruction->InputAt(0);
31   bool is_zero = IsZeroBitPattern(input);
32   switch (instruction->GetPackedType()) {
33     case DataType::Type::kBool:
34     case DataType::Type::kUint8:
35     case DataType::Type::kInt8:
36     case DataType::Type::kUint16:
37     case DataType::Type::kInt16:
38     case DataType::Type::kInt32:
39     case DataType::Type::kInt64:
40       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
41                                     : Location::RequiresRegister());
42       locations->SetOut(Location::RequiresFpuRegister());
43       break;
44     case DataType::Type::kFloat32:
45     case DataType::Type::kFloat64:
46       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
47                                     : Location::RequiresFpuRegister());
48       locations->SetOut(is_zero ? Location::RequiresFpuRegister()
49                                 : Location::SameAsFirstInput());
50       break;
51     default:
52       LOG(FATAL) << "Unsupported SIMD type";
53       UNREACHABLE();
54   }
55 }
56 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)57 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
58   LocationSummary* locations = instruction->GetLocations();
59   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
60 
61   // Shorthand for any type of zero.
62   if (IsZeroBitPattern(instruction->InputAt(0))) {
63     __ xorps(dst, dst);
64     return;
65   }
66 
67   switch (instruction->GetPackedType()) {
68     case DataType::Type::kBool:
69     case DataType::Type::kUint8:
70     case DataType::Type::kInt8:
71       DCHECK_EQ(16u, instruction->GetVectorLength());
72       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
73       __ punpcklbw(dst, dst);
74       __ punpcklwd(dst, dst);
75       __ pshufd(dst, dst, Immediate(0));
76       break;
77     case DataType::Type::kUint16:
78     case DataType::Type::kInt16:
79       DCHECK_EQ(8u, instruction->GetVectorLength());
80       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
81       __ punpcklwd(dst, dst);
82       __ pshufd(dst, dst, Immediate(0));
83       break;
84     case DataType::Type::kInt32:
85       DCHECK_EQ(4u, instruction->GetVectorLength());
86       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
87       __ pshufd(dst, dst, Immediate(0));
88       break;
89     case DataType::Type::kInt64:
90       DCHECK_EQ(2u, instruction->GetVectorLength());
91       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true);
92       __ punpcklqdq(dst, dst);
93       break;
94     case DataType::Type::kFloat32:
95       DCHECK_EQ(4u, instruction->GetVectorLength());
96       DCHECK(locations->InAt(0).Equals(locations->Out()));
97       __ shufps(dst, dst, Immediate(0));
98       break;
99     case DataType::Type::kFloat64:
100       DCHECK_EQ(2u, instruction->GetVectorLength());
101       DCHECK(locations->InAt(0).Equals(locations->Out()));
102       __ shufpd(dst, dst, Immediate(0));
103       break;
104     default:
105       LOG(FATAL) << "Unsupported SIMD type";
106       UNREACHABLE();
107   }
108 }
109 
VisitVecExtractScalar(HVecExtractScalar * instruction)110 void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
111   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
112   switch (instruction->GetPackedType()) {
113     case DataType::Type::kBool:
114     case DataType::Type::kUint8:
115     case DataType::Type::kInt8:
116     case DataType::Type::kUint16:
117     case DataType::Type::kInt16:
118     case DataType::Type::kInt32:
119     case DataType::Type::kInt64:
120       locations->SetInAt(0, Location::RequiresFpuRegister());
121       locations->SetOut(Location::RequiresRegister());
122       break;
123     case DataType::Type::kFloat32:
124     case DataType::Type::kFloat64:
125       locations->SetInAt(0, Location::RequiresFpuRegister());
126       locations->SetOut(Location::SameAsFirstInput());
127       break;
128     default:
129       LOG(FATAL) << "Unsupported SIMD type";
130       UNREACHABLE();
131   }
132 }
133 
VisitVecExtractScalar(HVecExtractScalar * instruction)134 void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
135   LocationSummary* locations = instruction->GetLocations();
136   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
137   switch (instruction->GetPackedType()) {
138     case DataType::Type::kBool:
139     case DataType::Type::kUint8:
140     case DataType::Type::kInt8:
141     case DataType::Type::kUint16:
142     case DataType::Type::kInt16:  // TODO: up to here, and?
143       LOG(FATAL) << "Unsupported SIMD type";
144       UNREACHABLE();
145     case DataType::Type::kInt32:
146       DCHECK_EQ(4u, instruction->GetVectorLength());
147       __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false);
148       break;
149     case DataType::Type::kInt64:
150       DCHECK_EQ(2u, instruction->GetVectorLength());
151       __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true);
152       break;
153     case DataType::Type::kFloat32:
154     case DataType::Type::kFloat64:
155       DCHECK_LE(2u, instruction->GetVectorLength());
156       DCHECK_LE(instruction->GetVectorLength(), 4u);
157       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
158       break;
159     default:
160       LOG(FATAL) << "Unsupported SIMD type";
161       UNREACHABLE();
162   }
163 }
164 
165 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)166 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
167   LocationSummary* locations = new (allocator) LocationSummary(instruction);
168   switch (instruction->GetPackedType()) {
169     case DataType::Type::kBool:
170     case DataType::Type::kUint8:
171     case DataType::Type::kInt8:
172     case DataType::Type::kUint16:
173     case DataType::Type::kInt16:
174     case DataType::Type::kInt32:
175     case DataType::Type::kInt64:
176     case DataType::Type::kFloat32:
177     case DataType::Type::kFloat64:
178       locations->SetInAt(0, Location::RequiresFpuRegister());
179       locations->SetOut(Location::RequiresFpuRegister());
180       break;
181     default:
182       LOG(FATAL) << "Unsupported SIMD type";
183       UNREACHABLE();
184   }
185 }
186 
VisitVecReduce(HVecReduce * instruction)187 void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
188   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
189   // Long reduction or min/max require a temporary.
190   if (instruction->GetPackedType() == DataType::Type::kInt64 ||
191       instruction->GetKind() == HVecReduce::kMin ||
192       instruction->GetKind() == HVecReduce::kMax) {
193     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
194   }
195 }
196 
VisitVecReduce(HVecReduce * instruction)197 void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
198   LocationSummary* locations = instruction->GetLocations();
199   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
200   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
201   switch (instruction->GetPackedType()) {
202     case DataType::Type::kInt32:
203       DCHECK_EQ(4u, instruction->GetVectorLength());
204       switch (instruction->GetKind()) {
205         case HVecReduce::kSum:
206           __ movaps(dst, src);
207           __ phaddd(dst, dst);
208           __ phaddd(dst, dst);
209           break;
210         case HVecReduce::kMin: {
211           XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
212           __ movaps(tmp, src);
213           __ movaps(dst, src);
214           __ psrldq(tmp, Immediate(8));
215           __ pminsd(dst, tmp);
216           __ psrldq(tmp, Immediate(4));
217           __ pminsd(dst, tmp);
218           break;
219         }
220         case HVecReduce::kMax: {
221           XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
222           __ movaps(tmp, src);
223           __ movaps(dst, src);
224           __ psrldq(tmp, Immediate(8));
225           __ pmaxsd(dst, tmp);
226           __ psrldq(tmp, Immediate(4));
227           __ pmaxsd(dst, tmp);
228           break;
229         }
230       }
231       break;
232     case DataType::Type::kInt64: {
233       DCHECK_EQ(2u, instruction->GetVectorLength());
234       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
235       switch (instruction->GetKind()) {
236         case HVecReduce::kSum:
237           __ movaps(tmp, src);
238           __ movaps(dst, src);
239           __ punpckhqdq(tmp, tmp);
240           __ paddq(dst, tmp);
241           break;
242         case HVecReduce::kMin:
243         case HVecReduce::kMax:
244           LOG(FATAL) << "Unsupported SIMD type";
245       }
246       break;
247     }
248     default:
249       LOG(FATAL) << "Unsupported SIMD type";
250       UNREACHABLE();
251   }
252 }
253 
VisitVecCnv(HVecCnv * instruction)254 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
255   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
256 }
257 
VisitVecCnv(HVecCnv * instruction)258 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
259   LocationSummary* locations = instruction->GetLocations();
260   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
261   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
262   DataType::Type from = instruction->GetInputType();
263   DataType::Type to = instruction->GetResultType();
264   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
265     DCHECK_EQ(4u, instruction->GetVectorLength());
266     __ cvtdq2ps(dst, src);
267   } else {
268     LOG(FATAL) << "Unsupported SIMD type";
269   }
270 }
271 
VisitVecNeg(HVecNeg * instruction)272 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
273   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
274 }
275 
VisitVecNeg(HVecNeg * instruction)276 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
277   LocationSummary* locations = instruction->GetLocations();
278   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
279   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
280   switch (instruction->GetPackedType()) {
281     case DataType::Type::kUint8:
282     case DataType::Type::kInt8:
283       DCHECK_EQ(16u, instruction->GetVectorLength());
284       __ pxor(dst, dst);
285       __ psubb(dst, src);
286       break;
287     case DataType::Type::kUint16:
288     case DataType::Type::kInt16:
289       DCHECK_EQ(8u, instruction->GetVectorLength());
290       __ pxor(dst, dst);
291       __ psubw(dst, src);
292       break;
293     case DataType::Type::kInt32:
294       DCHECK_EQ(4u, instruction->GetVectorLength());
295       __ pxor(dst, dst);
296       __ psubd(dst, src);
297       break;
298     case DataType::Type::kInt64:
299       DCHECK_EQ(2u, instruction->GetVectorLength());
300       __ pxor(dst, dst);
301       __ psubq(dst, src);
302       break;
303     case DataType::Type::kFloat32:
304       DCHECK_EQ(4u, instruction->GetVectorLength());
305       __ xorps(dst, dst);
306       __ subps(dst, src);
307       break;
308     case DataType::Type::kFloat64:
309       DCHECK_EQ(2u, instruction->GetVectorLength());
310       __ xorpd(dst, dst);
311       __ subpd(dst, src);
312       break;
313     default:
314       LOG(FATAL) << "Unsupported SIMD type";
315       UNREACHABLE();
316   }
317 }
318 
VisitVecAbs(HVecAbs * instruction)319 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
320   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
321   // Integral-abs requires a temporary for the comparison.
322   if (instruction->GetPackedType() == DataType::Type::kInt32) {
323     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
324   }
325 }
326 
VisitVecAbs(HVecAbs * instruction)327 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
328   LocationSummary* locations = instruction->GetLocations();
329   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
330   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
331   switch (instruction->GetPackedType()) {
332     case DataType::Type::kInt32: {
333       DCHECK_EQ(4u, instruction->GetVectorLength());
334       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
335       __ movaps(dst, src);
336       __ pxor(tmp, tmp);
337       __ pcmpgtd(tmp, dst);
338       __ pxor(dst, tmp);
339       __ psubd(dst, tmp);
340       break;
341     }
342     case DataType::Type::kFloat32:
343       DCHECK_EQ(4u, instruction->GetVectorLength());
344       __ pcmpeqb(dst, dst);  // all ones
345       __ psrld(dst, Immediate(1));
346       __ andps(dst, src);
347       break;
348     case DataType::Type::kFloat64:
349       DCHECK_EQ(2u, instruction->GetVectorLength());
350       __ pcmpeqb(dst, dst);  // all ones
351       __ psrlq(dst, Immediate(1));
352       __ andpd(dst, src);
353       break;
354     default:
355       LOG(FATAL) << "Unsupported SIMD type";
356       UNREACHABLE();
357   }
358 }
359 
VisitVecNot(HVecNot * instruction)360 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
361   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
362   // Boolean-not requires a temporary to construct the 16 x one.
363   if (instruction->GetPackedType() == DataType::Type::kBool) {
364     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
365   }
366 }
367 
VisitVecNot(HVecNot * instruction)368 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
369   LocationSummary* locations = instruction->GetLocations();
370   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
371   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
372   switch (instruction->GetPackedType()) {
373     case DataType::Type::kBool: {  // special case boolean-not
374       DCHECK_EQ(16u, instruction->GetVectorLength());
375       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
376       __ pxor(dst, dst);
377       __ pcmpeqb(tmp, tmp);  // all ones
378       __ psubb(dst, tmp);  // 16 x one
379       __ pxor(dst, src);
380       break;
381     }
382     case DataType::Type::kUint8:
383     case DataType::Type::kInt8:
384     case DataType::Type::kUint16:
385     case DataType::Type::kInt16:
386     case DataType::Type::kInt32:
387     case DataType::Type::kInt64:
388       DCHECK_LE(2u, instruction->GetVectorLength());
389       DCHECK_LE(instruction->GetVectorLength(), 16u);
390       __ pcmpeqb(dst, dst);  // all ones
391       __ pxor(dst, src);
392       break;
393     case DataType::Type::kFloat32:
394       DCHECK_EQ(4u, instruction->GetVectorLength());
395       __ pcmpeqb(dst, dst);  // all ones
396       __ xorps(dst, src);
397       break;
398     case DataType::Type::kFloat64:
399       DCHECK_EQ(2u, instruction->GetVectorLength());
400       __ pcmpeqb(dst, dst);  // all ones
401       __ xorpd(dst, src);
402       break;
403     default:
404       LOG(FATAL) << "Unsupported SIMD type";
405       UNREACHABLE();
406   }
407 }
408 
409 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)410 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
411   LocationSummary* locations = new (allocator) LocationSummary(instruction);
412   switch (instruction->GetPackedType()) {
413     case DataType::Type::kBool:
414     case DataType::Type::kUint8:
415     case DataType::Type::kInt8:
416     case DataType::Type::kUint16:
417     case DataType::Type::kInt16:
418     case DataType::Type::kInt32:
419     case DataType::Type::kInt64:
420     case DataType::Type::kFloat32:
421     case DataType::Type::kFloat64:
422       locations->SetInAt(0, Location::RequiresFpuRegister());
423       locations->SetInAt(1, Location::RequiresFpuRegister());
424       locations->SetOut(Location::SameAsFirstInput());
425       break;
426     default:
427       LOG(FATAL) << "Unsupported SIMD type";
428       UNREACHABLE();
429   }
430 }
431 
VisitVecAdd(HVecAdd * instruction)432 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
433   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
434 }
435 
VisitVecAdd(HVecAdd * instruction)436 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
437   LocationSummary* locations = instruction->GetLocations();
438   DCHECK(locations->InAt(0).Equals(locations->Out()));
439   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
440   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
441   switch (instruction->GetPackedType()) {
442     case DataType::Type::kUint8:
443     case DataType::Type::kInt8:
444       DCHECK_EQ(16u, instruction->GetVectorLength());
445       __ paddb(dst, src);
446       break;
447     case DataType::Type::kUint16:
448     case DataType::Type::kInt16:
449       DCHECK_EQ(8u, instruction->GetVectorLength());
450       __ paddw(dst, src);
451       break;
452     case DataType::Type::kInt32:
453       DCHECK_EQ(4u, instruction->GetVectorLength());
454       __ paddd(dst, src);
455       break;
456     case DataType::Type::kInt64:
457       DCHECK_EQ(2u, instruction->GetVectorLength());
458       __ paddq(dst, src);
459       break;
460     case DataType::Type::kFloat32:
461       DCHECK_EQ(4u, instruction->GetVectorLength());
462       __ addps(dst, src);
463       break;
464     case DataType::Type::kFloat64:
465       DCHECK_EQ(2u, instruction->GetVectorLength());
466       __ addpd(dst, src);
467       break;
468     default:
469       LOG(FATAL) << "Unsupported SIMD type";
470       UNREACHABLE();
471   }
472 }
473 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)474 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
475   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
476 }
477 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)478 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
479   LocationSummary* locations = instruction->GetLocations();
480   DCHECK(locations->InAt(0).Equals(locations->Out()));
481   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
482   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
483 
484   DCHECK(instruction->IsRounded());
485 
486   switch (instruction->GetPackedType()) {
487     case DataType::Type::kUint8:
488       DCHECK_EQ(16u, instruction->GetVectorLength());
489      __ pavgb(dst, src);
490      return;
491     case DataType::Type::kUint16:
492       DCHECK_EQ(8u, instruction->GetVectorLength());
493       __ pavgw(dst, src);
494       return;
495     default:
496       LOG(FATAL) << "Unsupported SIMD type";
497       UNREACHABLE();
498   }
499 }
500 
VisitVecSub(HVecSub * instruction)501 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
502   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
503 }
504 
VisitVecSub(HVecSub * instruction)505 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
506   LocationSummary* locations = instruction->GetLocations();
507   DCHECK(locations->InAt(0).Equals(locations->Out()));
508   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
509   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
510   switch (instruction->GetPackedType()) {
511     case DataType::Type::kUint8:
512     case DataType::Type::kInt8:
513       DCHECK_EQ(16u, instruction->GetVectorLength());
514       __ psubb(dst, src);
515       break;
516     case DataType::Type::kUint16:
517     case DataType::Type::kInt16:
518       DCHECK_EQ(8u, instruction->GetVectorLength());
519       __ psubw(dst, src);
520       break;
521     case DataType::Type::kInt32:
522       DCHECK_EQ(4u, instruction->GetVectorLength());
523       __ psubd(dst, src);
524       break;
525     case DataType::Type::kInt64:
526       DCHECK_EQ(2u, instruction->GetVectorLength());
527       __ psubq(dst, src);
528       break;
529     case DataType::Type::kFloat32:
530       DCHECK_EQ(4u, instruction->GetVectorLength());
531       __ subps(dst, src);
532       break;
533     case DataType::Type::kFloat64:
534       DCHECK_EQ(2u, instruction->GetVectorLength());
535       __ subpd(dst, src);
536       break;
537     default:
538       LOG(FATAL) << "Unsupported SIMD type";
539       UNREACHABLE();
540   }
541 }
542 
VisitVecMul(HVecMul * instruction)543 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
544   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
545 }
546 
VisitVecMul(HVecMul * instruction)547 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
548   LocationSummary* locations = instruction->GetLocations();
549   DCHECK(locations->InAt(0).Equals(locations->Out()));
550   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
551   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
552   switch (instruction->GetPackedType()) {
553     case DataType::Type::kUint16:
554     case DataType::Type::kInt16:
555       DCHECK_EQ(8u, instruction->GetVectorLength());
556       __ pmullw(dst, src);
557       break;
558     case DataType::Type::kInt32:
559       DCHECK_EQ(4u, instruction->GetVectorLength());
560       __ pmulld(dst, src);
561       break;
562     case DataType::Type::kFloat32:
563       DCHECK_EQ(4u, instruction->GetVectorLength());
564       __ mulps(dst, src);
565       break;
566     case DataType::Type::kFloat64:
567       DCHECK_EQ(2u, instruction->GetVectorLength());
568       __ mulpd(dst, src);
569       break;
570     default:
571       LOG(FATAL) << "Unsupported SIMD type";
572       UNREACHABLE();
573   }
574 }
575 
VisitVecDiv(HVecDiv * instruction)576 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
577   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
578 }
579 
VisitVecDiv(HVecDiv * instruction)580 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
581   LocationSummary* locations = instruction->GetLocations();
582   DCHECK(locations->InAt(0).Equals(locations->Out()));
583   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
584   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
585   switch (instruction->GetPackedType()) {
586     case DataType::Type::kFloat32:
587       DCHECK_EQ(4u, instruction->GetVectorLength());
588       __ divps(dst, src);
589       break;
590     case DataType::Type::kFloat64:
591       DCHECK_EQ(2u, instruction->GetVectorLength());
592       __ divpd(dst, src);
593       break;
594     default:
595       LOG(FATAL) << "Unsupported SIMD type";
596       UNREACHABLE();
597   }
598 }
599 
VisitVecMin(HVecMin * instruction)600 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
601   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
602 }
603 
VisitVecMin(HVecMin * instruction)604 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
605   LocationSummary* locations = instruction->GetLocations();
606   DCHECK(locations->InAt(0).Equals(locations->Out()));
607   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
608   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
609   switch (instruction->GetPackedType()) {
610     case DataType::Type::kUint8:
611       DCHECK_EQ(16u, instruction->GetVectorLength());
612       __ pminub(dst, src);
613       break;
614     case DataType::Type::kInt8:
615       DCHECK_EQ(16u, instruction->GetVectorLength());
616       __ pminsb(dst, src);
617       break;
618     case DataType::Type::kUint16:
619       DCHECK_EQ(8u, instruction->GetVectorLength());
620       __ pminuw(dst, src);
621       break;
622     case DataType::Type::kInt16:
623       DCHECK_EQ(8u, instruction->GetVectorLength());
624       __ pminsw(dst, src);
625       break;
626     case DataType::Type::kUint32:
627       DCHECK_EQ(4u, instruction->GetVectorLength());
628       __ pminud(dst, src);
629       break;
630     case DataType::Type::kInt32:
631       DCHECK_EQ(4u, instruction->GetVectorLength());
632       __ pminsd(dst, src);
633       break;
634     // Next cases are sloppy wrt 0.0 vs -0.0.
635     case DataType::Type::kFloat32:
636       DCHECK_EQ(4u, instruction->GetVectorLength());
637       __ minps(dst, src);
638       break;
639     case DataType::Type::kFloat64:
640       DCHECK_EQ(2u, instruction->GetVectorLength());
641       __ minpd(dst, src);
642       break;
643     default:
644       LOG(FATAL) << "Unsupported SIMD type";
645       UNREACHABLE();
646   }
647 }
648 
VisitVecMax(HVecMax * instruction)649 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
650   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
651 }
652 
VisitVecMax(HVecMax * instruction)653 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
654   LocationSummary* locations = instruction->GetLocations();
655   DCHECK(locations->InAt(0).Equals(locations->Out()));
656   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
657   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
658   switch (instruction->GetPackedType()) {
659     case DataType::Type::kUint8:
660       DCHECK_EQ(16u, instruction->GetVectorLength());
661       __ pmaxub(dst, src);
662       break;
663     case DataType::Type::kInt8:
664       DCHECK_EQ(16u, instruction->GetVectorLength());
665       __ pmaxsb(dst, src);
666       break;
667     case DataType::Type::kUint16:
668       DCHECK_EQ(8u, instruction->GetVectorLength());
669       __ pmaxuw(dst, src);
670       break;
671     case DataType::Type::kInt16:
672       DCHECK_EQ(8u, instruction->GetVectorLength());
673       __ pmaxsw(dst, src);
674       break;
675     case DataType::Type::kUint32:
676       DCHECK_EQ(4u, instruction->GetVectorLength());
677       __ pmaxud(dst, src);
678       break;
679     case DataType::Type::kInt32:
680       DCHECK_EQ(4u, instruction->GetVectorLength());
681       __ pmaxsd(dst, src);
682       break;
683     // Next cases are sloppy wrt 0.0 vs -0.0.
684     case DataType::Type::kFloat32:
685       DCHECK_EQ(4u, instruction->GetVectorLength());
686       __ maxps(dst, src);
687       break;
688     case DataType::Type::kFloat64:
689       DCHECK_EQ(2u, instruction->GetVectorLength());
690       __ maxpd(dst, src);
691       break;
692     default:
693       LOG(FATAL) << "Unsupported SIMD type";
694       UNREACHABLE();
695   }
696 }
697 
VisitVecAnd(HVecAnd * instruction)698 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
699   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
700 }
701 
VisitVecAnd(HVecAnd * instruction)702 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
703   LocationSummary* locations = instruction->GetLocations();
704   DCHECK(locations->InAt(0).Equals(locations->Out()));
705   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
706   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
707   switch (instruction->GetPackedType()) {
708     case DataType::Type::kBool:
709     case DataType::Type::kUint8:
710     case DataType::Type::kInt8:
711     case DataType::Type::kUint16:
712     case DataType::Type::kInt16:
713     case DataType::Type::kInt32:
714     case DataType::Type::kInt64:
715       DCHECK_LE(2u, instruction->GetVectorLength());
716       DCHECK_LE(instruction->GetVectorLength(), 16u);
717       __ pand(dst, src);
718       break;
719     case DataType::Type::kFloat32:
720       DCHECK_EQ(4u, instruction->GetVectorLength());
721       __ andps(dst, src);
722       break;
723     case DataType::Type::kFloat64:
724       DCHECK_EQ(2u, instruction->GetVectorLength());
725       __ andpd(dst, src);
726       break;
727     default:
728       LOG(FATAL) << "Unsupported SIMD type";
729       UNREACHABLE();
730   }
731 }
732 
VisitVecAndNot(HVecAndNot * instruction)733 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
734   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
735 }
736 
VisitVecAndNot(HVecAndNot * instruction)737 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
738   LocationSummary* locations = instruction->GetLocations();
739   DCHECK(locations->InAt(0).Equals(locations->Out()));
740   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
741   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
742   switch (instruction->GetPackedType()) {
743     case DataType::Type::kBool:
744     case DataType::Type::kUint8:
745     case DataType::Type::kInt8:
746     case DataType::Type::kUint16:
747     case DataType::Type::kInt16:
748     case DataType::Type::kInt32:
749     case DataType::Type::kInt64:
750       DCHECK_LE(2u, instruction->GetVectorLength());
751       DCHECK_LE(instruction->GetVectorLength(), 16u);
752       __ pandn(dst, src);
753       break;
754     case DataType::Type::kFloat32:
755       DCHECK_EQ(4u, instruction->GetVectorLength());
756       __ andnps(dst, src);
757       break;
758     case DataType::Type::kFloat64:
759       DCHECK_EQ(2u, instruction->GetVectorLength());
760       __ andnpd(dst, src);
761       break;
762     default:
763       LOG(FATAL) << "Unsupported SIMD type";
764       UNREACHABLE();
765   }
766 }
767 
VisitVecOr(HVecOr * instruction)768 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
769   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
770 }
771 
VisitVecOr(HVecOr * instruction)772 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
773   LocationSummary* locations = instruction->GetLocations();
774   DCHECK(locations->InAt(0).Equals(locations->Out()));
775   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
776   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
777   switch (instruction->GetPackedType()) {
778     case DataType::Type::kBool:
779     case DataType::Type::kUint8:
780     case DataType::Type::kInt8:
781     case DataType::Type::kUint16:
782     case DataType::Type::kInt16:
783     case DataType::Type::kInt32:
784     case DataType::Type::kInt64:
785       DCHECK_LE(2u, instruction->GetVectorLength());
786       DCHECK_LE(instruction->GetVectorLength(), 16u);
787       __ por(dst, src);
788       break;
789     case DataType::Type::kFloat32:
790       DCHECK_EQ(4u, instruction->GetVectorLength());
791       __ orps(dst, src);
792       break;
793     case DataType::Type::kFloat64:
794       DCHECK_EQ(2u, instruction->GetVectorLength());
795       __ orpd(dst, src);
796       break;
797     default:
798       LOG(FATAL) << "Unsupported SIMD type";
799       UNREACHABLE();
800   }
801 }
802 
VisitVecXor(HVecXor * instruction)803 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
804   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
805 }
806 
VisitVecXor(HVecXor * instruction)807 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
808   LocationSummary* locations = instruction->GetLocations();
809   DCHECK(locations->InAt(0).Equals(locations->Out()));
810   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
811   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
812   switch (instruction->GetPackedType()) {
813     case DataType::Type::kBool:
814     case DataType::Type::kUint8:
815     case DataType::Type::kInt8:
816     case DataType::Type::kUint16:
817     case DataType::Type::kInt16:
818     case DataType::Type::kInt32:
819     case DataType::Type::kInt64:
820       DCHECK_LE(2u, instruction->GetVectorLength());
821       DCHECK_LE(instruction->GetVectorLength(), 16u);
822       __ pxor(dst, src);
823       break;
824     case DataType::Type::kFloat32:
825       DCHECK_EQ(4u, instruction->GetVectorLength());
826       __ xorps(dst, src);
827       break;
828     case DataType::Type::kFloat64:
829       DCHECK_EQ(2u, instruction->GetVectorLength());
830       __ xorpd(dst, src);
831       break;
832     default:
833       LOG(FATAL) << "Unsupported SIMD type";
834       UNREACHABLE();
835   }
836 }
837 
838 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)839 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
840   LocationSummary* locations = new (allocator) LocationSummary(instruction);
841   switch (instruction->GetPackedType()) {
842     case DataType::Type::kUint16:
843     case DataType::Type::kInt16:
844     case DataType::Type::kInt32:
845     case DataType::Type::kInt64:
846       locations->SetInAt(0, Location::RequiresFpuRegister());
847       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
848       locations->SetOut(Location::SameAsFirstInput());
849       break;
850     default:
851       LOG(FATAL) << "Unsupported SIMD type";
852       UNREACHABLE();
853   }
854 }
855 
VisitVecShl(HVecShl * instruction)856 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
857   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
858 }
859 
VisitVecShl(HVecShl * instruction)860 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
861   LocationSummary* locations = instruction->GetLocations();
862   DCHECK(locations->InAt(0).Equals(locations->Out()));
863   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
864   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
865   switch (instruction->GetPackedType()) {
866     case DataType::Type::kUint16:
867     case DataType::Type::kInt16:
868       DCHECK_EQ(8u, instruction->GetVectorLength());
869       __ psllw(dst, Immediate(static_cast<int8_t>(value)));
870       break;
871     case DataType::Type::kInt32:
872       DCHECK_EQ(4u, instruction->GetVectorLength());
873       __ pslld(dst, Immediate(static_cast<int8_t>(value)));
874       break;
875     case DataType::Type::kInt64:
876       DCHECK_EQ(2u, instruction->GetVectorLength());
877       __ psllq(dst, Immediate(static_cast<int8_t>(value)));
878       break;
879     default:
880       LOG(FATAL) << "Unsupported SIMD type";
881       UNREACHABLE();
882   }
883 }
884 
VisitVecShr(HVecShr * instruction)885 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
886   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
887 }
888 
VisitVecShr(HVecShr * instruction)889 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
890   LocationSummary* locations = instruction->GetLocations();
891   DCHECK(locations->InAt(0).Equals(locations->Out()));
892   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
893   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
894   switch (instruction->GetPackedType()) {
895     case DataType::Type::kUint16:
896     case DataType::Type::kInt16:
897       DCHECK_EQ(8u, instruction->GetVectorLength());
898       __ psraw(dst, Immediate(static_cast<int8_t>(value)));
899       break;
900     case DataType::Type::kInt32:
901       DCHECK_EQ(4u, instruction->GetVectorLength());
902       __ psrad(dst, Immediate(static_cast<int8_t>(value)));
903       break;
904     default:
905       LOG(FATAL) << "Unsupported SIMD type";
906       UNREACHABLE();
907   }
908 }
909 
VisitVecUShr(HVecUShr * instruction)910 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
911   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
912 }
913 
VisitVecUShr(HVecUShr * instruction)914 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
915   LocationSummary* locations = instruction->GetLocations();
916   DCHECK(locations->InAt(0).Equals(locations->Out()));
917   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
918   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
919   switch (instruction->GetPackedType()) {
920     case DataType::Type::kUint16:
921     case DataType::Type::kInt16:
922       DCHECK_EQ(8u, instruction->GetVectorLength());
923       __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
924       break;
925     case DataType::Type::kInt32:
926       DCHECK_EQ(4u, instruction->GetVectorLength());
927       __ psrld(dst, Immediate(static_cast<int8_t>(value)));
928       break;
929     case DataType::Type::kInt64:
930       DCHECK_EQ(2u, instruction->GetVectorLength());
931       __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
932       break;
933     default:
934       LOG(FATAL) << "Unsupported SIMD type";
935       UNREACHABLE();
936   }
937 }
938 
VisitVecSetScalars(HVecSetScalars * instruction)939 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
940   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
941 
942   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
943 
944   HInstruction* input = instruction->InputAt(0);
945   bool is_zero = IsZeroBitPattern(input);
946 
947   switch (instruction->GetPackedType()) {
948     case DataType::Type::kBool:
949     case DataType::Type::kUint8:
950     case DataType::Type::kInt8:
951     case DataType::Type::kUint16:
952     case DataType::Type::kInt16:
953     case DataType::Type::kInt32:
954     case DataType::Type::kInt64:
955       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
956                                     : Location::RequiresRegister());
957       locations->SetOut(Location::RequiresFpuRegister());
958       break;
959     case DataType::Type::kFloat32:
960     case DataType::Type::kFloat64:
961       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
962                                     : Location::RequiresFpuRegister());
963       locations->SetOut(Location::RequiresFpuRegister());
964       break;
965     default:
966       LOG(FATAL) << "Unsupported SIMD type";
967       UNREACHABLE();
968   }
969 }
970 
VisitVecSetScalars(HVecSetScalars * instruction)971 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
972   LocationSummary* locations = instruction->GetLocations();
973   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
974 
975   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
976 
977   // Zero out all other elements first.
978   __ xorps(dst, dst);
979 
980   // Shorthand for any type of zero.
981   if (IsZeroBitPattern(instruction->InputAt(0))) {
982     return;
983   }
984 
985   // Set required elements.
986   switch (instruction->GetPackedType()) {
987     case DataType::Type::kBool:
988     case DataType::Type::kUint8:
989     case DataType::Type::kInt8:
990     case DataType::Type::kUint16:
991     case DataType::Type::kInt16:  // TODO: up to here, and?
992       LOG(FATAL) << "Unsupported SIMD type";
993       UNREACHABLE();
994     case DataType::Type::kInt32:
995       DCHECK_EQ(4u, instruction->GetVectorLength());
996       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
997       break;
998     case DataType::Type::kInt64:
999       DCHECK_EQ(2u, instruction->GetVectorLength());
1000       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());  // is 64-bit
1001       break;
1002     case DataType::Type::kFloat32:
1003       DCHECK_EQ(4u, instruction->GetVectorLength());
1004       __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1005       break;
1006     case DataType::Type::kFloat64:
1007       DCHECK_EQ(2u, instruction->GetVectorLength());
1008       __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1009       break;
1010     default:
1011       LOG(FATAL) << "Unsupported SIMD type";
1012       UNREACHABLE();
1013   }
1014 }
1015 
1016 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1017 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1018   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1019   switch (instruction->GetPackedType()) {
1020     case DataType::Type::kUint8:
1021     case DataType::Type::kInt8:
1022     case DataType::Type::kUint16:
1023     case DataType::Type::kInt16:
1024     case DataType::Type::kInt32:
1025     case DataType::Type::kInt64:
1026       locations->SetInAt(0, Location::RequiresFpuRegister());
1027       locations->SetInAt(1, Location::RequiresFpuRegister());
1028       locations->SetInAt(2, Location::RequiresFpuRegister());
1029       locations->SetOut(Location::SameAsFirstInput());
1030       break;
1031     default:
1032       LOG(FATAL) << "Unsupported SIMD type";
1033       UNREACHABLE();
1034   }
1035 }
1036 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1037 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1038   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1039 }
1040 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1041 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1042   // TODO: pmaddwd?
1043   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1044 }
1045 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1046 void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1047   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1048 }
1049 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1050 void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1051   // TODO: psadbw for unsigned?
1052   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1053 }
1054 
1055 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1056 static void CreateVecMemLocations(ArenaAllocator* allocator,
1057                                   HVecMemoryOperation* instruction,
1058                                   bool is_load) {
1059   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1060   switch (instruction->GetPackedType()) {
1061     case DataType::Type::kBool:
1062     case DataType::Type::kUint8:
1063     case DataType::Type::kInt8:
1064     case DataType::Type::kUint16:
1065     case DataType::Type::kInt16:
1066     case DataType::Type::kInt32:
1067     case DataType::Type::kInt64:
1068     case DataType::Type::kFloat32:
1069     case DataType::Type::kFloat64:
1070       locations->SetInAt(0, Location::RequiresRegister());
1071       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1072       if (is_load) {
1073         locations->SetOut(Location::RequiresFpuRegister());
1074       } else {
1075         locations->SetInAt(2, Location::RequiresFpuRegister());
1076       }
1077       break;
1078     default:
1079       LOG(FATAL) << "Unsupported SIMD type";
1080       UNREACHABLE();
1081   }
1082 }
1083 
1084 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1085 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1086   Location base = locations->InAt(0);
1087   Location index = locations->InAt(1);
1088   ScaleFactor scale = TIMES_1;
1089   switch (size) {
1090     case 2: scale = TIMES_2; break;
1091     case 4: scale = TIMES_4; break;
1092     case 8: scale = TIMES_8; break;
1093     default: break;
1094   }
1095   // Incorporate the string or array offset in the address computation.
1096   uint32_t offset = is_string_char_at
1097       ? mirror::String::ValueOffset().Uint32Value()
1098       : mirror::Array::DataOffset(size).Uint32Value();
1099   return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
1100 }
1101 
VisitVecLoad(HVecLoad * instruction)1102 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
1103   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1104   // String load requires a temporary for the compressed load.
1105   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1106     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1107   }
1108 }
1109 
VisitVecLoad(HVecLoad * instruction)1110 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
1111   LocationSummary* locations = instruction->GetLocations();
1112   size_t size = DataType::Size(instruction->GetPackedType());
1113   Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1114   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1115   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1116   switch (instruction->GetPackedType()) {
1117     case DataType::Type::kUint16:
1118       DCHECK_EQ(8u, instruction->GetVectorLength());
1119       // Special handling of compressed/uncompressed string load.
1120       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1121         NearLabel done, not_compressed;
1122         XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1123         // Test compression bit.
1124         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1125                       "Expecting 0=compressed, 1=uncompressed");
1126         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1127         __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
1128         __ j(kNotZero, &not_compressed);
1129         // Zero extend 8 compressed bytes into 8 chars.
1130         __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1131         __ pxor(tmp, tmp);
1132         __ punpcklbw(reg, tmp);
1133         __ jmp(&done);
1134         // Load 8 direct uncompressed chars.
1135         __ Bind(&not_compressed);
1136         is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
1137         __ Bind(&done);
1138         return;
1139       }
1140       FALLTHROUGH_INTENDED;
1141     case DataType::Type::kBool:
1142     case DataType::Type::kUint8:
1143     case DataType::Type::kInt8:
1144     case DataType::Type::kInt16:
1145     case DataType::Type::kInt32:
1146     case DataType::Type::kInt64:
1147       DCHECK_LE(2u, instruction->GetVectorLength());
1148       DCHECK_LE(instruction->GetVectorLength(), 16u);
1149       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1150       break;
1151     case DataType::Type::kFloat32:
1152       DCHECK_EQ(4u, instruction->GetVectorLength());
1153       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1154       break;
1155     case DataType::Type::kFloat64:
1156       DCHECK_EQ(2u, instruction->GetVectorLength());
1157       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1158       break;
1159     default:
1160       LOG(FATAL) << "Unsupported SIMD type";
1161       UNREACHABLE();
1162   }
1163 }
1164 
VisitVecStore(HVecStore * instruction)1165 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
1166   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1167 }
1168 
VisitVecStore(HVecStore * instruction)1169 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
1170   LocationSummary* locations = instruction->GetLocations();
1171   size_t size = DataType::Size(instruction->GetPackedType());
1172   Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1173   XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1174   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1175   switch (instruction->GetPackedType()) {
1176     case DataType::Type::kBool:
1177     case DataType::Type::kUint8:
1178     case DataType::Type::kInt8:
1179     case DataType::Type::kUint16:
1180     case DataType::Type::kInt16:
1181     case DataType::Type::kInt32:
1182     case DataType::Type::kInt64:
1183       DCHECK_LE(2u, instruction->GetVectorLength());
1184       DCHECK_LE(instruction->GetVectorLength(), 16u);
1185       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1186       break;
1187     case DataType::Type::kFloat32:
1188       DCHECK_EQ(4u, instruction->GetVectorLength());
1189       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1190       break;
1191     case DataType::Type::kFloat64:
1192       DCHECK_EQ(2u, instruction->GetVectorLength());
1193       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1194       break;
1195     default:
1196       LOG(FATAL) << "Unsupported SIMD type";
1197       UNREACHABLE();
1198   }
1199 }
1200 
1201 #undef __
1202 
1203 }  // namespace x86_64
1204 }  // namespace art
1205