1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21 
22 namespace art {
23 namespace x86_64 {
24 
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
27 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30   HInstruction* input = instruction->InputAt(0);
31   bool is_zero = IsZeroBitPattern(input);
32   switch (instruction->GetPackedType()) {
33     case DataType::Type::kBool:
34     case DataType::Type::kUint8:
35     case DataType::Type::kInt8:
36     case DataType::Type::kUint16:
37     case DataType::Type::kInt16:
38     case DataType::Type::kInt32:
39     case DataType::Type::kInt64:
40       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
41                                     : Location::RequiresRegister());
42       locations->SetOut(Location::RequiresFpuRegister());
43       break;
44     case DataType::Type::kFloat32:
45     case DataType::Type::kFloat64:
46       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
47                                     : Location::RequiresFpuRegister());
48       locations->SetOut(is_zero ? Location::RequiresFpuRegister()
49                                 : Location::SameAsFirstInput());
50       break;
51     default:
52       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
53       UNREACHABLE();
54   }
55 }
56 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)57 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
58   LocationSummary* locations = instruction->GetLocations();
59   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
60 
61   // Shorthand for any type of zero.
62   if (IsZeroBitPattern(instruction->InputAt(0))) {
63     __ xorps(dst, dst);
64     return;
65   }
66 
67   switch (instruction->GetPackedType()) {
68     case DataType::Type::kBool:
69     case DataType::Type::kUint8:
70     case DataType::Type::kInt8:
71       DCHECK_EQ(16u, instruction->GetVectorLength());
72       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
73       __ punpcklbw(dst, dst);
74       __ punpcklwd(dst, dst);
75       __ pshufd(dst, dst, Immediate(0));
76       break;
77     case DataType::Type::kUint16:
78     case DataType::Type::kInt16:
79       DCHECK_EQ(8u, instruction->GetVectorLength());
80       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
81       __ punpcklwd(dst, dst);
82       __ pshufd(dst, dst, Immediate(0));
83       break;
84     case DataType::Type::kInt32:
85       DCHECK_EQ(4u, instruction->GetVectorLength());
86       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
87       __ pshufd(dst, dst, Immediate(0));
88       break;
89     case DataType::Type::kInt64:
90       DCHECK_EQ(2u, instruction->GetVectorLength());
91       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true);
92       __ punpcklqdq(dst, dst);
93       break;
94     case DataType::Type::kFloat32:
95       DCHECK_EQ(4u, instruction->GetVectorLength());
96       DCHECK(locations->InAt(0).Equals(locations->Out()));
97       __ shufps(dst, dst, Immediate(0));
98       break;
99     case DataType::Type::kFloat64:
100       DCHECK_EQ(2u, instruction->GetVectorLength());
101       DCHECK(locations->InAt(0).Equals(locations->Out()));
102       __ shufpd(dst, dst, Immediate(0));
103       break;
104     default:
105       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
106       UNREACHABLE();
107   }
108 }
109 
VisitVecExtractScalar(HVecExtractScalar * instruction)110 void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
111   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
112   switch (instruction->GetPackedType()) {
113     case DataType::Type::kBool:
114     case DataType::Type::kUint8:
115     case DataType::Type::kInt8:
116     case DataType::Type::kUint16:
117     case DataType::Type::kInt16:
118     case DataType::Type::kInt32:
119     case DataType::Type::kInt64:
120       locations->SetInAt(0, Location::RequiresFpuRegister());
121       locations->SetOut(Location::RequiresRegister());
122       break;
123     case DataType::Type::kFloat32:
124     case DataType::Type::kFloat64:
125       locations->SetInAt(0, Location::RequiresFpuRegister());
126       locations->SetOut(Location::SameAsFirstInput());
127       break;
128     default:
129       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
130       UNREACHABLE();
131   }
132 }
133 
VisitVecExtractScalar(HVecExtractScalar * instruction)134 void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
135   LocationSummary* locations = instruction->GetLocations();
136   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
137   switch (instruction->GetPackedType()) {
138     case DataType::Type::kBool:
139     case DataType::Type::kUint8:
140     case DataType::Type::kInt8:
141     case DataType::Type::kUint16:
142     case DataType::Type::kInt16:  // TODO: up to here, and?
143       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
144       UNREACHABLE();
145     case DataType::Type::kInt32:
146       DCHECK_EQ(4u, instruction->GetVectorLength());
147       __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false);
148       break;
149     case DataType::Type::kInt64:
150       DCHECK_EQ(2u, instruction->GetVectorLength());
151       __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true);
152       break;
153     case DataType::Type::kFloat32:
154     case DataType::Type::kFloat64:
155       DCHECK_LE(2u, instruction->GetVectorLength());
156       DCHECK_LE(instruction->GetVectorLength(), 4u);
157       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
158       break;
159     default:
160       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
161       UNREACHABLE();
162   }
163 }
164 
165 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)166 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
167   LocationSummary* locations = new (allocator) LocationSummary(instruction);
168   switch (instruction->GetPackedType()) {
169     case DataType::Type::kBool:
170     case DataType::Type::kUint8:
171     case DataType::Type::kInt8:
172     case DataType::Type::kUint16:
173     case DataType::Type::kInt16:
174     case DataType::Type::kInt32:
175     case DataType::Type::kInt64:
176     case DataType::Type::kFloat32:
177     case DataType::Type::kFloat64:
178       locations->SetInAt(0, Location::RequiresFpuRegister());
179       locations->SetOut(Location::RequiresFpuRegister());
180       break;
181     default:
182       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
183       UNREACHABLE();
184   }
185 }
186 
VisitVecReduce(HVecReduce * instruction)187 void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
188   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
189   // Long reduction or min/max require a temporary.
190   if (instruction->GetPackedType() == DataType::Type::kInt64 ||
191       instruction->GetReductionKind() == HVecReduce::kMin ||
192       instruction->GetReductionKind() == HVecReduce::kMax) {
193     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
194   }
195 }
196 
VisitVecReduce(HVecReduce * instruction)197 void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
198   LocationSummary* locations = instruction->GetLocations();
199   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
200   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
201   switch (instruction->GetPackedType()) {
202     case DataType::Type::kInt32:
203       DCHECK_EQ(4u, instruction->GetVectorLength());
204       switch (instruction->GetReductionKind()) {
205         case HVecReduce::kSum:
206           __ movaps(dst, src);
207           __ phaddd(dst, dst);
208           __ phaddd(dst, dst);
209           break;
210         case HVecReduce::kMin:
211         case HVecReduce::kMax:
212           // Historical note: We've had a broken implementation here. b/117863065
213           // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
214           LOG(FATAL) << "Unsupported reduction type.";
215       }
216       break;
217     case DataType::Type::kInt64: {
218       DCHECK_EQ(2u, instruction->GetVectorLength());
219       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
220       switch (instruction->GetReductionKind()) {
221         case HVecReduce::kSum:
222           __ movaps(tmp, src);
223           __ movaps(dst, src);
224           __ punpckhqdq(tmp, tmp);
225           __ paddq(dst, tmp);
226           break;
227         case HVecReduce::kMin:
228         case HVecReduce::kMax:
229           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
230       }
231       break;
232     }
233     default:
234       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
235       UNREACHABLE();
236   }
237 }
238 
VisitVecCnv(HVecCnv * instruction)239 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
240   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
241 }
242 
VisitVecCnv(HVecCnv * instruction)243 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
244   LocationSummary* locations = instruction->GetLocations();
245   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
246   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
247   DataType::Type from = instruction->GetInputType();
248   DataType::Type to = instruction->GetResultType();
249   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
250     DCHECK_EQ(4u, instruction->GetVectorLength());
251     __ cvtdq2ps(dst, src);
252   } else {
253     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
254   }
255 }
256 
VisitVecNeg(HVecNeg * instruction)257 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
258   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
259 }
260 
VisitVecNeg(HVecNeg * instruction)261 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
262   LocationSummary* locations = instruction->GetLocations();
263   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
264   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
265   switch (instruction->GetPackedType()) {
266     case DataType::Type::kUint8:
267     case DataType::Type::kInt8:
268       DCHECK_EQ(16u, instruction->GetVectorLength());
269       __ pxor(dst, dst);
270       __ psubb(dst, src);
271       break;
272     case DataType::Type::kUint16:
273     case DataType::Type::kInt16:
274       DCHECK_EQ(8u, instruction->GetVectorLength());
275       __ pxor(dst, dst);
276       __ psubw(dst, src);
277       break;
278     case DataType::Type::kInt32:
279       DCHECK_EQ(4u, instruction->GetVectorLength());
280       __ pxor(dst, dst);
281       __ psubd(dst, src);
282       break;
283     case DataType::Type::kInt64:
284       DCHECK_EQ(2u, instruction->GetVectorLength());
285       __ pxor(dst, dst);
286       __ psubq(dst, src);
287       break;
288     case DataType::Type::kFloat32:
289       DCHECK_EQ(4u, instruction->GetVectorLength());
290       __ xorps(dst, dst);
291       __ subps(dst, src);
292       break;
293     case DataType::Type::kFloat64:
294       DCHECK_EQ(2u, instruction->GetVectorLength());
295       __ xorpd(dst, dst);
296       __ subpd(dst, src);
297       break;
298     default:
299       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
300       UNREACHABLE();
301   }
302 }
303 
VisitVecAbs(HVecAbs * instruction)304 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
305   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
306   // Integral-abs requires a temporary for the comparison.
307   if (instruction->GetPackedType() == DataType::Type::kInt32) {
308     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
309   }
310 }
311 
VisitVecAbs(HVecAbs * instruction)312 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
313   LocationSummary* locations = instruction->GetLocations();
314   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
315   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
316   switch (instruction->GetPackedType()) {
317     case DataType::Type::kInt32: {
318       DCHECK_EQ(4u, instruction->GetVectorLength());
319       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
320       __ movaps(dst, src);
321       __ pxor(tmp, tmp);
322       __ pcmpgtd(tmp, dst);
323       __ pxor(dst, tmp);
324       __ psubd(dst, tmp);
325       break;
326     }
327     case DataType::Type::kFloat32:
328       DCHECK_EQ(4u, instruction->GetVectorLength());
329       __ pcmpeqb(dst, dst);  // all ones
330       __ psrld(dst, Immediate(1));
331       __ andps(dst, src);
332       break;
333     case DataType::Type::kFloat64:
334       DCHECK_EQ(2u, instruction->GetVectorLength());
335       __ pcmpeqb(dst, dst);  // all ones
336       __ psrlq(dst, Immediate(1));
337       __ andpd(dst, src);
338       break;
339     default:
340       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
341       UNREACHABLE();
342   }
343 }
344 
VisitVecNot(HVecNot * instruction)345 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
346   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
347   // Boolean-not requires a temporary to construct the 16 x one.
348   if (instruction->GetPackedType() == DataType::Type::kBool) {
349     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
350   }
351 }
352 
VisitVecNot(HVecNot * instruction)353 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
354   LocationSummary* locations = instruction->GetLocations();
355   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
356   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
357   switch (instruction->GetPackedType()) {
358     case DataType::Type::kBool: {  // special case boolean-not
359       DCHECK_EQ(16u, instruction->GetVectorLength());
360       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
361       __ pxor(dst, dst);
362       __ pcmpeqb(tmp, tmp);  // all ones
363       __ psubb(dst, tmp);  // 16 x one
364       __ pxor(dst, src);
365       break;
366     }
367     case DataType::Type::kUint8:
368     case DataType::Type::kInt8:
369     case DataType::Type::kUint16:
370     case DataType::Type::kInt16:
371     case DataType::Type::kInt32:
372     case DataType::Type::kInt64:
373       DCHECK_LE(2u, instruction->GetVectorLength());
374       DCHECK_LE(instruction->GetVectorLength(), 16u);
375       __ pcmpeqb(dst, dst);  // all ones
376       __ pxor(dst, src);
377       break;
378     case DataType::Type::kFloat32:
379       DCHECK_EQ(4u, instruction->GetVectorLength());
380       __ pcmpeqb(dst, dst);  // all ones
381       __ xorps(dst, src);
382       break;
383     case DataType::Type::kFloat64:
384       DCHECK_EQ(2u, instruction->GetVectorLength());
385       __ pcmpeqb(dst, dst);  // all ones
386       __ xorpd(dst, src);
387       break;
388     default:
389       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
390       UNREACHABLE();
391   }
392 }
393 
394 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)395 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
396   LocationSummary* locations = new (allocator) LocationSummary(instruction);
397   switch (instruction->GetPackedType()) {
398     case DataType::Type::kBool:
399     case DataType::Type::kUint8:
400     case DataType::Type::kInt8:
401     case DataType::Type::kUint16:
402     case DataType::Type::kInt16:
403     case DataType::Type::kInt32:
404     case DataType::Type::kInt64:
405     case DataType::Type::kFloat32:
406     case DataType::Type::kFloat64:
407       locations->SetInAt(0, Location::RequiresFpuRegister());
408       locations->SetInAt(1, Location::RequiresFpuRegister());
409       locations->SetOut(Location::SameAsFirstInput());
410       break;
411     default:
412       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
413       UNREACHABLE();
414   }
415 }
416 
VisitVecAdd(HVecAdd * instruction)417 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
418   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
419 }
420 
VisitVecAdd(HVecAdd * instruction)421 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
422   LocationSummary* locations = instruction->GetLocations();
423   DCHECK(locations->InAt(0).Equals(locations->Out()));
424   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
425   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
426   switch (instruction->GetPackedType()) {
427     case DataType::Type::kUint8:
428     case DataType::Type::kInt8:
429       DCHECK_EQ(16u, instruction->GetVectorLength());
430       __ paddb(dst, src);
431       break;
432     case DataType::Type::kUint16:
433     case DataType::Type::kInt16:
434       DCHECK_EQ(8u, instruction->GetVectorLength());
435       __ paddw(dst, src);
436       break;
437     case DataType::Type::kInt32:
438       DCHECK_EQ(4u, instruction->GetVectorLength());
439       __ paddd(dst, src);
440       break;
441     case DataType::Type::kInt64:
442       DCHECK_EQ(2u, instruction->GetVectorLength());
443       __ paddq(dst, src);
444       break;
445     case DataType::Type::kFloat32:
446       DCHECK_EQ(4u, instruction->GetVectorLength());
447       __ addps(dst, src);
448       break;
449     case DataType::Type::kFloat64:
450       DCHECK_EQ(2u, instruction->GetVectorLength());
451       __ addpd(dst, src);
452       break;
453     default:
454       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
455       UNREACHABLE();
456   }
457 }
458 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)459 void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
460   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
461 }
462 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)463 void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
464   LocationSummary* locations = instruction->GetLocations();
465   DCHECK(locations->InAt(0).Equals(locations->Out()));
466   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
467   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
468   switch (instruction->GetPackedType()) {
469     case DataType::Type::kUint8:
470       DCHECK_EQ(16u, instruction->GetVectorLength());
471       __ paddusb(dst, src);
472       break;
473     case DataType::Type::kInt8:
474       DCHECK_EQ(16u, instruction->GetVectorLength());
475       __ paddsb(dst, src);
476       break;
477     case DataType::Type::kUint16:
478       DCHECK_EQ(8u, instruction->GetVectorLength());
479       __ paddusw(dst, src);
480       break;
481     case DataType::Type::kInt16:
482       DCHECK_EQ(8u, instruction->GetVectorLength());
483       __ paddsw(dst, src);
484       break;
485     default:
486       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
487       UNREACHABLE();
488   }
489 }
490 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)491 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
492   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
493 }
494 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)495 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
496   LocationSummary* locations = instruction->GetLocations();
497   DCHECK(locations->InAt(0).Equals(locations->Out()));
498   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
499   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
500 
501   DCHECK(instruction->IsRounded());
502 
503   switch (instruction->GetPackedType()) {
504     case DataType::Type::kUint8:
505       DCHECK_EQ(16u, instruction->GetVectorLength());
506       __ pavgb(dst, src);
507       break;
508     case DataType::Type::kUint16:
509       DCHECK_EQ(8u, instruction->GetVectorLength());
510       __ pavgw(dst, src);
511       break;
512     default:
513       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
514       UNREACHABLE();
515   }
516 }
517 
VisitVecSub(HVecSub * instruction)518 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
519   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
520 }
521 
VisitVecSub(HVecSub * instruction)522 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
523   LocationSummary* locations = instruction->GetLocations();
524   DCHECK(locations->InAt(0).Equals(locations->Out()));
525   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
526   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
527   switch (instruction->GetPackedType()) {
528     case DataType::Type::kUint8:
529     case DataType::Type::kInt8:
530       DCHECK_EQ(16u, instruction->GetVectorLength());
531       __ psubb(dst, src);
532       break;
533     case DataType::Type::kUint16:
534     case DataType::Type::kInt16:
535       DCHECK_EQ(8u, instruction->GetVectorLength());
536       __ psubw(dst, src);
537       break;
538     case DataType::Type::kInt32:
539       DCHECK_EQ(4u, instruction->GetVectorLength());
540       __ psubd(dst, src);
541       break;
542     case DataType::Type::kInt64:
543       DCHECK_EQ(2u, instruction->GetVectorLength());
544       __ psubq(dst, src);
545       break;
546     case DataType::Type::kFloat32:
547       DCHECK_EQ(4u, instruction->GetVectorLength());
548       __ subps(dst, src);
549       break;
550     case DataType::Type::kFloat64:
551       DCHECK_EQ(2u, instruction->GetVectorLength());
552       __ subpd(dst, src);
553       break;
554     default:
555       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
556       UNREACHABLE();
557   }
558 }
559 
VisitVecSaturationSub(HVecSaturationSub * instruction)560 void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
561   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
562 }
563 
VisitVecSaturationSub(HVecSaturationSub * instruction)564 void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
565   LocationSummary* locations = instruction->GetLocations();
566   DCHECK(locations->InAt(0).Equals(locations->Out()));
567   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
568   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
569   switch (instruction->GetPackedType()) {
570     case DataType::Type::kUint8:
571       DCHECK_EQ(16u, instruction->GetVectorLength());
572       __ psubusb(dst, src);
573       break;
574     case DataType::Type::kInt8:
575       DCHECK_EQ(16u, instruction->GetVectorLength());
576       __ psubsb(dst, src);
577       break;
578     case DataType::Type::kUint16:
579       DCHECK_EQ(8u, instruction->GetVectorLength());
580       __ psubusw(dst, src);
581       break;
582     case DataType::Type::kInt16:
583       DCHECK_EQ(8u, instruction->GetVectorLength());
584       __ psubsw(dst, src);
585       break;
586     default:
587       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
588       UNREACHABLE();
589   }
590 }
591 
VisitVecMul(HVecMul * instruction)592 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
593   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
594 }
595 
VisitVecMul(HVecMul * instruction)596 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
597   LocationSummary* locations = instruction->GetLocations();
598   DCHECK(locations->InAt(0).Equals(locations->Out()));
599   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
600   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
601   switch (instruction->GetPackedType()) {
602     case DataType::Type::kUint16:
603     case DataType::Type::kInt16:
604       DCHECK_EQ(8u, instruction->GetVectorLength());
605       __ pmullw(dst, src);
606       break;
607     case DataType::Type::kInt32:
608       DCHECK_EQ(4u, instruction->GetVectorLength());
609       __ pmulld(dst, src);
610       break;
611     case DataType::Type::kFloat32:
612       DCHECK_EQ(4u, instruction->GetVectorLength());
613       __ mulps(dst, src);
614       break;
615     case DataType::Type::kFloat64:
616       DCHECK_EQ(2u, instruction->GetVectorLength());
617       __ mulpd(dst, src);
618       break;
619     default:
620       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
621       UNREACHABLE();
622   }
623 }
624 
VisitVecDiv(HVecDiv * instruction)625 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
626   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
627 }
628 
VisitVecDiv(HVecDiv * instruction)629 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
630   LocationSummary* locations = instruction->GetLocations();
631   DCHECK(locations->InAt(0).Equals(locations->Out()));
632   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
633   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
634   switch (instruction->GetPackedType()) {
635     case DataType::Type::kFloat32:
636       DCHECK_EQ(4u, instruction->GetVectorLength());
637       __ divps(dst, src);
638       break;
639     case DataType::Type::kFloat64:
640       DCHECK_EQ(2u, instruction->GetVectorLength());
641       __ divpd(dst, src);
642       break;
643     default:
644       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
645       UNREACHABLE();
646   }
647 }
648 
VisitVecMin(HVecMin * instruction)649 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
650   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
651 }
652 
VisitVecMin(HVecMin * instruction)653 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
654   LocationSummary* locations = instruction->GetLocations();
655   DCHECK(locations->InAt(0).Equals(locations->Out()));
656   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
657   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
658   switch (instruction->GetPackedType()) {
659     case DataType::Type::kUint8:
660       DCHECK_EQ(16u, instruction->GetVectorLength());
661       __ pminub(dst, src);
662       break;
663     case DataType::Type::kInt8:
664       DCHECK_EQ(16u, instruction->GetVectorLength());
665       __ pminsb(dst, src);
666       break;
667     case DataType::Type::kUint16:
668       DCHECK_EQ(8u, instruction->GetVectorLength());
669       __ pminuw(dst, src);
670       break;
671     case DataType::Type::kInt16:
672       DCHECK_EQ(8u, instruction->GetVectorLength());
673       __ pminsw(dst, src);
674       break;
675     case DataType::Type::kUint32:
676       DCHECK_EQ(4u, instruction->GetVectorLength());
677       __ pminud(dst, src);
678       break;
679     case DataType::Type::kInt32:
680       DCHECK_EQ(4u, instruction->GetVectorLength());
681       __ pminsd(dst, src);
682       break;
683     // Next cases are sloppy wrt 0.0 vs -0.0.
684     case DataType::Type::kFloat32:
685       DCHECK_EQ(4u, instruction->GetVectorLength());
686       __ minps(dst, src);
687       break;
688     case DataType::Type::kFloat64:
689       DCHECK_EQ(2u, instruction->GetVectorLength());
690       __ minpd(dst, src);
691       break;
692     default:
693       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
694       UNREACHABLE();
695   }
696 }
697 
VisitVecMax(HVecMax * instruction)698 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
699   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
700 }
701 
VisitVecMax(HVecMax * instruction)702 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
703   LocationSummary* locations = instruction->GetLocations();
704   DCHECK(locations->InAt(0).Equals(locations->Out()));
705   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
706   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
707   switch (instruction->GetPackedType()) {
708     case DataType::Type::kUint8:
709       DCHECK_EQ(16u, instruction->GetVectorLength());
710       __ pmaxub(dst, src);
711       break;
712     case DataType::Type::kInt8:
713       DCHECK_EQ(16u, instruction->GetVectorLength());
714       __ pmaxsb(dst, src);
715       break;
716     case DataType::Type::kUint16:
717       DCHECK_EQ(8u, instruction->GetVectorLength());
718       __ pmaxuw(dst, src);
719       break;
720     case DataType::Type::kInt16:
721       DCHECK_EQ(8u, instruction->GetVectorLength());
722       __ pmaxsw(dst, src);
723       break;
724     case DataType::Type::kUint32:
725       DCHECK_EQ(4u, instruction->GetVectorLength());
726       __ pmaxud(dst, src);
727       break;
728     case DataType::Type::kInt32:
729       DCHECK_EQ(4u, instruction->GetVectorLength());
730       __ pmaxsd(dst, src);
731       break;
732     // Next cases are sloppy wrt 0.0 vs -0.0.
733     case DataType::Type::kFloat32:
734       DCHECK_EQ(4u, instruction->GetVectorLength());
735       __ maxps(dst, src);
736       break;
737     case DataType::Type::kFloat64:
738       DCHECK_EQ(2u, instruction->GetVectorLength());
739       __ maxpd(dst, src);
740       break;
741     default:
742       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
743       UNREACHABLE();
744   }
745 }
746 
VisitVecAnd(HVecAnd * instruction)747 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
748   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
749 }
750 
VisitVecAnd(HVecAnd * instruction)751 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
752   LocationSummary* locations = instruction->GetLocations();
753   DCHECK(locations->InAt(0).Equals(locations->Out()));
754   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
755   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
756   switch (instruction->GetPackedType()) {
757     case DataType::Type::kBool:
758     case DataType::Type::kUint8:
759     case DataType::Type::kInt8:
760     case DataType::Type::kUint16:
761     case DataType::Type::kInt16:
762     case DataType::Type::kInt32:
763     case DataType::Type::kInt64:
764       DCHECK_LE(2u, instruction->GetVectorLength());
765       DCHECK_LE(instruction->GetVectorLength(), 16u);
766       __ pand(dst, src);
767       break;
768     case DataType::Type::kFloat32:
769       DCHECK_EQ(4u, instruction->GetVectorLength());
770       __ andps(dst, src);
771       break;
772     case DataType::Type::kFloat64:
773       DCHECK_EQ(2u, instruction->GetVectorLength());
774       __ andpd(dst, src);
775       break;
776     default:
777       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
778       UNREACHABLE();
779   }
780 }
781 
VisitVecAndNot(HVecAndNot * instruction)782 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
783   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
784 }
785 
VisitVecAndNot(HVecAndNot * instruction)786 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
787   LocationSummary* locations = instruction->GetLocations();
788   DCHECK(locations->InAt(0).Equals(locations->Out()));
789   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
790   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
791   switch (instruction->GetPackedType()) {
792     case DataType::Type::kBool:
793     case DataType::Type::kUint8:
794     case DataType::Type::kInt8:
795     case DataType::Type::kUint16:
796     case DataType::Type::kInt16:
797     case DataType::Type::kInt32:
798     case DataType::Type::kInt64:
799       DCHECK_LE(2u, instruction->GetVectorLength());
800       DCHECK_LE(instruction->GetVectorLength(), 16u);
801       __ pandn(dst, src);
802       break;
803     case DataType::Type::kFloat32:
804       DCHECK_EQ(4u, instruction->GetVectorLength());
805       __ andnps(dst, src);
806       break;
807     case DataType::Type::kFloat64:
808       DCHECK_EQ(2u, instruction->GetVectorLength());
809       __ andnpd(dst, src);
810       break;
811     default:
812       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
813       UNREACHABLE();
814   }
815 }
816 
VisitVecOr(HVecOr * instruction)817 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
818   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
819 }
820 
VisitVecOr(HVecOr * instruction)821 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
822   LocationSummary* locations = instruction->GetLocations();
823   DCHECK(locations->InAt(0).Equals(locations->Out()));
824   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
825   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
826   switch (instruction->GetPackedType()) {
827     case DataType::Type::kBool:
828     case DataType::Type::kUint8:
829     case DataType::Type::kInt8:
830     case DataType::Type::kUint16:
831     case DataType::Type::kInt16:
832     case DataType::Type::kInt32:
833     case DataType::Type::kInt64:
834       DCHECK_LE(2u, instruction->GetVectorLength());
835       DCHECK_LE(instruction->GetVectorLength(), 16u);
836       __ por(dst, src);
837       break;
838     case DataType::Type::kFloat32:
839       DCHECK_EQ(4u, instruction->GetVectorLength());
840       __ orps(dst, src);
841       break;
842     case DataType::Type::kFloat64:
843       DCHECK_EQ(2u, instruction->GetVectorLength());
844       __ orpd(dst, src);
845       break;
846     default:
847       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
848       UNREACHABLE();
849   }
850 }
851 
VisitVecXor(HVecXor * instruction)852 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
853   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
854 }
855 
VisitVecXor(HVecXor * instruction)856 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
857   LocationSummary* locations = instruction->GetLocations();
858   DCHECK(locations->InAt(0).Equals(locations->Out()));
859   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
860   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
861   switch (instruction->GetPackedType()) {
862     case DataType::Type::kBool:
863     case DataType::Type::kUint8:
864     case DataType::Type::kInt8:
865     case DataType::Type::kUint16:
866     case DataType::Type::kInt16:
867     case DataType::Type::kInt32:
868     case DataType::Type::kInt64:
869       DCHECK_LE(2u, instruction->GetVectorLength());
870       DCHECK_LE(instruction->GetVectorLength(), 16u);
871       __ pxor(dst, src);
872       break;
873     case DataType::Type::kFloat32:
874       DCHECK_EQ(4u, instruction->GetVectorLength());
875       __ xorps(dst, src);
876       break;
877     case DataType::Type::kFloat64:
878       DCHECK_EQ(2u, instruction->GetVectorLength());
879       __ xorpd(dst, src);
880       break;
881     default:
882       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
883       UNREACHABLE();
884   }
885 }
886 
887 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)888 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
889   LocationSummary* locations = new (allocator) LocationSummary(instruction);
890   switch (instruction->GetPackedType()) {
891     case DataType::Type::kUint16:
892     case DataType::Type::kInt16:
893     case DataType::Type::kInt32:
894     case DataType::Type::kInt64:
895       locations->SetInAt(0, Location::RequiresFpuRegister());
896       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
897       locations->SetOut(Location::SameAsFirstInput());
898       break;
899     default:
900       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
901       UNREACHABLE();
902   }
903 }
904 
VisitVecShl(HVecShl * instruction)905 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
906   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
907 }
908 
VisitVecShl(HVecShl * instruction)909 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
910   LocationSummary* locations = instruction->GetLocations();
911   DCHECK(locations->InAt(0).Equals(locations->Out()));
912   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
913   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
914   switch (instruction->GetPackedType()) {
915     case DataType::Type::kUint16:
916     case DataType::Type::kInt16:
917       DCHECK_EQ(8u, instruction->GetVectorLength());
918       __ psllw(dst, Immediate(static_cast<int8_t>(value)));
919       break;
920     case DataType::Type::kInt32:
921       DCHECK_EQ(4u, instruction->GetVectorLength());
922       __ pslld(dst, Immediate(static_cast<int8_t>(value)));
923       break;
924     case DataType::Type::kInt64:
925       DCHECK_EQ(2u, instruction->GetVectorLength());
926       __ psllq(dst, Immediate(static_cast<int8_t>(value)));
927       break;
928     default:
929       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
930       UNREACHABLE();
931   }
932 }
933 
VisitVecShr(HVecShr * instruction)934 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
935   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
936 }
937 
VisitVecShr(HVecShr * instruction)938 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
939   LocationSummary* locations = instruction->GetLocations();
940   DCHECK(locations->InAt(0).Equals(locations->Out()));
941   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
942   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
943   switch (instruction->GetPackedType()) {
944     case DataType::Type::kUint16:
945     case DataType::Type::kInt16:
946       DCHECK_EQ(8u, instruction->GetVectorLength());
947       __ psraw(dst, Immediate(static_cast<int8_t>(value)));
948       break;
949     case DataType::Type::kInt32:
950       DCHECK_EQ(4u, instruction->GetVectorLength());
951       __ psrad(dst, Immediate(static_cast<int8_t>(value)));
952       break;
953     default:
954       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
955       UNREACHABLE();
956   }
957 }
958 
VisitVecUShr(HVecUShr * instruction)959 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
960   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
961 }
962 
VisitVecUShr(HVecUShr * instruction)963 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
964   LocationSummary* locations = instruction->GetLocations();
965   DCHECK(locations->InAt(0).Equals(locations->Out()));
966   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
967   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
968   switch (instruction->GetPackedType()) {
969     case DataType::Type::kUint16:
970     case DataType::Type::kInt16:
971       DCHECK_EQ(8u, instruction->GetVectorLength());
972       __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
973       break;
974     case DataType::Type::kInt32:
975       DCHECK_EQ(4u, instruction->GetVectorLength());
976       __ psrld(dst, Immediate(static_cast<int8_t>(value)));
977       break;
978     case DataType::Type::kInt64:
979       DCHECK_EQ(2u, instruction->GetVectorLength());
980       __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
981       break;
982     default:
983       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
984       UNREACHABLE();
985   }
986 }
987 
VisitVecSetScalars(HVecSetScalars * instruction)988 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
989   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
990 
991   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
992 
993   HInstruction* input = instruction->InputAt(0);
994   bool is_zero = IsZeroBitPattern(input);
995 
996   switch (instruction->GetPackedType()) {
997     case DataType::Type::kBool:
998     case DataType::Type::kUint8:
999     case DataType::Type::kInt8:
1000     case DataType::Type::kUint16:
1001     case DataType::Type::kInt16:
1002     case DataType::Type::kInt32:
1003     case DataType::Type::kInt64:
1004       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1005                                     : Location::RequiresRegister());
1006       locations->SetOut(Location::RequiresFpuRegister());
1007       break;
1008     case DataType::Type::kFloat32:
1009     case DataType::Type::kFloat64:
1010       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1011                                     : Location::RequiresFpuRegister());
1012       locations->SetOut(Location::RequiresFpuRegister());
1013       break;
1014     default:
1015       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1016       UNREACHABLE();
1017   }
1018 }
1019 
VisitVecSetScalars(HVecSetScalars * instruction)1020 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
1021   LocationSummary* locations = instruction->GetLocations();
1022   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1023 
1024   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1025 
1026   // Zero out all other elements first.
1027   __ xorps(dst, dst);
1028 
1029   // Shorthand for any type of zero.
1030   if (IsZeroBitPattern(instruction->InputAt(0))) {
1031     return;
1032   }
1033 
1034   // Set required elements.
1035   switch (instruction->GetPackedType()) {
1036     case DataType::Type::kBool:
1037     case DataType::Type::kUint8:
1038     case DataType::Type::kInt8:
1039     case DataType::Type::kUint16:
1040     case DataType::Type::kInt16:  // TODO: up to here, and?
1041       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1042       UNREACHABLE();
1043     case DataType::Type::kInt32:
1044       DCHECK_EQ(4u, instruction->GetVectorLength());
1045       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
1046       break;
1047     case DataType::Type::kInt64:
1048       DCHECK_EQ(2u, instruction->GetVectorLength());
1049       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());  // is 64-bit
1050       break;
1051     case DataType::Type::kFloat32:
1052       DCHECK_EQ(4u, instruction->GetVectorLength());
1053       __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1054       break;
1055     case DataType::Type::kFloat64:
1056       DCHECK_EQ(2u, instruction->GetVectorLength());
1057       __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1058       break;
1059     default:
1060       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1061       UNREACHABLE();
1062   }
1063 }
1064 
1065 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1066 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1067   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1068   switch (instruction->GetPackedType()) {
1069     case DataType::Type::kUint8:
1070     case DataType::Type::kInt8:
1071     case DataType::Type::kUint16:
1072     case DataType::Type::kInt16:
1073     case DataType::Type::kInt32:
1074     case DataType::Type::kInt64:
1075       locations->SetInAt(0, Location::RequiresFpuRegister());
1076       locations->SetInAt(1, Location::RequiresFpuRegister());
1077       locations->SetInAt(2, Location::RequiresFpuRegister());
1078       locations->SetOut(Location::SameAsFirstInput());
1079       break;
1080     default:
1081       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1082       UNREACHABLE();
1083   }
1084 }
1085 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1086 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1087   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1088 }
1089 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1090 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1091   // TODO: pmaddwd?
1092   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1093 }
1094 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1095 void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1096   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1097 }
1098 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1099 void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1100   // TODO: psadbw for unsigned?
1101   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1102 }
1103 
VisitVecDotProd(HVecDotProd * instruction)1104 void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) {
1105   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1106 }
1107 
VisitVecDotProd(HVecDotProd * instruction)1108 void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) {
1109   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1110 }
1111 
1112 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1113 static void CreateVecMemLocations(ArenaAllocator* allocator,
1114                                   HVecMemoryOperation* instruction,
1115                                   bool is_load) {
1116   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1117   switch (instruction->GetPackedType()) {
1118     case DataType::Type::kBool:
1119     case DataType::Type::kUint8:
1120     case DataType::Type::kInt8:
1121     case DataType::Type::kUint16:
1122     case DataType::Type::kInt16:
1123     case DataType::Type::kInt32:
1124     case DataType::Type::kInt64:
1125     case DataType::Type::kFloat32:
1126     case DataType::Type::kFloat64:
1127       locations->SetInAt(0, Location::RequiresRegister());
1128       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1129       if (is_load) {
1130         locations->SetOut(Location::RequiresFpuRegister());
1131       } else {
1132         locations->SetInAt(2, Location::RequiresFpuRegister());
1133       }
1134       break;
1135     default:
1136       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1137       UNREACHABLE();
1138   }
1139 }
1140 
1141 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1142 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1143   Location base = locations->InAt(0);
1144   Location index = locations->InAt(1);
1145   ScaleFactor scale = TIMES_1;
1146   switch (size) {
1147     case 2: scale = TIMES_2; break;
1148     case 4: scale = TIMES_4; break;
1149     case 8: scale = TIMES_8; break;
1150     default: break;
1151   }
1152   // Incorporate the string or array offset in the address computation.
1153   uint32_t offset = is_string_char_at
1154       ? mirror::String::ValueOffset().Uint32Value()
1155       : mirror::Array::DataOffset(size).Uint32Value();
1156   return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
1157 }
1158 
VisitVecLoad(HVecLoad * instruction)1159 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
1160   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1161   // String load requires a temporary for the compressed load.
1162   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1163     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1164   }
1165 }
1166 
VisitVecLoad(HVecLoad * instruction)1167 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
1168   LocationSummary* locations = instruction->GetLocations();
1169   size_t size = DataType::Size(instruction->GetPackedType());
1170   Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1171   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1172   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1173   switch (instruction->GetPackedType()) {
1174     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1175     case DataType::Type::kUint16:
1176       DCHECK_EQ(8u, instruction->GetVectorLength());
1177       // Special handling of compressed/uncompressed string load.
1178       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1179         NearLabel done, not_compressed;
1180         XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1181         // Test compression bit.
1182         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1183                       "Expecting 0=compressed, 1=uncompressed");
1184         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1185         __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
1186         __ j(kNotZero, &not_compressed);
1187         // Zero extend 8 compressed bytes into 8 chars.
1188         __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1189         __ pxor(tmp, tmp);
1190         __ punpcklbw(reg, tmp);
1191         __ jmp(&done);
1192         // Load 8 direct uncompressed chars.
1193         __ Bind(&not_compressed);
1194         is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
1195         __ Bind(&done);
1196         return;
1197       }
1198       FALLTHROUGH_INTENDED;
1199     case DataType::Type::kBool:
1200     case DataType::Type::kUint8:
1201     case DataType::Type::kInt8:
1202     case DataType::Type::kInt32:
1203     case DataType::Type::kInt64:
1204       DCHECK_LE(2u, instruction->GetVectorLength());
1205       DCHECK_LE(instruction->GetVectorLength(), 16u);
1206       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1207       break;
1208     case DataType::Type::kFloat32:
1209       DCHECK_EQ(4u, instruction->GetVectorLength());
1210       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1211       break;
1212     case DataType::Type::kFloat64:
1213       DCHECK_EQ(2u, instruction->GetVectorLength());
1214       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1215       break;
1216     default:
1217       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1218       UNREACHABLE();
1219   }
1220 }
1221 
VisitVecStore(HVecStore * instruction)1222 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
1223   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1224 }
1225 
VisitVecStore(HVecStore * instruction)1226 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
1227   LocationSummary* locations = instruction->GetLocations();
1228   size_t size = DataType::Size(instruction->GetPackedType());
1229   Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1230   XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1231   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1232   switch (instruction->GetPackedType()) {
1233     case DataType::Type::kBool:
1234     case DataType::Type::kUint8:
1235     case DataType::Type::kInt8:
1236     case DataType::Type::kUint16:
1237     case DataType::Type::kInt16:
1238     case DataType::Type::kInt32:
1239     case DataType::Type::kInt64:
1240       DCHECK_LE(2u, instruction->GetVectorLength());
1241       DCHECK_LE(instruction->GetVectorLength(), 16u);
1242       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1243       break;
1244     case DataType::Type::kFloat32:
1245       DCHECK_EQ(4u, instruction->GetVectorLength());
1246       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1247       break;
1248     case DataType::Type::kFloat64:
1249       DCHECK_EQ(2u, instruction->GetVectorLength());
1250       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1251       break;
1252     default:
1253       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1254       UNREACHABLE();
1255   }
1256 }
1257 
1258 #undef __
1259 
1260 }  // namespace x86_64
1261 }  // namespace art
1262