1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86.h"
18 #include "mirror/array-inl.h"
19 
20 namespace art {
21 namespace x86 {
22 
23 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
24 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
25 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)26 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
27   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
28   switch (instruction->GetPackedType()) {
29     case Primitive::kPrimLong:
30       // Long needs extra temporary to load the register pair.
31       locations->AddTemp(Location::RequiresFpuRegister());
32       FALLTHROUGH_INTENDED;
33     case Primitive::kPrimBoolean:
34     case Primitive::kPrimByte:
35     case Primitive::kPrimChar:
36     case Primitive::kPrimShort:
37     case Primitive::kPrimInt:
38       locations->SetInAt(0, Location::RequiresRegister());
39       locations->SetOut(Location::RequiresFpuRegister());
40       break;
41     case Primitive::kPrimFloat:
42     case Primitive::kPrimDouble:
43       locations->SetInAt(0, Location::RequiresFpuRegister());
44       locations->SetOut(Location::SameAsFirstInput());
45       break;
46     default:
47       LOG(FATAL) << "Unsupported SIMD type";
48       UNREACHABLE();
49   }
50 }
51 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)52 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
53   LocationSummary* locations = instruction->GetLocations();
54   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
55   switch (instruction->GetPackedType()) {
56     case Primitive::kPrimBoolean:
57     case Primitive::kPrimByte:
58       DCHECK_EQ(16u, instruction->GetVectorLength());
59       __ movd(reg, locations->InAt(0).AsRegister<Register>());
60       __ punpcklbw(reg, reg);
61       __ punpcklwd(reg, reg);
62       __ pshufd(reg, reg, Immediate(0));
63       break;
64     case Primitive::kPrimChar:
65     case Primitive::kPrimShort:
66       DCHECK_EQ(8u, instruction->GetVectorLength());
67       __ movd(reg, locations->InAt(0).AsRegister<Register>());
68       __ punpcklwd(reg, reg);
69       __ pshufd(reg, reg, Immediate(0));
70       break;
71     case Primitive::kPrimInt:
72       DCHECK_EQ(4u, instruction->GetVectorLength());
73       __ movd(reg, locations->InAt(0).AsRegister<Register>());
74       __ pshufd(reg, reg, Immediate(0));
75       break;
76     case Primitive::kPrimLong: {
77       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
78       DCHECK_EQ(2u, instruction->GetVectorLength());
79       __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
80       __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
81       __ punpckldq(reg, tmp);
82       __ punpcklqdq(reg, reg);
83       break;
84     }
85     case Primitive::kPrimFloat:
86       DCHECK(locations->InAt(0).Equals(locations->Out()));
87       DCHECK_EQ(4u, instruction->GetVectorLength());
88       __ shufps(reg, reg, Immediate(0));
89       break;
90     case Primitive::kPrimDouble:
91       DCHECK(locations->InAt(0).Equals(locations->Out()));
92       DCHECK_EQ(2u, instruction->GetVectorLength());
93       __ shufpd(reg, reg, Immediate(0));
94       break;
95     default:
96       LOG(FATAL) << "Unsupported SIMD type";
97       UNREACHABLE();
98   }
99 }
100 
VisitVecSetScalars(HVecSetScalars * instruction)101 void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
102   LOG(FATAL) << "No SIMD for " << instruction->GetId();
103 }
104 
VisitVecSetScalars(HVecSetScalars * instruction)105 void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
106   LOG(FATAL) << "No SIMD for " << instruction->GetId();
107 }
108 
VisitVecSumReduce(HVecSumReduce * instruction)109 void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
110   LOG(FATAL) << "No SIMD for " << instruction->GetId();
111 }
112 
VisitVecSumReduce(HVecSumReduce * instruction)113 void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
114   LOG(FATAL) << "No SIMD for " << instruction->GetId();
115 }
116 
117 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * arena,HVecUnaryOperation * instruction)118 static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
119   LocationSummary* locations = new (arena) LocationSummary(instruction);
120   switch (instruction->GetPackedType()) {
121     case Primitive::kPrimBoolean:
122     case Primitive::kPrimByte:
123     case Primitive::kPrimChar:
124     case Primitive::kPrimShort:
125     case Primitive::kPrimInt:
126     case Primitive::kPrimLong:
127     case Primitive::kPrimFloat:
128     case Primitive::kPrimDouble:
129       locations->SetInAt(0, Location::RequiresFpuRegister());
130       locations->SetOut(Location::RequiresFpuRegister());
131       break;
132     default:
133       LOG(FATAL) << "Unsupported SIMD type";
134       UNREACHABLE();
135   }
136 }
137 
VisitVecCnv(HVecCnv * instruction)138 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
139   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
140 }
141 
VisitVecCnv(HVecCnv * instruction)142 void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
143   LocationSummary* locations = instruction->GetLocations();
144   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
145   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
146   Primitive::Type from = instruction->GetInputType();
147   Primitive::Type to = instruction->GetResultType();
148   if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
149     DCHECK_EQ(4u, instruction->GetVectorLength());
150     __ cvtdq2ps(dst, src);
151   } else {
152     LOG(FATAL) << "Unsupported SIMD type";
153   }
154 }
155 
VisitVecNeg(HVecNeg * instruction)156 void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
157   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
158 }
159 
VisitVecNeg(HVecNeg * instruction)160 void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
161   LocationSummary* locations = instruction->GetLocations();
162   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
163   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
164   switch (instruction->GetPackedType()) {
165     case Primitive::kPrimByte:
166       DCHECK_EQ(16u, instruction->GetVectorLength());
167       __ pxor(dst, dst);
168       __ psubb(dst, src);
169       break;
170     case Primitive::kPrimChar:
171     case Primitive::kPrimShort:
172       DCHECK_EQ(8u, instruction->GetVectorLength());
173       __ pxor(dst, dst);
174       __ psubw(dst, src);
175       break;
176     case Primitive::kPrimInt:
177       DCHECK_EQ(4u, instruction->GetVectorLength());
178       __ pxor(dst, dst);
179       __ psubd(dst, src);
180       break;
181     case Primitive::kPrimLong:
182       DCHECK_EQ(2u, instruction->GetVectorLength());
183       __ pxor(dst, dst);
184       __ psubq(dst, src);
185       break;
186     case Primitive::kPrimFloat:
187       DCHECK_EQ(4u, instruction->GetVectorLength());
188       __ xorps(dst, dst);
189       __ subps(dst, src);
190       break;
191     case Primitive::kPrimDouble:
192       DCHECK_EQ(2u, instruction->GetVectorLength());
193       __ xorpd(dst, dst);
194       __ subpd(dst, src);
195       break;
196     default:
197       LOG(FATAL) << "Unsupported SIMD type";
198       UNREACHABLE();
199   }
200 }
201 
VisitVecAbs(HVecAbs * instruction)202 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
203   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
204   if (instruction->GetPackedType() == Primitive::kPrimInt) {
205     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
206   }
207 }
208 
VisitVecAbs(HVecAbs * instruction)209 void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
210   LocationSummary* locations = instruction->GetLocations();
211   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
212   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
213   switch (instruction->GetPackedType()) {
214     case Primitive::kPrimInt: {
215       DCHECK_EQ(4u, instruction->GetVectorLength());
216       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
217       __ movaps(dst, src);
218       __ pxor(tmp, tmp);
219       __ pcmpgtd(tmp, dst);
220       __ pxor(dst, tmp);
221       __ psubd(dst, tmp);
222       break;
223     }
224     case Primitive::kPrimFloat:
225       DCHECK_EQ(4u, instruction->GetVectorLength());
226       __ pcmpeqb(dst, dst);  // all ones
227       __ psrld(dst, Immediate(1));
228       __ andps(dst, src);
229       break;
230     case Primitive::kPrimDouble:
231       DCHECK_EQ(2u, instruction->GetVectorLength());
232       __ pcmpeqb(dst, dst);  // all ones
233       __ psrlq(dst, Immediate(1));
234       __ andpd(dst, src);
235       break;
236     default:
237       LOG(FATAL) << "Unsupported SIMD type";
238       UNREACHABLE();
239   }
240 }
241 
VisitVecNot(HVecNot * instruction)242 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
243   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
244   // Boolean-not requires a temporary to construct the 16 x one.
245   if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
246     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
247   }
248 }
249 
VisitVecNot(HVecNot * instruction)250 void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
251   LocationSummary* locations = instruction->GetLocations();
252   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
253   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
254   switch (instruction->GetPackedType()) {
255     case Primitive::kPrimBoolean: {  // special case boolean-not
256       DCHECK_EQ(16u, instruction->GetVectorLength());
257       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
258       __ pxor(dst, dst);
259       __ pcmpeqb(tmp, tmp);  // all ones
260       __ psubb(dst, tmp);  // 16 x one
261       __ pxor(dst, src);
262       break;
263     }
264     case Primitive::kPrimByte:
265     case Primitive::kPrimChar:
266     case Primitive::kPrimShort:
267     case Primitive::kPrimInt:
268     case Primitive::kPrimLong:
269       DCHECK_LE(2u, instruction->GetVectorLength());
270       DCHECK_LE(instruction->GetVectorLength(), 16u);
271       __ pcmpeqb(dst, dst);  // all ones
272       __ pxor(dst, src);
273       break;
274     case Primitive::kPrimFloat:
275       DCHECK_EQ(4u, instruction->GetVectorLength());
276       __ pcmpeqb(dst, dst);  // all ones
277       __ xorps(dst, src);
278       break;
279     case Primitive::kPrimDouble:
280       DCHECK_EQ(2u, instruction->GetVectorLength());
281       __ pcmpeqb(dst, dst);  // all ones
282       __ xorpd(dst, src);
283       break;
284     default:
285       LOG(FATAL) << "Unsupported SIMD type";
286       UNREACHABLE();
287   }
288 }
289 
290 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * arena,HVecBinaryOperation * instruction)291 static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
292   LocationSummary* locations = new (arena) LocationSummary(instruction);
293   switch (instruction->GetPackedType()) {
294     case Primitive::kPrimBoolean:
295     case Primitive::kPrimByte:
296     case Primitive::kPrimChar:
297     case Primitive::kPrimShort:
298     case Primitive::kPrimInt:
299     case Primitive::kPrimLong:
300     case Primitive::kPrimFloat:
301     case Primitive::kPrimDouble:
302       locations->SetInAt(0, Location::RequiresFpuRegister());
303       locations->SetInAt(1, Location::RequiresFpuRegister());
304       locations->SetOut(Location::SameAsFirstInput());
305       break;
306     default:
307       LOG(FATAL) << "Unsupported SIMD type";
308       UNREACHABLE();
309   }
310 }
311 
VisitVecAdd(HVecAdd * instruction)312 void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
313   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
314 }
315 
VisitVecAdd(HVecAdd * instruction)316 void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
317   LocationSummary* locations = instruction->GetLocations();
318   DCHECK(locations->InAt(0).Equals(locations->Out()));
319   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
320   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
321   switch (instruction->GetPackedType()) {
322     case Primitive::kPrimByte:
323       DCHECK_EQ(16u, instruction->GetVectorLength());
324       __ paddb(dst, src);
325       break;
326     case Primitive::kPrimChar:
327     case Primitive::kPrimShort:
328       DCHECK_EQ(8u, instruction->GetVectorLength());
329       __ paddw(dst, src);
330       break;
331     case Primitive::kPrimInt:
332       DCHECK_EQ(4u, instruction->GetVectorLength());
333       __ paddd(dst, src);
334       break;
335     case Primitive::kPrimLong:
336       DCHECK_EQ(2u, instruction->GetVectorLength());
337       __ paddq(dst, src);
338       break;
339     case Primitive::kPrimFloat:
340       DCHECK_EQ(4u, instruction->GetVectorLength());
341       __ addps(dst, src);
342       break;
343     case Primitive::kPrimDouble:
344       DCHECK_EQ(2u, instruction->GetVectorLength());
345       __ addpd(dst, src);
346       break;
347     default:
348       LOG(FATAL) << "Unsupported SIMD type";
349       UNREACHABLE();
350   }
351 }
352 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)353 void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
354   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
355 }
356 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)357 void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
358   LocationSummary* locations = instruction->GetLocations();
359   DCHECK(locations->InAt(0).Equals(locations->Out()));
360   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
361   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
362 
363   DCHECK(instruction->IsRounded());
364   DCHECK(instruction->IsUnsigned());
365 
366   switch (instruction->GetPackedType()) {
367     case Primitive::kPrimByte:
368       DCHECK_EQ(16u, instruction->GetVectorLength());
369      __ pavgb(dst, src);
370      return;
371     case Primitive::kPrimChar:
372     case Primitive::kPrimShort:
373       DCHECK_EQ(8u, instruction->GetVectorLength());
374       __ pavgw(dst, src);
375       return;
376     default:
377       LOG(FATAL) << "Unsupported SIMD type";
378       UNREACHABLE();
379   }
380 }
381 
VisitVecSub(HVecSub * instruction)382 void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
383   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
384 }
385 
VisitVecSub(HVecSub * instruction)386 void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
387   LocationSummary* locations = instruction->GetLocations();
388   DCHECK(locations->InAt(0).Equals(locations->Out()));
389   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
390   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
391   switch (instruction->GetPackedType()) {
392     case Primitive::kPrimByte:
393       DCHECK_EQ(16u, instruction->GetVectorLength());
394       __ psubb(dst, src);
395       break;
396     case Primitive::kPrimChar:
397     case Primitive::kPrimShort:
398       DCHECK_EQ(8u, instruction->GetVectorLength());
399       __ psubw(dst, src);
400       break;
401     case Primitive::kPrimInt:
402       DCHECK_EQ(4u, instruction->GetVectorLength());
403       __ psubd(dst, src);
404       break;
405     case Primitive::kPrimLong:
406       DCHECK_EQ(2u, instruction->GetVectorLength());
407       __ psubq(dst, src);
408       break;
409     case Primitive::kPrimFloat:
410       DCHECK_EQ(4u, instruction->GetVectorLength());
411       __ subps(dst, src);
412       break;
413     case Primitive::kPrimDouble:
414       DCHECK_EQ(2u, instruction->GetVectorLength());
415       __ subpd(dst, src);
416       break;
417     default:
418       LOG(FATAL) << "Unsupported SIMD type";
419       UNREACHABLE();
420   }
421 }
422 
VisitVecMul(HVecMul * instruction)423 void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
424   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
425 }
426 
VisitVecMul(HVecMul * instruction)427 void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
428   LocationSummary* locations = instruction->GetLocations();
429   DCHECK(locations->InAt(0).Equals(locations->Out()));
430   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
431   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
432   switch (instruction->GetPackedType()) {
433     case Primitive::kPrimChar:
434     case Primitive::kPrimShort:
435       DCHECK_EQ(8u, instruction->GetVectorLength());
436       __ pmullw(dst, src);
437       break;
438     case Primitive::kPrimInt:
439       DCHECK_EQ(4u, instruction->GetVectorLength());
440       __ pmulld(dst, src);
441       break;
442     case Primitive::kPrimFloat:
443       DCHECK_EQ(4u, instruction->GetVectorLength());
444       __ mulps(dst, src);
445       break;
446     case Primitive::kPrimDouble:
447       DCHECK_EQ(2u, instruction->GetVectorLength());
448       __ mulpd(dst, src);
449       break;
450     default:
451       LOG(FATAL) << "Unsupported SIMD type";
452       UNREACHABLE();
453   }
454 }
455 
VisitVecDiv(HVecDiv * instruction)456 void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
457   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
458 }
459 
VisitVecDiv(HVecDiv * instruction)460 void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
461   LocationSummary* locations = instruction->GetLocations();
462   DCHECK(locations->InAt(0).Equals(locations->Out()));
463   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
464   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
465   switch (instruction->GetPackedType()) {
466     case Primitive::kPrimFloat:
467       DCHECK_EQ(4u, instruction->GetVectorLength());
468       __ divps(dst, src);
469       break;
470     case Primitive::kPrimDouble:
471       DCHECK_EQ(2u, instruction->GetVectorLength());
472       __ divpd(dst, src);
473       break;
474     default:
475       LOG(FATAL) << "Unsupported SIMD type";
476       UNREACHABLE();
477   }
478 }
479 
VisitVecMin(HVecMin * instruction)480 void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
481   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
482 }
483 
VisitVecMin(HVecMin * instruction)484 void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
485   LOG(FATAL) << "No SIMD for " << instruction->GetId();
486 }
487 
VisitVecMax(HVecMax * instruction)488 void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
489   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
490 }
491 
VisitVecMax(HVecMax * instruction)492 void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
493   LOG(FATAL) << "No SIMD for " << instruction->GetId();
494 }
495 
VisitVecAnd(HVecAnd * instruction)496 void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
497   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
498 }
499 
VisitVecAnd(HVecAnd * instruction)500 void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
501   LocationSummary* locations = instruction->GetLocations();
502   DCHECK(locations->InAt(0).Equals(locations->Out()));
503   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
504   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
505   switch (instruction->GetPackedType()) {
506     case Primitive::kPrimBoolean:
507     case Primitive::kPrimByte:
508     case Primitive::kPrimChar:
509     case Primitive::kPrimShort:
510     case Primitive::kPrimInt:
511     case Primitive::kPrimLong:
512       DCHECK_LE(2u, instruction->GetVectorLength());
513       DCHECK_LE(instruction->GetVectorLength(), 16u);
514       __ pand(dst, src);
515       break;
516     case Primitive::kPrimFloat:
517       DCHECK_EQ(4u, instruction->GetVectorLength());
518       __ andps(dst, src);
519       break;
520     case Primitive::kPrimDouble:
521       DCHECK_EQ(2u, instruction->GetVectorLength());
522       __ andpd(dst, src);
523       break;
524     default:
525       LOG(FATAL) << "Unsupported SIMD type";
526       UNREACHABLE();
527   }
528 }
529 
VisitVecAndNot(HVecAndNot * instruction)530 void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
531   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
532 }
533 
VisitVecAndNot(HVecAndNot * instruction)534 void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
535   LocationSummary* locations = instruction->GetLocations();
536   DCHECK(locations->InAt(0).Equals(locations->Out()));
537   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
538   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
539   switch (instruction->GetPackedType()) {
540     case Primitive::kPrimBoolean:
541     case Primitive::kPrimByte:
542     case Primitive::kPrimChar:
543     case Primitive::kPrimShort:
544     case Primitive::kPrimInt:
545     case Primitive::kPrimLong:
546       DCHECK_LE(2u, instruction->GetVectorLength());
547       DCHECK_LE(instruction->GetVectorLength(), 16u);
548       __ pandn(dst, src);
549       break;
550     case Primitive::kPrimFloat:
551       DCHECK_EQ(4u, instruction->GetVectorLength());
552       __ andnps(dst, src);
553       break;
554     case Primitive::kPrimDouble:
555       DCHECK_EQ(2u, instruction->GetVectorLength());
556       __ andnpd(dst, src);
557       break;
558     default:
559       LOG(FATAL) << "Unsupported SIMD type";
560       UNREACHABLE();
561   }
562 }
563 
VisitVecOr(HVecOr * instruction)564 void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
565   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
566 }
567 
VisitVecOr(HVecOr * instruction)568 void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
569   LocationSummary* locations = instruction->GetLocations();
570   DCHECK(locations->InAt(0).Equals(locations->Out()));
571   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
572   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
573   switch (instruction->GetPackedType()) {
574     case Primitive::kPrimBoolean:
575     case Primitive::kPrimByte:
576     case Primitive::kPrimChar:
577     case Primitive::kPrimShort:
578     case Primitive::kPrimInt:
579     case Primitive::kPrimLong:
580       DCHECK_LE(2u, instruction->GetVectorLength());
581       DCHECK_LE(instruction->GetVectorLength(), 16u);
582       __ por(dst, src);
583       break;
584     case Primitive::kPrimFloat:
585       DCHECK_EQ(4u, instruction->GetVectorLength());
586       __ orps(dst, src);
587       break;
588     case Primitive::kPrimDouble:
589       DCHECK_EQ(2u, instruction->GetVectorLength());
590       __ orpd(dst, src);
591       break;
592     default:
593       LOG(FATAL) << "Unsupported SIMD type";
594       UNREACHABLE();
595   }
596 }
597 
VisitVecXor(HVecXor * instruction)598 void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
599   CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
600 }
601 
VisitVecXor(HVecXor * instruction)602 void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
603   LocationSummary* locations = instruction->GetLocations();
604   DCHECK(locations->InAt(0).Equals(locations->Out()));
605   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
606   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
607   switch (instruction->GetPackedType()) {
608     case Primitive::kPrimBoolean:
609     case Primitive::kPrimByte:
610     case Primitive::kPrimChar:
611     case Primitive::kPrimShort:
612     case Primitive::kPrimInt:
613     case Primitive::kPrimLong:
614       DCHECK_LE(2u, instruction->GetVectorLength());
615       DCHECK_LE(instruction->GetVectorLength(), 16u);
616       __ pxor(dst, src);
617       break;
618     case Primitive::kPrimFloat:
619       DCHECK_EQ(4u, instruction->GetVectorLength());
620       __ xorps(dst, src);
621       break;
622     case Primitive::kPrimDouble:
623       DCHECK_EQ(2u, instruction->GetVectorLength());
624       __ xorpd(dst, src);
625       break;
626     default:
627       LOG(FATAL) << "Unsupported SIMD type";
628       UNREACHABLE();
629   }
630 }
631 
632 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * arena,HVecBinaryOperation * instruction)633 static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
634   LocationSummary* locations = new (arena) LocationSummary(instruction);
635   switch (instruction->GetPackedType()) {
636     case Primitive::kPrimChar:
637     case Primitive::kPrimShort:
638     case Primitive::kPrimInt:
639     case Primitive::kPrimLong:
640       locations->SetInAt(0, Location::RequiresFpuRegister());
641       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
642       locations->SetOut(Location::SameAsFirstInput());
643       break;
644     default:
645       LOG(FATAL) << "Unsupported SIMD type";
646       UNREACHABLE();
647   }
648 }
649 
VisitVecShl(HVecShl * instruction)650 void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
651   CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
652 }
653 
VisitVecShl(HVecShl * instruction)654 void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
655   LocationSummary* locations = instruction->GetLocations();
656   DCHECK(locations->InAt(0).Equals(locations->Out()));
657   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
658   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
659   switch (instruction->GetPackedType()) {
660     case Primitive::kPrimChar:
661     case Primitive::kPrimShort:
662       DCHECK_EQ(8u, instruction->GetVectorLength());
663       __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
664       break;
665     case Primitive::kPrimInt:
666       DCHECK_EQ(4u, instruction->GetVectorLength());
667       __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
668       break;
669     case Primitive::kPrimLong:
670       DCHECK_EQ(2u, instruction->GetVectorLength());
671       __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
672       break;
673     default:
674       LOG(FATAL) << "Unsupported SIMD type";
675       UNREACHABLE();
676   }
677 }
678 
VisitVecShr(HVecShr * instruction)679 void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
680   CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
681 }
682 
VisitVecShr(HVecShr * instruction)683 void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
684   LocationSummary* locations = instruction->GetLocations();
685   DCHECK(locations->InAt(0).Equals(locations->Out()));
686   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
687   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
688   switch (instruction->GetPackedType()) {
689     case Primitive::kPrimChar:
690     case Primitive::kPrimShort:
691       DCHECK_EQ(8u, instruction->GetVectorLength());
692       __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
693       break;
694     case Primitive::kPrimInt:
695       DCHECK_EQ(4u, instruction->GetVectorLength());
696       __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
697       break;
698     default:
699       LOG(FATAL) << "Unsupported SIMD type";
700       UNREACHABLE();
701   }
702 }
703 
VisitVecUShr(HVecUShr * instruction)704 void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
705   CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
706 }
707 
VisitVecUShr(HVecUShr * instruction)708 void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
709   LocationSummary* locations = instruction->GetLocations();
710   DCHECK(locations->InAt(0).Equals(locations->Out()));
711   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
712   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
713   switch (instruction->GetPackedType()) {
714     case Primitive::kPrimChar:
715     case Primitive::kPrimShort:
716       DCHECK_EQ(8u, instruction->GetVectorLength());
717       __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
718       break;
719     case Primitive::kPrimInt:
720       DCHECK_EQ(4u, instruction->GetVectorLength());
721       __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
722       break;
723     case Primitive::kPrimLong:
724       DCHECK_EQ(2u, instruction->GetVectorLength());
725       __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
726       break;
727     default:
728       LOG(FATAL) << "Unsupported SIMD type";
729       UNREACHABLE();
730   }
731 }
732 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instr)733 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
734   LOG(FATAL) << "No SIMD for " << instr->GetId();
735 }
736 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instr)737 void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
738   LOG(FATAL) << "No SIMD for " << instr->GetId();
739 }
740 
741 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * arena,HVecMemoryOperation * instruction,bool is_load)742 static void CreateVecMemLocations(ArenaAllocator* arena,
743                                   HVecMemoryOperation* instruction,
744                                   bool is_load) {
745   LocationSummary* locations = new (arena) LocationSummary(instruction);
746   switch (instruction->GetPackedType()) {
747     case Primitive::kPrimBoolean:
748     case Primitive::kPrimByte:
749     case Primitive::kPrimChar:
750     case Primitive::kPrimShort:
751     case Primitive::kPrimInt:
752     case Primitive::kPrimLong:
753     case Primitive::kPrimFloat:
754     case Primitive::kPrimDouble:
755       locations->SetInAt(0, Location::RequiresRegister());
756       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
757       if (is_load) {
758         locations->SetOut(Location::RequiresFpuRegister());
759       } else {
760         locations->SetInAt(2, Location::RequiresFpuRegister());
761       }
762       break;
763     default:
764       LOG(FATAL) << "Unsupported SIMD type";
765       UNREACHABLE();
766   }
767 }
768 
769 // Helper to set up registers and address for vector memory operations.
CreateVecMemRegisters(HVecMemoryOperation * instruction,Location * reg_loc,bool is_load)770 static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
771                                      Location* reg_loc,
772                                      bool is_load) {
773   LocationSummary* locations = instruction->GetLocations();
774   Location base = locations->InAt(0);
775   Location index = locations->InAt(1);
776   *reg_loc = is_load ? locations->Out() : locations->InAt(2);
777   size_t size = Primitive::ComponentSize(instruction->GetPackedType());
778   uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
779   ScaleFactor scale = TIMES_1;
780   switch (size) {
781     case 2: scale = TIMES_2; break;
782     case 4: scale = TIMES_4; break;
783     case 8: scale = TIMES_8; break;
784     default: break;
785   }
786   return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
787 }
788 
VisitVecLoad(HVecLoad * instruction)789 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
790   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
791 }
792 
VisitVecLoad(HVecLoad * instruction)793 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
794   Location reg_loc = Location::NoLocation();
795   Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
796   XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
797   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
798   switch (instruction->GetPackedType()) {
799     case Primitive::kPrimBoolean:
800     case Primitive::kPrimByte:
801     case Primitive::kPrimChar:
802     case Primitive::kPrimShort:
803     case Primitive::kPrimInt:
804     case Primitive::kPrimLong:
805       DCHECK_LE(2u, instruction->GetVectorLength());
806       DCHECK_LE(instruction->GetVectorLength(), 16u);
807       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
808       break;
809     case Primitive::kPrimFloat:
810       DCHECK_EQ(4u, instruction->GetVectorLength());
811       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
812       break;
813     case Primitive::kPrimDouble:
814       DCHECK_EQ(2u, instruction->GetVectorLength());
815       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
816       break;
817     default:
818       LOG(FATAL) << "Unsupported SIMD type";
819       UNREACHABLE();
820   }
821 }
822 
VisitVecStore(HVecStore * instruction)823 void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
824   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
825 }
826 
VisitVecStore(HVecStore * instruction)827 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
828   Location reg_loc = Location::NoLocation();
829   Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
830   XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
831   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
832   switch (instruction->GetPackedType()) {
833     case Primitive::kPrimBoolean:
834     case Primitive::kPrimByte:
835     case Primitive::kPrimChar:
836     case Primitive::kPrimShort:
837     case Primitive::kPrimInt:
838     case Primitive::kPrimLong:
839       DCHECK_LE(2u, instruction->GetVectorLength());
840       DCHECK_LE(instruction->GetVectorLength(), 16u);
841       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
842       break;
843     case Primitive::kPrimFloat:
844       DCHECK_EQ(4u, instruction->GetVectorLength());
845       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
846       break;
847     case Primitive::kPrimDouble:
848       DCHECK_EQ(2u, instruction->GetVectorLength());
849       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
850       break;
851     default:
852       LOG(FATAL) << "Unsupported SIMD type";
853       UNREACHABLE();
854   }
855 }
856 
857 #undef __
858 
859 }  // namespace x86
860 }  // namespace art
861