1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if V8_TARGET_ARCH_ARM
6 
7 #include <memory>
8 
9 #include "src/arm/assembler-arm-inl.h"
10 #include "src/arm/simulator-arm.h"
11 #include "src/codegen.h"
12 #include "src/isolate.h"
13 #include "src/macro-assembler.h"
14 
15 namespace v8 {
16 namespace internal {
17 
18 #define __ masm.
19 
20 #if defined(V8_HOST_ARCH_ARM)
21 
CreateMemCopyUint8Function(Isolate * isolate,MemCopyUint8Function stub)22 MemCopyUint8Function CreateMemCopyUint8Function(Isolate* isolate,
23                                                 MemCopyUint8Function stub) {
24 #if defined(USE_SIMULATOR)
25   return stub;
26 #else
27   size_t allocated = 0;
28   byte* buffer = AllocatePage(isolate->heap()->GetRandomMmapAddr(), &allocated);
29   if (buffer == nullptr) return stub;
30 
31   MacroAssembler masm(isolate, buffer, static_cast<int>(allocated),
32                       CodeObjectRequired::kNo);
33 
34   Register dest = r0;
35   Register src = r1;
36   Register chars = r2;
37   Register temp1 = r3;
38   Label less_4;
39 
40   if (CpuFeatures::IsSupported(NEON)) {
41     CpuFeatureScope scope(&masm, NEON);
42     Label loop, less_256, less_128, less_64, less_32, _16_or_less, _8_or_less;
43     Label size_less_than_8;
44     __ pld(MemOperand(src, 0));
45 
46     __ cmp(chars, Operand(8));
47     __ b(lt, &size_less_than_8);
48     __ cmp(chars, Operand(32));
49     __ b(lt, &less_32);
50     if (CpuFeatures::dcache_line_size() == 32) {
51       __ pld(MemOperand(src, 32));
52     }
53     __ cmp(chars, Operand(64));
54     __ b(lt, &less_64);
55     __ pld(MemOperand(src, 64));
56     if (CpuFeatures::dcache_line_size() == 32) {
57       __ pld(MemOperand(src, 96));
58     }
59     __ cmp(chars, Operand(128));
60     __ b(lt, &less_128);
61     __ pld(MemOperand(src, 128));
62     if (CpuFeatures::dcache_line_size() == 32) {
63       __ pld(MemOperand(src, 160));
64     }
65     __ pld(MemOperand(src, 192));
66     if (CpuFeatures::dcache_line_size() == 32) {
67       __ pld(MemOperand(src, 224));
68     }
69     __ cmp(chars, Operand(256));
70     __ b(lt, &less_256);
71     __ sub(chars, chars, Operand(256));
72 
73     __ bind(&loop);
74     __ pld(MemOperand(src, 256));
75     __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
76     if (CpuFeatures::dcache_line_size() == 32) {
77       __ pld(MemOperand(src, 256));
78     }
79     __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
80     __ sub(chars, chars, Operand(64), SetCC);
81     __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
82     __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
83     __ b(ge, &loop);
84     __ add(chars, chars, Operand(256));
85 
86     __ bind(&less_256);
87     __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
88     __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
89     __ sub(chars, chars, Operand(128));
90     __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
91     __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
92     __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
93     __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
94     __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
95     __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
96     __ cmp(chars, Operand(64));
97     __ b(lt, &less_64);
98 
99     __ bind(&less_128);
100     __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
101     __ vld1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(src, PostIndex));
102     __ sub(chars, chars, Operand(64));
103     __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
104     __ vst1(Neon8, NeonListOperand(d4, 4), NeonMemOperand(dest, PostIndex));
105 
106     __ bind(&less_64);
107     __ cmp(chars, Operand(32));
108     __ b(lt, &less_32);
109     __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(src, PostIndex));
110     __ vst1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(dest, PostIndex));
111     __ sub(chars, chars, Operand(32));
112 
113     __ bind(&less_32);
114     __ cmp(chars, Operand(16));
115     __ b(le, &_16_or_less);
116     __ vld1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(src, PostIndex));
117     __ vst1(Neon8, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex));
118     __ sub(chars, chars, Operand(16));
119 
120     __ bind(&_16_or_less);
121     __ cmp(chars, Operand(8));
122     __ b(le, &_8_or_less);
123     __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex));
124     __ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest, PostIndex));
125     __ sub(chars, chars, Operand(8));
126 
127     // Do a last copy which may overlap with the previous copy (up to 8 bytes).
128     __ bind(&_8_or_less);
129     __ rsb(chars, chars, Operand(8));
130     __ sub(src, src, Operand(chars));
131     __ sub(dest, dest, Operand(chars));
132     __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src));
133     __ vst1(Neon8, NeonListOperand(d0), NeonMemOperand(dest));
134 
135     __ Ret();
136 
137     __ bind(&size_less_than_8);
138 
139     __ bic(temp1, chars, Operand(0x3), SetCC);
140     __ b(&less_4, eq);
141     __ ldr(temp1, MemOperand(src, 4, PostIndex));
142     __ str(temp1, MemOperand(dest, 4, PostIndex));
143   } else {
144     UseScratchRegisterScope temps(&masm);
145     Register temp2 = temps.Acquire();
146     Label loop;
147 
148     __ bic(temp2, chars, Operand(0x3), SetCC);
149     __ b(&less_4, eq);
150     __ add(temp2, dest, temp2);
151 
152     __ bind(&loop);
153     __ ldr(temp1, MemOperand(src, 4, PostIndex));
154     __ str(temp1, MemOperand(dest, 4, PostIndex));
155     __ cmp(dest, temp2);
156     __ b(&loop, ne);
157   }
158 
159   __ bind(&less_4);
160   __ mov(chars, Operand(chars, LSL, 31), SetCC);
161   // bit0 => Z (ne), bit1 => C (cs)
162   __ ldrh(temp1, MemOperand(src, 2, PostIndex), cs);
163   __ strh(temp1, MemOperand(dest, 2, PostIndex), cs);
164   __ ldrb(temp1, MemOperand(src), ne);
165   __ strb(temp1, MemOperand(dest), ne);
166   __ Ret();
167 
168   CodeDesc desc;
169   masm.GetCode(isolate, &desc);
170   DCHECK(!RelocInfo::RequiresRelocationAfterCodegen(desc));
171 
172   Assembler::FlushICache(buffer, allocated);
173   CHECK(SetPermissions(buffer, allocated, PageAllocator::kReadExecute));
174   return FUNCTION_CAST<MemCopyUint8Function>(buffer);
175 #endif
176 }
177 
178 
179 // Convert 8 to 16. The number of character to copy must be at least 8.
CreateMemCopyUint16Uint8Function(Isolate * isolate,MemCopyUint16Uint8Function stub)180 MemCopyUint16Uint8Function CreateMemCopyUint16Uint8Function(
181     Isolate* isolate, MemCopyUint16Uint8Function stub) {
182 #if defined(USE_SIMULATOR)
183   return stub;
184 #else
185   size_t allocated = 0;
186   byte* buffer = AllocatePage(isolate->heap()->GetRandomMmapAddr(), &allocated);
187   if (buffer == nullptr) return stub;
188 
189   MacroAssembler masm(isolate, buffer, static_cast<int>(allocated),
190                       CodeObjectRequired::kNo);
191 
192   Register dest = r0;
193   Register src = r1;
194   Register chars = r2;
195   if (CpuFeatures::IsSupported(NEON)) {
196     CpuFeatureScope scope(&masm, NEON);
197     Register temp = r3;
198     Label loop;
199 
200     __ bic(temp, chars, Operand(0x7));
201     __ sub(chars, chars, Operand(temp));
202     __ add(temp, dest, Operand(temp, LSL, 1));
203 
204     __ bind(&loop);
205     __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src, PostIndex));
206     __ vmovl(NeonU8, q0, d0);
207     __ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest, PostIndex));
208     __ cmp(dest, temp);
209     __ b(&loop, ne);
210 
211     // Do a last copy which will overlap with the previous copy (1 to 8 bytes).
212     __ rsb(chars, chars, Operand(8));
213     __ sub(src, src, Operand(chars));
214     __ sub(dest, dest, Operand(chars, LSL, 1));
215     __ vld1(Neon8, NeonListOperand(d0), NeonMemOperand(src));
216     __ vmovl(NeonU8, q0, d0);
217     __ vst1(Neon16, NeonListOperand(d0, 2), NeonMemOperand(dest));
218     __ Ret();
219   } else {
220     UseScratchRegisterScope temps(&masm);
221 
222     Register temp1 = r3;
223     Register temp2 = temps.Acquire();
224     Register temp3 = lr;
225     Register temp4 = r4;
226     Label loop;
227     Label not_two;
228 
229     __ Push(lr, r4);
230     __ bic(temp2, chars, Operand(0x3));
231     __ add(temp2, dest, Operand(temp2, LSL, 1));
232 
233     __ bind(&loop);
234     __ ldr(temp1, MemOperand(src, 4, PostIndex));
235     __ uxtb16(temp3, temp1);
236     __ uxtb16(temp4, temp1, 8);
237     __ pkhbt(temp1, temp3, Operand(temp4, LSL, 16));
238     __ str(temp1, MemOperand(dest));
239     __ pkhtb(temp1, temp4, Operand(temp3, ASR, 16));
240     __ str(temp1, MemOperand(dest, 4));
241     __ add(dest, dest, Operand(8));
242     __ cmp(dest, temp2);
243     __ b(&loop, ne);
244 
245     __ mov(chars, Operand(chars, LSL, 31), SetCC);  // bit0 => ne, bit1 => cs
246     __ b(&not_two, cc);
247     __ ldrh(temp1, MemOperand(src, 2, PostIndex));
248     __ uxtb(temp3, temp1, 8);
249     __ mov(temp3, Operand(temp3, LSL, 16));
250     __ uxtab(temp3, temp3, temp1);
251     __ str(temp3, MemOperand(dest, 4, PostIndex));
252     __ bind(&not_two);
253     __ ldrb(temp1, MemOperand(src), ne);
254     __ strh(temp1, MemOperand(dest), ne);
255     __ Pop(pc, r4);
256   }
257 
258   CodeDesc desc;
259   masm.GetCode(isolate, &desc);
260 
261   Assembler::FlushICache(buffer, allocated);
262   CHECK(SetPermissions(buffer, allocated, PageAllocator::kReadExecute));
263   return FUNCTION_CAST<MemCopyUint16Uint8Function>(buffer);
264 #endif
265 }
266 #endif
267 
CreateSqrtFunction(Isolate * isolate)268 UnaryMathFunctionWithIsolate CreateSqrtFunction(Isolate* isolate) {
269 #if defined(USE_SIMULATOR)
270   return nullptr;
271 #else
272   size_t allocated = 0;
273   byte* buffer = AllocatePage(isolate->heap()->GetRandomMmapAddr(), &allocated);
274   if (buffer == nullptr) return nullptr;
275 
276   MacroAssembler masm(isolate, buffer, static_cast<int>(allocated),
277                       CodeObjectRequired::kNo);
278 
279   __ MovFromFloatParameter(d0);
280   __ vsqrt(d0, d0);
281   __ MovToFloatResult(d0);
282   __ Ret();
283 
284   CodeDesc desc;
285   masm.GetCode(isolate, &desc);
286   DCHECK(!RelocInfo::RequiresRelocationAfterCodegen(desc));
287 
288   Assembler::FlushICache(buffer, allocated);
289   CHECK(SetPermissions(buffer, allocated, PageAllocator::kReadExecute));
290   return FUNCTION_CAST<UnaryMathFunctionWithIsolate>(buffer);
291 #endif
292 }
293 
294 #undef __
295 
296 }  // namespace internal
297 }  // namespace v8
298 
299 #endif  // V8_TARGET_ARCH_ARM
300