1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <iostream>
17
18 #include "parallel_move_resolver.h"
19 #include "nodes.h"
20
21 namespace art {
22
BuildInitialMoveList(HParallelMove * parallel_move)23 void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
24 // Perform a linear sweep of the moves to add them to the initial list of
25 // moves to perform, ignoring any move that is redundant (the source is
26 // the same as the destination, the destination is ignored and
27 // unallocated, or the move was already eliminated).
28 for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
29 MoveOperands* move = parallel_move->MoveOperandsAt(i);
30 if (!move->IsRedundant()) {
31 moves_.Add(move);
32 }
33 }
34 }
35
EmitNativeCode(HParallelMove * parallel_move)36 void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
37 DCHECK(moves_.IsEmpty());
38 // Build up a worklist of moves.
39 BuildInitialMoveList(parallel_move);
40
41 for (size_t i = 0; i < moves_.Size(); ++i) {
42 const MoveOperands& move = *moves_.Get(i);
43 // Skip constants to perform them last. They don't block other moves
44 // and skipping such moves with register destinations keeps those
45 // registers free for the whole algorithm.
46 if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
47 PerformMove(i);
48 }
49 }
50
51 // Perform the moves with constant sources.
52 for (size_t i = 0; i < moves_.Size(); ++i) {
53 MoveOperands* move = moves_.Get(i);
54 if (!move->IsEliminated()) {
55 DCHECK(move->GetSource().IsConstant());
56 EmitMove(i);
57 // Eliminate the move, in case following moves need a scratch register.
58 move->Eliminate();
59 }
60 }
61
62 moves_.Reset();
63 }
64
LowOf(Location location)65 Location LowOf(Location location) {
66 if (location.IsRegisterPair()) {
67 return Location::RegisterLocation(location.low());
68 } else if (location.IsFpuRegisterPair()) {
69 return Location::FpuRegisterLocation(location.low());
70 } else if (location.IsDoubleStackSlot()) {
71 return Location::StackSlot(location.GetStackIndex());
72 } else {
73 return Location::NoLocation();
74 }
75 }
76
HighOf(Location location)77 Location HighOf(Location location) {
78 if (location.IsRegisterPair()) {
79 return Location::RegisterLocation(location.high());
80 } else if (location.IsFpuRegisterPair()) {
81 return Location::FpuRegisterLocation(location.high());
82 } else if (location.IsDoubleStackSlot()) {
83 return Location::StackSlot(location.GetHighStackIndex(4));
84 } else {
85 return Location::NoLocation();
86 }
87 }
88
89 // Update the source of `move`, knowing that `updated_location` has been swapped
90 // with `new_source`. Note that `updated_location` can be a pair, therefore if
91 // `move` is non-pair, we need to extract which register to use.
UpdateSourceOf(MoveOperands * move,Location updated_location,Location new_source)92 static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
93 Location source = move->GetSource();
94 if (LowOf(updated_location).Equals(source)) {
95 move->SetSource(LowOf(new_source));
96 } else if (HighOf(updated_location).Equals(source)) {
97 move->SetSource(HighOf(new_source));
98 } else {
99 DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
100 move->SetSource(new_source);
101 }
102 }
103
PerformMove(size_t index)104 MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
105 // Each call to this function performs a move and deletes it from the move
106 // graph. We first recursively perform any move blocking this one. We
107 // mark a move as "pending" on entry to PerformMove in order to detect
108 // cycles in the move graph. We use operand swaps to resolve cycles,
109 // which means that a call to PerformMove could change any source operand
110 // in the move graph.
111
112 MoveOperands* move = moves_.Get(index);
113 DCHECK(!move->IsPending());
114 if (move->IsRedundant()) {
115 // Because we swap register pairs first, following, un-pending
116 // moves may become redundant.
117 move->Eliminate();
118 return nullptr;
119 }
120
121 // Clear this move's destination to indicate a pending move. The actual
122 // destination is saved in a stack-allocated local. Recursion may allow
123 // multiple moves to be pending.
124 DCHECK(!move->GetSource().IsInvalid());
125 Location destination = move->MarkPending();
126
127 // Perform a depth-first traversal of the move graph to resolve
128 // dependencies. Any unperformed, unpending move with a source the same
129 // as this one's destination blocks this one so recursively perform all
130 // such moves.
131 MoveOperands* required_swap = nullptr;
132 for (size_t i = 0; i < moves_.Size(); ++i) {
133 const MoveOperands& other_move = *moves_.Get(i);
134 if (other_move.Blocks(destination) && !other_move.IsPending()) {
135 // Though PerformMove can change any source operand in the move graph,
136 // calling `PerformMove` cannot create a blocking move via a swap
137 // (this loop does not miss any).
138 // For example, assume there is a non-blocking move with source A
139 // and this move is blocked on source B and there is a swap of A and
140 // B. Then A and B must be involved in the same cycle (or they would
141 // not be swapped). Since this move's destination is B and there is
142 // only a single incoming edge to an operand, this move must also be
143 // involved in the same cycle. In that case, the blocking move will
144 // be created but will be "pending" when we return from PerformMove.
145 required_swap = PerformMove(i);
146
147 if (required_swap == move) {
148 // If this move is required to swap, we do so without looking
149 // at the next moves. Swapping is not blocked by anything, it just
150 // updates other moves's source.
151 break;
152 } else if (required_swap == moves_.Get(i)) {
153 // If `other_move` was swapped, we iterate again to find a new
154 // potential cycle.
155 required_swap = nullptr;
156 i = 0;
157 } else if (required_swap != nullptr) {
158 // A move is required to swap. We walk back the cycle to find the
159 // move by just returning from this `PerforrmMove`.
160 moves_.Get(index)->ClearPending(destination);
161 return required_swap;
162 }
163 }
164 }
165
166 // We are about to resolve this move and don't need it marked as
167 // pending, so restore its destination.
168 move->ClearPending(destination);
169
170 // This move's source may have changed due to swaps to resolve cycles and
171 // so it may now be the last move in the cycle. If so remove it.
172 if (move->GetSource().Equals(destination)) {
173 move->Eliminate();
174 DCHECK(required_swap == nullptr);
175 return nullptr;
176 }
177
178 // The move may be blocked on a (at most one) pending move, in which case
179 // we have a cycle. Search for such a blocking move and perform a swap to
180 // resolve it.
181 bool do_swap = false;
182 if (required_swap != nullptr) {
183 DCHECK_EQ(required_swap, move);
184 do_swap = true;
185 } else {
186 for (size_t i = 0; i < moves_.Size(); ++i) {
187 const MoveOperands& other_move = *moves_.Get(i);
188 if (other_move.Blocks(destination)) {
189 DCHECK(other_move.IsPending());
190 if (!move->Is64BitMove() && other_move.Is64BitMove()) {
191 // We swap 64bits moves before swapping 32bits moves. Go back from the
192 // cycle by returning the move that must be swapped.
193 return moves_.Get(i);
194 }
195 do_swap = true;
196 break;
197 }
198 }
199 }
200
201 if (do_swap) {
202 EmitSwap(index);
203 // Any unperformed (including pending) move with a source of either
204 // this move's source or destination needs to have their source
205 // changed to reflect the state of affairs after the swap.
206 Location source = move->GetSource();
207 Location swap_destination = move->GetDestination();
208 move->Eliminate();
209 for (size_t i = 0; i < moves_.Size(); ++i) {
210 const MoveOperands& other_move = *moves_.Get(i);
211 if (other_move.Blocks(source)) {
212 UpdateSourceOf(moves_.Get(i), source, swap_destination);
213 } else if (other_move.Blocks(swap_destination)) {
214 UpdateSourceOf(moves_.Get(i), swap_destination, source);
215 }
216 }
217 // If the swap was required because of a 64bits move in the middle of a cycle,
218 // we return the swapped move, so that the caller knows it needs to re-iterate
219 // its dependency loop.
220 return required_swap;
221 } else {
222 // This move is not blocked.
223 EmitMove(index);
224 move->Eliminate();
225 DCHECK(required_swap == nullptr);
226 return nullptr;
227 }
228 }
229
IsScratchLocation(Location loc)230 bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
231 for (size_t i = 0; i < moves_.Size(); ++i) {
232 if (moves_.Get(i)->Blocks(loc)) {
233 return false;
234 }
235 }
236
237 for (size_t i = 0; i < moves_.Size(); ++i) {
238 if (moves_.Get(i)->GetDestination().Equals(loc)) {
239 return true;
240 }
241 }
242
243 return false;
244 }
245
AllocateScratchRegister(int blocked,int register_count,int if_scratch,bool * spilled)246 int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
247 int register_count,
248 int if_scratch,
249 bool* spilled) {
250 DCHECK_NE(blocked, if_scratch);
251 int scratch = -1;
252 for (int reg = 0; reg < register_count; ++reg) {
253 if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
254 scratch = reg;
255 break;
256 }
257 }
258
259 if (scratch == -1) {
260 *spilled = true;
261 scratch = if_scratch;
262 } else {
263 *spilled = false;
264 }
265
266 return scratch;
267 }
268
269
ScratchRegisterScope(ParallelMoveResolverWithSwap * resolver,int blocked,int if_scratch,int number_of_registers)270 ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
271 ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
272 : resolver_(resolver),
273 reg_(kNoRegister),
274 spilled_(false) {
275 reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
276
277 if (spilled_) {
278 resolver->SpillScratch(reg_);
279 }
280 }
281
282
~ScratchRegisterScope()283 ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
284 if (spilled_) {
285 resolver_->RestoreScratch(reg_);
286 }
287 }
288
EmitNativeCode(HParallelMove * parallel_move)289 void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
290 DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
291 DCHECK(moves_.IsEmpty());
292 DCHECK(scratches_.IsEmpty());
293
294 // Backend dependent initialization.
295 PrepareForEmitNativeCode();
296
297 // Build up a worklist of moves.
298 BuildInitialMoveList(parallel_move);
299
300 for (size_t i = 0; i < moves_.Size(); ++i) {
301 const MoveOperands& move = *moves_.Get(i);
302 // Skip constants to perform them last. They don't block other moves and
303 // skipping such moves with register destinations keeps those registers
304 // free for the whole algorithm.
305 if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
306 PerformMove(i);
307 }
308 }
309
310 // Perform the moves with constant sources and register destinations with UpdateMoveSource()
311 // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
312 // from changing the constant sources to stack locations.
313 for (size_t i = 0; i < moves_.Size(); ++i) {
314 MoveOperands* move = moves_.Get(i);
315 Location destination = move->GetDestination();
316 if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
317 Location source = move->GetSource();
318 EmitMove(i);
319 move->Eliminate();
320 // This may introduce additional instruction dependency, but reduce number
321 // of moves and possible literal loads. For example,
322 // Original moves:
323 // 1234.5678 -> D0
324 // 1234.5678 -> D1
325 // Updated moves:
326 // 1234.5678 -> D0
327 // D0 -> D1
328 UpdateMoveSource(source, destination);
329 }
330 }
331
332 // Perform the rest of the moves.
333 for (size_t i = 0; i < moves_.Size(); ++i) {
334 MoveOperands* move = moves_.Get(i);
335 if (!move->IsEliminated()) {
336 EmitMove(i);
337 move->Eliminate();
338 }
339 }
340
341 // All pending moves that we have added for resolve cycles should be performed.
342 DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
343
344 // Backend dependent cleanup.
345 FinishEmitNativeCode();
346
347 moves_.Reset();
348 scratches_.Reset();
349 }
350
GetScratchLocation(Location::Kind kind)351 Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
352 for (size_t i = 0; i < scratches_.Size(); ++i) {
353 Location loc = scratches_.Get(i);
354 if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
355 return loc;
356 }
357 }
358 for (size_t i = 0; i < moves_.Size(); ++i) {
359 Location loc = moves_.Get(i)->GetDestination();
360 if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
361 return loc;
362 }
363 }
364 return Location::NoLocation();
365 }
366
AddScratchLocation(Location loc)367 void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
368 if (kIsDebugBuild) {
369 for (size_t i = 0; i < scratches_.Size(); ++i) {
370 DCHECK(!loc.Equals(scratches_.Get(i)));
371 }
372 }
373 scratches_.Add(loc);
374 }
375
RemoveScratchLocation(Location loc)376 void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
377 DCHECK(!IsBlockedByMoves(loc));
378 for (size_t i = 0; i < scratches_.Size(); ++i) {
379 if (loc.Equals(scratches_.Get(i))) {
380 scratches_.DeleteAt(i);
381 break;
382 }
383 }
384 }
385
PerformMove(size_t index)386 void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
387 // Each call to this function performs a move and deletes it from the move
388 // graph. We first recursively perform any move blocking this one. We mark
389 // a move as "pending" on entry to PerformMove in order to detect cycles
390 // in the move graph. We use scratch location to resolve cycles, also
391 // additional pending moves might be added. After move has been performed,
392 // we will update source operand in the move graph to reduce dependencies in
393 // the graph.
394
395 MoveOperands* move = moves_.Get(index);
396 DCHECK(!move->IsPending());
397 DCHECK(!move->IsEliminated());
398 if (move->IsRedundant()) {
399 // Previous operations on the list of moves have caused this particular move
400 // to become a no-op, so we can safely eliminate it. Consider for example
401 // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
402 // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
403 // used as the scratch location, the move (1 -> 2) will occur while resolving
404 // the cycle. When that move is emitted, the code will update moves with a '1'
405 // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
406 // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
407 // eliminated here.
408 move->Eliminate();
409 return;
410 }
411
412 // Clear this move's destination to indicate a pending move. The actual
413 // destination is saved in a stack-allocated local. Recursion may allow
414 // multiple moves to be pending.
415 DCHECK(!move->GetSource().IsInvalid());
416 Location destination = move->MarkPending();
417
418 // Perform a depth-first traversal of the move graph to resolve
419 // dependencies. Any unperformed, unpending move with a source the same
420 // as this one's destination blocks this one so recursively perform all
421 // such moves.
422 for (size_t i = 0; i < moves_.Size(); ++i) {
423 const MoveOperands& other_move = *moves_.Get(i);
424 if (other_move.Blocks(destination) && !other_move.IsPending()) {
425 PerformMove(i);
426 }
427 }
428
429 // We are about to resolve this move and don't need it marked as
430 // pending, so restore its destination.
431 move->ClearPending(destination);
432
433 // No one else should write to the move destination when the it is pending.
434 DCHECK(!move->IsRedundant());
435
436 Location source = move->GetSource();
437 // The move may be blocked on several pending moves, in case we have a cycle.
438 if (IsBlockedByMoves(destination)) {
439 // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
440 // sequence:
441 // (C -> scratch) # Emit right now.
442 // (A -> B) (B -> C) # Unblocked.
443 // (scratch -> A) # Add to pending_moves_, blocked by (A -> B).
444 Location::Kind kind = source.GetKind();
445 DCHECK_NE(kind, Location::kConstant);
446 Location scratch = AllocateScratchLocationFor(kind);
447 // We only care about the move size.
448 Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt;
449 // Perform (C -> scratch)
450 move->SetDestination(scratch);
451 EmitMove(index);
452 move->Eliminate();
453 UpdateMoveSource(source, scratch);
454 // Add (scratch -> A).
455 AddPendingMove(scratch, destination, type);
456 } else {
457 // This move is not blocked.
458 EmitMove(index);
459 move->Eliminate();
460 UpdateMoveSource(source, destination);
461 }
462
463 // Moves in the pending list should not block any other moves. But performing
464 // unblocked moves in the pending list can free scratch registers, so we do this
465 // as early as possible.
466 MoveOperands* pending_move;
467 while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
468 Location pending_source = pending_move->GetSource();
469 Location pending_destination = pending_move->GetDestination();
470 // We do not depend on the pending move index. So just delete the move instead
471 // of eliminating it to make the pending list cleaner.
472 DeletePendingMove(pending_move);
473 move->SetSource(pending_source);
474 move->SetDestination(pending_destination);
475 EmitMove(index);
476 move->Eliminate();
477 UpdateMoveSource(pending_source, pending_destination);
478 // Free any unblocked locations in the scratch location list.
479 for (size_t i = 0; i < scratches_.Size(); ++i) {
480 Location scratch = scratches_.Get(i);
481 // Only scratch overlapping with performed move source can be unblocked.
482 if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
483 FreeScratchLocation(pending_source);
484 }
485 }
486 }
487 }
488
UpdateMoveSource(Location from,Location to)489 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
490 // This function is used to reduce the dependencies in the graph after
491 // (from -> to) has been performed. Since we ensure there is no move with the same
492 // destination, (to -> X) can not be blocked while (from -> X) might still be
493 // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
494 // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
495 // a dependency between the two. If we update the source location from 1 to 2, we
496 // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
497 //
498 // This is not something we must do, but we can use fewer scratch locations with
499 // this trick. For example, we can avoid using additional scratch locations for
500 // moves (0 -> 1), (1 -> 2), (1 -> 0).
501 for (size_t i = 0; i < moves_.Size(); ++i) {
502 MoveOperands* move = moves_.Get(i);
503 if (move->GetSource().Equals(from)) {
504 move->SetSource(to);
505 }
506 }
507 }
508
AddPendingMove(Location source,Location destination,Primitive::Type type)509 void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
510 Location destination, Primitive::Type type) {
511 pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr));
512 }
513
DeletePendingMove(MoveOperands * move)514 void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
515 pending_moves_.Delete(move);
516 }
517
GetUnblockedPendingMove(Location loc)518 MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
519 for (size_t i = 0; i < pending_moves_.Size(); ++i) {
520 MoveOperands* move = pending_moves_.Get(i);
521 Location destination = move->GetDestination();
522 // Only moves with destination overlapping with input loc can be unblocked.
523 if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
524 return move;
525 }
526 }
527 return nullptr;
528 }
529
IsBlockedByMoves(Location loc)530 bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
531 for (size_t i = 0; i < pending_moves_.Size(); ++i) {
532 if (pending_moves_.Get(i)->Blocks(loc)) {
533 return true;
534 }
535 }
536 for (size_t i = 0; i < moves_.Size(); ++i) {
537 if (moves_.Get(i)->Blocks(loc)) {
538 return true;
539 }
540 }
541 return false;
542 }
543
544 // So far it is only used for debugging purposes to make sure all pending moves
545 // have been performed.
GetNumberOfPendingMoves()546 size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
547 return pending_moves_.Size();
548 }
549
550 } // namespace art
551