1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <iostream>
17 
18 #include "parallel_move_resolver.h"
19 #include "nodes.h"
20 
21 namespace art {
22 
BuildInitialMoveList(HParallelMove * parallel_move)23 void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
24   // Perform a linear sweep of the moves to add them to the initial list of
25   // moves to perform, ignoring any move that is redundant (the source is
26   // the same as the destination, the destination is ignored and
27   // unallocated, or the move was already eliminated).
28   for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
29     MoveOperands* move = parallel_move->MoveOperandsAt(i);
30     if (!move->IsRedundant()) {
31       moves_.Add(move);
32     }
33   }
34 }
35 
EmitNativeCode(HParallelMove * parallel_move)36 void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
37   DCHECK(moves_.IsEmpty());
38   // Build up a worklist of moves.
39   BuildInitialMoveList(parallel_move);
40 
41   for (size_t i = 0; i < moves_.Size(); ++i) {
42     const MoveOperands& move = *moves_.Get(i);
43     // Skip constants to perform them last.  They don't block other moves
44     // and skipping such moves with register destinations keeps those
45     // registers free for the whole algorithm.
46     if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
47       PerformMove(i);
48     }
49   }
50 
51   // Perform the moves with constant sources.
52   for (size_t i = 0; i < moves_.Size(); ++i) {
53     MoveOperands* move = moves_.Get(i);
54     if (!move->IsEliminated()) {
55       DCHECK(move->GetSource().IsConstant());
56       EmitMove(i);
57       // Eliminate the move, in case following moves need a scratch register.
58       move->Eliminate();
59     }
60   }
61 
62   moves_.Reset();
63 }
64 
LowOf(Location location)65 Location LowOf(Location location) {
66   if (location.IsRegisterPair()) {
67     return Location::RegisterLocation(location.low());
68   } else if (location.IsFpuRegisterPair()) {
69     return Location::FpuRegisterLocation(location.low());
70   } else if (location.IsDoubleStackSlot()) {
71     return Location::StackSlot(location.GetStackIndex());
72   } else {
73     return Location::NoLocation();
74   }
75 }
76 
HighOf(Location location)77 Location HighOf(Location location) {
78   if (location.IsRegisterPair()) {
79     return Location::RegisterLocation(location.high());
80   } else if (location.IsFpuRegisterPair()) {
81     return Location::FpuRegisterLocation(location.high());
82   } else if (location.IsDoubleStackSlot()) {
83     return Location::StackSlot(location.GetHighStackIndex(4));
84   } else {
85     return Location::NoLocation();
86   }
87 }
88 
89 // Update the source of `move`, knowing that `updated_location` has been swapped
90 // with `new_source`. Note that `updated_location` can be a pair, therefore if
91 // `move` is non-pair, we need to extract which register to use.
UpdateSourceOf(MoveOperands * move,Location updated_location,Location new_source)92 static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
93   Location source = move->GetSource();
94   if (LowOf(updated_location).Equals(source)) {
95     move->SetSource(LowOf(new_source));
96   } else if (HighOf(updated_location).Equals(source)) {
97     move->SetSource(HighOf(new_source));
98   } else {
99     DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
100     move->SetSource(new_source);
101   }
102 }
103 
PerformMove(size_t index)104 MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
105   // Each call to this function performs a move and deletes it from the move
106   // graph.  We first recursively perform any move blocking this one.  We
107   // mark a move as "pending" on entry to PerformMove in order to detect
108   // cycles in the move graph.  We use operand swaps to resolve cycles,
109   // which means that a call to PerformMove could change any source operand
110   // in the move graph.
111 
112   MoveOperands* move = moves_.Get(index);
113   DCHECK(!move->IsPending());
114   if (move->IsRedundant()) {
115     // Because we swap register pairs first, following, un-pending
116     // moves may become redundant.
117     move->Eliminate();
118     return nullptr;
119   }
120 
121   // Clear this move's destination to indicate a pending move.  The actual
122   // destination is saved in a stack-allocated local.  Recursion may allow
123   // multiple moves to be pending.
124   DCHECK(!move->GetSource().IsInvalid());
125   Location destination = move->MarkPending();
126 
127   // Perform a depth-first traversal of the move graph to resolve
128   // dependencies.  Any unperformed, unpending move with a source the same
129   // as this one's destination blocks this one so recursively perform all
130   // such moves.
131   MoveOperands* required_swap = nullptr;
132   for (size_t i = 0; i < moves_.Size(); ++i) {
133     const MoveOperands& other_move = *moves_.Get(i);
134     if (other_move.Blocks(destination) && !other_move.IsPending()) {
135       // Though PerformMove can change any source operand in the move graph,
136       // calling `PerformMove` cannot create a blocking move via a swap
137       // (this loop does not miss any).
138       // For example, assume there is a non-blocking move with source A
139       // and this move is blocked on source B and there is a swap of A and
140       // B.  Then A and B must be involved in the same cycle (or they would
141       // not be swapped).  Since this move's destination is B and there is
142       // only a single incoming edge to an operand, this move must also be
143       // involved in the same cycle.  In that case, the blocking move will
144       // be created but will be "pending" when we return from PerformMove.
145       required_swap = PerformMove(i);
146 
147       if (required_swap == move) {
148         // If this move is required to swap, we do so without looking
149         // at the next moves. Swapping is not blocked by anything, it just
150         // updates other moves's source.
151         break;
152       } else if (required_swap == moves_.Get(i)) {
153         // If `other_move` was swapped, we iterate again to find a new
154         // potential cycle.
155         required_swap = nullptr;
156         i = 0;
157       } else if (required_swap != nullptr) {
158         // A move is required to swap. We walk back the cycle to find the
159         // move by just returning from this `PerforrmMove`.
160         moves_.Get(index)->ClearPending(destination);
161         return required_swap;
162       }
163     }
164   }
165 
166   // We are about to resolve this move and don't need it marked as
167   // pending, so restore its destination.
168   move->ClearPending(destination);
169 
170   // This move's source may have changed due to swaps to resolve cycles and
171   // so it may now be the last move in the cycle.  If so remove it.
172   if (move->GetSource().Equals(destination)) {
173     move->Eliminate();
174     DCHECK(required_swap == nullptr);
175     return nullptr;
176   }
177 
178   // The move may be blocked on a (at most one) pending move, in which case
179   // we have a cycle.  Search for such a blocking move and perform a swap to
180   // resolve it.
181   bool do_swap = false;
182   if (required_swap != nullptr) {
183     DCHECK_EQ(required_swap, move);
184     do_swap = true;
185   } else {
186     for (size_t i = 0; i < moves_.Size(); ++i) {
187       const MoveOperands& other_move = *moves_.Get(i);
188       if (other_move.Blocks(destination)) {
189         DCHECK(other_move.IsPending());
190         if (!move->Is64BitMove() && other_move.Is64BitMove()) {
191           // We swap 64bits moves before swapping 32bits moves. Go back from the
192           // cycle by returning the move that must be swapped.
193           return moves_.Get(i);
194         }
195         do_swap = true;
196         break;
197       }
198     }
199   }
200 
201   if (do_swap) {
202     EmitSwap(index);
203     // Any unperformed (including pending) move with a source of either
204     // this move's source or destination needs to have their source
205     // changed to reflect the state of affairs after the swap.
206     Location source = move->GetSource();
207     Location swap_destination = move->GetDestination();
208     move->Eliminate();
209     for (size_t i = 0; i < moves_.Size(); ++i) {
210       const MoveOperands& other_move = *moves_.Get(i);
211       if (other_move.Blocks(source)) {
212         UpdateSourceOf(moves_.Get(i), source, swap_destination);
213       } else if (other_move.Blocks(swap_destination)) {
214         UpdateSourceOf(moves_.Get(i), swap_destination, source);
215       }
216     }
217     // If the swap was required because of a 64bits move in the middle of a cycle,
218     // we return the swapped move, so that the caller knows it needs to re-iterate
219     // its dependency loop.
220     return required_swap;
221   } else {
222     // This move is not blocked.
223     EmitMove(index);
224     move->Eliminate();
225     DCHECK(required_swap == nullptr);
226     return nullptr;
227   }
228 }
229 
IsScratchLocation(Location loc)230 bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
231   for (size_t i = 0; i < moves_.Size(); ++i) {
232     if (moves_.Get(i)->Blocks(loc)) {
233       return false;
234     }
235   }
236 
237   for (size_t i = 0; i < moves_.Size(); ++i) {
238     if (moves_.Get(i)->GetDestination().Equals(loc)) {
239       return true;
240     }
241   }
242 
243   return false;
244 }
245 
AllocateScratchRegister(int blocked,int register_count,int if_scratch,bool * spilled)246 int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
247                                                           int register_count,
248                                                           int if_scratch,
249                                                           bool* spilled) {
250   DCHECK_NE(blocked, if_scratch);
251   int scratch = -1;
252   for (int reg = 0; reg < register_count; ++reg) {
253     if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
254       scratch = reg;
255       break;
256     }
257   }
258 
259   if (scratch == -1) {
260     *spilled = true;
261     scratch = if_scratch;
262   } else {
263     *spilled = false;
264   }
265 
266   return scratch;
267 }
268 
269 
ScratchRegisterScope(ParallelMoveResolverWithSwap * resolver,int blocked,int if_scratch,int number_of_registers)270 ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
271     ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
272     : resolver_(resolver),
273       reg_(kNoRegister),
274       spilled_(false) {
275   reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
276 
277   if (spilled_) {
278     resolver->SpillScratch(reg_);
279   }
280 }
281 
282 
~ScratchRegisterScope()283 ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
284   if (spilled_) {
285     resolver_->RestoreScratch(reg_);
286   }
287 }
288 
EmitNativeCode(HParallelMove * parallel_move)289 void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
290   DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
291   DCHECK(moves_.IsEmpty());
292   DCHECK(scratches_.IsEmpty());
293 
294   // Backend dependent initialization.
295   PrepareForEmitNativeCode();
296 
297   // Build up a worklist of moves.
298   BuildInitialMoveList(parallel_move);
299 
300   for (size_t i = 0; i < moves_.Size(); ++i) {
301     const MoveOperands& move = *moves_.Get(i);
302     // Skip constants to perform them last. They don't block other moves and
303     // skipping such moves with register destinations keeps those registers
304     // free for the whole algorithm.
305     if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
306       PerformMove(i);
307     }
308   }
309 
310   // Perform the moves with constant sources and register destinations with UpdateMoveSource()
311   // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
312   // from changing the constant sources to stack locations.
313   for (size_t i = 0; i < moves_.Size(); ++i) {
314     MoveOperands* move = moves_.Get(i);
315     Location destination = move->GetDestination();
316     if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
317       Location source = move->GetSource();
318       EmitMove(i);
319       move->Eliminate();
320       // This may introduce additional instruction dependency, but reduce number
321       // of moves and possible literal loads. For example,
322       // Original moves:
323       //   1234.5678 -> D0
324       //   1234.5678 -> D1
325       // Updated moves:
326       //   1234.5678 -> D0
327       //   D0 -> D1
328       UpdateMoveSource(source, destination);
329     }
330   }
331 
332   // Perform the rest of the moves.
333   for (size_t i = 0; i < moves_.Size(); ++i) {
334     MoveOperands* move = moves_.Get(i);
335     if (!move->IsEliminated()) {
336       EmitMove(i);
337       move->Eliminate();
338     }
339   }
340 
341   // All pending moves that we have added for resolve cycles should be performed.
342   DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
343 
344   // Backend dependent cleanup.
345   FinishEmitNativeCode();
346 
347   moves_.Reset();
348   scratches_.Reset();
349 }
350 
GetScratchLocation(Location::Kind kind)351 Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
352   for (size_t i = 0; i < scratches_.Size(); ++i) {
353     Location loc = scratches_.Get(i);
354     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
355       return loc;
356     }
357   }
358   for (size_t i = 0; i < moves_.Size(); ++i) {
359     Location loc = moves_.Get(i)->GetDestination();
360     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
361       return loc;
362     }
363   }
364   return Location::NoLocation();
365 }
366 
AddScratchLocation(Location loc)367 void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
368   if (kIsDebugBuild) {
369     for (size_t i = 0; i < scratches_.Size(); ++i) {
370       DCHECK(!loc.Equals(scratches_.Get(i)));
371     }
372   }
373   scratches_.Add(loc);
374 }
375 
RemoveScratchLocation(Location loc)376 void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
377   DCHECK(!IsBlockedByMoves(loc));
378   for (size_t i = 0; i < scratches_.Size(); ++i) {
379     if (loc.Equals(scratches_.Get(i))) {
380       scratches_.DeleteAt(i);
381       break;
382     }
383   }
384 }
385 
PerformMove(size_t index)386 void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
387   // Each call to this function performs a move and deletes it from the move
388   // graph. We first recursively perform any move blocking this one. We mark
389   // a move as "pending" on entry to PerformMove in order to detect cycles
390   // in the move graph. We use scratch location to resolve cycles, also
391   // additional pending moves might be added. After move has been performed,
392   // we will update source operand in the move graph to reduce dependencies in
393   // the graph.
394 
395   MoveOperands* move = moves_.Get(index);
396   DCHECK(!move->IsPending());
397   DCHECK(!move->IsEliminated());
398   if (move->IsRedundant()) {
399     // Previous operations on the list of moves have caused this particular move
400     // to become a no-op, so we can safely eliminate it. Consider for example
401     // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
402     // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
403     // used as the scratch location, the move (1 -> 2) will occur while resolving
404     // the cycle. When that move is emitted, the code will update moves with a '1'
405     // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
406     // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
407     // eliminated here.
408     move->Eliminate();
409     return;
410   }
411 
412   // Clear this move's destination to indicate a pending move. The actual
413   // destination is saved in a stack-allocated local. Recursion may allow
414   // multiple moves to be pending.
415   DCHECK(!move->GetSource().IsInvalid());
416   Location destination = move->MarkPending();
417 
418   // Perform a depth-first traversal of the move graph to resolve
419   // dependencies. Any unperformed, unpending move with a source the same
420   // as this one's destination blocks this one so recursively perform all
421   // such moves.
422   for (size_t i = 0; i < moves_.Size(); ++i) {
423     const MoveOperands& other_move = *moves_.Get(i);
424     if (other_move.Blocks(destination) && !other_move.IsPending()) {
425       PerformMove(i);
426     }
427   }
428 
429   // We are about to resolve this move and don't need it marked as
430   // pending, so restore its destination.
431   move->ClearPending(destination);
432 
433   // No one else should write to the move destination when the it is pending.
434   DCHECK(!move->IsRedundant());
435 
436   Location source = move->GetSource();
437   // The move may be blocked on several pending moves, in case we have a cycle.
438   if (IsBlockedByMoves(destination)) {
439     // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
440     // sequence:
441     // (C -> scratch)     # Emit right now.
442     // (A -> B) (B -> C)  # Unblocked.
443     // (scratch -> A)     # Add to pending_moves_, blocked by (A -> B).
444     Location::Kind kind = source.GetKind();
445     DCHECK_NE(kind, Location::kConstant);
446     Location scratch = AllocateScratchLocationFor(kind);
447     // We only care about the move size.
448     Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt;
449     // Perform (C -> scratch)
450     move->SetDestination(scratch);
451     EmitMove(index);
452     move->Eliminate();
453     UpdateMoveSource(source, scratch);
454     // Add (scratch -> A).
455     AddPendingMove(scratch, destination, type);
456   } else {
457     // This move is not blocked.
458     EmitMove(index);
459     move->Eliminate();
460     UpdateMoveSource(source, destination);
461   }
462 
463   // Moves in the pending list should not block any other moves. But performing
464   // unblocked moves in the pending list can free scratch registers, so we do this
465   // as early as possible.
466   MoveOperands* pending_move;
467   while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
468     Location pending_source = pending_move->GetSource();
469     Location pending_destination = pending_move->GetDestination();
470     // We do not depend on the pending move index. So just delete the move instead
471     // of eliminating it to make the pending list cleaner.
472     DeletePendingMove(pending_move);
473     move->SetSource(pending_source);
474     move->SetDestination(pending_destination);
475     EmitMove(index);
476     move->Eliminate();
477     UpdateMoveSource(pending_source, pending_destination);
478     // Free any unblocked locations in the scratch location list.
479     for (size_t i = 0; i < scratches_.Size(); ++i) {
480       Location scratch = scratches_.Get(i);
481       // Only scratch overlapping with performed move source can be unblocked.
482       if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
483         FreeScratchLocation(pending_source);
484       }
485     }
486   }
487 }
488 
UpdateMoveSource(Location from,Location to)489 void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
490   // This function is used to reduce the dependencies in the graph after
491   // (from -> to) has been performed. Since we ensure there is no move with the same
492   // destination, (to -> X) can not be blocked while (from -> X) might still be
493   // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
494   // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
495   // a dependency between the two. If we update the source location from 1 to 2, we
496   // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
497   //
498   // This is not something we must do, but we can use fewer scratch locations with
499   // this trick. For example, we can avoid using additional scratch locations for
500   // moves (0 -> 1), (1 -> 2), (1 -> 0).
501   for (size_t i = 0; i < moves_.Size(); ++i) {
502     MoveOperands* move = moves_.Get(i);
503     if (move->GetSource().Equals(from)) {
504       move->SetSource(to);
505     }
506   }
507 }
508 
AddPendingMove(Location source,Location destination,Primitive::Type type)509 void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
510     Location destination, Primitive::Type type) {
511   pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr));
512 }
513 
DeletePendingMove(MoveOperands * move)514 void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
515   pending_moves_.Delete(move);
516 }
517 
GetUnblockedPendingMove(Location loc)518 MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
519   for (size_t i = 0; i < pending_moves_.Size(); ++i) {
520     MoveOperands* move = pending_moves_.Get(i);
521     Location destination = move->GetDestination();
522     // Only moves with destination overlapping with input loc can be unblocked.
523     if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
524       return move;
525     }
526   }
527   return nullptr;
528 }
529 
IsBlockedByMoves(Location loc)530 bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
531   for (size_t i = 0; i < pending_moves_.Size(); ++i) {
532     if (pending_moves_.Get(i)->Blocks(loc)) {
533       return true;
534     }
535   }
536   for (size_t i = 0; i < moves_.Size(); ++i) {
537     if (moves_.Get(i)->Blocks(loc)) {
538       return true;
539     }
540   }
541   return false;
542 }
543 
544 // So far it is only used for debugging purposes to make sure all pending moves
545 // have been performed.
GetNumberOfPendingMoves()546 size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
547   return pending_moves_.Size();
548 }
549 
550 }  // namespace art
551