1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/memory-optimizer.h"
6
7 #include "src/compiler/js-graph.h"
8 #include "src/compiler/linkage.h"
9 #include "src/compiler/node-matchers.h"
10 #include "src/compiler/node-properties.h"
11 #include "src/compiler/node.h"
12 #include "src/compiler/simplified-operator.h"
13
14 namespace v8 {
15 namespace internal {
16 namespace compiler {
17
MemoryOptimizer(JSGraph * jsgraph,Zone * zone)18 MemoryOptimizer::MemoryOptimizer(JSGraph* jsgraph, Zone* zone)
19 : jsgraph_(jsgraph),
20 empty_state_(AllocationState::Empty(zone)),
21 pending_(zone),
22 tokens_(zone),
23 zone_(zone) {}
24
Optimize()25 void MemoryOptimizer::Optimize() {
26 EnqueueUses(graph()->start(), empty_state());
27 while (!tokens_.empty()) {
28 Token const token = tokens_.front();
29 tokens_.pop();
30 VisitNode(token.node, token.state);
31 }
32 DCHECK(pending_.empty());
33 DCHECK(tokens_.empty());
34 }
35
AllocationGroup(Node * node,PretenureFlag pretenure,Zone * zone)36 MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
37 PretenureFlag pretenure,
38 Zone* zone)
39 : node_ids_(zone), pretenure_(pretenure), size_(nullptr) {
40 node_ids_.insert(node->id());
41 }
42
AllocationGroup(Node * node,PretenureFlag pretenure,Node * size,Zone * zone)43 MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
44 PretenureFlag pretenure,
45 Node* size, Zone* zone)
46 : node_ids_(zone), pretenure_(pretenure), size_(size) {
47 node_ids_.insert(node->id());
48 }
49
Add(Node * node)50 void MemoryOptimizer::AllocationGroup::Add(Node* node) {
51 node_ids_.insert(node->id());
52 }
53
Contains(Node * node) const54 bool MemoryOptimizer::AllocationGroup::Contains(Node* node) const {
55 return node_ids_.find(node->id()) != node_ids_.end();
56 }
57
AllocationState()58 MemoryOptimizer::AllocationState::AllocationState()
59 : group_(nullptr), size_(std::numeric_limits<int>::max()), top_(nullptr) {}
60
AllocationState(AllocationGroup * group)61 MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group)
62 : group_(group), size_(std::numeric_limits<int>::max()), top_(nullptr) {}
63
AllocationState(AllocationGroup * group,int size,Node * top)64 MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group,
65 int size, Node* top)
66 : group_(group), size_(size), top_(top) {}
67
IsNewSpaceAllocation() const68 bool MemoryOptimizer::AllocationState::IsNewSpaceAllocation() const {
69 return group() && group()->IsNewSpaceAllocation();
70 }
71
VisitNode(Node * node,AllocationState const * state)72 void MemoryOptimizer::VisitNode(Node* node, AllocationState const* state) {
73 DCHECK(!node->IsDead());
74 DCHECK_LT(0, node->op()->EffectInputCount());
75 switch (node->opcode()) {
76 case IrOpcode::kAllocate:
77 return VisitAllocate(node, state);
78 case IrOpcode::kCall:
79 return VisitCall(node, state);
80 case IrOpcode::kLoadElement:
81 return VisitLoadElement(node, state);
82 case IrOpcode::kLoadField:
83 return VisitLoadField(node, state);
84 case IrOpcode::kStoreElement:
85 return VisitStoreElement(node, state);
86 case IrOpcode::kStoreField:
87 return VisitStoreField(node, state);
88 case IrOpcode::kCheckedLoad:
89 case IrOpcode::kCheckedStore:
90 case IrOpcode::kDeoptimizeIf:
91 case IrOpcode::kDeoptimizeUnless:
92 case IrOpcode::kIfException:
93 case IrOpcode::kLoad:
94 case IrOpcode::kStore:
95 case IrOpcode::kRetain:
96 case IrOpcode::kUnsafePointerAdd:
97 return VisitOtherEffect(node, state);
98 default:
99 break;
100 }
101 DCHECK_EQ(0, node->op()->EffectOutputCount());
102 }
103
VisitAllocate(Node * node,AllocationState const * state)104 void MemoryOptimizer::VisitAllocate(Node* node, AllocationState const* state) {
105 DCHECK_EQ(IrOpcode::kAllocate, node->opcode());
106 Node* value;
107 Node* size = node->InputAt(0);
108 Node* effect = node->InputAt(1);
109 Node* control = node->InputAt(2);
110 PretenureFlag pretenure = PretenureFlagOf(node->op());
111
112 // Propagate tenuring from outer allocations to inner allocations, i.e.
113 // when we allocate an object in old space and store a newly allocated
114 // child object into the pretenured object, then the newly allocated
115 // child object also should get pretenured to old space.
116 if (pretenure == TENURED) {
117 for (Edge const edge : node->use_edges()) {
118 Node* const user = edge.from();
119 if (user->opcode() == IrOpcode::kStoreField && edge.index() == 0) {
120 Node* const child = user->InputAt(1);
121 if (child->opcode() == IrOpcode::kAllocate &&
122 PretenureFlagOf(child->op()) == NOT_TENURED) {
123 NodeProperties::ChangeOp(child, node->op());
124 break;
125 }
126 }
127 }
128 } else {
129 DCHECK_EQ(NOT_TENURED, pretenure);
130 for (Edge const edge : node->use_edges()) {
131 Node* const user = edge.from();
132 if (user->opcode() == IrOpcode::kStoreField && edge.index() == 1) {
133 Node* const parent = user->InputAt(0);
134 if (parent->opcode() == IrOpcode::kAllocate &&
135 PretenureFlagOf(parent->op()) == TENURED) {
136 pretenure = TENURED;
137 break;
138 }
139 }
140 }
141 }
142
143 // Determine the top/limit addresses.
144 Node* top_address = jsgraph()->ExternalConstant(
145 pretenure == NOT_TENURED
146 ? ExternalReference::new_space_allocation_top_address(isolate())
147 : ExternalReference::old_space_allocation_top_address(isolate()));
148 Node* limit_address = jsgraph()->ExternalConstant(
149 pretenure == NOT_TENURED
150 ? ExternalReference::new_space_allocation_limit_address(isolate())
151 : ExternalReference::old_space_allocation_limit_address(isolate()));
152
153 // Check if we can fold this allocation into a previous allocation represented
154 // by the incoming {state}.
155 Int32Matcher m(size);
156 if (m.HasValue() && m.Value() < kMaxRegularHeapObjectSize) {
157 int32_t const object_size = m.Value();
158 if (state->size() <= kMaxRegularHeapObjectSize - object_size &&
159 state->group()->pretenure() == pretenure) {
160 // We can fold this Allocate {node} into the allocation {group}
161 // represented by the given {state}. Compute the upper bound for
162 // the new {state}.
163 int32_t const state_size = state->size() + object_size;
164
165 // Update the reservation check to the actual maximum upper bound.
166 AllocationGroup* const group = state->group();
167 if (OpParameter<int32_t>(group->size()) < state_size) {
168 NodeProperties::ChangeOp(group->size(),
169 common()->Int32Constant(state_size));
170 }
171
172 // Update the allocation top with the new object allocation.
173 // TODO(bmeurer): Defer writing back top as much as possible.
174 Node* top = graph()->NewNode(machine()->IntAdd(), state->top(),
175 jsgraph()->IntPtrConstant(object_size));
176 effect = graph()->NewNode(
177 machine()->Store(StoreRepresentation(
178 MachineType::PointerRepresentation(), kNoWriteBarrier)),
179 top_address, jsgraph()->IntPtrConstant(0), top, effect, control);
180
181 // Compute the effective inner allocated address.
182 value = graph()->NewNode(
183 machine()->BitcastWordToTagged(),
184 graph()->NewNode(machine()->IntAdd(), state->top(),
185 jsgraph()->IntPtrConstant(kHeapObjectTag)));
186
187 // Extend the allocation {group}.
188 group->Add(value);
189 state = AllocationState::Open(group, state_size, top, zone());
190 } else {
191 // Setup a mutable reservation size node; will be patched as we fold
192 // additional allocations into this new group.
193 Node* size = graph()->NewNode(common()->Int32Constant(object_size));
194
195 // Load allocation top and limit.
196 Node* top = effect =
197 graph()->NewNode(machine()->Load(MachineType::Pointer()), top_address,
198 jsgraph()->IntPtrConstant(0), effect, control);
199 Node* limit = effect = graph()->NewNode(
200 machine()->Load(MachineType::Pointer()), limit_address,
201 jsgraph()->IntPtrConstant(0), effect, control);
202
203 // Check if we need to collect garbage before we can start bump pointer
204 // allocation (always done for folded allocations).
205 Node* check = graph()->NewNode(
206 machine()->UintLessThan(),
207 graph()->NewNode(
208 machine()->IntAdd(), top,
209 machine()->Is64()
210 ? graph()->NewNode(machine()->ChangeInt32ToInt64(), size)
211 : size),
212 limit);
213 Node* branch =
214 graph()->NewNode(common()->Branch(BranchHint::kTrue), check, control);
215
216 Node* if_true = graph()->NewNode(common()->IfTrue(), branch);
217 Node* etrue = effect;
218 Node* vtrue = top;
219
220 Node* if_false = graph()->NewNode(common()->IfFalse(), branch);
221 Node* efalse = effect;
222 Node* vfalse;
223 {
224 Node* target = pretenure == NOT_TENURED
225 ? jsgraph()->AllocateInNewSpaceStubConstant()
226 : jsgraph()->AllocateInOldSpaceStubConstant();
227 if (!allocate_operator_.is_set()) {
228 CallDescriptor* descriptor =
229 Linkage::GetAllocateCallDescriptor(graph()->zone());
230 allocate_operator_.set(common()->Call(descriptor));
231 }
232 vfalse = efalse = graph()->NewNode(allocate_operator_.get(), target,
233 size, efalse, if_false);
234 vfalse = graph()->NewNode(machine()->IntSub(), vfalse,
235 jsgraph()->IntPtrConstant(kHeapObjectTag));
236 }
237
238 control = graph()->NewNode(common()->Merge(2), if_true, if_false);
239 effect = graph()->NewNode(common()->EffectPhi(2), etrue, efalse, control);
240 value = graph()->NewNode(
241 common()->Phi(MachineType::PointerRepresentation(), 2), vtrue, vfalse,
242 control);
243
244 // Compute the new top and write it back.
245 top = graph()->NewNode(machine()->IntAdd(), value,
246 jsgraph()->IntPtrConstant(object_size));
247 effect = graph()->NewNode(
248 machine()->Store(StoreRepresentation(
249 MachineType::PointerRepresentation(), kNoWriteBarrier)),
250 top_address, jsgraph()->IntPtrConstant(0), top, effect, control);
251
252 // Compute the initial object address.
253 value = graph()->NewNode(
254 machine()->BitcastWordToTagged(),
255 graph()->NewNode(machine()->IntAdd(), value,
256 jsgraph()->IntPtrConstant(kHeapObjectTag)));
257
258 // Start a new allocation group.
259 AllocationGroup* group =
260 new (zone()) AllocationGroup(value, pretenure, size, zone());
261 state = AllocationState::Open(group, object_size, top, zone());
262 }
263 } else {
264 // Load allocation top and limit.
265 Node* top = effect =
266 graph()->NewNode(machine()->Load(MachineType::Pointer()), top_address,
267 jsgraph()->IntPtrConstant(0), effect, control);
268 Node* limit = effect =
269 graph()->NewNode(machine()->Load(MachineType::Pointer()), limit_address,
270 jsgraph()->IntPtrConstant(0), effect, control);
271
272 // Compute the new top.
273 Node* new_top = graph()->NewNode(
274 machine()->IntAdd(), top,
275 machine()->Is64()
276 ? graph()->NewNode(machine()->ChangeInt32ToInt64(), size)
277 : size);
278
279 // Check if we can do bump pointer allocation here.
280 Node* check = graph()->NewNode(machine()->UintLessThan(), new_top, limit);
281 Node* branch =
282 graph()->NewNode(common()->Branch(BranchHint::kTrue), check, control);
283
284 Node* if_true = graph()->NewNode(common()->IfTrue(), branch);
285 Node* etrue = effect;
286 Node* vtrue;
287 {
288 etrue = graph()->NewNode(
289 machine()->Store(StoreRepresentation(
290 MachineType::PointerRepresentation(), kNoWriteBarrier)),
291 top_address, jsgraph()->IntPtrConstant(0), new_top, etrue, if_true);
292 vtrue = graph()->NewNode(
293 machine()->BitcastWordToTagged(),
294 graph()->NewNode(machine()->IntAdd(), top,
295 jsgraph()->IntPtrConstant(kHeapObjectTag)));
296 }
297
298 Node* if_false = graph()->NewNode(common()->IfFalse(), branch);
299 Node* efalse = effect;
300 Node* vfalse;
301 {
302 Node* target = pretenure == NOT_TENURED
303 ? jsgraph()->AllocateInNewSpaceStubConstant()
304 : jsgraph()->AllocateInOldSpaceStubConstant();
305 if (!allocate_operator_.is_set()) {
306 CallDescriptor* descriptor =
307 Linkage::GetAllocateCallDescriptor(graph()->zone());
308 allocate_operator_.set(common()->Call(descriptor));
309 }
310 vfalse = efalse = graph()->NewNode(allocate_operator_.get(), target, size,
311 efalse, if_false);
312 }
313
314 control = graph()->NewNode(common()->Merge(2), if_true, if_false);
315 effect = graph()->NewNode(common()->EffectPhi(2), etrue, efalse, control);
316 value = graph()->NewNode(
317 common()->Phi(MachineRepresentation::kTaggedPointer, 2), vtrue, vfalse,
318 control);
319
320 // Create an unfoldable allocation group.
321 AllocationGroup* group =
322 new (zone()) AllocationGroup(value, pretenure, zone());
323 state = AllocationState::Closed(group, zone());
324 }
325
326 // Replace all effect uses of {node} with the {effect}, enqueue the
327 // effect uses for further processing, and replace all value uses of
328 // {node} with the {value}.
329 for (Edge edge : node->use_edges()) {
330 if (NodeProperties::IsEffectEdge(edge)) {
331 EnqueueUse(edge.from(), edge.index(), state);
332 edge.UpdateTo(effect);
333 } else {
334 DCHECK(NodeProperties::IsValueEdge(edge));
335 edge.UpdateTo(value);
336 }
337 }
338
339 // Kill the {node} to make sure we don't leave dangling dead uses.
340 node->Kill();
341 }
342
VisitCall(Node * node,AllocationState const * state)343 void MemoryOptimizer::VisitCall(Node* node, AllocationState const* state) {
344 DCHECK_EQ(IrOpcode::kCall, node->opcode());
345 // If the call can allocate, we start with a fresh state.
346 if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) {
347 state = empty_state();
348 }
349 EnqueueUses(node, state);
350 }
351
VisitLoadElement(Node * node,AllocationState const * state)352 void MemoryOptimizer::VisitLoadElement(Node* node,
353 AllocationState const* state) {
354 DCHECK_EQ(IrOpcode::kLoadElement, node->opcode());
355 ElementAccess const& access = ElementAccessOf(node->op());
356 Node* index = node->InputAt(1);
357 node->ReplaceInput(1, ComputeIndex(access, index));
358 NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
359 EnqueueUses(node, state);
360 }
361
VisitLoadField(Node * node,AllocationState const * state)362 void MemoryOptimizer::VisitLoadField(Node* node, AllocationState const* state) {
363 DCHECK_EQ(IrOpcode::kLoadField, node->opcode());
364 FieldAccess const& access = FieldAccessOf(node->op());
365 Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
366 node->InsertInput(graph()->zone(), 1, offset);
367 NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
368 EnqueueUses(node, state);
369 }
370
VisitStoreElement(Node * node,AllocationState const * state)371 void MemoryOptimizer::VisitStoreElement(Node* node,
372 AllocationState const* state) {
373 DCHECK_EQ(IrOpcode::kStoreElement, node->opcode());
374 ElementAccess const& access = ElementAccessOf(node->op());
375 Node* object = node->InputAt(0);
376 Node* index = node->InputAt(1);
377 WriteBarrierKind write_barrier_kind =
378 ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
379 node->ReplaceInput(1, ComputeIndex(access, index));
380 NodeProperties::ChangeOp(
381 node, machine()->Store(StoreRepresentation(
382 access.machine_type.representation(), write_barrier_kind)));
383 EnqueueUses(node, state);
384 }
385
VisitStoreField(Node * node,AllocationState const * state)386 void MemoryOptimizer::VisitStoreField(Node* node,
387 AllocationState const* state) {
388 DCHECK_EQ(IrOpcode::kStoreField, node->opcode());
389 FieldAccess const& access = FieldAccessOf(node->op());
390 Node* object = node->InputAt(0);
391 WriteBarrierKind write_barrier_kind =
392 ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
393 Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
394 node->InsertInput(graph()->zone(), 1, offset);
395 NodeProperties::ChangeOp(
396 node, machine()->Store(StoreRepresentation(
397 access.machine_type.representation(), write_barrier_kind)));
398 EnqueueUses(node, state);
399 }
400
VisitOtherEffect(Node * node,AllocationState const * state)401 void MemoryOptimizer::VisitOtherEffect(Node* node,
402 AllocationState const* state) {
403 EnqueueUses(node, state);
404 }
405
ComputeIndex(ElementAccess const & access,Node * key)406 Node* MemoryOptimizer::ComputeIndex(ElementAccess const& access, Node* key) {
407 Node* index;
408 if (machine()->Is64()) {
409 // On 64-bit platforms, we need to feed a Word64 index to the Load and
410 // Store operators. Since LoadElement or StoreElement don't do any bounds
411 // checking themselves, we can be sure that the {key} was already checked
412 // and is in valid range, so we can do the further address computation on
413 // Word64 below, which ideally allows us to fuse the address computation
414 // with the actual memory access operation on Intel platforms.
415 index = graph()->NewNode(machine()->ChangeUint32ToUint64(), key);
416 } else {
417 index = key;
418 }
419 int const element_size_shift =
420 ElementSizeLog2Of(access.machine_type.representation());
421 if (element_size_shift) {
422 index = graph()->NewNode(machine()->WordShl(), index,
423 jsgraph()->IntPtrConstant(element_size_shift));
424 }
425 int const fixed_offset = access.header_size - access.tag();
426 if (fixed_offset) {
427 index = graph()->NewNode(machine()->IntAdd(), index,
428 jsgraph()->IntPtrConstant(fixed_offset));
429 }
430 return index;
431 }
432
ComputeWriteBarrierKind(Node * object,AllocationState const * state,WriteBarrierKind write_barrier_kind)433 WriteBarrierKind MemoryOptimizer::ComputeWriteBarrierKind(
434 Node* object, AllocationState const* state,
435 WriteBarrierKind write_barrier_kind) {
436 if (state->IsNewSpaceAllocation() && state->group()->Contains(object)) {
437 write_barrier_kind = kNoWriteBarrier;
438 }
439 return write_barrier_kind;
440 }
441
MergeStates(AllocationStates const & states)442 MemoryOptimizer::AllocationState const* MemoryOptimizer::MergeStates(
443 AllocationStates const& states) {
444 // Check if all states are the same; or at least if all allocation
445 // states belong to the same allocation group.
446 AllocationState const* state = states.front();
447 AllocationGroup* group = state->group();
448 for (size_t i = 1; i < states.size(); ++i) {
449 if (states[i] != state) state = nullptr;
450 if (states[i]->group() != group) group = nullptr;
451 }
452 if (state == nullptr) {
453 if (group != nullptr) {
454 // We cannot fold any more allocations into this group, but we can still
455 // eliminate write barriers on stores to this group.
456 // TODO(bmeurer): We could potentially just create a Phi here to merge
457 // the various tops; but we need to pay special attention not to create
458 // an unschedulable graph.
459 state = AllocationState::Closed(group, zone());
460 } else {
461 // The states are from different allocation groups.
462 state = empty_state();
463 }
464 }
465 return state;
466 }
467
EnqueueMerge(Node * node,int index,AllocationState const * state)468 void MemoryOptimizer::EnqueueMerge(Node* node, int index,
469 AllocationState const* state) {
470 DCHECK_EQ(IrOpcode::kEffectPhi, node->opcode());
471 int const input_count = node->InputCount() - 1;
472 DCHECK_LT(0, input_count);
473 Node* const control = node->InputAt(input_count);
474 if (control->opcode() == IrOpcode::kLoop) {
475 // For loops we always start with an empty state at the beginning.
476 if (index == 0) EnqueueUses(node, empty_state());
477 } else {
478 DCHECK_EQ(IrOpcode::kMerge, control->opcode());
479 // Check if we already know about this pending merge.
480 NodeId const id = node->id();
481 auto it = pending_.find(id);
482 if (it == pending_.end()) {
483 // Insert a new pending merge.
484 it = pending_.insert(std::make_pair(id, AllocationStates(zone()))).first;
485 }
486 // Add the next input state.
487 it->second.push_back(state);
488 // Check if states for all inputs are available by now.
489 if (it->second.size() == static_cast<size_t>(input_count)) {
490 // All inputs to this effect merge are done, merge the states given all
491 // input constraints, drop the pending merge and enqueue uses of the
492 // EffectPhi {node}.
493 state = MergeStates(it->second);
494 EnqueueUses(node, state);
495 pending_.erase(it);
496 }
497 }
498 }
499
EnqueueUses(Node * node,AllocationState const * state)500 void MemoryOptimizer::EnqueueUses(Node* node, AllocationState const* state) {
501 for (Edge const edge : node->use_edges()) {
502 if (NodeProperties::IsEffectEdge(edge)) {
503 EnqueueUse(edge.from(), edge.index(), state);
504 }
505 }
506 }
507
EnqueueUse(Node * node,int index,AllocationState const * state)508 void MemoryOptimizer::EnqueueUse(Node* node, int index,
509 AllocationState const* state) {
510 if (node->opcode() == IrOpcode::kEffectPhi) {
511 // An EffectPhi represents a merge of different effect chains, which
512 // needs special handling depending on whether the merge is part of a
513 // loop or just a normal control join.
514 EnqueueMerge(node, index, state);
515 } else {
516 Token token = {node, state};
517 tokens_.push(token);
518 }
519 }
520
graph() const521 Graph* MemoryOptimizer::graph() const { return jsgraph()->graph(); }
522
isolate() const523 Isolate* MemoryOptimizer::isolate() const { return jsgraph()->isolate(); }
524
common() const525 CommonOperatorBuilder* MemoryOptimizer::common() const {
526 return jsgraph()->common();
527 }
528
machine() const529 MachineOperatorBuilder* MemoryOptimizer::machine() const {
530 return jsgraph()->machine();
531 }
532
533 } // namespace compiler
534 } // namespace internal
535 } // namespace v8
536