1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h"
17 
18 #include <map>
19 
20 #include "absl/container/flat_hash_set.h"
21 #include "llvm/IR/MDBuilder.h"
22 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
23 #include "tensorflow/compiler/xla/service/logical_buffer.h"
24 #include "tensorflow/compiler/xla/types.h"
25 
26 namespace xla {
27 namespace llvm_ir {
28 
29 // Sentry allocation used to represent parameters of the entry computation in
30 // alias_scope_metadata_ and noalias_metadata_.
31 static const BufferAllocation* kParameterAllocation = new BufferAllocation(
32     /*index=*/-1, /*size=*/0, LogicalBuffer::Color(0));
33 
AddAliasingInformationToIrArray(const HloInstruction & hlo,llvm_ir::IrArray * array,const ShapeIndex & index)34 void AliasAnalysis::AddAliasingInformationToIrArray(const HloInstruction& hlo,
35                                                     llvm_ir::IrArray* array,
36                                                     const ShapeIndex& index) {
37   BufferAllocation::Slice buffer_slice;
38   if (hlo.opcode() == HloOpcode::kParameter &&
39       hlo.parent() == module_.entry_computation()) {
40     // Entry computation parameters may alias with each other but may not alias
41     // with our temporary buffers.
42     buffer_slice = BufferAllocation::Slice(kParameterAllocation, 0, 0);
43   } else {
44     auto unique_slice = assignment_.GetUniqueSlice(&hlo, index);
45     if (!unique_slice.ok()) {
46       // Skip HLOs which don't have a buffer assigned or for which the
47       // buffer can't be determined statically. We cannot determine their
48       // aliasing properties in these cases.
49       return;
50     }
51     buffer_slice = unique_slice.ValueOrDie();
52   }
53 
54   if (module_.config().debug_options().xla_llvm_enable_alias_scope_metadata()) {
55     llvm::MDNode*& alias_scope_md = alias_scope_metadata_[buffer_slice];
56     if (alias_scope_md == nullptr) {
57       alias_scope_md =
58           GetAliasScopeMetadataForBuffer(buffer_slice, GetAliasDomain());
59     }
60     if (alias_scope_md != nullptr) {
61       array->AddAliasScopeMetadata(alias_scope_md);
62     }
63   }
64 
65   if (module_.config().debug_options().xla_llvm_enable_noalias_metadata()) {
66     llvm::MDNode*& noalias_md = noalias_metadata_[{buffer_slice, &hlo}];
67     if (noalias_md == nullptr) {
68       noalias_md = GetNoaliasMetadataForBuffer(buffer_slice, GetAliasDomain(),
69                                                assignment_, hlo);
70     }
71     if (noalias_md != nullptr) {
72       array->AddNoaliasMetadata(noalias_md);
73     }
74   }
75 
76   if (module_.config()
77           .debug_options()
78           .xla_llvm_enable_invariant_load_metadata()) {
79     // Parameters of the entry computation are never stored to, loading from a
80     // parameter pointer should always return the same result within a loop.
81     if (hlo.opcode() == HloOpcode::kParameter &&
82         hlo.parent() == module_.entry_computation()) {
83       array->MarkInvariantOverWholeProgram(context_);
84     }
85   }
86 }
87 
GetAliasDomain()88 llvm::MDNode* AliasAnalysis::GetAliasDomain() {
89   llvm::MDBuilder metadata_builder(*context_);
90   if (alias_domain_ == nullptr) {
91     // We use createAliasScopeDomain rather than createAnonymousAliasScopeDomain
92     // so that when functions get inlined, we continue using the one domain,
93     // rather than duplicating it (and thus having two AA domains in one
94     // function).
95     //
96     // A side-effect of this is that if you ever compile two HLO modules in the
97     // same LLVM module, they'll have the same alias scope domain.  This isn't a
98     // problem because the two HLO modules will never interact with one another.
99     alias_domain_ =
100         metadata_builder.createAliasScopeDomain("XLA global AA domain");
101   }
102   return alias_domain_;
103 }
104 
GetAliasScopeMetadataForBuffer(const BufferAllocation::Slice & buffer_slice,llvm::MDNode * domain)105 llvm::MDNode* AliasAnalysis::GetAliasScopeMetadataForBuffer(
106     const BufferAllocation::Slice& buffer_slice, llvm::MDNode* domain) {
107   // While we could synthesize an alias.scope, doing so is not more profitable
108   // than LLVM's default behavior.
109   if (buffer_slice.allocation() == kParameterAllocation) {
110     return nullptr;
111   }
112 
113   llvm::MDBuilder metadata_builder(domain->getContext());
114   llvm::MDNode* scope = metadata_builder.createAliasScope(
115       "buffer: " + buffer_slice.ToString(), domain);
116   llvm::MDNode* scope_list = llvm::MDNode::get(domain->getContext(), scope);
117   return scope_list;
118 }
119 
GetNoaliasMetadataForBuffer(const BufferAllocation::Slice & buffer_slice,llvm::MDNode * domain,const BufferAssignment & assignment,const HloInstruction & hlo)120 llvm::MDNode* AliasAnalysis::GetNoaliasMetadataForBuffer(
121     const BufferAllocation::Slice& buffer_slice, llvm::MDNode* domain,
122     const BufferAssignment& assignment, const HloInstruction& hlo) {
123   // We want to construct a list of buffers which:
124   //
125   // 1. Do not alias the given buffer.
126   // 2. Will plausibly be used in the vicinity of the given buffer.
127   //
128   // Making the noalias set overly large will result in either a massive
129   // slowdown in LLVM or LLVM will just ignore the noalias set.
130   //
131   // A plausible list of instructions are:
132   // 1. Users of the given hlo.
133   // 2. Operands of users of the given hlo.
134   // 3. Operands of the given hlo.
135   //
136   // This set can be increased as we need.
137   std::vector<const HloValue*> worklist;
138   absl::flat_hash_set<const HloInstruction*> added_to_worklist;
139   auto add_buffers_to_worklist =
140       [&](const HloInstruction* instruction) {
141         // Buffers of parameters cannot be added to the noalias set.
142         if (instruction->opcode() == HloOpcode::kParameter) {
143           return;
144         }
145         if (added_to_worklist.contains(instruction)) {
146           return;
147         }
148         added_to_worklist.insert(instruction);
149         ShapeUtil::ForEachSubshape(
150             instruction->shape(),
151             [&](const Shape& /*shape*/, const ShapeIndex& index) {
152               for (const HloValue* buffer :
153                    assignment.GetSourceBuffers(instruction, index)) {
154                 if (assignment.HasAllocation(*buffer)) {
155                   worklist.push_back(buffer);
156                 }
157               }
158             });
159       };
160 
161   for (HloInstruction* user : hlo.users()) {
162     add_buffers_to_worklist(user);
163     for (HloInstruction* operand : user->operands()) {
164       add_buffers_to_worklist(operand);
165     }
166   }
167 
168   add_buffers_to_worklist(&hlo);
169   for (HloInstruction* operand : hlo.operands()) {
170     add_buffers_to_worklist(operand);
171   }
172 
173   std::set<BufferAllocation::Slice> buffers;
174   for (const HloValue* buffer : worklist) {
175     const BufferAllocation::Slice noalias_slice =
176         assignment.GetAssignedAllocation(*buffer).GetSlice(*buffer);
177     // Our buffer must not overlap with the noalias slice.
178     if (!buffer_slice.OverlapsWith(noalias_slice)) {
179       buffers.insert(noalias_slice);
180       // Some instructions have too many operands, causing the noalias set to be
181       // too large. To reduce compilation time (b/31901575), truncate noalias
182       // sets to at most 500 elements.
183       //
184       // Future work: improvements to LLVM's scoped AA that avoid creating a
185       // MDNode set for every alias query can help to reduce the compilation
186       // time as well.
187       constexpr int kMaxNoAliasSetSize = 500;
188       if (buffers.size() >= kMaxNoAliasSetSize) {
189         break;
190       }
191     }
192   }
193 
194   // Don't bother constructing a noalias metadata node if it would be empty.
195   if (buffers.empty()) {
196     return nullptr;
197   }
198 
199   llvm::MDBuilder metadata_builder(domain->getContext());
200   std::vector<llvm::Metadata*> scopes;
201   for (const BufferAllocation::Slice noalias_slice : buffers) {
202     llvm::MDNode* scope = metadata_builder.createAliasScope(
203         "buffer: " + noalias_slice.ToString(), domain);
204     scopes.push_back(scope);
205   }
206   llvm::MDNode* noalias_list =
207       llvm::MDNode::get(domain->getContext(), AsArrayRef(scopes));
208   return noalias_list;
209 }
210 
211 }  // namespace llvm_ir
212 }  // namespace xla
213