1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_
18 
19 #include <memory>
20 #include <string>
21 #include <utility>
22 
23 #include "absl/container/flat_hash_map.h"
24 #include "tensorflow/compiler/xla/service/call_graph.h"
25 #include "tensorflow/compiler/xla/service/hlo.pb.h"
26 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
27 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
28 #include "tensorflow/compiler/xla/service/hlo_module.h"
29 #include "tensorflow/compiler/xla/service/hlo_reachability.h"
30 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
31 #include "tensorflow/compiler/xla/service/hlo_value.h"
32 #include "tensorflow/compiler/xla/types.h"
33 
34 namespace xla {
35 
36 // Base class for describing a partial ordering of HLO instructions. Used to
37 // determine live range overlap of HLO instruction output buffers.
38 class HloOrdering {
39  public:
HloOrdering(const HloModule * module)40   explicit HloOrdering(const HloModule* module)
41       : module_(module), call_graph_(CallGraph::Build(module)) {}
42   virtual ~HloOrdering() = default;
43 
44   // Specify the ordering constraints between a pair of instructions a and b.
45   enum class ExecutionConstraint {
46     // Indicate a and b are the same instruction;
47     kIsSame,
48     // Indicate a runs before b;
49     kRunBefore,
50     // Only one of a or b runs each time their common ancestor is evaluated,
51     // and a is in an earlier branch than b.
52     kRunExclusiveBefore,
53     // Only one of a or b runs each time, and a is in a later branch than b.
54     kRunExclusiveAfter,
55     // Indicate a runs after b
56     kRunAfter,
57     // An order cannot be detrermined as a and b do not have a common ancestor.
58     kUnordered,
59   };
60   // Return the execution constraint between a and b.
61   HloOrdering::ExecutionConstraint GetExecutionConstraint(
62       const HloInstruction* a, const HloInstruction* b) const;
63 
64   // Returns true if instruction 'a' executes before instruction 'b'. This is
65   // not reflexive, that is, an instruction does not execute before itself.
66   bool ExecutesBefore(const HloInstruction* a, const HloInstruction* b) const;
67 
68   // Returns whether the value 'a' is defined before the value 'b' under the
69   // given ordering.
70   bool IsDefinedBefore(const HloValue& a, const HloValue& b) const;
71 
72   // Returns whether the given use is before the given value definition under
73   // the given ordering.
74   bool UsesBeforeValueDefinition(absl::Span<const HloUse* const> uses,
75                                  const HloValue& value,
76                                  const HloDataflowAnalysis& dataflow) const;
77   // Returns whether the given values interfere. Two values interfere if they
78   // may both be simultaneously live.
79   bool MayInterfere(const HloValue& a, const HloValue& b,
80                     const HloDataflowAnalysis& dataflow) const;
81 
82   // Returns true if the live range of the given value 'a' is strictly before
83   // the live range of value 'b' using the given HLO ordering.
84   bool LiveRangeStrictlyBefore(const HloValue& a, const HloValue& b,
85                                const HloDataflowAnalysis& dataflow) const;
86 
87   // Returns the sequential instruction order for the given computation, or
88   // nullptr if the computation does not have a sequential ordering.
89   virtual const HloInstructionSequence* SequentialOrder(
90       const HloComputation& computation) const = 0;
91 
92   // Return the call graph of the module used to compute ordering.
call_graph()93   const CallGraph& call_graph() const { return *call_graph_; }
94 
95   virtual string ToString() const = 0;
96 
97  protected:
98   // Returns true if instruction 'a' executes before instruction 'b'.
99   // Precondition: 'a' and 'b' are in the same computation.
100   //
101   // Derived classes should implement this method for determining order of
102   // instructions in the same computation. ExecutesBefore() analyzes the
103   // callgraph and uses this method to determine ordering of instructions in
104   // different computations.
105   virtual bool ExecutesBeforeInSameComputation(
106       const HloInstruction* a, const HloInstruction* b) const = 0;
107 
108   const HloModule* module_;
109 
110   std::unique_ptr<CallGraph> call_graph_;
111 };
112 
113 // Base class for partial orderings implemented by a map of predecessors for
114 // each instruction. Subclasses should fill in predecessors_.
115 class PredecessorHloOrdering : public HloOrdering {
116  public:
117   ~PredecessorHloOrdering() override = default;
118 
119   // Returns nullptr indicating the computation does not have a sequential
120   // ordering.
SequentialOrder(const HloComputation & computation)121   const HloInstructionSequence* SequentialOrder(
122       const HloComputation& computation) const override {
123     return nullptr;
124   }
125 
reachability_map(const HloComputation * computation)126   HloReachabilityMap& reachability_map(const HloComputation* computation) {
127     return *predecessors_.at(computation);
128   }
reachability_map(const HloComputation * computation)129   const HloReachabilityMap& reachability_map(
130       const HloComputation* computation) const {
131     return *predecessors_.at(computation);
132   }
133 
134  protected:
135   explicit PredecessorHloOrdering(const HloModule* module);
136   string ToStringHelper(const string& name) const;
137 
138   bool ExecutesBeforeInSameComputation(const HloInstruction* a,
139                                        const HloInstruction* b) const override;
140 
141   // For each computation in the module, this is the set of the instruction's
142   // predecessors. An instruction is an element of its own predecessor set.
143   //
144   // Subclasses should fill this in to define the desired ordering.
145   absl::flat_hash_map<const HloComputation*,
146                       std::unique_ptr<HloReachabilityMap>>
147       predecessors_;
148 };
149 
150 // An HLO ordering based on data dependencies in the HLO graph. In this partial
151 // order, instruction A executes before instruction B only if there is a path
152 // from A to B in the HLO graph. For example, given the following graph:
153 /*
154           param
155          /     \
156       negate   exp
157           \    /
158            add
159 */
160 // DependencyHloOrdering gives the following executes-before relations:
161 //   param executes before negate, exp, and add
162 //   negate executes before add
163 //   exp executes before add
164 //   add executes before nothing
165 // negate and exp are not ordered because the dependencies allow either to
166 // execute before the other (or in parallel). DependencyHloOrdering ordering
167 // allows maximum parallelism and enables any execution order which satisfies
168 // data dependencies. This requires pessimistic assumptions about buffer live
169 // ranges and can result in more memory used than more constrained orderings.
170 class DependencyHloOrdering : public PredecessorHloOrdering {
171  public:
172   explicit DependencyHloOrdering(const HloModule* module);
173   ~DependencyHloOrdering() override = default;
174 
175   string ToString() const override;
176 };
177 
178 // An HLO ordering based on a total order of instructions in each computation.
179 // The computation total order is a sequencing of all of its instructions in
180 // the computation (eg, {inst0, inst1, inst2,...}) as in single-threaded
181 // execution. For example, given the following HLO graph:
182 /*
183           param
184          /     \
185       negate   exp
186           \    /
187            add
188 */
189 // and the following sequence:
190 //
191 //  {param, negate, exp, add}
192 //
193 // SequentialHloOrdering gives the following executes-before relations:
194 //   param executes before negate, exp, and add
195 //   negate executes before exp and add
196 //   exp executes before add
197 //   add executes before nothing
198 // This is more constrained than DependencyHloOrdering in this example because
199 // negate and exp are ordered (negate before exp). This enables param to share
200 // the same buffer as exp (param buffer is dead after exp). Generally, this
201 // ordering enables more buffer sharing (reduced memory usage) because buffer
202 // interference is reduced relative to DependencyHloOrdering.
203 class SequentialHloOrdering : public HloOrdering {
204  public:
205   explicit SequentialHloOrdering(const HloSchedule& schedule);
206   explicit SequentialHloOrdering(HloSchedule&& schedule);
207   ~SequentialHloOrdering() override = default;
208 
209   // Returns the sequential instruction order for the given computation.
210   const HloInstructionSequence* SequentialOrder(
211       const HloComputation& computation) const override;
212 
213   string ToString() const override;
214 
215  protected:
216   void Initialize();
217 
218   bool ExecutesBeforeInSameComputation(const HloInstruction* a,
219                                        const HloInstruction* b) const override;
220 
221   const HloSchedule schedule_;
222 
223   // The position of every instruction in the HLO module in its respective
224   // computation sequence (a value of zero indicates the instruction is first in
225   // the sequence, etc). Instructions from all computations are contained in
226   // this map so more than one instruction may have the same position
227   // value. This is not a problem because ExecutesBefore also verifies
228   // instructions are in the same computation.
229   absl::flat_hash_map<const HloInstruction*, int> order_position_;
230 };
231 
232 }  // namespace xla
233 
234 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_
235