1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_
18 
19 #include <memory>
20 #include <string>
21 #include <utility>
22 
23 #include "absl/container/flat_hash_map.h"
24 #include "tensorflow/compiler/xla/service/call_graph.h"
25 #include "tensorflow/compiler/xla/service/hlo.pb.h"
26 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
27 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
28 #include "tensorflow/compiler/xla/service/hlo_module.h"
29 #include "tensorflow/compiler/xla/service/hlo_reachability.h"
30 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
31 #include "tensorflow/compiler/xla/service/hlo_value.h"
32 #include "tensorflow/compiler/xla/types.h"
33 
34 namespace xla {
35 
36 // Base class for describing a partial ordering of HLO instructions. Used to
37 // determine live range overlap of HLO instruction output buffers.
38 class HloOrdering {
39  public:
HloOrdering(const HloModule * module)40   HloOrdering(const HloModule* module)
41       : module_(module), call_graph_(CallGraph::Build(module)) {}
42   virtual ~HloOrdering() = default;
43 
44   // Returns true if instruction 'a' executes before instruction 'b'. This is
45   // not reflexive, that is, an instruction does not execute before itself.
46   bool ExecutesBefore(const HloInstruction* a, const HloInstruction* b) const;
47 
48   // Returns whether the value 'a' is defined before the value 'b' under the
49   // given ordering.
50   bool IsDefinedBefore(const HloValue& a, const HloValue& b) const;
51 
52   // Returns whether the given use is before the given value definition under
53   // the given ordering.
54   bool UseIsBeforeValueDefinition(const HloUse& use, const HloValue& value,
55                                   const HloDataflowAnalysis& dataflow) const;
56   // Returns whether the given values interfere. Two values interfere if they
57   // may both be simultaneously live.
58   bool MayInterfere(const HloValue& a, const HloValue& b,
59                     const HloDataflowAnalysis& dataflow) const;
60 
61   // Returns true if the live range of the given value 'a' is strictly before
62   // the live range of value 'b' using the given HLO ordering.
63   bool LiveRangeStrictlyBefore(const HloValue& a, const HloValue& b,
64                                const HloDataflowAnalysis& dataflow) const;
65 
66   // Returns the sequential instruction order for the given computation, or
67   // nullptr if the computation does not have a sequential ordering.
68   virtual const HloInstructionSequence* SequentialOrder(
69       const HloComputation& computation) const = 0;
70 
71   // Return the call graph of the module used to compute ordering.
call_graph()72   const CallGraph& call_graph() const { return *call_graph_; }
73 
74   virtual string ToString() const = 0;
75 
76  protected:
77   // Returns true if instruction 'a' executes before instruction 'b'.
78   // Precondition: 'a' and 'b' are in the same computation.
79   //
80   // Derived classes should implement this method for determining order of
81   // instructions in the same computation. ExecutesBefore() analyzes the
82   // callgraph and uses this method to determine ordering of instructions in
83   // different computations.
84   virtual bool ExecutesBeforeInSameComputation(
85       const HloInstruction* a, const HloInstruction* b) const = 0;
86 
87   const HloModule* module_;
88 
89   std::unique_ptr<CallGraph> call_graph_;
90 };
91 
92 // Base class for partial orderings implemented by a map of predecessors for
93 // each instruction. Subclasses should fill in predecessors_.
94 class PredecessorHloOrdering : public HloOrdering {
95  public:
96   ~PredecessorHloOrdering() override = default;
97 
98   // Returns nullptr indicating the computation does not have a sequential
99   // ordering.
SequentialOrder(const HloComputation & computation)100   const HloInstructionSequence* SequentialOrder(
101       const HloComputation& computation) const override {
102     return nullptr;
103   }
104 
reachability_map(const HloComputation * computation)105   HloReachabilityMap& reachability_map(const HloComputation* computation) {
106     return *predecessors_.at(computation);
107   }
reachability_map(const HloComputation * computation)108   const HloReachabilityMap& reachability_map(
109       const HloComputation* computation) const {
110     return *predecessors_.at(computation);
111   }
112 
113  protected:
114   explicit PredecessorHloOrdering(const HloModule* module);
115   string ToStringHelper(const string& name) const;
116 
117   bool ExecutesBeforeInSameComputation(const HloInstruction* a,
118                                        const HloInstruction* b) const override;
119 
120   // For each computation in the module, this is the set of the instruction's
121   // predecessors. An instruction is an element of its own predecessor set.
122   //
123   // Subclasses should fill this in to define the desired ordering.
124   absl::flat_hash_map<const HloComputation*,
125                       std::unique_ptr<HloReachabilityMap>>
126       predecessors_;
127 };
128 
129 // An HLO ordering based on data dependencies in the HLO graph. In this partial
130 // order, instruction A executes before instruction B only if there is a path
131 // from A to B in the HLO graph. For example, given the following graph:
132 /*
133           param
134          /     \
135       negate   exp
136           \    /
137            add
138 */
139 // DependencyHloOrdering gives the following executes-before relations:
140 //   param executes before negate, exp, and add
141 //   negate executes before add
142 //   exp executes before add
143 //   add executes before nothing
144 // negate and exp are not ordered because the dependencies allow either to
145 // execute before the other (or in parallel). DependencyHloOrdering ordering
146 // allows maximum parallelism and enables any execution order which satisfies
147 // data dependencies. This requires pessimistic assumptions about buffer live
148 // ranges and can result in more memory used than more constrained orderings.
149 class DependencyHloOrdering : public PredecessorHloOrdering {
150  public:
151   explicit DependencyHloOrdering(const HloModule* module);
152   ~DependencyHloOrdering() override = default;
153 
154   string ToString() const override;
155 };
156 
157 // An HLO ordering based on a total order of instructions in each computation.
158 // The computation total order is a sequencing of all of its instructions in
159 // the computation (eg, {inst0, inst1, inst2,...}) as in single-threaded
160 // execution. For example, given the following HLO graph:
161 /*
162           param
163          /     \
164       negate   exp
165           \    /
166            add
167 */
168 // and the following sequence:
169 //
170 //  {param, negate, exp, add}
171 //
172 // SequentialHloOrdering gives the following executes-before relations:
173 //   param executes before negate, exp, and add
174 //   negate executes before exp and add
175 //   exp executes before add
176 //   add executes before nothing
177 // This is more constrained than DependencyHloOrdering in this example because
178 // negate and exp are ordered (negate before exp). This enables param to share
179 // the same buffer as exp (param buffer is dead after exp). Generally, this
180 // ordering enables more buffer sharing (reduced memory usage) because buffer
181 // interference is reduced relative to DependencyHloOrdering.
182 class SequentialHloOrdering : public HloOrdering {
183  public:
184   SequentialHloOrdering(const HloSchedule& schedule);
185   SequentialHloOrdering(HloSchedule&& schedule);
186   ~SequentialHloOrdering() override = default;
187 
188   // Returns the sequential instruction order for the given computation.
189   const HloInstructionSequence* SequentialOrder(
190       const HloComputation& computation) const override;
191 
192   string ToString() const override;
193 
194  protected:
195   void Initialize();
196 
197   bool ExecutesBeforeInSameComputation(const HloInstruction* a,
198                                        const HloInstruction* b) const override;
199 
200   const HloSchedule schedule_;
201 
202   // The position of every instruction in the HLO module in its respective
203   // computation sequence (a value of zero indicates the instruction is first in
204   // the sequence, etc). Instructions from all computations are contained in
205   // this map so more than one instruction may have the same position
206   // value. This is not a problem because ExecutesBefore also verifies
207   // instructions are in the same computation.
208   absl::flat_hash_map<const HloInstruction*, int> order_position_;
209 };
210 
211 }  // namespace xla
212 
213 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_
214