1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MEMORY_SCHEDULER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MEMORY_SCHEDULER_H_
18 
19 #include <vector>
20 
21 #include "absl/container/flat_hash_map.h"
22 #include "tensorflow/compiler/xla/service/hlo_alias_analysis.h"
23 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
24 #include "tensorflow/compiler/xla/service/hlo_module.h"
25 #include "tensorflow/compiler/xla/service/hlo_ordering.h"
26 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
27 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
28 #include "tensorflow/compiler/xla/service/logical_buffer.h"
29 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
30 #include "tensorflow/compiler/xla/statusor.h"
31 #include "tensorflow/compiler/xla/types.h"
32 
33 namespace xla {
34 
35 // A memory scheduler computes an execution sequence for the HLO instructions in
36 // 'computation' that minimizes peak memory, given a points-to analysis result
37 // that describes buffer aliasing, together with a target-specific size function
38 // that maps a tensor's logical size to its padded size. peak_memory (may be
39 // nullptr) is set to the peak memory of the resulting schedule according to the
40 // HeapSimulator.
41 //
42 // TODO(yunxing): Cleanup usage of TuplePointsToAnalysis.
43 typedef std::function<StatusOr<HloInstructionSequence>(
44     HloComputation*, const TuplePointsToAnalysis&, const HloAliasAnalysis&,
45     const LogicalBuffer::SizeFunction&,
46     const absl::flat_hash_map<const HloComputation*, int64>&,
47     /*peak_memory*/ int64*)>
48     MemorySchedulerAlgorithm;
49 
50 // Scheduler for the entire module.
51 typedef std::function<StatusOr<HloSchedule>(
52     HloModule*, const TuplePointsToAnalysis&, const HloAliasAnalysis&,
53     const LogicalBuffer::SizeFunction&,
54     /*peak_memory*/ int64*)>
55     ModuleSchedulerAlgorithm;
56 
57 // Lift a computation scheduler into a module scheduler by calling the
58 // computation scheduler on all computations in a module.
59 ModuleSchedulerAlgorithm ComputationSchedulerToModuleScheduler(
60     const MemorySchedulerAlgorithm&);
61 
62 // List scheduler
63 StatusOr<HloInstructionSequence> ListMemoryScheduler(
64     HloComputation* computation,
65     const TuplePointsToAnalysis& points_to_analysis,
66     const HloAliasAnalysis& alias_analysis,
67     const LogicalBuffer::SizeFunction& size_function,
68     const absl::flat_hash_map<const HloComputation*, int64>&
69         memory_by_computation,
70     int64* peak_memory);
71 
72 // DFS-order scheduler
73 StatusOr<HloInstructionSequence> DFSMemoryScheduler(
74     HloComputation* computation,
75     const TuplePointsToAnalysis& points_to_analysis,
76     const HloAliasAnalysis& alias_analysis,
77     const LogicalBuffer::SizeFunction& size_function,
78     const absl::flat_hash_map<const HloComputation*, int64>&
79         memory_by_computation,
80     int64* peak_memory);
81 
82 // Naive Post Order scheduler
83 StatusOr<HloInstructionSequence> PostOrderMemoryScheduler(
84     HloComputation* computation,
85     const TuplePointsToAnalysis& points_to_analysis,
86     const HloAliasAnalysis& alias_analysis,
87     const LogicalBuffer::SizeFunction& size_function,
88     const absl::flat_hash_map<const HloComputation*, int64>&
89         memory_by_computation,
90     int64* peak_memory);
91 
92 // The default scheduling algorithm. Runs the list scheduler, the DFS scheduler,
93 // and the post-order scheduler and chooses whichever returns a lower min-
94 // memory, not accounting for fragmentation. peak_memory (may be nullptr) is set
95 // to the peak memory of the resulting schedule according to the HeapSimulator.
96 StatusOr<HloInstructionSequence> DefaultMemoryScheduler(
97     HloComputation* computation,
98     const TuplePointsToAnalysis& points_to_analysis,
99     const HloAliasAnalysis& alias_analysis,
100     const LogicalBuffer::SizeFunction& size_function,
101     const absl::flat_hash_map<const HloComputation*, int64>&
102         memory_by_computation,
103     int64* peak_memory);
104 
105 StatusOr<HloSchedule> DefaultModuleScheduler(
106     HloModule* module, const TuplePointsToAnalysis& points_to_analysis,
107     const HloAliasAnalysis& alias_analysis,
108     const LogicalBuffer::SizeFunction& size_function, int64* peak_memory);
109 
110 // Returns an HloSchedule which seeks to minimize the memory required for the
111 // module. size_function is the function returning the number of bytes required
112 // for a LogicalBuffer. peak_memory (if not nullptr) is set to the largest peak
113 // memory (according to the HeapSimulator) of all computations in the module.
114 StatusOr<HloSchedule> ScheduleModule(
115     HloModule* module, const LogicalBuffer::SizeFunction& size_function,
116     const ModuleSchedulerAlgorithm& algorithm = {},
117     int64* peak_memory = nullptr);
118 
119 // Computes the schedule for a single computation.
120 // Currently only used by the GPU backend.
121 StatusOr<HloInstructionSequence> ScheduleComputation(
122     HloComputation* computation,
123     const LogicalBuffer::SizeFunction& size_function);
124 
125 // A pass which schedules the HLO instructions in a module. The HloModule's
126 // schedule field is set to the resulting HloSchedule using
127 // HloModule::set_schedule.
128 class HloMemoryScheduler : public HloModulePass {
129  public:
130   // size_function is the function returning the number of bytes required for a
131   // LogicalBuffer. algorithm is the memory scheduling algorithm to use. If not
132   // specified, then DefaultMemoryScheduler is used.
133   HloMemoryScheduler(const LogicalBuffer::SizeFunction& size_function,
134                      const ModuleSchedulerAlgorithm& algorithm = {});
135 
136   ~HloMemoryScheduler() override = default;
137 
name()138   absl::string_view name() const override { return "hlo-memory-scheduler"; }
139 
140   StatusOr<bool> Run(HloModule* module) override;
141 
142  private:
143   LogicalBuffer::SizeFunction size_function_;
144 
145   ModuleSchedulerAlgorithm algorithm_;
146 };
147 
148 // A pass which produces a naive, but correct schedule. The schedule is produced
149 // using a DFS traversal of the graph with no attempt to minimize memory use.
150 class HloTrivialScheduler : public HloModulePass {
151  public:
name()152   absl::string_view name() const override { return "hlo-trivial-scheduler"; }
153 
154   StatusOr<bool> Run(HloModule* module) override;
155 };
156 
157 // A trivial pass which clears the schedule currently set on the
158 // HloModule. After this pass runs HloModule::has_schedule will return false.
159 class HloDescheduler : public HloModulePass {
160  public:
161   HloDescheduler() = default;
162   ~HloDescheduler() override = default;
name()163   absl::string_view name() const override { return "hlo-descheduler"; }
164 
165   StatusOr<bool> Run(HloModule* module) override;
166 };
167 
168 }  // namespace xla
169 
170 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MEMORY_SCHEDULER_H_
171