1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COMPILER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COMPILER_H_
18 
19 #include <memory>
20 #include <string>
21 #include <vector>
22 
23 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
24 #include "tensorflow/compiler/xla/service/executable.h"
25 #include "tensorflow/compiler/xla/service/gpu/gpu_device_info.h"
26 #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
27 #include "tensorflow/compiler/xla/service/gpu/ir_emitter_context.h"
28 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
29 #include "tensorflow/compiler/xla/service/hlo_module.h"
30 #include "tensorflow/compiler/xla/service/llvm_compiler.h"
31 #include "tensorflow/compiler/xla/statusor.h"
32 #include "tensorflow/compiler/xla/types.h"
33 #include "tensorflow/core/lib/hash/hash.h"
34 #include "tensorflow/core/platform/macros.h"
35 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
36 #include "tensorflow/core/platform/thread_annotations.h"
37 #include "tensorflow/stream_executor/stream_executor_pimpl.h"
38 
39 namespace xla {
40 namespace gpu {
41 
42 // The GPU compiler generates efficient GPU executables.
43 class GpuCompiler : public LLVMCompiler {
44  public:
45   GpuCompiler(se::Platform::Id platform_id, const char* target_triple,
46               const char* data_layout);
~GpuCompiler()47   ~GpuCompiler() override {}
48 
49   // Bring in
50   // StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
51   //     std::vector<std::unique_ptr<HloModule>> modules,
52   //     std::vector<std::vector<se::StreamExecutor*>>
53   //        stream_execs)
54   using LLVMCompiler::Compile;
55 
56   StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
57       std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
58       const CompileOptions& options) override;
59 
60   StatusOr<
61       std::tuple<std::unique_ptr<HloModule>, std::unique_ptr<BufferAssignment>>>
62   RunHloPassesAndBufferAssignement(std::unique_ptr<HloModule> hlo_module,
63                                    se::StreamExecutor* executor, bool optimize,
64                                    const CompileOptions& options) override;
65 
66   Status OptimizeHloModule(HloModule* hlo_module,
67                            se::StreamExecutor* stream_exec,
68                            se::DeviceMemoryAllocator* device_allocator);
69 
70   virtual Status OptimizeHloConvolutionCanonicalization(
71       HloModule* hlo_module, se::StreamExecutor* stream_exec,
72       se::DeviceMemoryAllocator* device_allocator) = 0;
73 
74   virtual Status OptimizeHloPostLayoutAssignment(
75       HloModule* hlo_module, se::StreamExecutor* stream_exec,
76       se::DeviceMemoryAllocator* device_allocator);
77 
GetCanShareBuffer()78   virtual HloDataflowAnalysis::CanShareBuffer GetCanShareBuffer() {
79     return
80         [](const HloInstruction*, const HloInstruction*,
81            const ShapeIndex&) -> absl::optional<bool> { return absl::nullopt; };
82   }
83 
84   virtual GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) = 0;
85 
86   // TODO(timshen): Replace `debug_module` with some portable debug information
87   // that accommodates both HLO and MLIR.
88   virtual StatusOr<std::pair<std::string, std::vector<uint8>>>
89   CompileTargetBinary(const HloModuleConfig& module_config,
90                       llvm::Module* llvm_module, GpuVersion gpu_version,
91                       se::StreamExecutor* stream_exec, bool relocatable,
92                       const HloModule* debug_module) = 0;
93 
94   Status PrepareHloModuleForIrEmitting(HloModule* hlo_module);
95 
96   StatusOr<std::unique_ptr<Executable>> RunBackend(
97       std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
98       const CompileOptions& options) override;
99 
100   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
101   CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
102                      AotCompilationOptions const& options) override;
103 
104   StatusOr<std::pair<std::string, std::vector<uint8>>> CompileToTargetBinary(
105       const HloModuleConfig& module_config,
106       std::unique_ptr<llvm::Module> llvm_module,
107       se::StreamExecutor* stream_exec, const CompileOptions& options,
108       const HloModule* debug_module);
109 
PlatformId()110   se::Platform::Id PlatformId() const override { return platform_id_; }
111 
ShapeSizeBytesFunction()112   HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override {
113     // Capture just the pointer size, not the entire GpuCompiler object.
114     return [pointer_size = pointer_size_](const Shape& shape) {
115       return GetSizeOfShape(shape, pointer_size);
116     };
117   }
118 
GetSizeOfShape(const Shape & shape,int pointer_size)119   static int64 GetSizeOfShape(const Shape& shape, int pointer_size) {
120     if (shape.is_static() || shape.IsTuple()) {
121       return ShapeUtil::ByteSizeOf(shape, pointer_size);
122     }
123     // Each dynamic dimension size is represented as a S32.
124     int64 metadata_size = sizeof(int32) * shape.dimensions_size();
125     return ShapeUtil::ByteSizeOf(shape, pointer_size) + metadata_size;
126   }
127 
128  private:
LinkModules(se::StreamExecutor * stream_exec,std::vector<std::vector<uint8>> modules)129   virtual StatusOr<std::vector<uint8>> LinkModules(
130       se::StreamExecutor* stream_exec,
131       std::vector<std::vector<uint8>> modules) {
132     return Unimplemented("LinkModules is not implemented.");
133   }
134 
135   se::Platform::Id platform_id_;
136 
137   // The triple that represents our target.
138   const char* target_triple_;
139 
140   // The data layout of the emitted module.
141   const char* data_layout_;
142 
143   // The size in bytes of a pointer. Used by ShapeSizeBytesFunction.
144   const int64 pointer_size_;
145 
146   TF_DISALLOW_COPY_AND_ASSIGN(GpuCompiler);
147 };
148 
149 GpuDeviceInfo GetGpuDeviceInfo(se::StreamExecutor* stream_exec);
150 
151 // Compile `hlo_module` using XLA GPU and return the LLVM module thus generated.
152 // The GpuExecutable (and the Thunks that are part of it) are not returned.
153 StatusOr<std::unique_ptr<llvm::Module>> CompileModuleToLlvmIr(
154     HloModule* hlo_module, llvm::LLVMContext* llvm_context,
155     const std::string& target_triple, const std::string& data_layout,
156     const std::string& platform_name, GpuDeviceInfo gpu_device_info,
157     absl::optional<CudaComputeCapability> cuda_compute_capability,
158     int pointer_size);
159 
160 // Compiles the given LMHLO module to an executable.
161 // ir_emitter_context should be partially populated: buffer_assignment
162 // or buffer_allocations should not be populated, while other fields should be
163 // populated (or left empty if that field is optional).
164 //
165 // NOTE: buffer_assignment will be gone from ir_emitter_context once LMHLO
166 // transition is done.
167 StatusOr<std::unique_ptr<Executable>> CompileLmhloToExecutable(
168     GpuCompiler* compiler, mlir::ModuleOp module, std::string module_name,
169     const HloModuleConfig& module_config,
170     const Compiler::CompileOptions& options,
171     absl::string_view entry_function_name, se::StreamExecutor* stream_exec,
172     std::unique_ptr<llvm::Module> llvm_module,
173     IrEmitterContext* ir_emitter_context);
174 
175 }  // namespace gpu
176 }  // namespace xla
177 
178 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_COMPILER_H_
179