1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_ELEMENTAL_IR_EMITTER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_ELEMENTAL_IR_EMITTER_H_
18 
19 #include <functional>
20 #include <string>
21 #include <utility>
22 
23 #include "absl/types/span.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Value.h"
26 #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h"
27 #include "tensorflow/compiler/xla/service/hlo_computation.h"
28 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
29 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
30 #include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h"
31 #include "tensorflow/compiler/xla/statusor.h"
32 #include "tensorflow/compiler/xla/types.h"
33 #include "tensorflow/compiler/xla/xla_data.pb.h"
34 
35 namespace xla {
36 namespace gpu {
37 
38 class GpuElementalIrEmitter : public ElementalIrEmitter {
39  public:
40   // A NestedComputer computes an element of the output of the given computation
41   // given a Span of its input elements.
42   using NestedComputer = std::function<StatusOr<llvm::Value*>(
43       const HloComputation&, absl::Span<llvm::Value* const>)>;
44 
45   GpuElementalIrEmitter(const HloModuleConfig& hlo_module_config,
46                         llvm::Module* module, llvm::IRBuilder<>* b,
47                         NestedComputer compute_nested);
48 
49   llvm_ir::ElementGenerator MakeElementGenerator(
50       const HloInstruction* hlo,
51       const HloToElementGeneratorMap& operand_to_generator) override;
52 
53  protected:
54   StatusOr<llvm::Value*> EmitFloatBinaryOp(const HloInstruction* op,
55                                            llvm::Value* lhs_value,
56                                            llvm::Value* rhs_value) override;
57 
58   StatusOr<llvm::Value*> EmitErfcInv(PrimitiveType prim_type,
59                                      llvm::Value* value) override;
60 
61   StatusOr<llvm::Value*> EmitLog(PrimitiveType prim_type,
62                                  llvm::Value* value) override;
63 
64   StatusOr<llvm::Value*> EmitLog1p(PrimitiveType prim_type,
65                                    llvm::Value* value) override;
66 
67   StatusOr<llvm::Value*> EmitSin(PrimitiveType prim_type,
68                                  llvm::Value* value) override;
69 
70   StatusOr<llvm::Value*> EmitCos(PrimitiveType prim_type,
71                                  llvm::Value* value) override;
72 
73   StatusOr<llvm::Value*> EmitExp(PrimitiveType prim_type,
74                                  llvm::Value* value) override;
75 
76   StatusOr<llvm::Value*> EmitExpm1(PrimitiveType prim_type,
77                                    llvm::Value* value) override;
78 
79   StatusOr<llvm::Value*> EmitSqrt(PrimitiveType prim_type,
80                                   llvm::Value* value) override;
81 
82   StatusOr<llvm::Value*> EmitRsqrt(PrimitiveType prim_type,
83                                    llvm::Value* value) override;
84 
85   StatusOr<llvm::Value*> EmitPow(PrimitiveType prim_type, llvm::Value* lhs,
86                                  llvm::Value* rhs) override;
87 
88   StatusOr<llvm::Value*> EmitAtan2(PrimitiveType prim_type, llvm::Value* lhs,
89                                    llvm::Value* rhs) override;
90 
91   StatusOr<llvm::Value*> EmitTanh(PrimitiveType prim_type,
92                                   llvm::Value* value) override;
93 
94   StatusOr<llvm::Value*> EmitRoundNearestAfz(PrimitiveType prim_type,
95                                              llvm::Value* value) override;
96 
97   llvm::Value* EmitThreadId() override;
98 
99  private:
100   // Emits IR for op, which must have opcode kPower.
101   StatusOr<llvm::Value*> EmitPowerOp(const HloInstruction* op,
102                                      llvm::Value* lhs_value,
103                                      llvm::Value* rhs_value);
104 
105   // Emits IR to call a device function named "callee_name" on the given
106   // operand. Returns the IR value that represents the return value.
107   llvm::Value* EmitDeviceFunctionCall(
108       const string& callee_name, absl::Span<llvm::Value* const> operands,
109       absl::Span<const PrimitiveType> input_type, PrimitiveType output_type,
110       absl::Span<const llvm::Attribute::AttrKind> attributes);
111 
112   // Emits IR to call an LLVM intrinsic of type [T] -> T.  Adjusts
113   // callee_name according to T.  Returns the IR value that represents the
114   // return value of the function.
115   StatusOr<llvm::Value*> EmitLlvmIntrinsicMathCall(
116       const string& callee_name, absl::Span<llvm::Value* const> operands,
117       absl::Span<const PrimitiveType> input_types, PrimitiveType output_type);
118 
119   // Emits IR to call a libdevice function of type [T] -> T.  Adjusts
120   // callee_name according to T.  Returns the IR value that represents the
121   // return value of the function.
122   StatusOr<llvm::Value*> EmitLibdeviceMathCall(
123       const string& callee_name, absl::Span<llvm::Value* const> operands,
124       absl::Span<const PrimitiveType> input_types, PrimitiveType output_type);
125 
126   // Emits IR to call a function of type [T] -> T.  Does not munge callee_name.
127   // Returns the IR value that represents the return value of the function.
128   StatusOr<llvm::Value*> EmitMathCall(
129       const string& callee_name, absl::Span<llvm::Value* const> operands,
130       absl::Span<const PrimitiveType> input_types, PrimitiveType output_type);
131 
132   const HloModuleConfig& hlo_module_config_;
133   NestedComputer compute_nested_;
134 };
135 
136 }  // namespace gpu
137 }  // namespace xla
138 
139 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_ELEMENTAL_IR_EMITTER_H_
140