1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_
17 #define TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_
18 
19 #include <string>
20 
21 #include "tensorflow/compiler/xla/types.h"
22 
23 // These classes are forward declared so that ExecutableRunOptions can be linked
24 // into an XLA-compiled binary without having to link all of the pointed-to
25 // objects (e.g., for an ahead-of-time compiled CPU binary, the gpu tools don't
26 // need to be linked).
27 namespace stream_executor {
28 class Stream;
29 class Platform;
30 class DeviceMemoryAllocator;
31 }  // namespace stream_executor
32 
33 namespace Eigen {
34 struct ThreadPoolDevice;
35 }  // namespace Eigen
36 
37 namespace xla {
38 
39 class DeviceAssignment;
40 class ExecutionProfile;
41 namespace gpu {
42 class GpuExecutableRunOptions;
43 }  // namespace gpu
44 
45 // A unique identifier for a particular "logical execution" of an XLA model.
46 //
47 // A logical execution might encompass multiple executions of one or more
48 // HloModules.  Runs that are part of the same logical execution can
49 // communicate via collective ops (e.g. kAllToAll), whereas runs that are part
50 // of different logical executions are isolated.
51 class RunId {
52  public:
53   // Creates a new, unique RunId.
54   RunId();
RunId(int64 value)55   explicit RunId(int64 value) : data_(value) {}
56 
57   RunId(const RunId&) = default;
58   RunId& operator=(const RunId&) = default;
59   friend bool operator==(const RunId& a, const RunId& b);
60   std::string ToString() const;
61   int64 ToInt() const;
62 
63   template <typename H>
AbslHashValue(H h,const RunId & id)64   friend H AbslHashValue(H h, const RunId& id) {
65     return H::combine(std::move(h), id.data_);
66   }
67 
68  private:
69   int64 data_;
70 };
71 
72 // Callback used by the GPU backend only. This is an "one-sided" version of
73 // ThenDoHostCallback that enqueues a callback onto a stream. The difference
74 // with ThenDoHostCallback is that the device does not block waiting for the
75 // callback to complete; instead the callback is scheduled by the runtime.
76 // This functionality must be provided by the caller, and hence is provided in
77 // callback form.
78 using ThenExecuteFunction =
79     std::function<void(stream_executor::Stream*, std::function<void()>)>;
80 
81 // Class containing options for running a LocalExecutable.
82 class ExecutableRunOptions {
83  public:
84   // Specifies the allocator to use during execution.
85   ExecutableRunOptions& set_allocator(
86       stream_executor::DeviceMemoryAllocator* allocator);
87   stream_executor::DeviceMemoryAllocator* allocator() const;
88 
89   // If set, this is the device to run the computation on. Valid device_ordinal
90   // values are: 0 to # of devices - 1. These values are identical to the device
91   // ordinal values used by StreamExecutor. The device must be of the same type
92   // as the executable was compiled for. A value of -1 indicates this option has
93   // not been set.
94   ExecutableRunOptions& set_device_ordinal(int device_ordinal);
95   int device_ordinal() const;
96 
97   // If set, this is the stream to run the computation on. The platform of the
98   // stream must match the platform the executable was built for.  A value of
99   // nullptr indicates the option has not been set.
100   ExecutableRunOptions& set_stream(stream_executor::Stream* stream);
101   stream_executor::Stream* stream() const;
102 
103   // If set, this is the stream to perform any pre-computation transfers on.
104   // The platform of the stream must match the platform the executable was
105   // built for.  A value of nullptr indicates the option has not been set.
106   ExecutableRunOptions& set_host_to_device_stream(
107       stream_executor::Stream* stream);
108   stream_executor::Stream* host_to_device_stream() const;
109 
110   // Sets the thread pool device on which to run Eigen subcomputations.
111   //
112   // This field must be set for XLA:CPU models that call Eigen routines, but may
113   // be null otherwise.  Routines that use this field should always CHECK (or
114   // TF_RET_CHECK) that it's not null before dereferencing it, so that users get
115   // a clean crash rather than a segfault.
116   //
117   // Does not take ownership.
118   ExecutableRunOptions& set_intra_op_thread_pool(
119       const Eigen::ThreadPoolDevice* intra_op_thread_pool);
120   const Eigen::ThreadPoolDevice* intra_op_thread_pool() const;
121 
122   // If set, profiling information is written to 'profile'.
123   ExecutionProfile* execution_profile() const;
124   ExecutableRunOptions& set_execution_profile(ExecutionProfile* profile);
125 
126   ExecutableRunOptions& set_device_assignment(
127       const DeviceAssignment* device_assignment);
128   const DeviceAssignment* device_assignment() const;
129 
130   ExecutableRunOptions& set_rng_seed(int rng_seed);
131   int rng_seed() const;
132 
set_launch_id(int32 launch_id)133   ExecutableRunOptions& set_launch_id(int32 launch_id) {
134     launch_id_ = launch_id;
135     return *this;
136   }
137 
launch_id()138   int32 launch_id() const { return launch_id_; }
139 
140   ExecutableRunOptions& set_run_id(RunId id);
141   RunId run_id() const;
142 
143   // See documentation on ThenExecuteFunction.
set_then_execute_function(ThenExecuteFunction * f)144   ExecutableRunOptions& set_then_execute_function(ThenExecuteFunction* f) {
145     then_execute_function_ = f;
146     return *this;
147   }
then_execute_function()148   ThenExecuteFunction* then_execute_function() const {
149     return then_execute_function_;
150   }
151 
152   // GPU-backend specific options. These are kept out-of-line to avoid bloating
153   // the size of this dependency for CPU-only AOT builds.
154   ExecutableRunOptions& set_gpu_executable_run_options(
155       const gpu::GpuExecutableRunOptions* gpu_executable_run_options);
156   const gpu::GpuExecutableRunOptions* gpu_executable_run_options() const;
157 
158  private:
159   stream_executor::DeviceMemoryAllocator* allocator_ = nullptr;
160   int device_ordinal_ = -1;
161   const DeviceAssignment* device_assignment_ = nullptr;
162   stream_executor::Stream* stream_ = nullptr;
163   const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr;
164   ExecutionProfile* execution_profile_ = nullptr;
165   int rng_seed_ = 0;
166   int32 launch_id_ = 0;
167   stream_executor::Stream* host_to_device_stream_ = nullptr;
168   ThenExecuteFunction* then_execute_function_ = nullptr;
169   RunId run_id_;
170   const gpu::GpuExecutableRunOptions* gpu_executable_run_options_ = nullptr;
171 };
172 
173 }  // namespace xla
174 
175 #endif  // TENSORFLOW_COMPILER_XLA_EXECUTABLE_RUN_OPTIONS_H_
176