1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_CLIENT_EXECUTABLE_BUILD_OPTIONS_H_
17 #define TENSORFLOW_COMPILER_XLA_CLIENT_EXECUTABLE_BUILD_OPTIONS_H_
18 
19 #include "absl/strings/string_view.h"
20 #include "absl/types/optional.h"
21 #include "tensorflow/compiler/xla/service/computation_placer.h"
22 #include "tensorflow/compiler/xla/shape.h"
23 #include "tensorflow/compiler/xla/util.h"
24 #include "tensorflow/compiler/xla/xla.pb.h"
25 #include "tensorflow/compiler/xla/xla_data.pb.h"
26 #include "tensorflow/core/platform/threadpool.h"
27 
28 namespace stream_executor {
29 
30 // Forward-declared to avoid StreamExecutor dependency.
31 class DeviceMemoryAllocator;
32 
33 }  // namespace stream_executor
34 
35 namespace xla {
36 
37 // Class containing options for building an LocalExecutable with
38 // LocalClient::Compile.
39 class ExecutableBuildOptions {
40  public:
41   // If set, this is the device to build the computation for. Valid
42   // device_ordinal values are: 0 to # of devices - 1. These values are
43   // identical to the device ordinal values used by StreamExecutor. The built
44   // executable will be executable on any device equivalent to the specified
45   // device as determined by Backend::devices_equivalent(). A value of -1
46   // indicates this option has not been set.
47   ExecutableBuildOptions& set_device_ordinal(int device_ordinal);
48   int device_ordinal() const;
49 
50   // If set, this specifies the layout of the result of the computation. If not
51   // set, the service will chose the layout of the result. A Shape is used to
52   // store the layout to accommodate tuple result shapes. A value of nullptr
53   // indicates the option has not been set.
54   ExecutableBuildOptions& set_result_layout(const Shape& shape_with_layout);
55   const Shape* result_layout() const;
56 
57   // Expose access to the XLA debug options which will be passed to the
58   // compilation process.
has_debug_options()59   bool has_debug_options() const { return debug_options_.has_value(); }
debug_options()60   const DebugOptions& debug_options() const { return *debug_options_; }
61   DebugOptions* mutable_debug_options();
62 
63   // If set, this specifies an allocator that can be used to allocate temporary
64   // space on the device during compilation.  For example, the compiler might
65   // want to run various algorithms on the device and pick the fastest one -- it
66   // might allocate buffers for use by these algorithms using this allocator.
67   //
68   // This does not need to be the same as the se::DeviceMemoryAllocator passed
69   // when running the executable.
70   ExecutableBuildOptions& set_device_allocator(
71       se::DeviceMemoryAllocator* allocator);
72   se::DeviceMemoryAllocator* device_allocator() const;
73 
74   // Returns a string representation of the build options, suitable for
75   // debugging.
76   string ToString() const;
77 
78   // The number of replicas of this computation that are to be executed.
79   // Defaults to 1.
num_replicas()80   int num_replicas() const { return num_replicas_; }
81   ExecutableBuildOptions& set_num_replicas(int num_replicas);
82 
83   // The number of partitions in this computation. Defaults to 1.
num_partitions()84   int num_partitions() const { return num_partitions_; }
85   ExecutableBuildOptions& set_num_partitions(int num_partitions);
86 
87   // Indicates whether to use SPMD (true) or MPMD (false) partitioning when
88   // num_partitions > 1 and XLA is requested to partition the input program.
use_spmd_partitioning()89   bool use_spmd_partitioning() const { return use_spmd_partitioning_; }
90   ExecutableBuildOptions& set_use_spmd_partitioning(bool use_spmd_partitioning);
91 
deduplicate_hlo()92   bool deduplicate_hlo() const { return deduplicate_hlo_; }
93   ExecutableBuildOptions& set_deduplicate_hlo(bool deduplicate_hlo);
94 
broadcast_replicated_params()95   bool broadcast_replicated_params() const {
96     return broadcast_replicated_params_;
97   }
98   ExecutableBuildOptions& set_broadcast_replicated_params(
99       bool broadcast_replicated_params);
100 
101   // If set, this specifies a static device assignment for the computation.
102   // Otherwise, the computation will be compiled generically and can be run with
103   // any device assignment compatible with the computation's replica and
104   // partition counts.
has_device_assignment()105   bool has_device_assignment() const { return device_assignment_.has_value(); }
106   ExecutableBuildOptions& set_device_assignment(
107       const DeviceAssignment& device_assignment);
device_assignment()108   const DeviceAssignment& device_assignment() const {
109     CHECK(device_assignment_.has_value());
110     return device_assignment_.value();
111   }
112 
113   // Whether input and output buffers are aliased if the associated parameter is
114   // passed-through XLA modules without being changed.
alias_passthrough_params()115   bool alias_passthrough_params() const { return alias_passthrough_params_; }
set_alias_passthrough_params(bool alias_passthrough_params)116   void set_alias_passthrough_params(bool alias_passthrough_params) {
117     alias_passthrough_params_ = alias_passthrough_params;
118   }
119 
run_backend_only()120   bool run_backend_only() const { return run_backend_only_; }
121   // By default, XLA builds an executable by invoking standard compilation, i.e,
122   // running Compiler::Compile, or both Compiler::RunHloPasses and
123   // Compiler::RunBackend. When run_backend_only is set to true, XLA builds an
124   // executable by invoking only RunBackend and skip invoking RunHloPasses,
125   // which can be used to compile post-optimizations HLO modules.
set_run_backend_only(bool run_backend_only)126   ExecutableBuildOptions& set_run_backend_only(bool run_backend_only) {
127     run_backend_only_ = run_backend_only;
128     return *this;
129   }
130 
131   // Thread pool for parallel compilation.
compile_thread_pool()132   tensorflow::thread::ThreadPool* compile_thread_pool() const {
133     return compile_thread_pool_;
134   }
set_compile_thread_pool(tensorflow::thread::ThreadPool * compile_thread_pool)135   ExecutableBuildOptions& set_compile_thread_pool(
136       tensorflow::thread::ThreadPool* compile_thread_pool) {
137     compile_thread_pool_ = compile_thread_pool;
138     return *this;
139   }
140 
141  private:
142   int device_ordinal_ = -1;
143   Shape result_layout_;
144   bool result_layout_set_ = false;
145   absl::optional<DebugOptions> debug_options_;
146   se::DeviceMemoryAllocator* device_allocator_ = nullptr;
147   int num_replicas_ = 1;
148   int num_partitions_ = 1;
149   bool use_spmd_partitioning_ = false;
150   bool deduplicate_hlo_ = false;
151   bool broadcast_replicated_params_ = false;
152   absl::optional<DeviceAssignment> device_assignment_;
153   bool alias_passthrough_params_ = false;
154   bool run_backend_only_ = false;
155   tensorflow::thread::ThreadPool* compile_thread_pool_ = nullptr;
156 };
157 
158 // Creates an ExecutionOptions based on a given ExecutableBuildOptions and
159 // ProgramShape.
160 ExecutionOptions CreateExecutionOptions(
161     const ExecutableBuildOptions& build_options,
162     const ProgramShape* program_shape);
163 
164 }  // namespace xla
165 
166 #endif  // TENSORFLOW_COMPILER_XLA_CLIENT_EXECUTABLE_BUILD_OPTIONS_H_
167