1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
18 
19 #include <map>
20 #include <memory>
21 #include <set>
22 #include <string>
23 #include <vector>
24 
25 #include "absl/container/flat_hash_map.h"
26 #include "absl/strings/str_cat.h"
27 #include "absl/types/span.h"
28 #include "tensorflow/compiler/xla/service/compiler.h"
29 #include "tensorflow/compiler/xla/service/computation_placer.h"
30 #include "tensorflow/compiler/xla/service/stream_pool.h"
31 #include "tensorflow/compiler/xla/service/transfer_manager.h"
32 #include "tensorflow/compiler/xla/statusor.h"
33 #include "tensorflow/compiler/xla/types.h"
34 #include "tensorflow/core/platform/mutex.h"
35 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
36 #include "tensorflow/core/platform/thread_annotations.h"
37 #include "tensorflow/stream_executor/device_memory_allocator.h"
38 
39 namespace Eigen {
40 struct ThreadPoolDevice;
41 }
42 
43 namespace xla {
44 
45 // Options to configure the backend when it is created.
46 class BackendOptions {
47  public:
48   // Set the platform backing the backend, or nullptr for the default platform.
49   BackendOptions& set_platform(se::Platform* platform);
50   se::Platform* platform() const;
51 
52   // Sets the thread pool size for parallel execution of an individual operator.
53   // The default value of -1 will result in initializing the thread pool with
54   // the number of threads equal to the number of cores in the system.
55   BackendOptions& set_intra_op_parallelism_threads(int num_threads);
56   int intra_op_parallelism_threads() const;
57 
58   // Sets the allowed_devices for selectively constructing stream executors
59   // on the platform.
60   BackendOptions& set_allowed_devices(
61       const absl::optional<std::set<int>>& allowed_devices);
62   const absl::optional<std::set<int>>& allowed_devices() const;
63 
64  private:
65   se::Platform* platform_ = nullptr;
66   int intra_op_parallelism_threads_ = -1;
67   absl::optional<std::set<int>> allowed_devices_;
68 };
69 
70 // Class which encapsulates an XLA backend. It includes everything necessary
71 // to compile and execute computations on a particular platform.
72 //
73 // It also offers a pooling API for creation/use of initialized streams:
74 //
75 //    StreamPool::Ptr stream = backend->BorrowStream().ConsumeValueOrDie();
76 class Backend {
77  public:
78   // Creates a new backend.
79   static StatusOr<std::unique_ptr<Backend>> CreateBackend(
80       const BackendOptions& options);
81 
82   // Creates a backend for the default platform. The default platform is defined
83   // in PlatformUtil.
84   static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend();
85 
86   ~Backend();
87 
88   // Accessors for the various objects.
platform()89   se::Platform* platform() const { return platform_; }
compiler()90   Compiler* compiler() const { return compiler_; }
memory_allocator()91   se::DeviceMemoryAllocator* memory_allocator() const {
92     return memory_allocator_.get();
93   }
transfer_manager()94   TransferManager* transfer_manager() const { return transfer_manager_; }
computation_placer()95   ComputationPlacer* computation_placer() const { return computation_placer_; }
96 
97   // Returns the number of devices of the platform type which are visible. Not
98   // all of these devices may be usable by XLA.
device_count()99   int device_count() const { return stream_executors_.size(); }
100 
101   // Returns the device ordinal number of the default device.
102   int default_device_ordinal() const;
103 
104   // Returns stream executors of all supported devices for this backend. The
105   // executors are ordered by the device ordinal.
stream_executors()106   const std::vector<se::StreamExecutor*>& stream_executors() const {
107     return stream_executors_;
108   }
109 
110   // Returns the stream executor for the given device ordinal.
111   StatusOr<se::StreamExecutor*> stream_executor(int device_ordinal) const;
112 
113   // Returns the stream executor for the default device ordinal. This stream
114   // executor can only be used when the number of computations is 1 (replication
115   // can be > 1).
default_stream_executor()116   se::StreamExecutor* default_stream_executor() const {
117     CHECK(!stream_executors_.empty());
118     return stream_executors_[0];
119   }
120 
121   // Borrows a stream for use by the caller, either by grabbing it from an
122   // internal pool, or by constructing/initializating it, and returns the result
123   // to the caller.
124   StatusOr<StreamPool::Ptr> BorrowStream(int device_ordinal);
125   StatusOr<StreamPool::Ptr> BorrowStream(se::StreamExecutor* executor);
126 
127   // Returns a function to borrow a stream, as `BorrowStream` above does.
128   // Purely for convenience, the caller could rather make this anonymous
129   // function itself.
StreamBorrower()130   std::function<StatusOr<StreamPool::Ptr>(int)> StreamBorrower() {
131     return [this](int device_ordinal) { return BorrowStream(device_ordinal); };
132   }
133 
134   // Returns whether the given device ordinal of the backend is supported.
device_ordinal_supported(int device_ordinal)135   bool device_ordinal_supported(int device_ordinal) const {
136     return (device_ordinal >= 0 && device_ordinal < device_count() &&
137             stream_executors_[device_ordinal] != nullptr);
138   }
139 
140   // Return a string identifier for the given device, eg: "GPU:3".
device_name(int device_ordinal)141   string device_name(int device_ordinal) const {
142     return absl::StrCat(platform_->Name(), ":", device_ordinal);
143   }
144 
145   // Returns true if the devices with the given ordinals are equivalent from
146   // XLA's perspective. That is, an executable compiled for one device would
147   // be equivalent to an executable compiled for the other.
148   StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b);
149 
150   // For the host platform, returns the configured eigen threadpool device to be
151   // used for scheduling work. For other platforms, returns NULL.
152   const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const;
153   tensorflow::thread::ThreadPool* eigen_intra_op_thread_pool() const;
154 
155   // Resets the devices associated with this backend.
156   Status ResetDevices();
157 
158  private:
159   Backend(se::Platform* platform, Compiler* compiler,
160           absl::Span<se::StreamExecutor* const> stream_executors,
161           TransferManager* transfer_manager,
162           ComputationPlacer* computation_placer,
163           int intra_op_parallelism_threads);
164   Backend(const Backend&) = delete;
165   Backend& operator=(const Backend&) = delete;
166 
167   se::Platform* platform_;
168   Compiler* compiler_;
169   TransferManager* transfer_manager_;
170   ComputationPlacer* computation_placer_;
171 
172   // Vector of stream executors. stream_executors_[0] is the default executor.
173   std::vector<se::StreamExecutor*> stream_executors_;
174 
175   tensorflow::mutex mu_;
176 
177   // Mapping from stream executor to stream pools, used by `BorrowStream` above.
178   absl::flat_hash_map<se::StreamExecutor*, std::unique_ptr<StreamPool>>
179       stream_pools_ TF_GUARDED_BY(mu_);
180 
181   // The default memory allocator to use.
182   std::unique_ptr<se::StreamExecutorMemoryAllocator> memory_allocator_;
183 
184   // For the CPU backend, an Eigen threadpool device for use by Eigen code.
185   struct IntraOpThreadPool;
186   std::unique_ptr<IntraOpThreadPool> intra_op_thread_pool_;
187 };
188 
189 }  // namespace xla
190 
191 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
192