1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #if !TENSORFLOW_USE_SYCL
17 #error This file must only be included when building TensorFlow with SYCL support
18 #endif
19 
20 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
21 #define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
22 
23 #include "tensorflow/core/common_runtime/local_device.h"
24 #include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
25 #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
26 #include "tensorflow/core/public/session_options.h"
27 
28 namespace tensorflow {
29 
30 class GSYCLInterface {
31   std::vector<Eigen::QueueInterface*> m_queue_interface_;  // owned
32   std::vector<Allocator*> m_cpu_allocator_;                // not owned
33   std::vector<SYCLAllocator*> m_sycl_allocator_;           // owned
34   std::vector<SYCLDeviceContext*> m_sycl_context_;         // ref counted
GSYCLInterface()35   GSYCLInterface() {
36     bool found_device = false;
37     auto device_list = Eigen::get_sycl_supported_devices();
38     // Obtain list of supported devices from Eigen
39     for (const auto& device : device_list) {
40       if (device.is_gpu()) {
41         // returns first found GPU
42         AddDevice(device);
43         found_device = true;
44       }
45     }
46 
47     if (!found_device) {
48       // Currently Intel GPU is not supported
49       LOG(WARNING) << "No OpenCL GPU found that is supported by "
50                    << "ComputeCpp/triSYCL, trying OpenCL CPU";
51     }
52 
53     for (const auto& device : device_list) {
54       if (device.is_cpu()) {
55         // returns first found CPU
56         AddDevice(device);
57         found_device = true;
58       }
59     }
60 
61     if (!found_device) {
62       LOG(WARNING) << "No OpenCL CPU found that is supported by "
63                    << "ComputeCpp/triSYCL, checking for host sycl device";
64     }
65 
66     for (const auto& device : device_list) {
67       // triSYCL only supports the host device for now
68       if (device.is_host()) {
69         LOG(WARNING) << "Found SYCL host device";
70         AddDevice(device);
71         found_device = true;
72       }
73     }
74 
75     if (!found_device) {
76       // Currently Intel GPU is not supported
77       LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU"
78                  << " supported by ComputeCPP/triSYCL was found";
79     } else {
80       LOG(INFO) << "Found following OpenCL devices:";
81       for (int i = 0; i < device_list.size(); i++) {
82         LOG(INFO) << GetShortDeviceDescription(i);
83       }
84     }
85   }
86 
~GSYCLInterface()87   ~GSYCLInterface() {
88     m_cpu_allocator_.clear();
89 
90     for (auto p : m_sycl_allocator_) {
91       p->Synchronize();
92       p->ClearSYCLDevice();
93       // Cannot delete the Allocator instances, as the Allocator lifetime
94       // needs to exceed any Tensor created by it. There is no way of
95       // knowing when all Tensors have been deallocated, as they are
96       // RefCounted and wait until all instances of a Tensor have been
97       // destroyed before calling Allocator.Deallocate. This could happen at
98       // program exit, which can set up a race condition between destroying
99       // Tensors and Allocators when the program is cleaning up.
100     }
101     m_sycl_allocator_.clear();
102 
103     for (auto p : m_sycl_context_) {
104       p->Unref();
105     }
106     m_sycl_context_.clear();
107 
108     for (auto p : m_queue_interface_) {
109       p->deallocate_all();
110       delete p;
111     }
112     m_queue_interface_.clear();
113   }
114 
AddDevice(const cl::sycl::device & d)115   void AddDevice(const cl::sycl::device& d) {
116     m_queue_interface_.push_back(new Eigen::QueueInterface(d));
117     m_cpu_allocator_.push_back(cpu_allocator());
118     m_sycl_allocator_.push_back(new SYCLAllocator(m_queue_interface_.back()));
119     m_sycl_context_.push_back(new SYCLDeviceContext());
120   }
121 
122  public:
instance()123   static const GSYCLInterface* instance() {
124     // c++11 guarantees that this will be constructed in a thread safe way
125     static const GSYCLInterface instance;
126     return &instance;
127   }
128 
129   Eigen::QueueInterface* GetQueueInterface(size_t i = 0) const {
130     if (!m_queue_interface_.empty()) {
131       return m_queue_interface_[i];
132     } else {
133       std::cerr << "No cl::sycl::device has been added" << std::endl;
134       return nullptr;
135     }
136   }
137 
138   SYCLAllocator* GetSYCLAllocator(size_t i = 0) const {
139     if (!m_sycl_allocator_.empty()) {
140       return m_sycl_allocator_[i];
141     } else {
142       std::cerr << "No cl::sycl::device has been added" << std::endl;
143       return nullptr;
144     }
145   }
146 
147   Allocator* GetCPUAllocator(size_t i = 0) const {
148     if (!m_cpu_allocator_.empty()) {
149       return m_cpu_allocator_[i];
150     } else {
151       std::cerr << "No cl::sycl::device has been added" << std::endl;
152       return nullptr;
153     }
154   }
155 
156   SYCLDeviceContext* GetSYCLContext(size_t i = 0) const {
157     if (!m_sycl_context_.empty()) {
158       return m_sycl_context_[i];
159     } else {
160       std::cerr << "No cl::sycl::device has been added" << std::endl;
161       return nullptr;
162     }
163   }
164 
165   string GetShortDeviceDescription(int device_id = 0) const {
166     Eigen::QueueInterface* queue_ptr = GetQueueInterface(device_id);
167     if (!queue_ptr) {
168       LOG(ERROR)
169           << "Device name cannot be given after Eigen QueueInterface destroyed";
170       return "";
171     }
172     auto device = queue_ptr->sycl_queue().get_device();
173     auto name = device.get_info<cl::sycl::info::device::name>();
174     auto vendor = device.get_info<cl::sycl::info::device::vendor>();
175     auto profile = device.get_info<cl::sycl::info::device::profile>();
176 
177     std::string type;
178     if (device.is_host()) {
179       type = "Host";
180     } else if (device.is_cpu()) {
181       type = "CPU";
182     } else if (device.is_gpu()) {
183       type = "GPU";
184     } else if (device.is_accelerator()) {
185       type = "Accelerator";
186     } else {
187       type = "Unknown";
188     }
189 
190     return strings::StrCat(
191         "id: ", device_id, ", type: ", type, ", name: ", name.c_str(),
192         ", vendor: ", vendor.c_str(), ", profile: ", profile.c_str());
193   }
194 };
195 
196 class SYCLDevice : public LocalDevice {
197  public:
SYCLDevice(const SessionOptions & options,const string & name,Bytes memory_limit,const DeviceLocality & locality,const string & physical_device_desc,SYCLAllocator * sycl_allocator,Allocator * cpu_allocator,SYCLDeviceContext * ctx)198   SYCLDevice(const SessionOptions& options, const string& name,
199              Bytes memory_limit, const DeviceLocality& locality,
200              const string& physical_device_desc, SYCLAllocator* sycl_allocator,
201              Allocator* cpu_allocator, SYCLDeviceContext* ctx)
202       : LocalDevice(options, Device::BuildDeviceAttributes(
203                                  name, DEVICE_SYCL, memory_limit, locality,
204                                  physical_device_desc)),
205         cpu_allocator_(cpu_allocator),
206         sycl_allocator_(sycl_allocator),
207         device_context_(ctx) {
208     set_eigen_sycl_device(sycl_allocator->getSyclDevice());
209   }
210 
211   ~SYCLDevice() override;
212 
213   void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
214   Allocator* GetAllocator(AllocatorAttributes attr) override;
215   Status MakeTensorFromProto(const TensorProto& tensor_proto,
216                              const AllocatorAttributes alloc_attrs,
217                              Tensor* tensor) override;
218 
219   Status FillContextMap(const Graph* graph,
220                         DeviceContextMap* device_context_map) override;
221 
222   Status Sync() override;
223 
224  private:
225   Allocator* cpu_allocator_;           // not owned
226   SYCLAllocator* sycl_allocator_;      // not owned
227   SYCLDeviceContext* device_context_;  // not owned
228 };
229 
230 }  // namespace tensorflow
231 
232 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
233