1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/stream_executor/kernel_spec.h"
17 #include "absl/strings/string_view.h"
18 
19 namespace stream_executor {
20 
KernelLoaderSpec(absl::string_view kernelname)21 KernelLoaderSpec::KernelLoaderSpec(absl::string_view kernelname)
22     : kernelname_(std::string(kernelname)) {}
23 
OnDiskKernelLoaderSpec(absl::string_view filename,absl::string_view kernelname)24 OnDiskKernelLoaderSpec::OnDiskKernelLoaderSpec(absl::string_view filename,
25                                                absl::string_view kernelname)
26     : KernelLoaderSpec(kernelname), filename_(std::string(filename)) {}
27 
CudaPtxOnDisk(absl::string_view filename,absl::string_view kernelname)28 CudaPtxOnDisk::CudaPtxOnDisk(absl::string_view filename,
29                              absl::string_view kernelname)
30     : OnDiskKernelLoaderSpec(filename, kernelname) {}
31 
CudaCubinOnDisk(absl::string_view filename,absl::string_view kernelname)32 CudaCubinOnDisk::CudaCubinOnDisk(absl::string_view filename,
33                                  absl::string_view kernelname)
34     : OnDiskKernelLoaderSpec(filename, kernelname) {}
35 
CudaCubinInMemory(const char * bytes,absl::string_view kernelname)36 CudaCubinInMemory::CudaCubinInMemory(const char *bytes,
37                                      absl::string_view kernelname)
38     : KernelLoaderSpec(kernelname), bytes_(bytes) {}
39 
CompareComputeCapability(const std::tuple<int,int> & lhs,const std::tuple<int,int> & rhs)40 bool CompareComputeCapability(const std::tuple<int, int> &lhs,
41                               const std::tuple<int, int> &rhs) {
42   return std::get<0>(lhs) < std::get<0>(rhs) ||
43          (std::get<0>(lhs) == std::get<0>(rhs) &&
44           std::get<1>(lhs) < std::get<1>(rhs));
45 }
46 
47 const std::tuple<int, int> CudaPtxInMemory::kMinimumCapability{1, 0};
48 
CudaPtxInMemory(absl::string_view ptx,absl::string_view kernel_name,bool ptx_compressed)49 CudaPtxInMemory::CudaPtxInMemory(absl::string_view ptx,
50                                  absl::string_view kernel_name,
51                                  bool ptx_compressed)
52     : KernelLoaderSpec(kernel_name),
53       ptx_by_compute_capability_(CompareComputeCapability) {
54   if (ptx_compressed) {
55     // Lazy decompression. Put an empty string in decompressed_ptx_ showing that
56     // the original ptx is compressed.
57     decompressed_ptx_[ptx.data()] = "";
58   }
59   ptx_by_compute_capability_[kMinimumCapability] = ptx.data();
60 }
61 
CudaPtxInMemory(const std::initializer_list<CudaPtxInMemory::PtxSpec> & spec_list,absl::string_view kernel_name,bool ptx_compressed)62 CudaPtxInMemory::CudaPtxInMemory(
63     const std::initializer_list<CudaPtxInMemory::PtxSpec> &spec_list,
64     absl::string_view kernel_name, bool ptx_compressed)
65     : KernelLoaderSpec(kernel_name),
66       ptx_by_compute_capability_(CompareComputeCapability) {
67   for (const auto &spec : spec_list) {
68     int major, minor;
69     absl::string_view ptx;
70     std::tie(major, minor, ptx) = spec;
71     if (ptx_compressed) {
72       // Lazy decompression. Put an empty string in decompressed_ptx_ showing
73       // that the original ptx is compressed.
74       decompressed_ptx_[ptx.data()] = "";
75     }
76     ptx_by_compute_capability_[std::tuple<int, int>{major, minor}] = ptx.data();
77   }
78 }
79 
DecompressPtx(const char * ptx)80 std::string CudaPtxInMemory::DecompressPtx(const char *ptx) {
81   // Get the length of the PTX string from the beginning of the buffer.
82   uint64 ptx_length = *reinterpret_cast<const uint64 *>(ptx);
83   // Get the PTX string from the buffer with offset and length.
84   std::string compressed_ptx(ptx + sizeof(uint64),
85                              ptx + sizeof(uint64) + ptx_length);
86   std::string decompressed_ptx;
87   // Decompress the PTX string with bzip2.
88   LOG(FATAL) << "bzip2 decompression is not supported yet.";
89   return decompressed_ptx;
90 }
91 
default_text() const92 const char *CudaPtxInMemory::default_text() const {
93   if (ptx_by_compute_capability_.empty()) {
94     return nullptr;
95   }
96 
97   absl::MutexLock lock(&mu_);
98 
99   auto ptx = ptx_by_compute_capability_.begin()->second;
100   // Check if there is an entry in decompressed ptx table.
101   auto decompressed_ptx_iter = decompressed_ptx_.find(ptx);
102   if (decompressed_ptx_iter != decompressed_ptx_.end()) {
103     // If the decompressed string is empty, which means the ptx hasn't been
104     // decompressed, decompress it here.
105     if (decompressed_ptx_iter->second.empty()) {
106       decompressed_ptx_iter->second = DecompressPtx(ptx);
107     }
108     return decompressed_ptx_iter->second.c_str();
109   }
110   return ptx;
111 }
112 
original_default_text() const113 const char *CudaPtxInMemory::original_default_text() const {
114   if (ptx_by_compute_capability_.empty()) {
115     return nullptr;
116   }
117 
118   return ptx_by_compute_capability_.begin()->second;
119 }
120 
text(int compute_capability_major,int compute_capability_minor) const121 const char *CudaPtxInMemory::text(int compute_capability_major,
122                                   int compute_capability_minor) const {
123   std::tuple<int, int> capability{compute_capability_major,
124                                   compute_capability_minor};
125 
126   auto ptx_iter = ptx_by_compute_capability_.find(capability);
127   if (ptx_iter == ptx_by_compute_capability_.end()) {
128     return nullptr;
129   }
130 
131   absl::MutexLock lock(&mu_);
132 
133   // Check if there is an entry in decompressed ptx table.
134   auto decompressed_ptx_iter = decompressed_ptx_.find(ptx_iter->second);
135   if (decompressed_ptx_iter != decompressed_ptx_.end()) {
136     // If the decompressed string is empty, which means the ptx hasn't been
137     // decompressed, decompress it here.
138     if (decompressed_ptx_iter->second.empty()) {
139       decompressed_ptx_iter->second = DecompressPtx(ptx_iter->second);
140     }
141     return decompressed_ptx_iter->second.c_str();
142   }
143   return ptx_iter->second;
144 }
145 
original_text(int compute_capability_major,int compute_capability_minor) const146 const char *CudaPtxInMemory::original_text(int compute_capability_major,
147                                            int compute_capability_minor) const {
148   std::tuple<int, int> capability{compute_capability_major,
149                                   compute_capability_minor};
150 
151   auto ptx_iter = ptx_by_compute_capability_.find(capability);
152   if (ptx_iter == ptx_by_compute_capability_.end()) {
153     return nullptr;
154   }
155 
156   return ptx_iter->second;
157 }
158 
OpenCLTextOnDisk(absl::string_view filename,absl::string_view kernelname)159 OpenCLTextOnDisk::OpenCLTextOnDisk(absl::string_view filename,
160                                    absl::string_view kernelname)
161     : OnDiskKernelLoaderSpec(filename, kernelname) {}
162 
OpenCLTextInMemory(absl::string_view text,absl::string_view kernelname)163 OpenCLTextInMemory::OpenCLTextInMemory(absl::string_view text,
164                                        absl::string_view kernelname)
165     : KernelLoaderSpec(kernelname), text_(text) {}
166 
OpenCLBinaryOnDisk(absl::string_view filename,absl::string_view kernelname)167 OpenCLBinaryOnDisk::OpenCLBinaryOnDisk(absl::string_view filename,
168                                        absl::string_view kernelname)
169     : OnDiskKernelLoaderSpec(filename, kernelname) {}
170 
AddOpenCLTextOnDisk(absl::string_view filename,absl::string_view kernelname)171 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextOnDisk(
172     absl::string_view filename, absl::string_view kernelname) {
173   CHECK(ocl_text_on_disk_ == nullptr);
174   ocl_text_on_disk_.reset(new OpenCLTextOnDisk{filename, kernelname});
175   return this;
176 }
177 
AddOpenCLBinaryOnDisk(absl::string_view filename,absl::string_view kernelname)178 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLBinaryOnDisk(
179     absl::string_view filename, absl::string_view kernelname) {
180   CHECK(ocl_binary_on_disk_ == nullptr);
181   ocl_binary_on_disk_.reset(new OpenCLBinaryOnDisk{filename, kernelname});
182   return this;
183 }
184 
AddOpenCLTextInMemory(absl::string_view filename,absl::string_view kernelname)185 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextInMemory(
186     absl::string_view filename, absl::string_view kernelname) {
187   CHECK(ocl_text_in_memory_ == nullptr);
188   ocl_text_in_memory_.reset(new OpenCLTextInMemory{filename, kernelname});
189   return this;
190 }
191 
AddCudaPtxOnDisk(absl::string_view filename,absl::string_view kernelname)192 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxOnDisk(
193     absl::string_view filename, absl::string_view kernelname) {
194   CHECK(cuda_ptx_on_disk_ == nullptr);
195   cuda_ptx_on_disk_.reset(new CudaPtxOnDisk{filename, kernelname});
196   return this;
197 }
198 
AddCudaCubinInMemory(const char * bytes,absl::string_view kernelname)199 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinInMemory(
200     const char *bytes, absl::string_view kernelname) {
201   CHECK(cuda_cubin_in_memory_ == nullptr);
202   cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernelname});
203   return this;
204 }
205 
AddCudaCubinOnDisk(absl::string_view filename,absl::string_view kernelname)206 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinOnDisk(
207     absl::string_view filename, absl::string_view kernelname) {
208   CHECK(cuda_cubin_on_disk_ == nullptr);
209   cuda_cubin_on_disk_.reset(new CudaCubinOnDisk{filename, kernelname});
210   return this;
211 }
212 
AddCudaPtxInMemory(absl::string_view ptx,absl::string_view kernelname)213 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
214     absl::string_view ptx, absl::string_view kernelname) {
215   CHECK(cuda_ptx_in_memory_ == nullptr);
216   cuda_ptx_in_memory_.reset(
217       new CudaPtxInMemory{ptx, kernelname, false /* ptx_compressed */});
218   return this;
219 }
220 
AddCudaCompressedPtxInMemory(absl::string_view ptx,absl::string_view kernelname)221 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
222     absl::string_view ptx, absl::string_view kernelname) {
223   CHECK(cuda_ptx_in_memory_ == nullptr);
224   cuda_ptx_in_memory_.reset(
225       new CudaPtxInMemory{ptx, kernelname, true /* ptx_compressed */});
226   return this;
227 }
228 
AddCudaPtxInMemory(std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,absl::string_view kernelname)229 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
230     std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
231     absl::string_view kernelname) {
232   CHECK(cuda_ptx_in_memory_ == nullptr);
233   cuda_ptx_in_memory_.reset(
234       new CudaPtxInMemory{spec_list, kernelname, false /* ptx_compressed */});
235   return this;
236 }
237 
AddCudaCompressedPtxInMemory(std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,absl::string_view kernelname)238 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
239     std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
240     absl::string_view kernelname) {
241   CHECK(cuda_ptx_in_memory_ == nullptr);
242   cuda_ptx_in_memory_.reset(
243       new CudaPtxInMemory{spec_list, kernelname, true /* ptx_compressed */});
244   return this;
245 }
246 
MultiKernelLoaderSpec(size_t arity)247 MultiKernelLoaderSpec::MultiKernelLoaderSpec(size_t arity) : arity_(arity) {}
248 
249 }  // namespace stream_executor
250