1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // The CUDA implementation of the StreamExecutorInterface functionality.
17 // CUDA inclusions are ideally confined to this implementation file.
18 //
19 // The notions from the StreamExecutor basically correspond to the CUDA streams
20 // programming model provided by the libcuda.so driver APIs, so we don't have
21 // to do much more than wrap the calls to the libraries appropriately.
22 #ifndef TENSORFLOW_STREAM_EXECUTOR_GPU_GPU_KERNEL_H_
23 #define TENSORFLOW_STREAM_EXECUTOR_GPU_GPU_KERNEL_H_
24 
25 #include "tensorflow/stream_executor/gpu/gpu_driver.h"
26 #include "tensorflow/stream_executor/kernel_cache_config.h"
27 #include "tensorflow/stream_executor/platform/logging.h"
28 #include "tensorflow/stream_executor/platform/port.h"
29 #include "tensorflow/stream_executor/stream_executor_internal.h"
30 
31 namespace stream_executor {
32 namespace gpu {
33 
34 // Wraps a GpuFunctionHandle to implement the platform-independent
35 // KernelInterface.
36 class GpuKernel : public internal::KernelInterface {
37  public:
GpuKernel()38   GpuKernel()
39       : gpu_function_(nullptr),
40         arity_(0),
41         preferred_cache_config_(KernelCacheConfig::kNoPreference) {}
42 
43   // Note that the function is unloaded when the module is unloaded, and the
44   // module that the function is contained in is owned by the GpuExecutor.
~GpuKernel()45   ~GpuKernel() override {}
46 
47   // As arity cannot be reflected upon using the CUDA API, the arity is
48   // explicitly set during the GpuExecutor::GetKernel initialization process.
set_arity(unsigned arity)49   void set_arity(unsigned arity) { arity_ = arity; }
Arity()50   unsigned Arity() const override { return arity_; }
51 
52   // Returns the GpuFunctionHandle value for passing to the CUDA API.
AsGpuFunctionHandle()53   GpuFunctionHandle AsGpuFunctionHandle() const {
54     DCHECK(gpu_function_ != nullptr);
55     return const_cast<GpuFunctionHandle>(gpu_function_);
56   }
57 
58   // Returns the slot that the GpuFunctionHandle is stored within for this
59   // object, for the CUDA API which wants to load into a GpuFunctionHandle*.
gpu_function_ptr()60   GpuFunctionHandle* gpu_function_ptr() { return &gpu_function_; }
61 
62   // CUDA supports setting the preferred cache configuration of a
63   // GpuFunctionHandle (more-or-less equivalent to a GpuKernel). We support this
64   // via the below functions; users can set a preference, and that is applied
65   // when the kernel is [lazy-]loaded (in GpuExecutor::Launch). The alternative
66   // would be to load the kernel & set the preference when the user calls the
67   // setter below; either approach is valid. Sets the current kernel cache
68   // configuration preference.
SetPreferredCacheConfig(KernelCacheConfig config)69   void SetPreferredCacheConfig(KernelCacheConfig config) override {
70     preferred_cache_config_ = config;
71   }
72 
73   // Returns the current kernel cache configuration preference.
GetPreferredCacheConfig()74   KernelCacheConfig GetPreferredCacheConfig() const override {
75     return preferred_cache_config_;
76   }
77 
78   // Returns the current kernel cache configuration preference as a
79   // CUfunc_cache.
80   GpuFuncCachePreference GetGpuCacheConfig() const;
81 
82  private:
83   GpuFunctionHandle gpu_function_;  // Wrapped CUDA kernel handle.
84   unsigned arity_;  // Number of formal parameters the kernel takes.
85 
86   // Preferred (but not required) cache configuration for this kernel.
87   KernelCacheConfig preferred_cache_config_;
88 };
89 
90 // Given a platform-independent kernel datatype, returns the (const) internal
91 // CUDA platform implementation pointer.
AsGpuKernel(const KernelBase * kernel)92 inline const GpuKernel* AsGpuKernel(const KernelBase* kernel) {
93   return static_cast<const GpuKernel*>(kernel->implementation());
94 }
95 
96 // Given a platform-independent kernel datatype, returns the (non-const)
97 // internal CUDA platform implementation pointer.
AsGpuKernel(KernelBase * kernel)98 inline GpuKernel* AsGpuKernel(KernelBase* kernel) {
99   return static_cast<GpuKernel*>(kernel->implementation());
100 }
101 
102 }  // namespace gpu
103 }  // namespace stream_executor
104 
105 #endif  // TENSORFLOW_STREAM_EXECUTOR_GPU_GPU_KERNEL_H_
106