1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_
17 #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_
18 
19 #include <functional>
20 #include <map>
21 #include <unordered_map>
22 #include <vector>
23 
24 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
25 #include "tensorflow/core/common_runtime/process_state.h"
26 #include "tensorflow/core/common_runtime/shared_counter.h"
27 #include "tensorflow/core/framework/allocator.h"
28 #include "tensorflow/core/platform/mutex.h"
29 #include "tensorflow/core/platform/thread_annotations.h"
30 #include "tensorflow/core/platform/types.h"
31 #include "tensorflow/core/protobuf/config.pb.h"
32 
33 namespace tensorflow {
34 
35 class Allocator;
36 class PoolAllocator;
37 class SharedCounter;
38 
39 // Singleton that manages per-process state when GPUs are present.
40 class GPUProcessState {
41  public:
42   // If ps == nullptr, returns pointer to the single instance of this class to
43   // be used within this process.
44   //
45   // If ps != nullptrs, accepts a value to be returned by all subsequent calls.
46   // A non-null ps may ONLY be provided during program static storage
47   // initialization.  Must not be called more than once with a non-null ps.
48   //
49   // If a derived class of GPUProcessState is ever used in a process, it must
50   // always be used in place of this class.  In order to ensure that existing
51   // calls to GPUProcessState::singleton() all resolve to the derived instance
52   // instead, this function must be called once during startup, supplying the
53   // derived instance value, prior to any accessor call to this function.
54   static GPUProcessState* singleton(GPUProcessState* ps = nullptr);
55 
56   // Query whether any GPU device has been created so far.
57   // Disable thread safety analysis since a race is benign here.
HasGPUDevice()58   bool HasGPUDevice() const NO_THREAD_SAFETY_ANALYSIS {
59     return gpu_device_enabled_;
60   }
61 
62   // Set the flag to indicate a GPU device has been created.
63   // Disable thread safety analysis since a race is benign here.
EnableGPUDevice()64   void EnableGPUDevice() NO_THREAD_SAFETY_ANALYSIS {
65     gpu_device_enabled_ = true;
66   }
67 
68   // Returns the one GPU allocator used for the indexed GPU.
69   // Note that this is a system GPU index, not (necessarily) a brain
70   // device index.
71   //
72   // 'total_bytes' is the total number of bytes that should be made
73   // available to the allocator.  The first call to this function for
74   // a given tf_gpu_id creates the allocator, so only the total_bytes
75   // used on that first call is used.
76   //
77   // "Allocator type" describes the type of algorithm to use for the
78   // underlying allocator.  REQUIRES: Must be a valid type (see
79   // config.proto for the list of supported strings.).
80   //
81   // REQUIRES: tf_gpu_id must be a valid id for a BaseGPUDevice available in the
82   // current system environment.  Otherwise returns nullptr.
83   virtual Allocator* GetGPUAllocator(const GPUOptions& options,
84                                      TfGpuId tf_gpu_id, size_t total_bytes);
85 
86   virtual Allocator* GetGpuHostAllocator(int numa_node);
87 
88   // Registers a Visitor to be invoked on new chunks of memory allocated by the
89   // SubAllocator of every GPU proximate to the specified bus.  The AllocVisitor
90   // is provided with a memory pointer, a GPU id, and the size of the area it
91   // identifies.  The pointer is not guaranteed to be valid after the call
92   // terminates.  The intention is for this interface to be used for network
93   // device memory registration.  "bus_id" is platform-specific.  On many
94   // platforms it should be 0.  On machines with multiple PCIe buses, it should
95   // be the index of one of the PCIe buses (maybe the NUMA node at which the
96   // PCIe is rooted).  If the bus_id is invalid, results are undefined.
97   virtual void AddGPUAllocVisitor(int bus_id,
98                                   const SubAllocator::Visitor& visitor);
99 
100   // Registers a Visitor to be invoked on new chunks of memory allocated by
101   // the SubAllocator of the GpuHostAllocator for the given numa_node.
102   virtual void AddGpuHostAllocVisitor(int numa_node,
103                                       const SubAllocator::Visitor& visitor);
104 
105   // Registers a Visitor to be invoked on each chunk handed back for freeing to
106   // the SubAllocator of the GpuHostAllocator for the given numa_node.
107   virtual void AddGpuHostFreeVisitor(int numa_node,
108                                      const SubAllocator::Visitor& visitor);
109 
110   // Returns bus_id for the given GPU id.
111   virtual int BusIdForGPU(TfGpuId tf_gpu_id);
112 
113   SharedCounter* GPUAllocatorCounter(TfGpuId tf_gpu_id);
114 
115  protected:
116   // GPUProcessState is a singleton that should not normally be deleted except
117   // at process shutdown.
118   GPUProcessState();
~GPUProcessState()119   virtual ~GPUProcessState() {}
120   friend class GPUDeviceTest;
121 
122   // Helper method for unit tests to reset the ProcessState singleton by
123   // cleaning up everything. Never use in production.
124   virtual void TestOnlyReset();
125 
mem_desc_map()126   ProcessState::MDMap* mem_desc_map() {
127     if (process_state_) return &process_state_->mem_desc_map_;
128     return nullptr;
129   }
130 
131   static GPUProcessState* instance_;
132   ProcessState* process_state_;  // Not owned.
133   bool gpu_device_enabled_;
134 
135   mutex mu_;
136 
137   struct AllocatorParts {
138     std::unique_ptr<Allocator> allocator;
139     std::unique_ptr<SharedCounter> counter;
140     SubAllocator* sub_allocator;  // owned by allocator
141     std::unique_ptr<Allocator> recording_allocator;
142   };
143   std::vector<AllocatorParts> gpu_allocators_ GUARDED_BY(mu_);
144   std::vector<std::vector<SubAllocator::Visitor>> gpu_visitors_ GUARDED_BY(mu_);
145 
146   std::vector<AllocatorParts> gpu_host_allocators_ GUARDED_BY(mu_);
147   std::vector<std::vector<SubAllocator::Visitor>> gpu_host_alloc_visitors_
148       GUARDED_BY(mu_);
149   std::vector<std::vector<SubAllocator::Visitor>> gpu_host_free_visitors_
150       GUARDED_BY(mu_);
151 };
152 
153 }  // namespace tensorflow
154 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_
155