1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_
17 #define TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_
18 
19 #include "tensorflow/core/framework/allocator.h"
20 #include "tensorflow/stream_executor/device_memory.h"
21 #include "tensorflow/stream_executor/device_memory_allocator.h"
22 #include "tensorflow/stream_executor/lib/statusor.h"
23 #include "tensorflow/stream_executor/platform.h"
24 
25 namespace stream_executor {
26 
27 // Adapter class that wraps a Tensorflow allocator.
28 //
29 // Assumes that the Tensorflow allocator permits asynchronous deallocation:
30 // see comment on `AllowsAsynchronousDeallocation()`.
31 class TfAllocatorAdapter : public DeviceMemoryAllocator {
32  public:
33   // stream: a Stream on which the allocator can only be used. If non-null, the
34   // allocator can not be used on any other stream.
35   TfAllocatorAdapter(tensorflow::Allocator *wrapped, Stream *stream);
36 
37   // Constructor for the cases where `stream` can not be provided.
38   TfAllocatorAdapter(tensorflow::Allocator *wrapped, Platform *platform);
39 
40   ~TfAllocatorAdapter() override;
41 
42   port::StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
43                                               bool retry_on_failure,
44                                               int64 memory_space) override;
45 
46   port::Status Deallocate(int device_ordinal, DeviceMemoryBase mem) override;
47 
48   // The Tensorflow BFC allocator used on GPU allows host-side deallocation
49   // before GPU execution takes place. Tensorflow uses the ordering of the main
50   // compute stream to enforce a happens-before relationship between a memory
51   // allocation and code that reuses the same memory. If Tensorflow adds
52   // support for multiple GPU streams or allocators with different ordering
53   // requirements, this code may need to change.
54   // (This attribute has no effect on CPU.)
AllowsAsynchronousDeallocation()55   bool AllowsAsynchronousDeallocation() const override { return true; }
56 
57   port::StatusOr<Stream *> GetStream(int device_ordinal) override;
58 
59  private:
60   tensorflow::Allocator *wrapped_;
61   Stream *stream_;
62 };
63 
64 // Adapter class that wraps per-device TF allocators with corresponding streams
65 // as a TfAllocatorAdapter. Assumes that the Tensorflow allocator permits
66 // asynchronous deallocation; see comment on `AllowsAsynchronousDeallocation()`.
67 class MultiDeviceAdapter : public DeviceMemoryAllocator {
68  public:
69   using AllocatorWithStream =
70       std::pair<std::unique_ptr<tensorflow::Allocator>, Stream *>;
MultiDeviceAdapter(const Platform * platform,std::vector<AllocatorWithStream> tf_allocators)71   MultiDeviceAdapter(const Platform *platform,
72                      std::vector<AllocatorWithStream> tf_allocators)
73       : DeviceMemoryAllocator(platform) {
74     tf_allocators_.reserve(tf_allocators.size());
75     for (AllocatorWithStream &p : tf_allocators) {
76       per_device_allocators_.emplace_back(p.first.get(), p.second);
77       tf_allocators_.push_back(std::move(p.first));
78     }
79   }
80 
Allocate(int device_ordinal,uint64 size,bool retry_on_failure,int64 memory_space)81   port::StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
82                                               bool retry_on_failure,
83                                               int64 memory_space) override {
84     CHECK_LT(device_ordinal, per_device_allocators_.size());
85     return per_device_allocators_[device_ordinal].Allocate(
86         device_ordinal, size, retry_on_failure, memory_space);
87   }
88 
Deallocate(int device_ordinal,DeviceMemoryBase mem)89   port::Status Deallocate(int device_ordinal, DeviceMemoryBase mem) override {
90     CHECK_LT(device_ordinal, per_device_allocators_.size());
91     return per_device_allocators_[device_ordinal].Deallocate(device_ordinal,
92                                                              mem);
93   }
94 
95   // The Tensorflow BFC allocator used on GPU allows host-side deallocation
96   // before GPU execution takes place. Tensorflow uses the ordering of the main
97   // compute stream to enforce a happens-before relationship between a memory
98   // allocation and code that reuses the same memory. If Tensorflow adds
99   // support for multiple GPU streams or allocators with different ordering
100   // requirements, this code may need to change.
101   // (This attribute has no effect on CPU.)
AllowsAsynchronousDeallocation()102   bool AllowsAsynchronousDeallocation() const override { return true; }
103 
GetStream(int device_ordinal)104   port::StatusOr<Stream *> GetStream(int device_ordinal) override {
105     return per_device_allocators_[device_ordinal].GetStream(device_ordinal);
106   }
107 
108  private:
109   std::vector<TfAllocatorAdapter> per_device_allocators_;
110   // The wrapped TF allocators backing per_device_allocators_
111   // (TfAllocatorAdapter does not take ownership of its underlying Allocator).
112   std::vector<std::unique_ptr<tensorflow::Allocator>> tf_allocators_;
113 };
114 
115 }  // namespace stream_executor
116 
117 #endif  // TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_
118