1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_BUFFER_ALLOCATIONS_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_BUFFER_ALLOCATIONS_H_
18 
19 #include <memory>
20 #include <set>
21 #include <vector>
22 
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/types/span.h"
25 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
26 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
27 #include "tensorflow/compiler/xla/statusor.h"
28 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
29 
30 namespace xla {
31 namespace gpu {
32 
33 // A thread-compatible class that encapsulates the base addresses of the
34 // allocated device buffers.
35 class BufferAllocations {
36  public:
37   // This inner class encapsulates methods that build a BufferAllocations from
38   // the given buffer assignment.
39   class Builder {
40    public:
41     // Registers preallocated buffers (such as parameter addresses and
42     // user-specified result buffers) to the given buffer index. The builder
43     // will skip allocating buffers for registered buffer indices.
44     void RegisterBuffer(BufferAllocation::Index index,
45                         se::DeviceMemoryBase address);
46 
47     // Builds a BufferAllocations object from the given buffer assignment.
48     // `memory_allocator` is what this function uses to allocate device memory.
49     // `device_ordinal` is the number of the device this function allocates
50     // memory on.
51     StatusOr<std::unique_ptr<BufferAllocations>> Build(
52         const BufferAssignment* buffer_assignment, int device_ordinal,
53         DeviceMemoryAllocator* memory_allocator);
54 
55    private:
56     absl::flat_hash_map<BufferAllocation::Index, se::DeviceMemoryBase>
57         registered_buffers_;
58   };
59 
60   ~BufferAllocations();
61 
62   BufferAllocations(const BufferAllocations&) = delete;
63   BufferAllocations& operator=(const BufferAllocations&) = delete;
64 
memory_allocator()65   DeviceMemoryAllocator* memory_allocator() const { return memory_allocator_; }
device_ordinal()66   int device_ordinal() const { return device_ordinal_; }
67 
68   // Returns the device address of buffer `buffer_index`. `buffer_index` must be
69   // a valid index, i.e., in [0, buffer_count). This function returns null if
70   // `buffer_index` is not assigned to a buffer address.
71   se::DeviceMemoryBase GetDeviceAddress(
72       BufferAllocation::Index buffer_index) const;
73 
74   // Same as above, but also adjusts the returned address for the offset and
75   // size contained in the given slice.
76   se::DeviceMemoryBase GetDeviceAddress(
77       const BufferAllocation::Slice& buffer_slice) const;
78 
GetTempBufferBase()79   se::DeviceMemoryBase GetTempBufferBase() const { return temp_buffer_base_; }
80 
81   // Tears down all buffers allocated by this object that are not in
82   // `live_addresses`.
83   Status TearDown(const std::set<se::DeviceMemoryBase>& live_addresses);
84 
85  private:
BufferAllocations(BufferAllocation::Index buffer_count,int device_ordinal,DeviceMemoryAllocator * memory_allocator,const BufferAssignment * buffer_assignment)86   BufferAllocations(BufferAllocation::Index buffer_count, int device_ordinal,
87                     DeviceMemoryAllocator* memory_allocator,
88                     const BufferAssignment* buffer_assignment)
89       : buffers_(buffer_count),
90         device_ordinal_(device_ordinal),
91         memory_allocator_(memory_allocator),
92         buffer_assignment_(buffer_assignment) {}
93 
94   // Sets the device address of buffer `buffer_index`.
95   void SetBuffer(BufferAllocation::Index buffer_index,
96                  se::DeviceMemoryBase buffer);
97 
98   // An array of device pointers that stores the address of each buffer
99   // indexed by Index. Each element can point to a temporary buffer, an
100   // input buffer, or nullptr if no buffer is needed for that Index.
101   std::vector<se::DeviceMemoryBase> buffers_;
102 
103   // The base address of the memory block that contains all temporary buffers.
104   se::DeviceMemoryBase temp_buffer_base_;
105 
106   int device_ordinal_;
107   DeviceMemoryAllocator* memory_allocator_;
108   const BufferAssignment* buffer_assignment_;
109   bool torn_down_ = false;
110 };
111 
112 // LLVM and PTXAS don't deal well with large constants, so we only emit very
113 // small constants directly in LLVM IR.  Larger constants are emitted with zero
114 // initializers in LLVM IR and are later overwritten when the PTX/CUBIN is
115 // loaded.
116 bool ShouldEmitLiteralInLlvmIr(const Literal& literal);
117 
118 }  // namespace gpu
119 }  // namespace xla
120 
121 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_BUFFER_ALLOCATIONS_H_
122