1 //===----------- MemoryManager.cpp - Target independent memory manager ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Functionality for managing target memory.
10 // It is very expensive to call alloc/free functions of target devices. The
11 // MemoryManagerTy in this file is to reduce the number of invocations of those
12 // functions by buffering allocated device memory. In this way, when a memory is
13 // not used, it will not be freed on the device directly. The buffer is
14 // organized in a number of buckets for efficient look up. A memory will go to
15 // corresponding bucket based on its size. When a new memory request comes in,
16 // it will first check whether there is free memory of same size. If yes,
17 // returns it directly. Otherwise, allocate one on device.
18 //
19 // It also provides a way to opt out the memory manager. Memory
20 // allocation/deallocation will only be managed if the requested size is less
21 // than SizeThreshold, which can be configured via an environment variable
22 // LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD.
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "MemoryManager.h"
27 #include "device.h"
28 #include "private.h"
29 #include "rtl.h"
30
31 namespace {
32 constexpr const size_t BucketSize[] = {
33 0, 1U << 2, 1U << 3, 1U << 4, 1U << 5, 1U << 6, 1U << 7,
34 1U << 8, 1U << 9, 1U << 10, 1U << 11, 1U << 12, 1U << 13};
35
36 constexpr const int NumBuckets = sizeof(BucketSize) / sizeof(BucketSize[0]);
37
38 /// The threshold to manage memory using memory manager. If the request size is
39 /// larger than \p SizeThreshold, the allocation will not be managed by the
40 /// memory manager. This variable can be configured via an env \p
41 /// LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD. By default, the value is 8KB.
42 size_t SizeThreshold = 1U << 13;
43
44 /// Find the previous number that is power of 2 given a number that is not power
45 /// of 2.
floorToPowerOfTwo(size_t Num)46 size_t floorToPowerOfTwo(size_t Num) {
47 Num |= Num >> 1;
48 Num |= Num >> 2;
49 Num |= Num >> 4;
50 Num |= Num >> 8;
51 Num |= Num >> 16;
52 #if INTPTR_MAX == INT64_MAX
53 Num |= Num >> 32;
54 #elif INTPTR_MAX == INT32_MAX
55 // Do nothing with 32-bit
56 #else
57 #error Unsupported architecture
58 #endif
59 Num += 1;
60 return Num >> 1;
61 }
62
63 /// Find a suitable bucket
findBucket(size_t Size)64 int findBucket(size_t Size) {
65 const size_t F = floorToPowerOfTwo(Size);
66
67 DP("findBucket: Size %zu is floored to %zu.\n", Size, F);
68
69 int L = 0, H = NumBuckets - 1;
70 while (H - L > 1) {
71 int M = (L + H) >> 1;
72 if (BucketSize[M] == F)
73 return M;
74 if (BucketSize[M] > F)
75 H = M - 1;
76 else
77 L = M;
78 }
79
80 assert(L >= 0 && L < NumBuckets && "L is out of range");
81
82 DP("findBucket: Size %zu goes to bucket %d\n", Size, L);
83
84 return L;
85 }
86 } // namespace
87
MemoryManagerTy(DeviceTy & Dev,size_t Threshold)88 MemoryManagerTy::MemoryManagerTy(DeviceTy &Dev, size_t Threshold)
89 : FreeLists(NumBuckets), FreeListLocks(NumBuckets), Device(Dev) {
90 if (Threshold)
91 SizeThreshold = Threshold;
92 }
93
~MemoryManagerTy()94 MemoryManagerTy::~MemoryManagerTy() {
95 // TODO: There is a little issue that target plugin is destroyed before this
96 // object, therefore the memory free will not succeed.
97 // Deallocate all memory in map
98 for (auto Itr = PtrToNodeTable.begin(); Itr != PtrToNodeTable.end(); ++Itr) {
99 assert(Itr->second.Ptr && "nullptr in map table");
100 deleteOnDevice(Itr->second.Ptr);
101 }
102 }
103
allocateOnDevice(size_t Size,void * HstPtr) const104 void *MemoryManagerTy::allocateOnDevice(size_t Size, void *HstPtr) const {
105 return Device.RTL->data_alloc(Device.RTLDeviceID, Size, HstPtr);
106 }
107
deleteOnDevice(void * Ptr) const108 int MemoryManagerTy::deleteOnDevice(void *Ptr) const {
109 return Device.RTL->data_delete(Device.RTLDeviceID, Ptr);
110 }
111
freeAndAllocate(size_t Size,void * HstPtr)112 void *MemoryManagerTy::freeAndAllocate(size_t Size, void *HstPtr) {
113 std::vector<void *> RemoveList;
114
115 // Deallocate all memory in FreeList
116 for (int I = 0; I < NumBuckets; ++I) {
117 FreeListTy &List = FreeLists[I];
118 std::lock_guard<std::mutex> Lock(FreeListLocks[I]);
119 if (List.empty())
120 continue;
121 for (const NodeTy &N : List) {
122 deleteOnDevice(N.Ptr);
123 RemoveList.push_back(N.Ptr);
124 }
125 FreeLists[I].clear();
126 }
127
128 // Remove all nodes in the map table which have been released
129 if (!RemoveList.empty()) {
130 std::lock_guard<std::mutex> LG(MapTableLock);
131 for (void *P : RemoveList)
132 PtrToNodeTable.erase(P);
133 }
134
135 // Try allocate memory again
136 return allocateOnDevice(Size, HstPtr);
137 }
138
allocateOrFreeAndAllocateOnDevice(size_t Size,void * HstPtr)139 void *MemoryManagerTy::allocateOrFreeAndAllocateOnDevice(size_t Size,
140 void *HstPtr) {
141 void *TgtPtr = allocateOnDevice(Size, HstPtr);
142 // We cannot get memory from the device. It might be due to OOM. Let's
143 // free all memory in FreeLists and try again.
144 if (TgtPtr == nullptr) {
145 DP("Failed to get memory on device. Free all memory in FreeLists and "
146 "try again.\n");
147 TgtPtr = freeAndAllocate(Size, HstPtr);
148 }
149
150 #ifdef OMPTARGET_DEBUG
151 if (TgtPtr == nullptr)
152 DP("Still cannot get memory on device probably because the device is "
153 "OOM.\n");
154 #endif
155
156 return TgtPtr;
157 }
158
allocate(size_t Size,void * HstPtr)159 void *MemoryManagerTy::allocate(size_t Size, void *HstPtr) {
160 // If the size is zero, we will not bother the target device. Just return
161 // nullptr directly.
162 if (Size == 0)
163 return nullptr;
164
165 DP("MemoryManagerTy::allocate: size %zu with host pointer " DPxMOD ".\n",
166 Size, DPxPTR(HstPtr));
167
168 // If the size is greater than the threshold, allocate it directly from
169 // device.
170 if (Size > SizeThreshold) {
171 DP("%zu is greater than the threshold %zu. Allocate it directly from "
172 "device\n",
173 Size, SizeThreshold);
174 void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
175
176 DP("Got target pointer " DPxMOD ". Return directly.\n", DPxPTR(TgtPtr));
177
178 return TgtPtr;
179 }
180
181 NodeTy *NodePtr = nullptr;
182
183 // Try to get a node from FreeList
184 {
185 const int B = findBucket(Size);
186 FreeListTy &List = FreeLists[B];
187
188 NodeTy TempNode(Size, nullptr);
189 std::lock_guard<std::mutex> LG(FreeListLocks[B]);
190 FreeListTy::const_iterator Itr = List.find(TempNode);
191
192 if (Itr != List.end()) {
193 NodePtr = &Itr->get();
194 List.erase(Itr);
195 }
196 }
197
198 #ifdef OMPTARGET_DEBUG
199 if (NodePtr != nullptr)
200 DP("Find one node " DPxMOD " in the bucket.\n", DPxPTR(NodePtr));
201 #endif
202
203 // We cannot find a valid node in FreeLists. Let's allocate on device and
204 // create a node for it.
205 if (NodePtr == nullptr) {
206 DP("Cannot find a node in the FreeLists. Allocate on device.\n");
207 // Allocate one on device
208 void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
209
210 if (TgtPtr == nullptr)
211 return nullptr;
212
213 // Create a new node and add it into the map table
214 {
215 std::lock_guard<std::mutex> Guard(MapTableLock);
216 auto Itr = PtrToNodeTable.emplace(TgtPtr, NodeTy(Size, TgtPtr));
217 NodePtr = &Itr.first->second;
218 }
219
220 DP("Node address " DPxMOD ", target pointer " DPxMOD ", size %zu\n",
221 DPxPTR(NodePtr), DPxPTR(TgtPtr), Size);
222 }
223
224 assert(NodePtr && "NodePtr should not be nullptr at this point");
225
226 return NodePtr->Ptr;
227 }
228
free(void * TgtPtr)229 int MemoryManagerTy::free(void *TgtPtr) {
230 DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr));
231
232 NodeTy *P = nullptr;
233
234 // Look it up into the table
235 {
236 std::lock_guard<std::mutex> G(MapTableLock);
237 auto Itr = PtrToNodeTable.find(TgtPtr);
238
239 // We don't remove the node from the map table because the map does not
240 // change.
241 if (Itr != PtrToNodeTable.end())
242 P = &Itr->second;
243 }
244
245 // The memory is not managed by the manager
246 if (P == nullptr) {
247 DP("Cannot find its node. Delete it on device directly.\n");
248 return deleteOnDevice(TgtPtr);
249 }
250
251 // Insert the node to the free list
252 const int B = findBucket(P->Size);
253
254 DP("Found its node " DPxMOD ". Insert it to bucket %d.\n", DPxPTR(P), B);
255
256 {
257 std::lock_guard<std::mutex> G(FreeListLocks[B]);
258 FreeLists[B].insert(*P);
259 }
260
261 return OFFLOAD_SUCCESS;
262 }
263