1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/common_runtime/scoped_allocator.h"
16 
17 #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h"
18 #include "tensorflow/core/platform/dynamic_annotations.h"
19 
20 namespace tensorflow {
21 
ScopedAllocator(const Tensor & backing_tensor,int32 scope_id,const string & name,const gtl::ArraySlice<Field> fields,int32 expected_call_count,ScopedAllocatorContainer * container)22 ScopedAllocator::ScopedAllocator(const Tensor& backing_tensor, int32 scope_id,
23                                  const string& name,
24                                  const gtl::ArraySlice<Field> fields,
25                                  int32 expected_call_count,
26                                  ScopedAllocatorContainer* container)
27     : backing_tensor_(backing_tensor),
28       tbuf_(backing_tensor_.buf_),
29       id_(scope_id),
30       name_(name),
31       container_(container),
32       fields_(fields.begin(), fields.end()),
33       expected_call_count_(expected_call_count),
34       live_alloc_count_(0) {
35   // Hold this until all aliases have been deallocated.
36   tbuf_->Ref();
37   // Hold this until all expected_calls have been made.
38   container->Ref();
39   CHECK_GE(tbuf_->size(), fields.back().offset + fields.back().bytes_requested);
40 }
41 
~ScopedAllocator()42 ScopedAllocator::~ScopedAllocator() {
43   mutex_lock l(mu_);
44   VLOG(1) << "~ScopedAllocator " << this << " tbuf_ " << tbuf_ << " data "
45           << static_cast<void*>(tbuf_->data());
46   // In the absence of incomplete graph execution situations
47   // (interruption by error status or control flow branch crossing
48   // ScopedAllocation region) we expect expected_call_count_ == 0 at
49   // exit.
50   if (VLOG_IS_ON(1)) {
51     if (expected_call_count_ > 0)
52       VLOG(1) << "expected_call_count_ = " << expected_call_count_
53               << " at deallocation";
54   }
55   if (tbuf_) tbuf_->Unref();
56 }
57 
AllocateRaw(int32 field_index,size_t num_bytes)58 void* ScopedAllocator::AllocateRaw(int32 field_index, size_t num_bytes) {
59   VLOG(1) << "ScopedAllocator index " << id_ << " AllocateRaw "
60           << "field " << field_index << " num_bytes " << num_bytes;
61   void* ptr = nullptr;
62   const Field* field = nullptr;
63   {
64     mutex_lock l(mu_);
65     if (expected_call_count_ <= 0) {
66       LOG(ERROR) << "Scoped allocator " << name_
67                  << " could not satisfy request for " << num_bytes
68                  << " bytes, expected uses exhausted. ";
69       return nullptr;
70     }
71 
72     int32_t num_fields = static_cast<int32>(fields_.size());
73     if (field_index >= num_fields) {
74       LOG(ERROR) << "ScopedAllocator " << name_
75                  << " received unexpected field number " << field_index;
76       return nullptr;
77     }
78 
79     field = &fields_[field_index];
80     if (num_bytes != field->bytes_requested) {
81       LOG(ERROR) << "ScopedAllocator " << name_ << " got request for "
82                  << num_bytes << " bytes from field " << field_index
83                  << " which has precalculated size " << field->bytes_requested
84                  << " and offset " << field->offset;
85       return nullptr;
86     }
87 
88     ptr = static_cast<void*>((tbuf_->template base<char>() + field->offset));
89 
90     ++live_alloc_count_;
91     --expected_call_count_;
92     if (0 == expected_call_count_) {
93       for (auto& f : fields_) {
94         container_->Drop(f.scope_id, this);
95       }
96       container_->Drop(id_, this);
97       container_->Unref();
98       container_ = nullptr;
99     }
100   }
101   VLOG(2) << "AllocateRaw returning " << ptr << " bytes_requested "
102           << field->bytes_requested << " bytes_allocated "
103           << field->bytes_allocated;
104 
105   // If there is overshoot due to alignment, let MSAN believe that the padding
106   // is initialized.  This is okay because we do not use this memory region for
107   // anything meaningful.
108   if (field->bytes_allocated > field->bytes_requested) {
109     size_t extra_bytes = field->bytes_allocated - field->bytes_requested;
110     void* extra_buf = static_cast<void*>(static_cast<char*>(ptr) +
111                                          field->bytes_allocated - extra_bytes);
112     VLOG(2) << "AllocateRaw requested " << num_bytes
113             << " bytes which is not divisible by kAllocatorAlignment="
114             << Allocator::kAllocatorAlignment << " and hence we allocated "
115             << field->bytes_allocated << ". Annotating " << extra_bytes
116             << " bytes starting at " << extra_buf
117             << " with TF_ANNOTATE_MEMORY_IS_INITIALIZED";
118     TF_ANNOTATE_MEMORY_IS_INITIALIZED(extra_buf, extra_bytes);
119   }
120 
121   return ptr;
122 }
123 
DeallocateRaw(void * p)124 void ScopedAllocator::DeallocateRaw(void* p) {
125   CHECK(VerifyPointer(p));
126 
127   bool dead = false;
128   {
129     mutex_lock l(mu_);
130     CHECK_GT(live_alloc_count_, 0);
131     if (0 == --live_alloc_count_) {
132       if (0 == expected_call_count_) {
133         dead = true;
134       }
135     }
136   }
137   if (dead) {
138     delete this;
139   }
140 }
141 
VerifyPointer(const void * p)142 bool ScopedAllocator::VerifyPointer(const void* p) {
143   void* base = tbuf_->data();
144   CHECK_GE(p, base);
145   for (auto& f : fields_) {
146     void* f_ptr = static_cast<void*>(static_cast<char*>(base) + f.offset);
147     if (f_ptr == p) {
148       return true;
149       break;
150     }
151   }
152   VLOG(1) << "ScopedAllocator index " << id_ << " VerifyPointer for p=" << p
153           << " failed.";
154   return false;
155 }
156 
VerifyTensor(const Tensor * t)157 bool ScopedAllocator::VerifyTensor(const Tensor* t) {
158   return VerifyPointer(t->buf_->data());
159 }
160 
ScopedAllocatorInstance(ScopedAllocator * sa,int32 field_index)161 ScopedAllocatorInstance::ScopedAllocatorInstance(ScopedAllocator* sa,
162                                                  int32 field_index)
163     : scoped_allocator_(sa),
164       field_index_(field_index),
165       allocated_(false),
166       deallocated_(false),
167       in_table_(true) {
168   VLOG(1) << "new ScopedAllocatorInstance " << this << " on SA " << sa
169           << " field_index " << field_index;
170 }
171 
DropFromTable()172 void ScopedAllocatorInstance::DropFromTable() {
173   bool del = false;
174   {
175     mutex_lock l(mu_);
176     CHECK(in_table_);
177     in_table_ = false;
178     VLOG(2) << "ScopedAllocatorInstance::DropFromTable " << this
179             << " allocated_ " << allocated_ << " deallocated_ " << deallocated_
180             << " in_table_ " << in_table_;
181     // Single use is complete when it is allocated and deallocated.
182     // This check prevents a race between Allocating the tensor slice and
183     // Dropping it from the parent container's table.
184     if (allocated_ && deallocated_) {
185       del = true;
186     }
187   }
188   if (del) delete this;
189 }
190 
AllocateRaw(size_t alignment,size_t num_bytes)191 void* ScopedAllocatorInstance::AllocateRaw(size_t alignment, size_t num_bytes) {
192   void* ptr = scoped_allocator_->AllocateRaw(field_index_, num_bytes);
193   {
194     mutex_lock l(mu_);
195     if (nullptr == ptr) {
196       VLOG(2) << "ScopedAllocatorInstance::AllocateRaw " << this
197               << " call to underlying ScopedAllocator unsuccessful,"
198               << " allocated_ " << allocated_ << " deallocated_ "
199               << deallocated_ << " in_table_ " << in_table_
200               << " returning nullptr.";
201     } else {
202       allocated_ = true;
203       VLOG(2) << "ScopedAllocatorInstance::AllocateRaw " << this
204               << " allocated_ " << allocated_ << " deallocated_ "
205               << deallocated_ << " in_table_ " << in_table_
206               << " returning ptr = " << ptr;
207     }
208   }
209   return ptr;
210 }
211 
DeallocateRaw(void * p)212 void ScopedAllocatorInstance::DeallocateRaw(void* p) {
213   scoped_allocator_->DeallocateRaw(p);
214   bool del = false;
215   {
216     mutex_lock l(mu_);
217     CHECK(allocated_);
218     deallocated_ = true;
219     VLOG(2) << "ScopedAllocatorInstance::DeallocateRaw " << this
220             << " allocated_ " << allocated_ << " deallocated_ " << deallocated_
221             << " in_table_ " << in_table_;
222     // Single use is now complete, but only delete this instance when it is
223     // no longer in a ScopedAllocatorContainer's table.
224     if (!in_table_) {
225       del = true;
226     }
227   }
228   if (del) delete this;
229 }
230 
Name()231 string ScopedAllocatorInstance::Name() {
232   return strings::StrCat(scoped_allocator_->name(), "_field_", field_index_);
233 }
234 
235 }  // namespace tensorflow
236