1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <atomic>
17 
18 #include "tensorflow/core/framework/resource_mgr.h"
19 
20 #include "tensorflow/core/framework/device_attributes.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/node_def_util.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/lib/gtl/map_util.h"
25 #include "tensorflow/core/lib/strings/scanner.h"
26 #include "tensorflow/core/lib/strings/str_util.h"
27 #include "tensorflow/core/lib/strings/stringprintf.h"
28 #include "tensorflow/core/platform/demangle.h"
29 
30 namespace tensorflow {
31 
32 // Used to generate unique names for anonymous variables
33 static std::atomic<int64> current_id_;
34 
MakeResourceHandle(OpKernelContext * ctx,const string & container,const string & name,const TypeIndex & type_index)35 ResourceHandle MakeResourceHandle(OpKernelContext* ctx, const string& container,
36                                   const string& name,
37                                   const TypeIndex& type_index) {
38   ResourceHandle result;
39   result.set_device(ctx->device()->attributes().name());
40   string actual_container;
41   if (!container.empty()) {
42     actual_container = container;
43   } else {
44     actual_container = ctx->resource_manager()->default_container();
45   }
46   result.set_container(actual_container);
47   if (name == ResourceHandle::ANONYMOUS_NAME) {
48     result.set_name(strings::StrCat("_AnonymousVar", current_id_.fetch_add(1)));
49   } else {
50     result.set_name(name);
51   }
52   result.set_hash_code(type_index.hash_code());
53   result.set_maybe_type_name(type_index.name());
54   return result;
55 }
56 
MakeResourceHandleToOutput(OpKernelContext * context,int output_index,const string & container,const string & name,const TypeIndex & type_index)57 Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
58                                   const string& container, const string& name,
59                                   const TypeIndex& type_index) {
60   Tensor* handle;
61   TF_RETURN_IF_ERROR(
62       context->allocate_output(output_index, TensorShape({}), &handle));
63   handle->scalar<ResourceHandle>()() =
64       MakeResourceHandle(context, container, name, type_index);
65   return Status::OK();
66 }
67 
68 namespace internal {
69 
ValidateDevice(OpKernelContext * ctx,const ResourceHandle & p)70 Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p) {
71   if (ctx->device()->attributes().name() != p.device()) {
72     return errors::InvalidArgument(
73         "Trying to access resource ", p.name(), " located in device ",
74         p.device(), " from device ", ctx->device()->attributes().name());
75   }
76   return Status::OK();
77 }
78 
79 }  // end namespace internal
80 
InsertDebugTypeName(uint64 hash_code,const string & type_name)81 Status ResourceMgr::InsertDebugTypeName(uint64 hash_code,
82                                         const string& type_name) {
83   auto iter = debug_type_names_.emplace(hash_code, type_name);
84   if (iter.first->second != type_name) {
85     return errors::AlreadyExists("Duplicate hash code found for type ",
86                                  type_name);
87   }
88   return Status::OK();
89 }
90 
DebugTypeName(uint64 hash_code) const91 const char* ResourceMgr::DebugTypeName(uint64 hash_code) const {
92   auto type_name_iter = debug_type_names_.find(hash_code);
93   if (type_name_iter == debug_type_names_.end()) {
94     return "<unknown>";
95   } else {
96     return type_name_iter->second.c_str();
97   }
98 }
99 
ResourceMgr()100 ResourceMgr::ResourceMgr() : default_container_("localhost") {}
101 
ResourceMgr(const string & default_container)102 ResourceMgr::ResourceMgr(const string& default_container)
103     : default_container_(default_container) {}
104 
~ResourceMgr()105 ResourceMgr::~ResourceMgr() { Clear(); }
106 
Clear()107 void ResourceMgr::Clear() {
108   mutex_lock l(mu_);
109   for (const auto& p : containers_) {
110     for (const auto& q : *p.second) {
111       q.second->Unref();
112     }
113     delete p.second;
114   }
115   containers_.clear();
116 }
117 
DebugString() const118 string ResourceMgr::DebugString() const {
119   mutex_lock l(mu_);
120   struct Line {
121     const string* container;
122     const string type;
123     const string* resource;
124     const string detail;
125   };
126   std::vector<Line> lines;
127   for (const auto& p : containers_) {
128     const string& container = p.first;
129     for (const auto& q : *p.second) {
130       const Key& key = q.first;
131       const char* type = DebugTypeName(key.first);
132       const string& resource = key.second;
133       Line l{&container, port::Demangle(type), &resource,
134              q.second->DebugString()};
135       lines.push_back(l);
136     }
137   }
138   std::vector<string> text;
139   text.reserve(lines.size());
140   for (const Line& line : lines) {
141     text.push_back(strings::Printf(
142         "%-20s | %-40s | %-40s | %-s", line.container->c_str(),
143         line.type.c_str(), line.resource->c_str(), line.detail.c_str()));
144   }
145   std::sort(text.begin(), text.end());
146   return str_util::Join(text, "\n");
147 }
148 
DoCreate(const string & container,TypeIndex type,const string & name,ResourceBase * resource)149 Status ResourceMgr::DoCreate(const string& container, TypeIndex type,
150                              const string& name, ResourceBase* resource) {
151   Container** b = &containers_[container];
152   if (*b == nullptr) {
153     *b = new Container;
154   }
155   if ((*b)->insert({{type.hash_code(), name}, resource}).second) {
156     TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name()));
157     return Status::OK();
158   }
159   resource->Unref();
160   return errors::AlreadyExists("Resource ", container, "/", name, "/",
161                                type.name());
162 }
163 
DoLookup(const string & container,TypeIndex type,const string & name,ResourceBase ** resource) const164 Status ResourceMgr::DoLookup(const string& container, TypeIndex type,
165                              const string& name,
166                              ResourceBase** resource) const {
167   const Container* b = gtl::FindPtrOrNull(containers_, container);
168   if (b == nullptr) {
169     return errors::NotFound("Container ", container,
170                             " does not exist. (Could not find resource: ",
171                             container, "/", name, ")");
172   }
173   auto r = gtl::FindPtrOrNull(*b, {type.hash_code(), name});
174   if (r == nullptr) {
175     return errors::NotFound("Resource ", container, "/", name, "/", type.name(),
176                             " does not exist.");
177   }
178   *resource = const_cast<ResourceBase*>(r);
179   (*resource)->Ref();
180   return Status::OK();
181 }
182 
DoDelete(const string & container,uint64 type_hash_code,const string & resource_name,const string & type_name)183 Status ResourceMgr::DoDelete(const string& container, uint64 type_hash_code,
184                              const string& resource_name,
185                              const string& type_name) {
186   ResourceBase* base = nullptr;
187   {
188     mutex_lock l(mu_);
189     Container* b = gtl::FindPtrOrNull(containers_, container);
190     if (b == nullptr) {
191       return errors::NotFound("Container ", container, " does not exist.");
192     }
193     auto iter = b->find({type_hash_code, resource_name});
194     if (iter == b->end()) {
195       return errors::NotFound("Resource ", container, "/", resource_name, "/",
196                               type_name, " does not exist.");
197     }
198     base = iter->second;
199     b->erase(iter);
200   }
201   CHECK(base != nullptr);
202   base->Unref();
203   return Status::OK();
204 }
205 
DoDelete(const string & container,TypeIndex type,const string & resource_name)206 Status ResourceMgr::DoDelete(const string& container, TypeIndex type,
207                              const string& resource_name) {
208   return DoDelete(container, type.hash_code(), resource_name, type.name());
209 }
210 
Delete(const ResourceHandle & handle)211 Status ResourceMgr::Delete(const ResourceHandle& handle) {
212   return DoDelete(handle.container(), handle.hash_code(), handle.name(),
213                   "<unknown>");
214 }
215 
Cleanup(const string & container)216 Status ResourceMgr::Cleanup(const string& container) {
217   {
218     tf_shared_lock l(mu_);
219     if (!gtl::FindOrNull(containers_, container)) {
220       // Nothing to cleanup.
221       return Status::OK();
222     }
223   }
224   Container* b = nullptr;
225   {
226     mutex_lock l(mu_);
227     auto iter = containers_.find(container);
228     if (iter == containers_.end()) {
229       // Nothing to cleanup, it's OK (concurrent cleanup).
230       return Status::OK();
231     }
232     b = iter->second;
233     containers_.erase(iter);
234   }
235   CHECK(b != nullptr);
236   for (const auto& p : *b) {
237     p.second->Unref();
238   }
239   delete b;
240   return Status::OK();
241 }
242 
IsValidContainerName(StringPiece s)243 static bool IsValidContainerName(StringPiece s) {
244   using ::tensorflow::strings::Scanner;
245   return Scanner(s)
246       .One(Scanner::LETTER_DIGIT_DOT)
247       .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH)
248       .Eos()
249       .GetResult();
250 }
251 
Init(ResourceMgr * rmgr,const NodeDef & ndef,bool use_node_name_as_default)252 Status ContainerInfo::Init(ResourceMgr* rmgr, const NodeDef& ndef,
253                            bool use_node_name_as_default) {
254   CHECK(rmgr);
255   rmgr_ = rmgr;
256   string attr_container;
257   TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "container", &attr_container));
258   if (!attr_container.empty() && !IsValidContainerName(attr_container)) {
259     return errors::InvalidArgument("container contains invalid characters: ",
260                                    attr_container);
261   }
262   string attr_shared_name;
263   TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "shared_name", &attr_shared_name));
264   if (!attr_shared_name.empty() && (attr_shared_name[0] == '_')) {
265     return errors::InvalidArgument("shared_name cannot start with '_':",
266                                    attr_shared_name);
267   }
268   if (!attr_container.empty()) {
269     container_ = attr_container;
270   } else {
271     container_ = rmgr_->default_container();
272   }
273   if (!attr_shared_name.empty()) {
274     name_ = attr_shared_name;
275   } else if (use_node_name_as_default) {
276     name_ = ndef.name();
277   } else {
278     resource_is_private_to_kernel_ = true;
279     static std::atomic<int64> counter(0);
280     name_ = strings::StrCat("_", counter.fetch_add(1), "_", ndef.name());
281   }
282   return Status::OK();
283 }
284 
DebugString() const285 string ContainerInfo::DebugString() const {
286   return strings::StrCat("[", container(), ",", name(), ",",
287                          resource_is_private_to_kernel() ? "private" : "public",
288                          "]");
289 }
290 
HandleFromInput(OpKernelContext * ctx,int input)291 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
292   return ctx->input(input).flat<ResourceHandle>()(0);
293 }
294 
HandleFromInput(OpKernelContext * ctx,StringPiece input,ResourceHandle * handle)295 Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
296                        ResourceHandle* handle) {
297   const Tensor* tensor;
298   TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
299   *handle = tensor->flat<ResourceHandle>()(0);
300   return Status::OK();
301 }
302 
DeleteResource(OpKernelContext * ctx,const ResourceHandle & p)303 Status DeleteResource(OpKernelContext* ctx, const ResourceHandle& p) {
304   TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
305   return ctx->resource_manager()->Delete(p);
306 }
307 
ResourceHandlesShape(shape_inference::InferenceContext * c)308 Status ResourceHandlesShape(shape_inference::InferenceContext* c) {
309   int n;
310   TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
311   for (int i = 0; i < n; ++i) {
312     c->set_output(i, c->Scalar());
313   }
314   return Status::OK();
315 }
316 
317 }  //  end namespace tensorflow
318