1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/common_runtime/process_util.h"
17 
18 #ifdef INTEL_MKL
19 #ifdef _OPENMP
20 #include <omp.h>
21 #endif  // _OPENMP
22 #endif  // INTEL_MKL
23 #include <string.h>
24 
25 #include "tensorflow/core/lib/core/threadpool.h"
26 #include "tensorflow/core/platform/byte_order.h"
27 #include "tensorflow/core/platform/cpu_info.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/platform/tracing.h"
30 #include "tensorflow/core/platform/types.h"
31 #include "tensorflow/core/util/util.h"
32 
33 namespace tensorflow {
34 
35 namespace {
36 
DefaultNumInterOpThreads()37 int32 DefaultNumInterOpThreads() {
38   // Use environment setting if specified (init once)
39   static int env_num_threads = NumInterOpThreadsFromEnvironment();
40   if (env_num_threads > 0) {
41     return env_num_threads;
42   }
43 
44   // Default to using the number of cores available in the process.
45   return port::NumSchedulableCPUs();
46 }
47 
InitComputePool(const SessionOptions & options)48 static thread::ThreadPool* InitComputePool(const SessionOptions& options) {
49   int32 inter_op_parallelism_threads =
50       options.config.inter_op_parallelism_threads();
51   if (inter_op_parallelism_threads == 0) {
52     inter_op_parallelism_threads = DefaultNumInterOpThreads();
53   }
54   return new thread::ThreadPool(Env::Default(), "Compute",
55                                 inter_op_parallelism_threads);
56 }
57 
58 }  // namespace
59 
ComputePool(const SessionOptions & options)60 thread::ThreadPool* ComputePool(const SessionOptions& options) {
61   static thread::ThreadPool* compute_pool = InitComputePool(options);
62   return compute_pool;
63 }
64 
NumInterOpThreadsFromEnvironment()65 int32 NumInterOpThreadsFromEnvironment() {
66   int32 num;
67   const char* val = std::getenv("TF_NUM_INTEROP_THREADS");
68   return (val && strings::safe_strto32(val, &num)) ? num : 0;
69 }
70 
NumIntraOpThreadsFromEnvironment()71 int32 NumIntraOpThreadsFromEnvironment() {
72   int32 num;
73   const char* val = std::getenv("TF_NUM_INTRAOP_THREADS");
74   return (val && strings::safe_strto32(val, &num)) ? num : 0;
75 }
76 
NumInterOpThreadsFromSessionOptions(const SessionOptions & options)77 int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
78   const int32 inter_op = options.config.inter_op_parallelism_threads();
79   if (inter_op != 0) return inter_op;
80 #ifdef INTEL_MKL
81   if (!DisableMKL()) {
82     // MKL library executes ops in parallel using OMP threads
83     // Set inter_op conservatively to avoid thread oversubscription that could
84     // lead to severe perf degradations and OMP resource exhaustion
85     int mkl_intra_op = 1;
86 #ifdef _OPENMP
87     mkl_intra_op = omp_get_max_threads();
88 #endif  // _OPENMP
89     DCHECK_GE(mkl_intra_op, 1);
90     const int32 mkl_inter_op = std::max(
91         (DefaultNumInterOpThreads() + mkl_intra_op - 1) / mkl_intra_op, 2);
92     VLOG(0)
93         << "Creating new thread pool with default inter op setting: "
94         << mkl_inter_op
95         << ". Tune using inter_op_parallelism_threads for best performance.";
96     return mkl_inter_op;
97   }
98 #endif  // INTEL_MKL
99   return DefaultNumInterOpThreads();
100 }
101 
NewThreadPoolFromSessionOptions(const SessionOptions & options)102 thread::ThreadPool* NewThreadPoolFromSessionOptions(
103     const SessionOptions& options) {
104   const int32 num_threads = NumInterOpThreadsFromSessionOptions(options);
105   VLOG(1) << "Direct session inter op parallelism threads: " << num_threads;
106   return new thread::ThreadPool(options.env, "Compute", num_threads);
107 }
108 
SchedClosure(std::function<void ()> closure)109 void SchedClosure(std::function<void()> closure) {
110   if (!tracing::EventCollector::IsEnabled()) {
111     return Env::Default()->SchedClosure(std::move(closure));
112   }
113   uint64 id = tracing::GetUniqueArg();
114   tracing::RecordEvent(tracing::EventCategory::kScheduleClosure, id);
115 
116   Env::Default()->SchedClosure(std::bind(
117       [id](std::function<void()> closure) {
118         tracing::ScopedRegion region(tracing::EventCategory::kRunClosure, id);
119         closure();
120       },
121       std::move(closure)));
122 }
123 
SchedNonBlockingClosureAfter(int64 micros,std::function<void ()> closure)124 void SchedNonBlockingClosureAfter(int64 micros, std::function<void()> closure) {
125   Env::Default()->SchedClosureAfter(micros, std::move(closure));
126 }
127 
128 }  // namespace tensorflow
129