1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/grappler/clusters/utils.h"
17 
18 #include "third_party/eigen3/Eigen/Core"
19 
20 #if GOOGLE_CUDA
21 #include "cuda/include/cuda.h"
22 #include "cuda/include/cuda_runtime_api.h"
23 #include "cuda/include/cudnn.h"
24 #endif
25 
26 #if TENSORFLOW_USE_ROCM
27 #include "rocm/include/hip/hip_runtime.h"
28 #endif
29 
30 #ifdef EIGEN_USE_LIBXSMM
31 #include "include/libxsmm.h"
32 #endif
33 
34 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
35 #include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
36 #include "tensorflow/core/lib/core/status.h"
37 #include "tensorflow/core/lib/strings/numbers.h"
38 #include "tensorflow/core/lib/strings/strcat.h"
39 #include "tensorflow/core/platform/byte_order.h"
40 #include "tensorflow/core/platform/cpu_info.h"
41 #include "tensorflow/core/platform/mem.h"
42 
43 namespace tensorflow {
44 namespace grappler {
45 
GetLocalCPUInfo()46 DeviceProperties GetLocalCPUInfo() {
47   DeviceProperties device;
48   device.set_type("CPU");
49 
50   device.set_vendor(port::CPUVendorIDString());
51   // Combine cpu family and model into the model string.
52   device.set_model(
53       strings::StrCat((port::CPUFamily() << 4) + port::CPUModelNum()));
54   device.set_frequency(port::NominalCPUFrequency() * 1e-6);
55   device.set_num_cores(port::NumSchedulableCPUs());
56   device.set_l1_cache_size(Eigen::l1CacheSize());
57   device.set_l2_cache_size(Eigen::l2CacheSize());
58   device.set_l3_cache_size(Eigen::l3CacheSize());
59 
60   int64 free_mem = port::AvailableRam();
61   if (free_mem < INT64_MAX) {
62     device.set_memory_size(free_mem);
63   }
64 
65   (*device.mutable_environment())["cpu_instruction_set"] =
66       Eigen::SimdInstructionSetsInUse();
67 
68   (*device.mutable_environment())["eigen"] = strings::StrCat(
69       EIGEN_WORLD_VERSION, ".", EIGEN_MAJOR_VERSION, ".", EIGEN_MINOR_VERSION);
70 #ifdef EIGEN_USE_LIBXSMM
71   (*device.mutable_environment())["libxsmm"] = LIBXSMM_VERSION;
72 #endif
73 
74   return device;
75 }
76 
GetLocalGPUInfo(PlatformGpuId platform_gpu_id)77 DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id) {
78   DeviceProperties device;
79   device.set_type("GPU");
80 
81 #if GOOGLE_CUDA
82   cudaDeviceProp properties;
83   cudaError_t error =
84       cudaGetDeviceProperties(&properties, platform_gpu_id.value());
85   if (error != cudaSuccess) {
86     device.set_type("UNKNOWN");
87     LOG(ERROR) << "Failed to get device properties, error code: " << error;
88     return device;
89   }
90 
91   device.set_vendor("NVIDIA");
92   device.set_model(properties.name);
93   device.set_frequency(properties.clockRate * 1e-3);
94   device.set_num_cores(properties.multiProcessorCount);
95   device.set_num_registers(properties.regsPerMultiprocessor);
96   // For compute capability less than 5, l1 cache size is configurable to
97   // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For
98   // compute capability larger or equal to 5, l1 cache (unified with texture
99   // cache) size is 24 KB. This number may need to be updated for future
100   // compute capabilities.
101   device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024);
102   device.set_l2_cache_size(properties.l2CacheSize);
103   device.set_l3_cache_size(0);
104   device.set_shared_memory_size_per_multiprocessor(
105       properties.sharedMemPerMultiprocessor);
106   device.set_memory_size(properties.totalGlobalMem);
107   // 8 is the number of bits per byte. 2 is accounted for
108   // double data rate (DDR).
109   device.set_bandwidth(properties.memoryBusWidth / 8 *
110                        properties.memoryClockRate * 2);
111 
112   (*device.mutable_environment())["architecture"] =
113       strings::StrCat(properties.major, ".", properties.minor);
114   (*device.mutable_environment())["cuda"] = strings::StrCat(CUDA_VERSION);
115   (*device.mutable_environment())["cudnn"] = strings::StrCat(CUDNN_VERSION);
116 
117 #elif TENSORFLOW_USE_ROCM
118   hipDeviceProp_t properties;
119   hipError_t error =
120       hipGetDeviceProperties(&properties, platform_gpu_id.value());
121   if (error != hipSuccess) {
122     device.set_type("UNKNOWN");
123     LOG(ERROR) << "Failed to get device properties, error code: " << error;
124     return device;
125   }
126 
127   // ROCM TODO review if numbers here are valid
128   device.set_vendor("Advanced Micro Devices, Inc");
129   device.set_model(properties.name);
130   device.set_frequency(properties.clockRate * 1e-3);
131   device.set_num_cores(properties.multiProcessorCount);
132   device.set_num_registers(properties.regsPerBlock);
133   device.set_l1_cache_size(16 * 1024);
134   device.set_l2_cache_size(properties.l2CacheSize);
135   device.set_l3_cache_size(0);
136   device.set_shared_memory_size_per_multiprocessor(
137       properties.maxSharedMemoryPerMultiProcessor);
138   device.set_memory_size(properties.totalGlobalMem);
139   // 8 is the number of bits per byte. 2 is accounted for
140   // double data rate (DDR).
141   device.set_bandwidth(properties.memoryBusWidth / 8 *
142                        properties.memoryClockRate * 2);
143 
144   (*device.mutable_environment())["architecture"] =
145       strings::StrCat("gfx", properties.gcnArch);
146 #endif
147 
148   return device;
149 }
150 
GetDeviceInfo(const DeviceNameUtils::ParsedName & device)151 DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) {
152   DeviceProperties unknown;
153   unknown.set_type("UNKNOWN");
154 
155   if (device.type == "CPU") {
156     return GetLocalCPUInfo();
157   } else if (device.type == "GPU") {
158     if (device.has_id) {
159       TfGpuId tf_gpu_id(device.id);
160       PlatformGpuId platform_gpu_id;
161       Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
162       if (!s.ok()) {
163         LOG(ERROR) << s;
164         return unknown;
165       }
166       return GetLocalGPUInfo(platform_gpu_id);
167     } else {
168       return GetLocalGPUInfo(PlatformGpuId(0));
169     }
170   }
171   return unknown;
172 }
173 
174 }  // end namespace grappler
175 }  // end namespace tensorflow
176