1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_STREAM_EXECUTOR_GPU_REDZONE_ALLOCATOR_H_
17 #define TENSORFLOW_STREAM_EXECUTOR_GPU_REDZONE_ALLOCATOR_H_
18 
19 #include <vector>
20 
21 #include "tensorflow/core/lib/math/math_util.h"
22 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
23 #include "tensorflow/stream_executor/device_memory_allocator.h"
24 #include "tensorflow/stream_executor/gpu/asm_compiler.h"
25 #include "tensorflow/stream_executor/gpu/gpu_asm_opts.h"
26 
27 namespace stream_executor {
28 
29 // An allocator that allocates a bit of extra memory around the beginning/end of
30 // every allocation and can check that this memory is unmodified.
31 //
32 // This can be used to check for out-of-bounds writes, and, if the redzone is
33 // filled with a sufficiently "ugly" pattern, may also be able to check for
34 // out-of-bounds reads.  The default fill pattern of -1 is an unusual NaN
35 // pattern when interpreted as a floating-point number, so hopefully works for
36 // out-of-bounds reads and writes in those cases.
37 //
38 // This class implements ScratchAllocator, so can be used to allocate temp
39 // memory for cudnn convolutions.
40 class RedzoneAllocator : public ScratchAllocator {
41  public:
42   static constexpr int64 kDefaultMemoryLimit = 1LL << 32;  // 4GB
43   static constexpr int64 kDefaultRedzoneSize =
44       1LL << 23;  // 8MiB per side, 16MiB total.
45   static constexpr uint8 kDefaultRedzonePattern = -1;
46   RedzoneAllocator(Stream* stream, DeviceMemoryAllocator* memory_allocator,
47                    GpuAsmOpts gpu_compilation_opts_,
48                    int64 memory_limit = kDefaultMemoryLimit,
49                    int64 redzone_size = kDefaultRedzoneSize,
50                    uint8 redzone_pattern = kDefaultRedzonePattern);
51 
52   // Redzones don't count towards the memory limit.
GetMemoryLimitInBytes()53   int64 GetMemoryLimitInBytes() override { return memory_limit_; }
54 
TotalAllocatedBytesExcludingRedzones()55   int64 TotalAllocatedBytesExcludingRedzones() const {
56     return allocated_bytes_excluding_redzones_;
57   }
58 
59   port::StatusOr<DeviceMemory<uint8>> AllocateBytes(int64 byte_size) override;
60 
61   // Non-empty redzone check status implies that there was a write into a
62   // redzone, with a string communicating the location of the write.
63   struct RedzoneCheckStatus {
64     RedzoneCheckStatus() = default;
65 
RedzoneCheckStatusRedzoneCheckStatus66     RedzoneCheckStatus(absl::string_view buffer_name, void* user_buffer_address,
67                        int64 offset, uint64 expected_value, uint64 actual_value)
68         : buffer_name(buffer_name),
69           user_buffer_address(user_buffer_address),
70           offset(offset),
71           expected_value(expected_value),
72           actual_value(actual_value) {}
73 
OKRedzoneCheckStatus74     static RedzoneCheckStatus OK() { return {}; }
75 
okRedzoneCheckStatus76     bool ok() { return user_buffer_address == nullptr; }
77 
78     std::string RedzoneFailureMsg() const;
79 
80     std::string buffer_name = {};
81     void* user_buffer_address = nullptr;
82     int64 offset = 0;
83     uint64 expected_value = 0;
84     uint64 actual_value = 0;
85   };
86 
87   // Determines whether redzones around all allocated buffers are unmodified.
88   //
89   // Reinitializes redzones to the expected value, so that the same buffer
90   // could be reused for multiple checks.
91   //
92   // Returns:
93   //
94   //  - RedzoneCheckStatus::OK() if everything went well.
95   //  - RedzoneCheckStatus with a non-empty error message iff a write into a
96   //    redzone has been detected.
97   //  - A stream error, if loading or launching the kernel has failed.
98   port::StatusOr<RedzoneCheckStatus> CheckRedzones() const;
99 
100  private:
101   const int device_ordinal_;
102   Stream* stream_;
103 
104   // Memory limit of the allocator in bytes.
105   const int64 memory_limit_;
106 
107   // Redzone size on *one side* of allocation in bytes.
108   //
109   // Must be a multiple of kXlaAllocatedBufferAlignBytes, otherwise the buffers
110   // returned to users will be misaligned.
111   const int64 redzone_size_;
112 
113   const uint8 redzone_pattern_;
114   DeviceMemoryAllocator* memory_allocator_;
115   GpuAsmOpts gpu_compilation_opts_;
116 
117   // The second element of the pair is the size of the user allocation.  This
118   // isn't necessarily just first.size() - 2 * redzone_size_ because when the
119   // user allocation size is not a multiple of 4 bytes, we round up the size of
120   // the RHS redzone.
121   //
122   // ScratchAllocators need to free all allocated memory on destruction so we
123   // use `OwningDeviceMemory` here.
124   std::vector<std::pair<OwningDeviceMemory, int64>> allocated_buffers_;
125 
126   int64 allocated_bytes_excluding_redzones_ = 0;
127 };
128 
129 }  // namespace stream_executor
130 
131 #endif  // TENSORFLOW_STREAM_EXECUTOR_GPU_REDZONE_ALLOCATOR_H_
132