Home
last modified time | relevance | path

Searched refs:DeviceMemoryBase (Results 1 – 25 of 151) sorted by relevance

1234567

/external/tensorflow/tensorflow/stream_executor/
Ddevice_memory.h57 class DeviceMemoryBase {
62 explicit DeviceMemoryBase(void *opaque = nullptr, uint64 size = 0)
75 bool operator<(const DeviceMemoryBase &other) const {
95 bool IsSameAs(const DeviceMemoryBase &other) const { in IsSameAs()
122 class DeviceMemory final : public DeviceMemoryBase {
125 DeviceMemory() : DeviceMemoryBase(nullptr, 0) {} in DeviceMemory()
130 explicit DeviceMemory(const DeviceMemoryBase &other) in DeviceMemory()
131 : DeviceMemoryBase(const_cast<DeviceMemoryBase &>(other).opaque(), in DeviceMemory()
156 DeviceMemoryBase::Reset(opaque, bytes); in ResetFromByteSize()
168 DeviceMemory(void *opaque, uint64 size) : DeviceMemoryBase(opaque, size) {} in DeviceMemory()
[all …]
Dstream_executor_internal.h198 virtual DeviceMemoryBase Allocate(uint64 size, int64 memory_space) = 0;
199 DeviceMemoryBase Allocate(uint64 size) { in Allocate()
202 virtual void *GetSubBuffer(DeviceMemoryBase *parent, uint64 offset,
204 virtual void Deallocate(DeviceMemoryBase *mem) = 0;
219 virtual port::Status SynchronousMemZero(DeviceMemoryBase *location,
221 virtual port::Status SynchronousMemSet(DeviceMemoryBase *location, int value,
223 virtual port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
226 const DeviceMemoryBase &gpu_src,
229 DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src,
231 virtual port::Status MemZero(Stream *stream, DeviceMemoryBase *location,
[all …]
Dstream_executor_pimpl.h182 port::StatusOr<DeviceMemoryBase> GetUntypedSymbol(
190 void Deallocate(DeviceMemoryBase *mem);
237 port::Status SynchronousMemZero(DeviceMemoryBase *location,
242 port::Status SynchronousMemSet(DeviceMemoryBase *location, int value,
249 bool SynchronousMemcpy(DeviceMemoryBase *device_dst, const void *host_src,
256 bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &device_src,
261 DeviceMemoryBase *device_dst);
268 DeviceMemoryBase *device_dst) { in SynchronousMemcpyH2D()
275 port::Status SynchronousMemcpyD2H(const DeviceMemoryBase &device_src,
291 bool SynchronousMemcpy(DeviceMemoryBase *device_dst,
[all …]
Dtemporary_memory_manager.cc31 DeviceMemoryBase device_memory = it->first; in ForceDeallocateAll()
37 const DeviceMemoryBase& device_memory, uint64 generation, bool must_exist) { in MarkFinalized()
55 DeviceMemoryBase device_memory = it->first; in DeallocateFinalizedTemporaries()
66 bool TemporaryMemoryManager::IsFinalized(const DeviceMemoryBase& device_memory, in IsFinalized()
82 bool TemporaryMemoryManager::HasAllocated(const DeviceMemoryBase& device_memory, in HasAllocated()
96 DeviceMemoryBase device_memory = in AllocateArrayBase()
Dtemporary_device_memory.h62 DeviceMemoryBase* mutable_device_memory();
65 const DeviceMemoryBase& device_memory() const;
93 TemporaryDeviceMemoryBase(Stream* parent, DeviceMemoryBase device_memory,
97 DeviceMemoryBase device_memory_;
Dstream_executor_pimpl.cc210 void StreamExecutor::Deallocate(DeviceMemoryBase *mem) { in Deallocate()
279 const dnn::BatchDescriptor &input_descriptor, DeviceMemoryBase input_data, in GetMIOpenConvolveAlgorithms()
281 DeviceMemoryBase filter_data, const dnn::BatchDescriptor &output_descriptor, in GetMIOpenConvolveAlgorithms()
282 DeviceMemoryBase output_data, in GetMIOpenConvolveAlgorithms()
487 DeviceMemoryBase StreamExecutor::Allocate(uint64 size, int64 memory_space) { in Allocate()
494 return DeviceMemoryBase(); in Allocate()
496 DeviceMemoryBase buf = implementation_->Allocate(size, memory_space); in Allocate()
505 port::StatusOr<DeviceMemoryBase> StreamExecutor::GetUntypedSymbol( in GetUntypedSymbol()
512 return DeviceMemoryBase(opaque, bytes); in GetUntypedSymbol()
591 port::Status StreamExecutor::SynchronousMemZero(DeviceMemoryBase *location, in SynchronousMemZero()
[all …]
/external/tensorflow/tensorflow/compiler/xla/service/interpreter/
Dexecutor.h49 using Args = absl::Span<const DeviceMemoryBase>;
70 DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override;
71 void *GetSubBuffer(DeviceMemoryBase *parent, uint64 offset_bytes,
73 void Deallocate(DeviceMemoryBase *mem) override;
82 bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &dev_src,
84 bool Memcpy(Stream *stream, DeviceMemoryBase *dev_dst, const void *host_src,
86 bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *pop_dst, in MemcpyDeviceToDevice()
87 const DeviceMemoryBase &host_src, in MemcpyDeviceToDevice()
92 port::Status MemZero(Stream *stream, DeviceMemoryBase *location, in MemZero()
96 port::Status Memset(Stream *stream, DeviceMemoryBase *location, uint8 pattern, in Memset()
[all …]
Dexecutor.cc36 DeviceMemoryBase XlaInterpreterExecutor::Allocate(uint64 size, in Allocate()
38 return DeviceMemoryBase(new char[size], size); in Allocate()
41 void *XlaInterpreterExecutor::GetSubBuffer(DeviceMemoryBase *parent, in GetSubBuffer()
47 void XlaInterpreterExecutor::Deallocate(DeviceMemoryBase *mem) { in Deallocate()
52 const DeviceMemoryBase &dev_src, in Memcpy()
61 bool XlaInterpreterExecutor::Memcpy(Stream *stream, DeviceMemoryBase *dev_dst, in Memcpy()
71 DeviceMemoryBase *dev_dst, const void *host_src, uint64 size) { in SynchronousMemcpy()
77 void *host_dst, const DeviceMemoryBase &dev_src, uint64 size) { in SynchronousMemcpy()
/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dgpu_conv_runner.h85 se::DeviceMemoryBase bias_buf;
86 se::DeviceMemoryBase side_input_buf; // nullable
89 se::DeviceMemoryBase input_buf;
90 se::DeviceMemoryBase filter_buf;
91 se::DeviceMemoryBase output_buf;
111 absl::Span<se::DeviceMemoryBase> operand_buffers,
112 se::DeviceMemoryBase result_buffer,
113 se::DeviceMemoryBase scratch_buf, se::Stream* stream,
117 absl::Span<se::DeviceMemoryBase> operand_buffers,
118 se::DeviceMemoryBase result_buffer,
[all …]
Dcudnn_batchnorm_runner.h43 const CudnnBatchNormConfig &config, se::DeviceMemoryBase operand,
44 se::DeviceMemoryBase output, se::DeviceMemory<float> scale,
49 const CudnnBatchNormConfig &config, se::DeviceMemoryBase operand,
50 se::DeviceMemoryBase output_data, se::DeviceMemory<float> output_mean,
55 const CudnnBatchNormConfig &config, se::DeviceMemoryBase operand,
56 se::DeviceMemoryBase output_grad_data, se::DeviceMemoryBase grad_output,
Dbuffer_allocations.cc36 const std::set<se::DeviceMemoryBase>& live_addresses, in TearDown()
44 se::DeviceMemoryBase buffer_address = GetDeviceAddress(allocation.index()); in TearDown()
60 se::DeviceMemoryBase BufferAllocations::GetDeviceAddress( in GetDeviceAddress()
67 se::DeviceMemoryBase& BufferAllocations::GetMutableDeviceAddress( in GetMutableDeviceAddress()
74 se::DeviceMemoryBase BufferAllocations::GetDeviceAddress( in GetDeviceAddress()
76 se::DeviceMemoryBase base = GetDeviceAddress(buffer_slice.index()); in GetDeviceAddress()
79 return se::DeviceMemoryBase( in GetDeviceAddress()
Dbuffer_allocations.h38 BufferAllocations(absl::Span<se::DeviceMemoryBase const> buffers, in BufferAllocations()
58 se::DeviceMemoryBase GetDeviceAddress(
62 se::DeviceMemoryBase& GetMutableDeviceAddress(
67 se::DeviceMemoryBase GetDeviceAddress(
72 Status TearDown(const std::set<se::DeviceMemoryBase>& live_addresses,
89 std::vector<se::DeviceMemoryBase> buffers_;
Dcudnn_batchnorm_runner.cc32 se::DeviceMemoryBase operand;
41 se::DeviceMemoryBase output;
49 se::DeviceMemoryBase output_data;
57 se::DeviceMemoryBase output_grad_data;
58 se::DeviceMemoryBase grad_output;
115 const se::DeviceMemoryBase& operand, in AssignCommonParams()
227 const CudnnBatchNormConfig& config, se::DeviceMemoryBase operand, in RunCudnnBatchNormForwardInference()
228 se::DeviceMemoryBase output, se::DeviceMemory<float> scale, in RunCudnnBatchNormForwardInference()
256 const CudnnBatchNormConfig& config, se::DeviceMemoryBase operand, in RunCudnnBatchNormForwardTraining()
257 se::DeviceMemoryBase output_data, se::DeviceMemory<float> output_mean, in RunCudnnBatchNormForwardTraining()
[all …]
Dcudnn_batchnorm_thunk.cc55 se::DeviceMemoryBase output_base = in ExecuteOnStream()
57 se::DeviceMemoryBase operand = buffer_allocations.GetDeviceAddress(operand_); in ExecuteOnStream()
92 se::DeviceMemoryBase operand = buffer_allocations.GetDeviceAddress(operand_); in ExecuteOnStream()
93 se::DeviceMemoryBase output_data = in ExecuteOnStream()
140 se::DeviceMemoryBase operand = buffer_allocations.GetDeviceAddress(operand_); in ExecuteOnStream()
141 se::DeviceMemoryBase output_grad_data = in ExecuteOnStream()
143 se::DeviceMemoryBase grad_output = in ExecuteOnStream()
/external/tensorflow/tensorflow/stream_executor/host/
Dhost_gpu_executor.h63 DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override;
64 void *GetSubBuffer(DeviceMemoryBase *parent, uint64 offset_bytes,
66 void Deallocate(DeviceMemoryBase *mem) override;
75 bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src,
77 bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src,
79 bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst,
80 const DeviceMemoryBase &gpu_src,
83 port::Status MemZero(Stream *stream, DeviceMemoryBase *location,
85 port::Status Memset(Stream *stream, DeviceMemoryBase *location, uint8 pattern,
87 port::Status Memset32(Stream *stream, DeviceMemoryBase *location,
[all …]
Dhost_gpu_executor.cc69 DeviceMemoryBase HostExecutor::Allocate(uint64 size, int64 memory_space) { in Allocate()
74 return DeviceMemoryBase( in Allocate()
78 void *HostExecutor::GetSubBuffer(DeviceMemoryBase *parent, uint64 offset_bytes, in GetSubBuffer()
83 void HostExecutor::Deallocate(DeviceMemoryBase *mem) { in Deallocate()
87 port::Status HostExecutor::SynchronousMemZero(DeviceMemoryBase *location, in SynchronousMemZero()
93 port::Status HostExecutor::SynchronousMemSet(DeviceMemoryBase *location, in SynchronousMemSet()
100 const DeviceMemoryBase &gpu_src, uint64 size) { in Memcpy()
109 bool HostExecutor::Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, in Memcpy()
120 DeviceMemoryBase *gpu_dst, in MemcpyDeviceToDevice()
121 const DeviceMemoryBase &gpu_src, in MemcpyDeviceToDevice()
[all …]
/external/tensorflow/tensorflow/stream_executor/tpu/
Dtpu_executor.h51 using DeviceMemoryBase = ::stream_executor::DeviceMemoryBase; variable
69 DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override;
88 void Deallocate(const DeviceMemoryBase& memory);
90 void Deallocate(DeviceMemoryBase* memory) override;
121 const ::stream_executor::DeviceMemoryBase& device_src,
124 bool Memcpy(Stream* stream, ::stream_executor::DeviceMemoryBase* device_dst,
128 ::stream_executor::DeviceMemoryBase* gpu_dst,
129 const ::stream_executor::DeviceMemoryBase& host_src,
135 Status SynchronousMemcpy(::stream_executor::DeviceMemoryBase* device_dst,
138 void* host_dst, const ::stream_executor::DeviceMemoryBase& device_src,
[all …]
/external/tensorflow/tensorflow/stream_executor/gpu/
Dgpu_executor.h89 DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override;
91 void* GetSubBuffer(DeviceMemoryBase* mem, uint64 offset_bytes,
94 void Deallocate(DeviceMemoryBase* mem) override;
122 port::Status SynchronousMemZero(DeviceMemoryBase* location,
125 port::Status SynchronousMemSet(DeviceMemoryBase* location, int value,
128 port::Status SynchronousMemcpy(DeviceMemoryBase* gpu_dst,
132 const DeviceMemoryBase& gpu_src,
135 port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase* gpu_dst,
136 const DeviceMemoryBase& gpu_src,
139 port::Status MemZero(Stream* stream, DeviceMemoryBase* location,
[all …]
/external/tensorflow/tensorflow/compiler/xla/service/
Dshaped_buffer.h74 const se::DeviceMemoryBase& root_buffer() const { in root_buffer()
80 const se::DeviceMemoryBase& buffer(const ShapeIndex& index) const { in buffer()
85 void set_buffer(const se::DeviceMemoryBase& buffer, const ShapeIndex& index) { in set_buffer()
92 void set_buffers(ShapeTree<se::DeviceMemoryBase> buffers) { in set_buffers()
116 const ShapeTree<se::DeviceMemoryBase>& buffers() const { return buffers_; } in buffers()
117 ShapeTree<se::DeviceMemoryBase>& buffers() { return buffers_; } in buffers()
136 ShapeTree<se::DeviceMemoryBase> buffers_;
188 *buffers_.mutable_element(index) = se::DeviceMemoryBase(); in set_buffer()
Dtransfer_manager.h174 const se::DeviceMemoryBase& dest,
178 const se::DeviceMemoryBase& source,
184 const se::DeviceMemoryBase& dest,
188 const se::DeviceMemoryBase& source,
276 const se::DeviceMemoryBase& device_buffer) const { in CanBufferBeAccessedNow()
302 se::Stream* stream, absl::Span<const se::DeviceMemoryBase> elements,
303 const Shape& shape, se::DeviceMemoryBase* region) = 0;
311 const se::DeviceMemoryBase& source,
320 se::DeviceMemoryBase* destination);
Dtransfer_manager.cc115 se::Stream* stream, const Shape& shape, const se::DeviceMemoryBase& source, in TransferArrayFromDevice()
144 const se::DeviceMemoryBase& dest, in TransferArrayToDevice()
159 const se::DeviceMemoryBase& dest, in TransferArrayToDeviceAsync()
180 se::Stream* stream, const Shape& shape, const se::DeviceMemoryBase& source, in TransferArrayFromDevice()
212 [&](const ShapeIndex& index, se::DeviceMemoryBase* buffer) { in ReadDynamicShapes()
302 se::DeviceMemoryBase device_memory = device_buffer.buffer(index); in WriteTupleIndexTablesAsync()
306 std::vector<se::DeviceMemoryBase> elements; in WriteTupleIndexTablesAsync()
328 se::DeviceMemoryBase device_memory = device_buffer.buffer({}); in WriteRootTupleIndexTable()
332 std::vector<se::DeviceMemoryBase> elements; in WriteRootTupleIndexTable()
347 se::DeviceMemoryBase device_memory = in WriteRootTupleIndexTable()
[all …]
Dmaybe_owning_device_memory.h33 explicit MaybeOwningDeviceMemory(tensorflow::se::DeviceMemoryBase unowned) in MaybeOwningDeviceMemory()
38 MaybeOwningDeviceMemory& operator=(tensorflow::se::DeviceMemoryBase unowned) {
52 tensorflow::se::DeviceMemoryBase AsDeviceMemoryBase() const;
69 tensorflow::se::DeviceMemoryBase>
/external/tensorflow/tensorflow/core/kernels/
Dgpu_utils.h53 se::DeviceMemoryBase WrapRedzoneBestEffort(se::RedzoneAllocator* rz_allocator,
54 se::DeviceMemoryBase buffer);
64 se::DeviceMemoryBase wrapped(const_cast<T*>(cuda_memory), size * sizeof(T)); in AsDeviceMemory()
219 se::DeviceMemoryBase input_buffer,
220 se::DeviceMemoryBase filter_buffer,
221 se::DeviceMemoryBase output_buffer,
231 se::dnn::DataType element_type, se::DeviceMemoryBase input_buffer,
232 se::DeviceMemoryBase filter_buffer, se::DeviceMemoryBase output_buffer,
233 se::DeviceMemoryBase bias_buffer, se::DeviceMemoryBase side_input_buffer,
Dgpu_utils.cc40 se::DeviceMemoryBase WrapRedzoneBestEffort(se::RedzoneAllocator* rz_allocator, in WrapRedzoneBestEffort()
41 se::DeviceMemoryBase buffer) { in WrapRedzoneBestEffort()
56 return se::DeviceMemoryBase(output_rz_or.ValueOrDie()); in WrapRedzoneBestEffort()
128 se::DeviceMemoryBase input_buffer, in LogConvAutotuneResults()
129 se::DeviceMemoryBase filter_buffer, in LogConvAutotuneResults()
130 se::DeviceMemoryBase output_buffer, in LogConvAutotuneResults()
171 se::dnn::DataType element_type, se::DeviceMemoryBase input_buffer, in LogFusedConvForwardAutotuneResults()
172 se::DeviceMemoryBase filter_buffer, se::DeviceMemoryBase output_buffer, in LogFusedConvForwardAutotuneResults()
173 se::DeviceMemoryBase bias_buffer, se::DeviceMemoryBase side_input_buffer, in LogFusedConvForwardAutotuneResults()
/external/tensorflow/tensorflow/compiler/xla/pjrt/
Dtracked_device_buffer.cc101 ShapeTree<se::DeviceMemoryBase>::iterator iterator = in FromScopedShapedBuffer()
103 std::vector<se::DeviceMemoryBase> buffers; in FromScopedShapedBuffer()
110 iterator->second = se::DeviceMemoryBase(); in FromScopedShapedBuffer()
116 absl::Span<se::DeviceMemoryBase>(buffers), definition_events, in FromScopedShapedBuffer()
123 ShapeTree<se::DeviceMemoryBase>::iterator iterator = in AsShapedBuffer()
125 for (const se::DeviceMemoryBase& buf : device_memory_) { in AsShapedBuffer()
140 for (const se::DeviceMemoryBase& buf : device_memory_) { in AddToInputAsImmutable()
153 for (const se::DeviceMemoryBase& buf : device_memory_) { in AddToInputAsDonated()
165 absl::Span<se::DeviceMemoryBase const> device_memory, in TrackedDeviceBuffer()
178 for (const se::DeviceMemoryBase& buffer : device_memory_) { in ~TrackedDeviceBuffer()

1234567