1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/python/py_buffer.h"
17 
18 #include "absl/base/casts.h"
19 #include "pybind11/pybind11.h"
20 #include "pybind11/pytypes.h"
21 #include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
22 #include "tensorflow/compiler/xla/python/python_ref_manager.h"
23 #include "tensorflow/compiler/xla/python/types.h"
24 #include "tensorflow/compiler/xla/util.h"
25 
26 namespace xla {
27 
28 namespace py = pybind11;
29 
PyBuffer(std::shared_ptr<PyClient> client,std::unique_ptr<PjRtBuffer> buffer,std::shared_ptr<Traceback> traceback)30 PyBuffer::PyBuffer(std::shared_ptr<PyClient> client,
31                    std::unique_ptr<PjRtBuffer> buffer,
32                    std::shared_ptr<Traceback> traceback)
33     : client_(std::move(client)),
34       buffer_(std::move(buffer)),
35       traceback_(std::move(traceback)) {
36   CHECK(PyGILState_Check());
37   next_ = client_->buffers_;
38   client_->buffers_ = this;
39   prev_ = nullptr;
40   if (next_) {
41     next_->prev_ = this;
42   }
43 }
44 
~PyBuffer()45 PyBuffer::~PyBuffer() {
46   CHECK(PyGILState_Check());
47   if (client_->buffers_ == this) {
48     client_->buffers_ = next_;
49   }
50   if (prev_) {
51     prev_->next_ = next_;
52   }
53   if (next_) {
54     next_->prev_ = prev_;
55   }
56 }
57 
python_shape() const58 pybind11::tuple PyBuffer::python_shape() const {
59   return IntSpanToTuple(buffer()->on_device_shape().dimensions());
60 }
61 
python_dtype() const62 pybind11::dtype PyBuffer::python_dtype() const {
63   PrimitiveType primitive = buffer()->on_device_shape().element_type();
64   return PrimitiveTypeToDtype(primitive).ValueOrDie();
65 }
66 
device() const67 ClientAndPtr<PjRtDevice> PyBuffer::device() const {
68   return WrapWithClient(client_, buffer_->device());
69 }
70 
CopyToDevice(const ClientAndPtr<PjRtDevice> & dst_device) const71 StatusOr<std::unique_ptr<PyBuffer>> PyBuffer::CopyToDevice(
72     const ClientAndPtr<PjRtDevice>& dst_device) const {
73   CHECK(dst_device.get() != nullptr);
74   GlobalPyRefManager()->CollectGarbage();
75   std::unique_ptr<PjRtBuffer> out;
76   {
77     py::gil_scoped_release gil_release;
78     TF_ASSIGN_OR_RETURN(out, buffer_->CopyToDevice(dst_device.get()));
79   }
80   auto traceback = Traceback::Get();
81   return std::make_unique<PyBuffer>(dst_device.client, std::move(out),
82                                     std::move(traceback));
83 }
84 
BlockHostUntilReady()85 Status PyBuffer::BlockHostUntilReady() {
86   GlobalPyRefManager()->CollectGarbage();
87   py::gil_scoped_release gil_release;
88   return buffer_->BlockHostUntilReady();
89 }
90 
CopyToHostAsync()91 Status PyBuffer::CopyToHostAsync() {
92   if (!buffer_->IsOnCpu() && !host_value_) {
93     std::shared_ptr<HostValue> host_value = std::make_shared<HostValue>();
94     host_value_ = host_value;
95     py::gil_scoped_release gil;
96     host_value->value = std::make_shared<Literal>(
97         ShapeUtil::DeviceShapeToHostShape(buffer_->on_device_shape()));
98     Literal* literal = host_value->value.get();
99     buffer_->ToLiteral(literal,
100                        [host_value{std::move(host_value)}](Status status) {
101                          host_value->status = std::move(status);
102                          host_value->ready.Notify();
103                        });
104   }
105   return Status::OK();
106 }
107 
AsNumPyArray(py::handle this_obj)108 StatusOr<pybind11::object> PyBuffer::AsNumPyArray(py::handle this_obj) {
109   if (buffer_->IsDeleted()) {
110     return InvalidArgument("DeviceArray has been deleted.");
111   }
112   TF_RET_CHECK(buffer_->on_device_shape().IsArray());
113   // On CPU, we can return the value in a zero-copy way.
114   if (buffer_->IsOnCpu()) {
115     TF_ASSIGN_OR_RETURN(
116         py::dtype dtype,
117         PrimitiveTypeToDtype(buffer_->on_device_shape().element_type()));
118     // Objects that must be kept alive while the array is alive.
119     struct Hold {
120       py::object buffer;
121       std::unique_ptr<PjRtBuffer::ExternalReference> external_reference_hold;
122     };
123     auto hold = std::make_unique<Hold>();
124     TF_ASSIGN_OR_RETURN(hold->external_reference_hold,
125                         buffer_->AcquireExternalReference());
126     hold->buffer = py::reinterpret_borrow<py::object>(this_obj);
127     void* data = hold->external_reference_hold->OpaqueDeviceMemoryDataPointer();
128     py::capsule hold_capsule(hold.release(),
129                              [](void* h) { delete static_cast<Hold*>(h); });
130     py::array array(dtype, buffer_->on_device_shape().dimensions(),
131                     ByteStridesForShape(buffer_->on_device_shape()), data,
132                     hold_capsule);
133     array.attr("flags").attr("writeable") = Py_False;
134     {
135       py::gil_scoped_release gil;
136       TF_RETURN_IF_ERROR(buffer_->BlockHostUntilReady());
137     }
138     return array;
139   }
140 
141   TF_RETURN_IF_ERROR(CopyToHostAsync());
142   if (!host_value_->ready.HasBeenNotified()) {
143     py::gil_scoped_release gil;
144     host_value_->ready.WaitForNotification();
145   }
146   TF_RETURN_IF_ERROR(host_value_->status);
147   TF_ASSIGN_OR_RETURN(py::object array, LiteralToPython(host_value_->value));
148   array.attr("flags").attr("writeable") = Py_False;
149   return array;
150 }
151 
152 // TODO(zhangqiaorjc): Delete UnsafeBufferPointer.
UnsafeBufferPointer() const153 StatusOr<std::uintptr_t> PyBuffer::UnsafeBufferPointer() const {
154   if (buffer_->on_device_shape().IsTuple()) {
155     return Unimplemented(
156         "unsafe_buffer_pointer is not implemented for tuple "
157         "buffers.");
158   }
159 
160   TF_ASSIGN_OR_RETURN(
161       std::unique_ptr<PjRtBuffer::ExternalReference> external_reference_hold,
162       buffer_->AcquireExternalReference());
163   const void* ptr = external_reference_hold->OpaqueDeviceMemoryDataPointer();
164   return absl::bit_cast<std::uintptr_t>(ptr);
165 }
166 
CudaArrayInterface() const167 StatusOr<py::dict> PyBuffer::CudaArrayInterface() const {
168   // TODO(zhangqiaorjc): Differentiate between NVidia and other GPUs.
169   if (buffer_->client()->platform_id() != kGpuId) {
170     return InvalidArgument(
171         "__cuda_array_interface__ is only defined for NVidia GPU buffers.");
172   }
173   if (!buffer_->on_device_shape().IsArray()) {
174     return InvalidArgument(
175         "__cuda_array_interface__ is only defined for array buffers.");
176   }
177   if (buffer_->on_device_shape().element_type() == BF16) {
178     return InvalidArgument(
179         "__cuda_array_interface__ is not supported for bfloat16 buffers.");
180   }
181   TF_RET_CHECK(LayoutUtil::IsMonotonicWithDim0Major(
182       buffer_->on_device_shape().layout()));
183 
184   py::dict result;
185   result["shape"] = IntSpanToTuple(buffer_->on_device_shape().dimensions());
186   TF_ASSIGN_OR_RETURN(py::str typestr,
187                       TypeDescriptorForPrimitiveType(
188                           buffer_->on_device_shape().element_type()));
189   result["typestr"] = std::move(typestr);
190   TF_ASSIGN_OR_RETURN(
191       std::unique_ptr<PjRtBuffer::ExternalReference> external_reference_hold,
192       buffer_->AcquireExternalReference());
193   const void* root_ptr =
194       external_reference_hold->OpaqueDeviceMemoryDataPointer();
195   py::tuple data(2);
196   data[0] = py::int_(absl::bit_cast<std::uintptr_t>(root_ptr));
197   data[1] = py::bool_(true);  // read-only
198   result["data"] = std::move(data);
199   result["version"] = py::int_(2);
200   return result;
201 }
202 
203 // PEP 3118 buffer protocol implementation.
204 
205 namespace {
206 
207 // Extra data to be kept alive by the consumer of the buffer protocol.
208 struct ExtraBufferInfo {
ExtraBufferInfoxla::__anon7a3a1dda0311::ExtraBufferInfo209   explicit ExtraBufferInfo(
210       std::unique_ptr<PjRtBuffer::ExternalReference> external_reference_hold)
211       : external_reference_hold(std::move(external_reference_hold)) {}
212 
213   std::string format;
214   std::vector<Py_ssize_t> strides;
215   // We keep an external reference hold to the PjRtBuffer. This prevents a
216   // use-after-free in the event that Delete() is called on a buffer with an
217   // live buffer protocol view. It does however mean that Delete() sometimes
218   // won't actually delete immediately.
219   std::unique_ptr<PjRtBuffer::ExternalReference> external_reference_hold;
220 };
221 
PjRtBufferGetBuffer(PyObject * exporter,Py_buffer * view,int flags)222 int PjRtBufferGetBuffer(PyObject* exporter, Py_buffer* view, int flags) {
223   auto& buffer =
224       *py::reinterpret_borrow<py::object>(exporter).cast<PyBuffer&>().buffer();
225   Status status = [&]() {
226     // Py_buffer objects are POD C structures, so we don't need to hold the GIL.
227     // Additionally we call BlockHostUntilReady() below, which may block.
228     py::gil_scoped_release gil_release;
229 
230     if (!buffer.IsOnCpu()) {
231       return InvalidArgument(
232           "Python buffer protocol is only defined for CPU buffers.");
233     }
234     if (!buffer.on_device_shape().IsArray()) {
235       return InvalidArgument(
236           "Python buffer protocol is only defined for array buffers.");
237     }
238     // If we allowed exports of formatted BF16 buffers, consumers would get
239     // confused about the type because there is no way to describe BF16 to
240     // Python.
241     if (buffer.on_device_shape().element_type() == BF16 &&
242         ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)) {
243       return InvalidArgument(
244           "bfloat16 buffer format not supported by Python buffer protocol.");
245     }
246     if ((flags & PyBUF_WRITEABLE) == PyBUF_WRITEABLE) {
247       return InvalidArgument("XLA buffers are read-only.");
248     }
249     TF_ASSIGN_OR_RETURN(
250         std::unique_ptr<PjRtBuffer::ExternalReference> external_reference_hold,
251         buffer.AcquireExternalReference());
252     if (buffer.IsDeleted()) {
253       return InvalidArgument("Deleted buffer used in buffer protocol.");
254     }
255     const Shape& shape = buffer.on_device_shape();
256     if (((flags & PyBUF_C_CONTIGUOUS) == PyBUF_C_CONTIGUOUS ||
257          (flags & PyBUF_STRIDES) == PyBUF_ND) &&
258         !LayoutUtil::IsMonotonicWithDim0Major(shape.layout())) {
259       return InvalidArgument("Buffer is not in C-contiguous layout.");
260     } else if ((flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS &&
261                !LayoutUtil::IsMonotonicWithDim0Minor(shape.layout())) {
262       return InvalidArgument("Buffer is not in F-contiguous layout.");
263     } else if ((flags & PyBUF_ANY_CONTIGUOUS) == PyBUF_ANY_CONTIGUOUS &&
264                !LayoutUtil::IsMonotonicWithDim0Major(shape.layout()) &&
265                !LayoutUtil::IsMonotonicWithDim0Minor(shape.layout())) {
266       return InvalidArgument("Buffer is not in contiguous layout.");
267     }
268     std::memset(view, 0, sizeof(Py_buffer));
269     const void* root_ptr =
270         external_reference_hold->OpaqueDeviceMemoryDataPointer();
271     view->buf = const_cast<void*>(root_ptr);
272     auto extra =
273         absl::make_unique<ExtraBufferInfo>(std::move(external_reference_hold));
274     view->itemsize = ShapeUtil::ByteSizeOfPrimitiveType(shape.element_type());
275     view->len = ShapeUtil::ByteSizeOf(shape);
276     view->readonly = 1;
277     if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
278       TF_ASSIGN_OR_RETURN(extra->format, FormatDescriptorForPrimitiveType(
279                                              shape.element_type()));
280       view->format = const_cast<char*>(extra->format.c_str());
281     }
282     if ((flags & PyBUF_ND) == PyBUF_ND) {
283       view->ndim = shape.dimensions_size();
284       static_assert(sizeof(int64) == sizeof(Py_ssize_t),
285                     "Py_ssize_t must be 64 bits");
286       if (view->ndim != 0) {
287         view->shape = reinterpret_cast<Py_ssize_t*>(
288             const_cast<int64*>(shape.dimensions().data()));
289         if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) {
290           extra->strides = ByteStridesForShape(shape);
291           view->strides = extra->strides.data();
292         }
293       }
294     }
295     TF_RETURN_IF_ERROR(buffer.BlockHostUntilReady());
296     view->internal = extra.release();
297     return Status::OK();
298   }();
299   if (!status.ok()) {
300     PyErr_SetString(PyExc_BufferError, status.ToString().c_str());
301     return -1;
302   }
303   view->obj = exporter;
304   Py_INCREF(view->obj);
305   return 0;
306 }
307 
PjRtBufferReleaseBuffer(PyObject *,Py_buffer * buffer)308 void PjRtBufferReleaseBuffer(PyObject*, Py_buffer* buffer) {
309   auto extra = static_cast<ExtraBufferInfo*>(buffer->internal);
310   delete extra;
311 }
312 
__anon7a3a1dda0502() 313 PyBufferProcs PjRtBufferProcs = []() {
314   PyBufferProcs procs;
315   procs.bf_getbuffer = &PjRtBufferGetBuffer;
316   procs.bf_releasebuffer = &PjRtBufferReleaseBuffer;
317   return procs;
318 }();
319 
320 }  // namespace
321 
BufferProtocol()322 /*static*/ PyBufferProcs* PyBuffer::BufferProtocol() {
323   return &PjRtBufferProcs;
324 }
325 
SetStickyDevice(pybind11::object sticky_device)326 void PyBuffer::SetStickyDevice(pybind11::object sticky_device) {
327   if (sticky_device_ && !sticky_device_->equal(sticky_device)) {
328     throw std::invalid_argument(
329         "One cannot set again the stickyness of a buffer and needs to create "
330         "a new one or a `_DeviceArray`");
331   }
332   sticky_device_ = sticky_device;
333 }
334 
SetAval(pybind11::object aval)335 void PyBuffer::SetAval(pybind11::object aval) {
336   if (aval_ && !aval_->equal(aval)) {
337     throw std::invalid_argument(
338         "One cannot set again the aval_ of a buffer and needs to create a "
339         "new one or a `_DeviceArray`");
340   }
341   aval_ = aval;
342 }
343 
344 }  // namespace xla
345