1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_
17 #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_
18 
19 #include <stddef.h>
20 #include <stdint.h>
21 
22 #include "tensorflow/c/tf_attrtype.h"
23 #include "tensorflow/core/tpu/libtftpu.h"
24 #include "tensorflow/stream_executor/tpu/c_api_decl.h"
25 
26 extern "C" {
27 
28 SE_Platform* TpuPlatform_New();
29 void TpuPlatform_Free(SE_Platform* platform);
30 void TpuPlatform_Initialize(SE_Platform* platform, size_t options_size,
31                             const char** options_key,
32                             const char** options_value, TF_Status* status);
33 bool TpuPlatform_Initialized(SE_Platform* platform);
34 SE_StreamExecutor* TpuPlatform_GetExecutor(SE_Platform* platform,
35                                            SE_StreamExecutorConfig* config,
36                                            TF_Status* status);
37 SE_PlatformId TpuPlatform_Id(SE_Platform* platform);
38 int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform);
39 int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform);
40 bool TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy(SE_Platform* platform);
41 SE_TpuTopology* TpuPlatform_GetTopologyPtr(SE_Platform* platform);
42 SE_TpuTopology_Host* TpuPlatform_GetHostLocation(SE_Platform* platform);
43 TpuRuntimeVersion TpuPlatform_GetRuntimeVersion(SE_Platform* platform);
44 
45 void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal,
46                       SE_DeviceOptions* device_options, TF_Status* status);
47 void TpuExecutor_Free(SE_StreamExecutor* executor);
48 
49 int TpuExecutor_PlatformDeviceCount(SE_StreamExecutor* executor);
50 
51 SE_DeviceMemoryBase TpuExecutor_Allocate(SE_StreamExecutor* executor,
52                                          uint64_t size, int64_t memory_space);
53 void TpuExecutor_Deallocate(SE_StreamExecutor* executor,
54                             SE_DeviceMemoryBase* memory);
55 bool TpuExecutor_GetAllocatorStats(SE_StreamExecutor* executor,
56                                    SE_AllocatorStats* stats);
57 bool TpuExecutor_DeviceMemoryUsage(SE_StreamExecutor* executor, int64_t* free,
58                                    int64_t* total);
59 
60 bool TpuExecutor_AllocateStream(SE_StreamExecutor* executor, SE_Stream* stream);
61 void TpuExecutor_DeallocateStream(SE_StreamExecutor* executor,
62                                   SE_Stream* stream);
63 bool TpuExecutor_CreateStreamDependency(SE_StreamExecutor* executor,
64                                         SE_Stream* dependent, SE_Stream* other);
65 void TpuExecutor_GetStatus(SE_StreamExecutor* executor, SE_Stream* stream,
66                            TF_Status* status);
67 
68 SE_TpuTopology_Core* TpuExecutor_GetCoreLocation(SE_StreamExecutor* executor);
69 
70 void TpuExecutor_AllocateEvent(SE_StreamExecutor* executor, SE_Event* event,
71                                TF_Status* status);
72 void TpuExecutor_DeallocateEvent(SE_StreamExecutor* executor, SE_Event* event,
73                                  TF_Status* status);
74 int TpuExecutor_PollForEventStatus(SE_StreamExecutor* executor,
75                                    SE_Event* event);
76 void TpuExecutor_RecordEvent(SE_StreamExecutor* executor, SE_Stream* stream,
77                              SE_Event* event, TF_Status* status);
78 void TpuExecutor_WaitForEvent(SE_StreamExecutor* executor, SE_Stream* stream,
79                               SE_Event* event, TF_Status* status);
80 
81 bool TpuExecutor_AllocateTimer(SE_StreamExecutor* executor, SE_Timer* timer);
82 void TpuExecutor_DeallocateTimer(SE_StreamExecutor* executor, SE_Timer* timer);
83 bool TpuExecutor_StartTimer(SE_StreamExecutor* executor, SE_Stream* stream,
84                             SE_Timer* timer);
85 bool TpuExecutor_StopTimer(SE_StreamExecutor* executor, SE_Stream* stream,
86                            SE_Timer* timer);
87 
88 void TpuExecutor_SynchronousMemcpyToHost(SE_StreamExecutor* executor,
89                                          void* host_dst,
90                                          const SE_DeviceMemoryBase* device_src,
91                                          uint64_t size, TF_Status* status);
92 void TpuExecutor_SynchronousMemcpyFromHost(SE_StreamExecutor* executor,
93                                            SE_DeviceMemoryBase* device_dst,
94                                            const void* host_src, uint64_t size,
95                                            TF_Status* status);
96 bool TpuExecutor_MemcpyToHost(SE_StreamExecutor* executor, SE_Stream* stream,
97                               void* host_dst,
98                               const SE_DeviceMemoryBase* device_src,
99                               uint64_t size);
100 
101 bool TpuExecutor_MemcpyFromHost(SE_StreamExecutor* executor, SE_Stream* stream,
102                                 SE_DeviceMemoryBase* device_dst,
103                                 const void* host_src, uint64_t size);
104 
105 void TpuExecutor_EnqueueInfeed(SE_StreamExecutor* executor,
106                                int32_t infeed_queue_index, const uint8_t* data,
107                                int64_t size, TF_Status* status);
108 void TpuExecutor_DequeueOutfeed(SE_StreamExecutor* executor,
109                                 int32_t outfeed_queue_index, uint8_t* data,
110                                 int64_t size, TF_Status* status);
111 void TpuExecutor_WaitForInfeedReady(SE_StreamExecutor* executor,
112                                     int32_t infeed_queue_index,
113                                     TF_Status* status);
114 void TpuExecutor_WaitForOutfeedReady(SE_StreamExecutor* executor,
115                                      int32_t outfeed_queue_index,
116                                      TF_Status* status);
117 
118 void TpuExecutor_BlockHostUntilDone(SE_StreamExecutor* executor,
119                                     SE_Stream* stream, TF_Status* status);
120 void TpuExecutor_BlockUntilDoneOrFailed(SE_StreamExecutor* executor,
121                                         TF_Status* status);
122 void TpuExecutor_SyncAndForgetFailedStreams(SE_StreamExecutor* executor);
123 bool TpuExecutor_SynchronizeAllActivity(SE_StreamExecutor* executor);
124 
125 SE_Stream* TpuStream_New(SE_StreamExecutor* parent);
126 void TpuStream_Free(SE_Stream*);
127 void* TpuStream_Stream(SE_Stream*);
128 bool TpuStream_Status(SE_Stream*);
129 bool TpuStream_IsSameSharedMemoryLocation(SE_Stream*, SE_Stream*);
130 void TpuStream_EnqueueTransferHostToDevice(SE_Stream* stream,
131                                            SE_DeviceMemoryBase device_dst,
132                                            void* host_src, uint64_t size,
133                                            TF_Status* status);
134 void TpuStream_EnqueueTransferDeviceToHost(SE_Stream* stream,
135                                            SE_DeviceMemoryBase device_src,
136                                            void* host_dst, uint64_t size,
137                                            TF_Status* status);
138 void TpuStream_TpuEnqueueOnDeviceSendRecvLocal(SE_Stream* stream,
139                                                SE_DeviceMemoryBase send_buffer,
140                                                SE_DeviceMemoryBase recv_buffer,
141                                                TF_Status* status);
142 
143 SE_Event* TpuEvent_New(SE_StreamExecutor* parent);
144 void TpuEvent_Free(SE_Event*);
145 
146 SE_Timer* TpuTimer_New(SE_StreamExecutor* parent);
147 void TpuTimer_Free(SE_Timer*);
148 int64_t TpuTimer_Nanoseconds(SE_Timer*);
149 int64_t TpuTimer_Microseconds(SE_Timer*);
150 
151 TF_Status* TpuStatus_New();
152 TF_Status* TpuStatus_Create(int32_t code, const char* msg);
153 void TpuStatus_Set(TF_Status* status, int32_t code, const char* msg,
154                    int32_t len);
155 void TpuStatus_Free(TF_Status* status);
156 const char* TpuStatus_Message(TF_Status* status);
157 int TpuStatus_Code(TF_Status* status);
158 bool TpuStatus_Ok(TF_Status* status);
159 
160 SE_StreamExecutorConfig* TpuStreamExecutorConfig_Default();
161 void TpuStreamExecutorConfig_SetOrdinal(SE_StreamExecutorConfig*, int ordinal);
162 void TpuStreamExecutorConfig_Free(SE_StreamExecutorConfig*);
163 
164 SE_DeviceDescription* TpuDeviceDescription_New();
165 void TpuDeviceDescription_Free(SE_DeviceDescription* description);
166 void TpuExecutor_CreateDeviceDescription(SE_StreamExecutor* executor,
167                                          SE_DeviceDescription* description,
168                                          TF_Status* status);
169 
170 SE_DeviceOptions* TpuExecutor_NewDeviceOptions(unsigned flags);
171 void TpuExecutor_FreeDeviceOptions(SE_DeviceOptions* options);
172 
173 bool TpuExecutor_HostCallback(SE_StreamExecutor* executor, SE_Stream* stream,
174                               SE_StatusCallbackFn callback_fn, void* ctx);
175 
176 XLA_TransferManager* TpuTransferManager_New();
177 void TpuTransferManager_Free(XLA_TransferManager* manager);
178 SE_PlatformId TpuTransferManager_PlatformId(XLA_TransferManager* manager);
179 void TpuTransferManager_HostShapeToDeviceShape(XLA_TransferManager* manager,
180                                                XLA_Shape* host_shape,
181                                                XLA_Shape* device_shape);
182 void TpuTransferManager_TransferLiteralToDeviceAsync(
183     XLA_TransferManager* manager, SE_Stream* stream, XLA_Literal* literal,
184     XLA_ShapedBuffer* device_buffer, TF_Status* status);
185 void TpuTransferManager_TransferLiteralFromDevice(
186     XLA_TransferManager* manager, SE_Stream* stream,
187     XLA_ShapedBuffer* device_buffer, XLA_Literal* literal,
188     XLA_StatusCallbackFn callback, void* ctx);
189 int64_t TpuTransferManager_GetByteSizeRequirement(XLA_TransferManager* manager,
190                                                   XLA_Shape* shape);
191 void TpuTransferManager_ChooseCompactLayoutForShape(
192     XLA_TransferManager* manager, XLA_Shape* host_shape, XLA_Shape* output,
193     TF_Status* status);
194 bool TpuTransferManager_CanShapedBufferBeAccessedNow(
195     XLA_TransferManager* manager, SE_StreamExecutor* executor,
196     XLA_ShapedBuffer* device_buffer);
197 bool TpuTransferManager_CanBufferBeAccessedNow(
198     XLA_TransferManager* manager, SE_StreamExecutor* executor,
199     SE_DeviceMemoryBase* device_buffer);
200 void TpuTransferManager_WriteSingleTupleIndexTable(
201     XLA_TransferManager* manager, SE_Stream* stream,
202     SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape,
203     SE_DeviceMemoryBase* region, TF_Status* status);
204 void TpuTransferManager_GetInfeedLayout(XLA_Shape* shape,
205                                         XLA_Shape* infeed_shape);
206 void TpuTransferManager_LinearizeToBuffers(
207     XLA_TransferManager* manager, XLA_Literal* c_literal, char*** buffers_array,
208     int64_t** buffers_size, int64_t* buffers_array_size, TF_Status* status);
209 void TpuTransferManager_FreeBuffers(char** buffers_array, int64_t* buffers_size,
210                                     int64_t buffers_array_size);
211 void TpuTransferManager_TransferLiteralToInfeed(XLA_TransferManager* manager,
212                                                 SE_StreamExecutor* executor,
213                                                 XLA_Literal* c_literal,
214                                                 TF_Status* status);
215 void TpuTransferManager_TransferBuffersToInfeed(XLA_TransferManager* manager,
216                                                 SE_StreamExecutor* executor,
217                                                 uint32_t** buffers_array,
218                                                 int64_t* buffers_size_in_uint32,
219                                                 int64_t buffers_array_size,
220                                                 TF_Status* status);
221 void TpuTransferManager_TransferLiteralFromOutfeed(
222     XLA_TransferManager* manager, SE_StreamExecutor* executor,
223     XLA_Shape* shape /*deprecated*/, XLA_Literal* c_literal, TF_Status* status);
224 void TpuTransferManager_ResetDevices(XLA_TransferManager* manager,
225                                      SE_StreamExecutor** executors,
226                                      int64_t num_executors, TF_Status* status);
227 
228 XLA_ComputationPlacer* TpuComputationPlacer_New();
229 void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer);
230 // `assignment` should be a preallocated array of size `replicate_count` *
231 // `computation_count`. The assignment will be constructed as a 2D array where
232 // assignment[replica][computation] = device_id.
233 void TpuComputationPlacer_AssignDevices(XLA_ComputationPlacer* placer,
234                                         int replica_count,
235                                         int computation_count, int* assignment,
236                                         TF_Status* status);
237 void TpuComputationPlacer_AssignLocalDevices(SE_TpuTopology_Host* host,
238                                              int replica_count,
239                                              int computation_count,
240                                              int* assignment,
241                                              TF_Status* status);
242 
243 int TpuTopology_LogicalDevicesPerHost(SE_TpuTopology* tpu_topology,
244                                       TpuCoreTypeEnum tpu_core_type);
245 int TpuTopology_LogicalDevicesPerChip(SE_TpuTopology* tpu_topology,
246                                       TpuCoreTypeEnum tpu_core_type);
247 int TpuTopology_HostCount(SE_TpuTopology* tpu_topology);
248 int TpuTopology_ChipsPerHost(SE_TpuTopology* tpu_topology);
249 
250 int TpuTopology_ChipBounds_X(SE_TpuTopology* tpu_topology);
251 int TpuTopology_ChipBounds_Y(SE_TpuTopology* tpu_topology);
252 int TpuTopology_ChipBounds_Z(SE_TpuTopology* tpu_topology);
253 bool TpuTopology_HasChip(SE_TpuTopology* tpu_topology, int x, int y, int z);
254 SE_TpuTopology_Core* TpuTopology_CoreForId(SE_TpuTopology* tpu_topology,
255                                            TpuCoreTypeEnum tpu_core_type,
256                                            int id);
257 SE_TpuTopology_Core* TpuTopology_Core(SE_TpuTopology* tpu_topology,
258                                       TpuCoreTypeEnum tpu_core_type, int x,
259                                       int y, int z, int index);
260 int TpuTopology_NumCores(SE_TpuTopology* tpu_topology,
261                          TpuCoreTypeEnum tpu_core_type);
262 // 'cores' should be a preallocated array of size TpuTopology_NumCores.
263 void TpuTopology_Cores(SE_TpuTopology* tpu_topology,
264                        TpuCoreTypeEnum tpu_core_type,
265                        SE_TpuTopology_Core** cores);
266 int TpuTopology_IdForHost(SE_TpuTopology* tpu_topology, int x, int y, int z);
267 TpuVersionEnum TpuTopology_Version(SE_TpuTopology* tpu_topology);
268 void TpuCoreLocation_ChipCoordinates(SE_TpuTopology_Core* tpu_core_location,
269                                      int* x, int* y, int* z);
270 void TpuCoreLocation_HostCoordinates(SE_TpuTopology_Core* tpu_core_location,
271                                      int* x, int* y, int* z);
272 int TpuCoreLocation_Index(SE_TpuTopology_Core* tpu_core_location);
273 int TpuCoreLocation_Id(SE_TpuTopology_Core* tpu_core_location);
274 
275 int TpuHostLocation_Id(SE_TpuTopology_Host* tpu_host_location);
276 int TpuHostLocation_NumCores(SE_TpuTopology_Host* tpu_host_location,
277                              TpuCoreTypeEnum tpu_core_type);
278 // 'cores' should be a preallocated array of size TpuHostLocation_NumCores.
279 void TpuHostLocation_Cores(SE_TpuTopology_Host* tpu_host_location,
280                            TpuCoreTypeEnum tpu_core_type,
281                            SE_TpuTopology_Core** cores);
282 
283 // C API for XLA::Compiler interface
284 
285 TFTPU_CAPI_EXPORT Tpu_Compiler* TpuCompiler_New();
286 TFTPU_CAPI_EXPORT void TpuCompiler_Free(Tpu_Compiler* compiler);
287 
288 TFTPU_CAPI_EXPORT void TpuCompiler_RunHloPasses(
289     Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module,
290     SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator,
291     XLA_HloModule* result, TF_Status* status);
292 
293 TFTPU_CAPI_EXPORT void TpuCompiler_RunBackend(
294     Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module,
295     SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator,
296     SE_Executable** result, TF_Status* status);
297 
298 TFTPU_CAPI_EXPORT void TpuCompiler_Compile(
299     Tpu_Compiler* compiler, XLA_HloModuleGroup* se_hlo_module_group,
300     SE_StreamExecutorList* stream_exec_lists, int num_lists,
301     SE_DeviceMemoryAllocator* allocator, SE_Executable** executables,
302     TF_Status* status);
303 
304 TFTPU_CAPI_EXPORT int64_t TpuCompiler_ShapeSize(Tpu_Compiler* compiler,
305                                                 XLA_Shape* c_shape);
306 
307 TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream(
308     SE_Executable* executable, SE_ExecutableRunOptions* se_options,
309     SE_ExecutionInput** se_arguments, int se_arguments_size,
310     SE_HloExecutionProfile* hlo_execution_profile,
311     SE_ExecutionOutput* se_output, TF_Status* status);
312 
313 // This frees the XLA_ShapeIndex* array allocated when se_output is returned by
314 // TpuExecutable_ExecuteAsyncOnStream.
315 TFTPU_CAPI_EXPORT void TpuExecutable_FreeXlaShapeIndexArray(
316     XLA_ShapeIndex* array);
317 
318 // This frees the SE_MaybeOwningDeviceMemory* array allocated when se_output is
319 // returned by TpuExecutable_ExecuteAsyncOnStream.
320 // Note that this only frees the heap-allocated array itself, and does not
321 // free any of the underlying device memory.
322 TFTPU_CAPI_EXPORT void TpuExecutable_FreeMaybeOwningDeviceMemoryArray(
323     SE_MaybeOwningDeviceMemory* array);
324 
325 TFTPU_CAPI_EXPORT void TpuExecutable_Fingerprint(SE_Executable* executable,
326                                                  const char** fingerprint,
327                                                  size_t* size);
328 
329 // Caller is responsible for freeing the returned module's proto and its
330 // config's proto.
331 TFTPU_CAPI_EXPORT XLA_HloModule
332 TpuExecutable_HloModule(SE_Executable* executable);
333 
334 TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*);
335 
336 // Converts an XLA `Shape` into its equivalent TPU `Shape` representation.
337 TFTPU_CAPI_EXPORT void XlaShapeToTpuShapeRepresentation(
338     XLA_Shape* serialized_xla_shape, int data_type, bool use_fast_memory,
339     XLA_Shape* serialized_tpu_shape, TF_Status* status);
340 
341 TFTPU_CAPI_EXPORT void XlaShapeToTpuPaddedShape(XLA_Shape* serialized_xla_shape,
342                                                 XLA_Shape* padded_shape,
343                                                 TF_Status* status);
344 
345 struct TfTpu_ExecutorApiFn {
346   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_New);
347   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Free);
348   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialize);
349   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Initialized);
350   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetExecutor);
351   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Id);
352   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_VisibleDeviceCount);
353   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_TpuMemoryLimit);
354   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy);
355   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetTopologyPtr);
356   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetHostLocation);
357   TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetRuntimeVersion);
358 
359   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Init);
360   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Free);
361   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PlatformDeviceCount);
362   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Allocate);
363   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Deallocate);
364   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetAllocatorStats);
365   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeviceMemoryUsage);
366   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateStream);
367   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateStream);
368   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateStreamDependency);
369   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetStatus);
370   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_GetCoreLocation);
371   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateEvent);
372   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateEvent);
373   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PollForEventStatus);
374   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_RecordEvent);
375   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForEvent);
376   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_AllocateTimer);
377   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DeallocateTimer);
378   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StartTimer);
379   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_StopTimer);
380   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyToHost);
381   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronousMemcpyFromHost);
382   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyToHost);
383   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_MemcpyFromHost);
384   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_EnqueueInfeed);
385   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_DequeueOutfeed);
386   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForInfeedReady);
387   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_WaitForOutfeedReady);
388   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockHostUntilDone);
389   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_BlockUntilDoneOrFailed);
390   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SyncAndForgetFailedStreams);
391   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_SynchronizeAllActivity);
392 
393   TFTPU_ADD_FN_IN_STRUCT(TpuStream_New);
394   TFTPU_ADD_FN_IN_STRUCT(TpuStream_Free);
395   TFTPU_ADD_FN_IN_STRUCT(TpuStream_Stream);
396   TFTPU_ADD_FN_IN_STRUCT(TpuStream_Status);
397   TFTPU_ADD_FN_IN_STRUCT(TpuStream_IsSameSharedMemoryLocation);
398   TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferHostToDevice);
399   TFTPU_ADD_FN_IN_STRUCT(TpuStream_EnqueueTransferDeviceToHost);
400   TFTPU_ADD_FN_IN_STRUCT(TpuStream_TpuEnqueueOnDeviceSendRecvLocal);
401 
402   TFTPU_ADD_FN_IN_STRUCT(TpuEvent_New);
403   TFTPU_ADD_FN_IN_STRUCT(TpuEvent_Free);
404 
405   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_New);
406   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Free);
407   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Nanoseconds);
408   TFTPU_ADD_FN_IN_STRUCT(TpuTimer_Microseconds);
409 
410   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_New);
411   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Create);
412   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Set);
413   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Free);
414   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Message);
415   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Code);
416   TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Ok);
417 
418   TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Default);
419   TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_SetOrdinal);
420   TFTPU_ADD_FN_IN_STRUCT(TpuStreamExecutorConfig_Free);
421 
422   TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_New);
423   TFTPU_ADD_FN_IN_STRUCT(TpuDeviceDescription_Free);
424 
425   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_CreateDeviceDescription);
426   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_NewDeviceOptions);
427   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_FreeDeviceOptions);
428   TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_HostCallback);
429 
430   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_New);
431   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_Free);
432   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_PlatformId);
433   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_HostShapeToDeviceShape);
434   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToDeviceAsync);
435   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromDevice);
436   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement);
437   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ChooseCompactLayoutForShape);
438   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanShapedBufferBeAccessedNow);
439   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_CanBufferBeAccessedNow);
440   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable);
441   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetInfeedLayout);
442   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_LinearizeToBuffers);
443   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_FreeBuffers);
444   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralToInfeed);
445   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferBuffersToInfeed);
446   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromOutfeed);
447   TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_ResetDevices);
448 
449   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New);
450   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free);
451   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignDevices);
452   TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_AssignLocalDevices);
453 
454   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerHost);
455   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerChip);
456   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HostCount);
457   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipsPerHost);
458 
459   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_X);
460   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Y);
461   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Z);
462   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HasChip);
463   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_CoreForId);
464   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Core);
465   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_NumCores);
466   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Cores);
467   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_IdForHost);
468   TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Version);
469 
470   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates);
471   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_HostCoordinates);
472   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index);
473   TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id);
474 
475   TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Id);
476   TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_NumCores);
477   TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Cores);
478 
479   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_New);
480   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Free);
481   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses);
482   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunBackend);
483   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile);
484   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize);
485   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream);
486   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeXlaShapeIndexArray);
487   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_FreeMaybeOwningDeviceMemoryArray);
488   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Fingerprint);
489   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_HloModule);
490   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free);
491 
492   TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuShapeRepresentation);
493   TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuPaddedShape);
494 };
495 }
496 
497 // extern "C"
498 
499 #endif  // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_
500