1 /*===--------------------------------------------------------------------------
2  *              ATMI (Asynchronous Task and Memory Interface)
3  *
4  * This file is distributed under the MIT License. See LICENSE.txt for details.
5  *===------------------------------------------------------------------------*/
6 #include "atmi_runtime.h"
7 #include "internal.h"
8 #include "rt.h"
9 #include <hsa.h>
10 #include <hsa_ext_amd.h>
11 #include <memory>
12 
13 /*
14  * Initialize/Finalize
15  */
atmi_init()16 atmi_status_t atmi_init() { return core::Runtime::Initialize(); }
17 
atmi_finalize()18 atmi_status_t atmi_finalize() { return core::Runtime::Finalize(); }
19 
20 /*
21  * Machine Info
22  */
atmi_machine_get_info()23 atmi_machine_t *atmi_machine_get_info() {
24   return core::Runtime::GetMachineInfo();
25 }
26 
27 /*
28  * Modules
29  */
atmi_module_register_from_memory_to_place(void * module_bytes,size_t module_size,atmi_place_t place,atmi_status_t (* on_deserialized_data)(void * data,size_t size,void * cb_state),void * cb_state)30 atmi_status_t atmi_module_register_from_memory_to_place(
31     void *module_bytes, size_t module_size, atmi_place_t place,
32     atmi_status_t (*on_deserialized_data)(void *data, size_t size,
33                                           void *cb_state),
34     void *cb_state) {
35   return core::Runtime::getInstance().RegisterModuleFromMemory(
36       module_bytes, module_size, place, on_deserialized_data, cb_state);
37 }
38 
39 /*
40  * Data
41  */
42 
invoke_hsa_copy(hsa_signal_t sig,void * dest,const void * src,size_t size,hsa_agent_t agent)43 static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest,
44                                     const void *src, size_t size,
45                                     hsa_agent_t agent) {
46   const hsa_signal_value_t init = 1;
47   const hsa_signal_value_t success = 0;
48   hsa_signal_store_screlease(sig, init);
49 
50   hsa_status_t err =
51       hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0, NULL, sig);
52   if (err != HSA_STATUS_SUCCESS) {
53     return err;
54   }
55 
56   // async_copy reports success by decrementing and failure by setting to < 0
57   hsa_signal_value_t got = init;
58   while (got == init) {
59     got = hsa_signal_wait_scacquire(sig, HSA_SIGNAL_CONDITION_NE, init,
60                                     UINT64_MAX, ATMI_WAIT_STATE);
61   }
62 
63   if (got != success) {
64     return HSA_STATUS_ERROR;
65   }
66 
67   return err;
68 }
69 
70 struct atmiFreePtrDeletor {
operator ()atmiFreePtrDeletor71   void operator()(void *p) {
72     atmi_free(p); // ignore failure to free
73   }
74 };
75 
atmi_memcpy_h2d(hsa_signal_t signal,void * deviceDest,const void * hostSrc,size_t size,hsa_agent_t agent)76 atmi_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
77                               const void *hostSrc, size_t size,
78                               hsa_agent_t agent) {
79   hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size);
80 
81   // hsa_memory_copy sometimes fails in situations where
82   // allocate + copy succeeds. Looks like it might be related to
83   // locking part of a read only segment. Fall back for now.
84   if (rc == HSA_STATUS_SUCCESS) {
85     return ATMI_STATUS_SUCCESS;
86   }
87 
88   void *tempHostPtr;
89   atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0);
90   atmi_status_t ret = atmi_malloc(&tempHostPtr, size, CPU);
91   if (ret != ATMI_STATUS_SUCCESS) {
92     DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
93                 size);
94     return ret;
95   }
96   std::unique_ptr<void, atmiFreePtrDeletor> del(tempHostPtr);
97   memcpy(tempHostPtr, hostSrc, size);
98 
99   if (invoke_hsa_copy(signal, deviceDest, tempHostPtr, size, agent) !=
100       HSA_STATUS_SUCCESS) {
101     return ATMI_STATUS_ERROR;
102   }
103   return ATMI_STATUS_SUCCESS;
104 }
105 
atmi_memcpy_d2h(hsa_signal_t signal,void * dest,const void * deviceSrc,size_t size,hsa_agent_t agent)106 atmi_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
107                               const void *deviceSrc, size_t size,
108                               hsa_agent_t agent) {
109   hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size);
110 
111   // hsa_memory_copy sometimes fails in situations where
112   // allocate + copy succeeds. Looks like it might be related to
113   // locking part of a read only segment. Fall back for now.
114   if (rc == HSA_STATUS_SUCCESS) {
115     return ATMI_STATUS_SUCCESS;
116   }
117 
118   void *tempHostPtr;
119   atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0);
120   atmi_status_t ret = atmi_malloc(&tempHostPtr, size, CPU);
121   if (ret != ATMI_STATUS_SUCCESS) {
122     DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
123                 size);
124     return ret;
125   }
126   std::unique_ptr<void, atmiFreePtrDeletor> del(tempHostPtr);
127 
128   if (invoke_hsa_copy(signal, tempHostPtr, deviceSrc, size, agent) !=
129       HSA_STATUS_SUCCESS) {
130     return ATMI_STATUS_ERROR;
131   }
132 
133   memcpy(dest, tempHostPtr, size);
134   return ATMI_STATUS_SUCCESS;
135 }
136 
atmi_free(void * ptr)137 atmi_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); }
138 
atmi_malloc(void ** ptr,size_t size,atmi_mem_place_t place)139 atmi_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) {
140   return core::Runtime::Malloc(ptr, size, place);
141 }
142