1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_SERVER_H
18 #define ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_SERVER_H
19 
20 #include "ExecutionBurstUtils.h"
21 
22 #include <android-base/thread_annotations.h>
23 #include <android/hardware/neuralnetworks/1.0/types.h>
24 #include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
25 #include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
26 #include <android/hardware/neuralnetworks/1.2/types.h>
27 #include <fmq/MessageQueue.h>
28 #include <hidl/MQDescriptor.h>
29 #include <nnapi/IBurst.h>
30 #include <nnapi/Result.h>
31 #include <nnapi/Types.h>
32 #include <nnapi/hal/ProtectCallback.h>
33 
34 #include <atomic>
35 #include <chrono>
36 #include <memory>
37 #include <optional>
38 #include <thread>
39 #include <tuple>
40 #include <vector>
41 
42 namespace android::hardware::neuralnetworks::V1_2::utils {
43 
44 /**
45  * The ExecutionBurstServer class is responsible for waiting for and deserializing a request object
46  * from a FMQ, performing the inference, and serializing the result back across another FMQ.
47  */
48 class ExecutionBurstServer : public IBurstContext {
49     struct PrivateConstructorTag {};
50 
51   public:
52     /**
53      * Class to cache the memory objects for a burst object.
54      *
55      * This class is thread-safe.
56      */
57     class MemoryCache {
58       public:
59         // Precondition: burstExecutor != nullptr
60         // Precondition: burstCallback != nullptr
61         MemoryCache(nn::SharedBurst burstExecutor, sp<IBurstCallback> burstCallback);
62 
63         /**
64          * Get the cached memory objects corresponding to provided slot identifiers.
65          *
66          * If the slot entry is not present in the cache, this class will use IBurstCallback to
67          * retrieve those entries that are not present in the cache, then cache them.
68          *
69          * @param slots Identifiers of memory objects to be retrieved.
70          * @return A vector where each element is the memory object and a ref-counted cache "hold"
71          *     object to preserve the cache entry of the IBurst object as long as the "hold" object
72          *     is alive, otherwise GeneralError. Each element of the vector corresponds to the
73          *     element of slot.
74          */
75         nn::GeneralResult<std::vector<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>>
76         getCacheEntries(const std::vector<int32_t>& slots);
77 
78         /**
79          * Remove an entry from the cache.
80          *
81          * @param slot Identifier of the memory object to be removed from the cache.
82          */
83         void removeCacheEntry(int32_t slot);
84 
85       private:
86         nn::GeneralResult<void> ensureCacheEntriesArePresentLocked(
87                 const std::vector<int32_t>& slots) REQUIRES(mMutex);
88         nn::GeneralResult<std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>>
89         getCacheEntryLocked(int32_t slot) REQUIRES(mMutex);
90         void addCacheEntryLocked(int32_t slot, nn::SharedMemory memory) REQUIRES(mMutex);
91 
92         std::mutex mMutex;
93         std::map<int32_t, std::pair<nn::SharedMemory, nn::IBurst::OptionalCacheHold>> mCache
94                 GUARDED_BY(mMutex);
95         nn::SharedBurst kBurstExecutor;
96         const sp<IBurstCallback> kBurstCallback;
97     };
98 
99     /**
100      * Create automated context to manage FMQ-based executions.
101      *
102      * This function is intended to be used by a service to automatically:
103      * 1) Receive data from a provided FMQ
104      * 2) Execute a model with the given information
105      * 3) Send the result to the created FMQ
106      *
107      * @param callback Callback used to retrieve memories corresponding to unrecognized slots.
108      * @param requestChannel Input FMQ channel through which the client passes the request to the
109      *     service.
110      * @param resultChannel Output FMQ channel from which the client can retrieve the result of the
111      *     execution.
112      * @param burstExecutor Object which maintains a local cache of the memory pools and executes
113      *     using the cached memory pools.
114      * @param pollingTimeWindow How much time (in microseconds) the ExecutionBurstServer is allowed
115      *     to poll the FMQ before waiting on the blocking futex. Polling may result in lower
116      *     latencies at the potential cost of more power usage.
117      * @return IBurstContext Handle to the burst context.
118      */
119     static nn::GeneralResult<sp<ExecutionBurstServer>> create(
120             const sp<IBurstCallback>& callback,
121             const MQDescriptorSync<FmqRequestDatum>& requestChannel,
122             const MQDescriptorSync<FmqResultDatum>& resultChannel, nn::SharedBurst burstExecutor,
123             std::chrono::microseconds pollingTimeWindow = std::chrono::microseconds{0});
124 
125     ExecutionBurstServer(PrivateConstructorTag tag, const sp<IBurstCallback>& callback,
126                          std::unique_ptr<RequestChannelReceiver> requestChannel,
127                          std::unique_ptr<ResultChannelSender> resultChannel,
128                          nn::SharedBurst burstExecutor);
129     ~ExecutionBurstServer();
130 
131     // Used by the NN runtime to preemptively remove any stored memory. See
132     // IBurstContext::freeMemory for more information.
133     Return<void> freeMemory(int32_t slot) override;
134 
135   private:
136     // Work loop that will continue processing execution requests until the ExecutionBurstServer
137     // object is freed.
138     void task();
139 
140     nn::ExecutionResult<std::pair<hidl_vec<OutputShape>, Timing>> execute(
141             const V1_0::Request& requestWithoutPools, const std::vector<int32_t>& slotsOfPools,
142             MeasureTiming measure);
143 
144     std::thread mWorker;
145     std::atomic<bool> mTeardown{false};
146     const sp<IBurstCallback> mCallback;
147     const std::unique_ptr<RequestChannelReceiver> mRequestChannelReceiver;
148     const std::unique_ptr<ResultChannelSender> mResultChannelSender;
149     const nn::SharedBurst mBurstExecutor;
150     MemoryCache mMemoryCache;
151 };
152 
153 }  // namespace android::hardware::neuralnetworks::V1_2::utils
154 
155 #endif  // ANDROID_HARDWARE_INTERFACES_NEURALNETWORKS_1_2_UTILS_EXECUTION_BURST_SERVER_H
156