1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_IDEVICE_H 18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_IDEVICE_H 19 20 #include <memory> 21 #include <string> 22 #include <utility> 23 #include <vector> 24 25 #include "nnapi/Result.h" 26 #include "nnapi/Types.h" 27 28 namespace android::nn { 29 30 // Forward declarations 31 class IBuffer; 32 class IPreparedModel; 33 34 /** 35 * This interface represents a device driver. 36 * 37 * This interface is thread-safe, and any class that implements this interface must be thread-safe. 38 */ 39 class IDevice { 40 public: 41 /** 42 * Returns the name of the driver. 43 * 44 * @return Name of the driver. 45 */ 46 virtual const std::string& getName() const = 0; 47 48 /** 49 * Get the version string of the driver implementation. 50 * 51 * The version string must be a unique token among the set of version strings of drivers of a 52 * specific device. The token identifies the device driver's implementation. The token must not 53 * be confused with the feature level which is solely defined by the interface version. This API 54 * is opaque to the Android framework, but the Android framework may use the information for 55 * debugging or to pass on to NNAPI applications. 56 * 57 * Application developers sometimes have specific requirements to ensure good user experiences, 58 * and they need more information to make intelligent decisions when the Android framework 59 * cannot. For example, combined with the device name and other information, the token can help 60 * NNAPI applications filter devices based on their needs: 61 * - An application demands a certain level of performance, but a specific version of the driver 62 * cannot meet that requirement because of a performance regression. The application can 63 * disallow the driver based on the version provided. 64 * - An application has a minimum precision requirement, but certain versions of the driver 65 * cannot meet that requirement because of bugs or certain optimizations. The application can 66 * filter out versions of these drivers. 67 * 68 * @return version The version string of the device implementation. Must have nonzero length. 69 */ 70 virtual const std::string& getVersionString() const = 0; 71 72 /** 73 * Returns the feature level of a driver. 74 * 75 * @return featureLevel The Version of the NNAPI specification this driver implements. 76 * See {@link ANeuralNetworks_getFeatureLevel} and {@link FeatureLevelCode} 77 * for NNAPI specification versioning information. 78 */ 79 virtual Version getFeatureLevel() const = 0; 80 81 /** 82 * Returns the device type of a driver. 83 * 84 * The device type can be used to help application developers to distribute Machine Learning 85 * workloads and other workloads such as graphical rendering. E.g., for an app which renders AR 86 * scenes based on real time object detection results, the developer could choose an ACCELERATOR 87 * type device for ML workloads, and reserve GPU for graphical rendering. 88 * 89 * @return type The DeviceType of the device. Please note, this is not a bitfield of 90 * DeviceTypes. Each device must only be of a single DeviceType. 91 */ 92 virtual DeviceType getType() const = 0; 93 94 /** 95 * Gets information about extensions supported by the driver implementation. 96 * 97 * Extensions of category ExtensionCategory::BASE must not appear in the list. 98 * 99 * All extension operations and operands must be fully supported for the extension to appear in 100 * the list of supported extensions. 101 * 102 * @return extensions A list of supported extensions. 103 */ 104 virtual const std::vector<Extension>& getSupportedExtensions() const = 0; 105 106 /** 107 * Gets the capabilities of a driver. 108 * 109 * @return capabilities Capabilities of the driver. 110 */ 111 virtual const Capabilities& getCapabilities() const = 0; 112 113 /** 114 * Gets the caching requirements of the driver implementation. 115 * 116 * There are two types of cache file descriptors provided to the driver: model cache and data 117 * cache. 118 * 119 * The data cache is for caching constant data, possibly including preprocessed and transformed 120 * tensor buffers. Any modification to the data cache should have no worse effect than 121 * generating bad output values at execution time. 122 * 123 * The model cache is for caching security-sensitive data such as compiled executable machine 124 * code in the device's native binary format. A modification to the model cache may affect the 125 * driver's execution behavior, and a malicious client could make use of this to execute beyond 126 * the granted permission. Thus, the driver must always check whether the model cache is 127 * corrupted before preparing the model from cache. 128 * 129 * IDevice::getNumberOfCacheFilesNeeded returns how many of each type of cache files the driver 130 * implementation needs to cache a single prepared model. Returning 0 for both types indicates 131 * compilation caching is not supported by this driver. The driver may still choose not to cache 132 * certain compiled models even if it reports that caching is supported. 133 * 134 * If the device reports that caching is not supported, the user may avoid calling 135 * IDevice::prepareModelFromCache or providing cache file descriptors to IDevice::prepareModel. 136 * 137 * @return A pair of: 138 * - numModelCache An unsigned integer indicating how many files for model cache the driver 139 * needs to cache a single prepared model. It must be less than or equal to 140 * ::android::nn::kMaxNumberOfCacheFiles. 141 * - numDataCache An unsigned integer indicating how many files for data cache the driver 142 * needs to cache a single prepared model. It must be less than or equal to 143 * ::android::nn::kMaxNumberOfCacheFiles. 144 */ 145 virtual std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const = 0; 146 147 /** 148 * Blocks until the device is not in a bad state. 149 * 150 * @return Nothing on success, otherwise GeneralError. 151 */ 152 virtual GeneralResult<void> wait() const = 0; 153 154 /** 155 * Gets the supported operations in a model. 156 * 157 * IDevice::getSupportedOperations indicates which operations of the top-level subgraph are 158 * fully supported by the vendor driver. If an operation may not be supported for any reason, 159 * IDevice::getSupportedOperations must return `false` for that operation. 160 * 161 * The {@link OperationType::IF} and {@link OperationType::WHILE} operations may only be fully 162 * supported if the vendor driver fully supports all operations in the referenced subgraphs. 163 * 164 * @param model A Model whose operations--and their corresponding operands--are to be verified 165 * by the driver. 166 * @return supportedOperations A list of supported operations, where `true` indicates the 167 * operation is supported and `false` indicates the operation is not supported. The index of 168 * "supported" corresponds with the index of the operation it is describing. 169 */ 170 virtual GeneralResult<std::vector<bool>> getSupportedOperations(const Model& model) const = 0; 171 172 /** 173 * Creates a prepared model for execution. 174 * 175 * IDevice::prepareModel is used to make any necessary transformations or alternative 176 * representations to a model for execution, possibly including transformations on the constant 177 * data, optimization on the model's graph, or compilation into the device's native binary 178 * format. The model itself is not changed. 179 * 180 * Optionally, caching information may be provided for the driver to save the prepared model to 181 * cache files for faster model compilation time when the same model preparation is requested in 182 * the future. There are two types of cache file handles provided to the driver: model cache and 183 * data cache. For more information on the two types of cache handles, refer to 184 * IDevice::getNumberOfCacheFilesNeeded. 185 * 186 * The file descriptors must be opened with read and write permission. A file may have any size, 187 * and the corresponding file descriptor may have any offset. The driver must truncate a file to 188 * zero size before writing to that file. The file descriptors may be closed by the client once 189 * the preparation has finished. The driver must dup a file descriptor if it wants to get access 190 * to the cache file later. 191 * 192 * IDevice::prepareModel must verify its inputs related to preparing the model (as opposed to 193 * saving the prepared model to cache) are correct. If there is an error, IDevice::prepareModel 194 * must immediately return {@link ErrorStatus::INVALID_ARGUMENT} as a GeneralError. If the 195 * inputs to IDevice::prepareModel are valid and there is no error, IDevice::prepareModel must 196 * prepare the model. 197 * 198 * The model is prepared with a priority. This priority is relative to other prepared models 199 * owned by the same client. Higher priority executions may use more compute resources than 200 * lower priority executions, and may preempt or starve lower priority executions. 201 * 202 * IDevice::prepareModel can be called with an optional deadline. If the model is not able to be 203 * prepared before the provided deadline, the model preparation may be aborted, and either 204 * {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link 205 * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned as a GeneralError. 206 * 207 * Optionally, the driver may save the prepared model to cache during the preparation. Any error 208 * that occurs when saving to cache must not affect the status of preparing the model. Even if 209 * the input arguments related to the cache may be invalid, or the driver may fail to save to 210 * cache, IDevice::prepareModel must finish preparing the model. The driver may choose not to 211 * save to cache even if the caching information is provided and valid. 212 * 213 * The only information that may be unknown to the model at this stage is the shape of the 214 * tensors, which may only be known at execution time. As such, some driver services may return 215 * partially prepared models, where the prepared model may only be finished when it is paired 216 * with a set of inputs to the model. Note that the same prepared model object may be used with 217 * different shapes of inputs on different (possibly concurrent) executions. 218 * 219 * @param model The model to be prepared for execution. 220 * @param preference Indicates the intended execution behavior of a prepared model. 221 * @param priority Priority of the prepared model relative to other prepared models owned by an 222 * application. 223 * @param deadline Optional time point. If provided, prepareModel is expected to complete by 224 * this time point. If it is not able to be completed by the deadline, the execution may be 225 * aborted. 226 * @param modelCache A vector of handles with each entry holding exactly one cache file 227 * descriptor for the security-sensitive cache. The length of the vector must either be 0 228 * indicating that caching information is not provided, or match numModelCache returned from 229 * IDevice::getNumberOfCacheFilesNeeded. The cache handles will be provided in the same 230 * order when retrieving the preparedModel from cache files with 231 * IDevice::prepareModelFromCache. 232 * @param dataCache A vector of handles with each entry holding exactly one cache file 233 * descriptor for the constants' cache. The length of the vector must either be 0 indicating 234 * that caching information is not provided, or match numDataCache returned from 235 * IDevice::getNumberOfCacheFilesNeeded. The cache handles will be provided in the same 236 * order when retrieving the preparedModel from cache files with 237 * IDevice::prepareModelFromCache. 238 * @param token An caching token of length ::android::nn::kByteSizeOfCacheToken identifying the 239 * prepared model. The same token will be provided when retrieving the prepared model from 240 * the cache files with IDevice::prepareModelFromCache. Tokens should be chosen to have a 241 * low rate of collision for a particular application. The driver cannot detect a collision; 242 * a collision will result in a failed execution or in a successful execution that produces 243 * incorrect output values. If both modelCache and dataCache are empty indicating that 244 * caching information is not provided, this token must be ignored. 245 * @param hints Specifies the optional device specific compilation hints. The same token must 246 * not be repeated. It is allowed for the driver to ignore the user-provided hints. 247 * @param extensionNameToPrefix The mapping between extension names and prefixes of token 248 *. values. 249 * @return preparedModel An IPreparedModel object representing a model that has been prepared 250 * for execution, otherwise GeneralError. 251 */ 252 virtual GeneralResult<SharedPreparedModel> prepareModel( 253 const Model& model, ExecutionPreference preference, Priority priority, 254 OptionalTimePoint deadline, const std::vector<SharedHandle>& modelCache, 255 const std::vector<SharedHandle>& dataCache, const CacheToken& token, 256 const std::vector<nn::TokenValuePair>& hints, 257 const std::vector<nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const = 0; 258 259 /** 260 * Creates a prepared model from cache files for execution. 261 * 262 * IDevice::prepareModelFromCache is used to retrieve a prepared model directly from cache files 263 * to avoid slow model compilation time. There are two types of cache file handles provided to 264 * the driver: model cache and data cache. For more information on the two types of cache 265 * handles, refer to IDevice::getNumberOfCacheFilesNeeded. 266 * 267 * The file descriptors must be opened with read and write permission. A file may have any size, 268 * and the corresponding file descriptor may have any offset. The driver must truncate a file to 269 * zero size before writing to that file. The file descriptors may be closed by the client once 270 * the preparation has finished. The driver must dup a file descriptor if it wants to get access 271 * to the cache file later. 272 * 273 * IDevice::prepareModelFromCache must verify its inputs are correct, and that the 274 * security-sensitive cache has not been modified since it was last written by the driver. If 275 * there is an error, or if compilation caching is not supported, or if the security-sensitive 276 * cache has been modified, IDevice::prepareModelFromCache must immediately return {@link 277 * ErrorStatus::INVALID_ARGUMENT} as a GeneralError. If the inputs to 278 * IDevice::prepareModelFromCache are valid, the security-sensitive cache is not modified, and 279 * there is no error, IDevice::prepareModelFromCache must prepare the model 280 * 281 * IDevice::prepareModelFromCache can be called with an optional deadline. If the model is not 282 * able to prepared before the provided deadline, the model preparation may be aborted, and 283 * either {@link ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link 284 * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned as a GeneralError. 285 * 286 * The only information that may be unknown to the model at this stage is the shape of the 287 * tensors, which may only be known at execution time. As such, some driver services may return 288 * partially prepared models, where the prepared model may only be finished when it is paired 289 * with a set of inputs to the model. Note that the same prepared model object may be used with 290 * different shapes of inputs on different (possibly concurrent) executions. 291 * 292 * @param deadline Optional time point. If provided, prepareModel is expected to complete by 293 * this time point. If it is not able to be completed by the deadline, the execution may be 294 * aborted. 295 * @param modelCache A vector of handles with each entry holding exactly one cache file 296 * descriptor for the security-sensitive cache. The length of the vector must match the 297 * numModelCache returned from IDevice::getNumberOfCacheFilesNeeded. The cache handles will 298 * be provided in the same order as with IDevice::prepareModel. 299 * @param dataCache A vector of handles with each entry holding exactly one cache file 300 * descriptor for the constants' cache. The length of the vector must match the numDataCache 301 * returned from IDevice::getNumberOfCacheFilesNeeded. The cache handles will be provided in 302 * the same order as with IDevice::prepareModel. 303 * @param token A caching token of length ::android::nn::kByteSizeOfCacheToken identifying the 304 * prepared model. It is the same token provided when saving the cache files with 305 * IDevice::prepareModel. Tokens should be chosen to have a low rate of collision for a 306 * particular application. The driver cannot detect a collision; a collision will result in 307 * a failed execution or in a successful execution that produces incorrect output values. 308 * @return preparedModel An IPreparedModel object representing a model that has been prepared 309 * for execution, otherwise GeneralError. 310 */ 311 virtual GeneralResult<SharedPreparedModel> prepareModelFromCache( 312 OptionalTimePoint deadline, const std::vector<SharedHandle>& modelCache, 313 const std::vector<SharedHandle>& dataCache, const CacheToken& token) const = 0; 314 315 /** 316 * Allocates a driver-managed buffer with the properties specified by the descriptor as well as 317 * the input and output roles of prepared models. 318 * 319 * IDevice::allocate must verify its inputs are correct. If there is an error, or if a certain 320 * role or property is not supported by the driver, IDevice::allocate must return with {@link 321 * ErrorStatus::INVALID_ARGUMENT} as a GeneralError. If the allocation is successful, this 322 * method must return the produced IBuffer. A successful allocation must accommodate all of the 323 * specified roles and buffer properties. 324 * 325 * The buffer is allocated as an uninitialized state. An uninitialized buffer may only be used 326 * in ways that are specified by outputRoles. A buffer is initialized after it is used as an 327 * output in a successful execution, or after a successful invocation of IBuffer::copyFrom on 328 * the buffer. An initialized buffer may be used according to all roles specified in inputRoles 329 * and outputRoles. A buffer will return to the uninitialized state if it is used as an output 330 * in a failed execution, or after a failed invocation of IBuffer::copyFrom on the buffer. 331 * 332 * The driver may deduce the dimensions of the buffer according to the buffer descriptor as well 333 * as the input and output roles. The dimensions or rank of the buffer may be unknown at this 334 * stage. As such, some driver services may only create a placeholder and defer the actual 335 * allocation until execution time. Note that the same buffer may be used for different shapes 336 * of outputs on different executions. When the buffer is used as an input, the input shape must 337 * be the same as the output shape from the last execution using this buffer as an output. 338 * 339 * The driver must apply proper validatation upon every usage of the buffer, and fail the 340 * execution immediately if the usage is illegal. 341 * 342 * @param desc A buffer descriptor specifying the properties of the buffer to allocate. 343 * @param preparedModels A vector of IPreparedModel objects. Must only contain IPreparedModel 344 * objects from the same IDevice as this method invoked on. 345 * @param inputRoles A vector of roles with each specifying an input to a prepared model. 346 * @param outputRoles A vector of roles with each specifying an output to a prepared model. 347 * Each role specified in inputRoles and outputRoles must be unique. The corresponding model 348 * operands of the roles must have the same OperandType, scale, zero point, and ExtraParams. 349 * The dimensions of the operands and the dimensions specified in the buffer descriptor must 350 * be compatible with each other. Two dimensions are incompatible if there is at least one 351 * axis that is fully specified in both but has different values. 352 * @return The allocated IBuffer object. If the buffer was unable to be allocated due to an 353 * error, a GeneralError is returned instead. 354 */ 355 virtual GeneralResult<SharedBuffer> allocate( 356 const BufferDesc& desc, const std::vector<SharedPreparedModel>& preparedModels, 357 const std::vector<BufferRole>& inputRoles, 358 const std::vector<BufferRole>& outputRoles) const = 0; 359 360 // Public virtual destructor to allow objects to be stored (and destroyed) as smart pointers. 361 // E.g., std::unique_ptr<IDevice>. 362 virtual ~IDevice() = default; 363 364 protected: 365 // Protect the non-destructor special member functions to prevent object slicing. 366 IDevice() = default; 367 IDevice(const IDevice&) = default; 368 IDevice(IDevice&&) noexcept = default; 369 IDevice& operator=(const IDevice&) = default; 370 IDevice& operator=(IDevice&&) noexcept = default; 371 }; 372 373 } // namespace android::nn 374 375 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_IDEVICE_H 376