1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
18 #define LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
19 
20 #include "common/memory_image/data-store.h"
21 #include "common/task-spec.pb.h"
22 #include "util/strings/stringpiece.h"
23 
24 namespace libtextclassifier {
25 namespace nlp_core {
26 
27 // In-memory representation of data for a Saft model.  Provides access to a
28 // TaskSpec object (produced by the "spec" stage of the Saft training model) and
29 // to the bytes of the TaskInputs mentioned in that spec (all these bytes are in
30 // memory, no file I/O required).
31 //
32 // Technically, an InMemoryModelData is a DataStore that maps the special string
33 // kTaskSpecDataStoreEntryName to the binary serialization of a TaskSpec.  For
34 // each TaskInput (of the TaskSpec) with a file_pattern that starts with
35 // kFilePatternPrefix (see below), the same DataStore maps file_pattern to some
36 // content bytes.  This way, it is possible to have all TaskInputs in memory,
37 // while still allowing classic, on-disk TaskInputs.
38 class InMemoryModelData {
39  public:
40   // Name for the DataStore entry that stores the serialized TaskSpec for the
41   // entire model.
42   static const char kTaskSpecDataStoreEntryName[];
43 
44   // Returns prefix for TaskInput::Part::file_pattern, to distinguish those
45   // "files" from other files.
46   static const char kFilePatternPrefix[];
47 
48   // Constructs an InMemoryModelData based on a chunk of bytes.  Those bytes
49   // should have been produced by a DataStoreBuilder.
InMemoryModelData(StringPiece bytes)50   explicit InMemoryModelData(StringPiece bytes) : data_store_(bytes) {}
51 
52   // Fills *task_spec with a TaskSpec similar to the one used by
53   // DataStoreBuilder (when building the bytes used to construct this
54   // InMemoryModelData) except that each file name
55   // (TaskInput::Part::file_pattern) is replaced with a name that can be used to
56   // retrieve the corresponding file content bytes via GetBytesForInputFile().
57   //
58   // Returns true on success, false otherwise.
59   bool GetTaskSpec(TaskSpec *task_spec) const;
60 
61   // Gets content bytes for a file.  The file_name argument should be the
62   // file_pattern for a TaskInput from the TaskSpec (see GetTaskSpec()).
63   // Returns a StringPiece indicating a memory area with the content bytes.  On
64   // error, returns StringPiece(nullptr, 0).
65   StringPiece GetBytesForInputFile(const std::string &file_name) const;
66 
67  private:
68   const memory_image::DataStore data_store_;
69 };
70 
71 }  // namespace nlp_core
72 }  // namespace libtextclassifier
73 
74 #endif  // LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_IN_MEMORY_MODEL_DATA_H_
75