1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "lang_id/common/flatbuffers/model-utils.h"
18 
19 #include <string.h>
20 
21 #include <string>
22 
23 #include "lang_id/common/lite_base/logging.h"
24 #include "lang_id/common/math/checksum.h"
25 
26 namespace libtextclassifier3 {
27 namespace saft_fbs {
28 
ClearlyFailsChecksum(const Model & model)29 bool ClearlyFailsChecksum(const Model &model) {
30   if (!flatbuffers::IsFieldPresent(&model, Model::VT_CRC32)) {
31     SAFTM_LOG(WARNING)
32         << "No CRC32, most likely an old model; skip CRC32 check";
33     return false;
34   }
35   const mobile::uint32 expected_crc32 = model.crc32();
36   const mobile::uint32 actual_crc32 = ComputeCrc2Checksum(&model);
37   if (actual_crc32 != expected_crc32) {
38     SAFTM_LOG(ERROR) << "Corrupt model: different CRC32: " << actual_crc32
39                      << " vs " << expected_crc32;
40     return true;
41   }
42   SAFTM_DLOG(INFO) << "Successfully checked CRC32 " << actual_crc32;
43   return false;
44 }
45 
GetVerifiedModelFromBytes(const char * data,size_t num_bytes)46 const Model *GetVerifiedModelFromBytes(const char *data, size_t num_bytes) {
47   if ((data == nullptr) || (num_bytes == 0)) {
48     SAFTM_LOG(ERROR) << "GetModel called on an empty sequence of bytes";
49     return nullptr;
50   }
51   const uint8_t *start = reinterpret_cast<const uint8_t *>(data);
52   flatbuffers::Verifier verifier(start, num_bytes);
53   if (!VerifyModelBuffer(verifier)) {
54     SAFTM_LOG(ERROR) << "Not a valid Model flatbuffer";
55     return nullptr;
56   }
57   const Model *model = GetModel(start);
58   if (model == nullptr) {
59     return nullptr;
60   }
61   if (ClearlyFailsChecksum(*model)) {
62     return nullptr;
63   }
64   return model;
65 }
66 
GetInputByName(const Model * model,const std::string & name)67 const ModelInput *GetInputByName(const Model *model, const std::string &name) {
68   if (model == nullptr) {
69     SAFTM_LOG(ERROR) << "GetInputByName called with model == nullptr";
70     return nullptr;
71   }
72   const auto *inputs = model->inputs();
73   if (inputs == nullptr) {
74     // We should always have a list of inputs; maybe an empty one, if no inputs,
75     // but the list should be there.
76     SAFTM_LOG(ERROR) << "null inputs";
77     return nullptr;
78   }
79   for (const ModelInput *input : *inputs) {
80     if (input != nullptr) {
81       const flatbuffers::String *input_name = input->name();
82       if (input_name && input_name->str() == name) {
83         return input;
84       }
85     }
86   }
87   return nullptr;
88 }
89 
GetInputBytes(const ModelInput * input)90 mobile::StringPiece GetInputBytes(const ModelInput *input) {
91   if ((input == nullptr) || (input->data() == nullptr)) {
92     SAFTM_LOG(ERROR) << "ModelInput has no content";
93     return mobile::StringPiece(nullptr, 0);
94   }
95   const flatbuffers::Vector<uint8_t> *input_data = input->data();
96   if (input_data == nullptr) {
97     SAFTM_LOG(ERROR) << "null input data";
98     return mobile::StringPiece(nullptr, 0);
99   }
100   return mobile::StringPiece(reinterpret_cast<const char *>(input_data->data()),
101                              input_data->size());
102 }
103 
FillParameters(const Model & model,mobile::TaskContext * context)104 bool FillParameters(const Model &model, mobile::TaskContext *context) {
105   if (context == nullptr) {
106     SAFTM_LOG(ERROR) << "null context";
107     return false;
108   }
109   const auto *parameters = model.parameters();
110   if (parameters == nullptr) {
111     // We should always have a list of parameters; maybe an empty one, if no
112     // parameters, but the list should be there.
113     SAFTM_LOG(ERROR) << "null list of parameters";
114     return false;
115   }
116   for (const ModelParameter *p : *parameters) {
117     if (p == nullptr) {
118       SAFTM_LOG(ERROR) << "null parameter";
119       return false;
120     }
121     if (p->name() == nullptr) {
122       SAFTM_LOG(ERROR) << "null parameter name";
123       return false;
124     }
125     const std::string name = p->name()->str();
126     if (name.empty()) {
127       SAFTM_LOG(ERROR) << "empty parameter name";
128       return false;
129     }
130     if (p->value() == nullptr) {
131       SAFTM_LOG(ERROR) << "null parameter name";
132       return false;
133     }
134     context->SetParameter(name, p->value()->str());
135   }
136   return true;
137 }
138 
139 namespace {
140 // Updates |*crc| with the information from |s|.  Auxiliary for
141 // ComputeCrc2Checksum.
142 //
143 // The bytes from |info| are also used to update the CRC32 checksum.  |info|
144 // should be a brief tag that indicates what |s| represents.  The idea is to add
145 // some structure to the information that goes into the CRC32 computation.
146 template <typename T>
UpdateCrc(mobile::Crc32 * crc,const flatbuffers::Vector<T> * s,mobile::StringPiece info)147 void UpdateCrc(mobile::Crc32 *crc, const flatbuffers::Vector<T> *s,
148                mobile::StringPiece info) {
149   crc->Update("|");
150   crc->Update(info.data(), info.size());
151   crc->Update(":");
152   if (s == nullptr) {
153     crc->Update("empty");
154   } else {
155     crc->Update(reinterpret_cast<const char *>(s->data()),
156                 s->size() * sizeof(T));
157   }
158 }
159 }  // namespace
160 
ComputeCrc2Checksum(const Model * model)161 mobile::uint32 ComputeCrc2Checksum(const Model *model) {
162   // Implementation note: originally, I (salcianu@) thought we can just compute
163   // a CRC32 checksum of the model bytes.  Unfortunately, the expected checksum
164   // is there too (and because we don't control the flatbuffer format, we can't
165   // "arrange" for it to be placed at the head / tail of those bytes).  Instead,
166   // we traverse |model| and feed into the CRC32 computation those parts we are
167   // interested in (which excludes the crc32 field).
168   //
169   // Note: storing the checksum outside the Model would be too disruptive for
170   // the way we currently ship our models.
171   mobile::Crc32 crc;
172   if (model == nullptr) {
173     return crc.Get();
174   }
175   crc.Update("|Parameters:");
176   const auto *parameters = model->parameters();
177   if (parameters != nullptr) {
178     for (const ModelParameter *p : *parameters) {
179       if (p != nullptr) {
180         UpdateCrc(&crc, p->name(), "name");
181         UpdateCrc(&crc, p->value(), "value");
182       }
183     }
184   }
185   crc.Update("|Inputs:");
186   const auto *inputs = model->inputs();
187   if (inputs != nullptr) {
188     for (const ModelInput *input : *inputs) {
189       if (input != nullptr) {
190         UpdateCrc(&crc, input->name(), "name");
191         UpdateCrc(&crc, input->type(), "type");
192         UpdateCrc(&crc, input->sub_type(), "sub-type");
193         UpdateCrc(&crc, input->data(), "data");
194       }
195     }
196   }
197   return crc.Get();
198 }
199 
200 }  // namespace saft_fbs
201 }  // namespace nlp_saft
202