1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/policy/core/common/schema.h"
6 
7 #include <limits.h>
8 #include <stddef.h>
9 
10 #include <algorithm>
11 #include <climits>
12 #include <map>
13 #include <memory>
14 #include <utility>
15 
16 #include "base/compiler_specific.h"
17 #include "base/containers/flat_set.h"
18 #include "base/logging.h"
19 #include "base/macros.h"
20 #include "base/stl_util.h"
21 #include "base/strings/stringprintf.h"
22 #include "components/json_schema/json_schema_constants.h"
23 #include "components/json_schema/json_schema_validator.h"
24 #include "components/policy/core/common/schema_internal.h"
25 #include "third_party/re2/src/re2/re2.h"
26 
27 namespace schema = json_schema_constants;
28 
29 namespace policy {
30 
31 using internal::PropertiesNode;
32 using internal::PropertyNode;
33 using internal::RestrictionNode;
34 using internal::SchemaData;
35 using internal::SchemaNode;
36 
37 namespace {
38 
39 // Maps schema "id" attributes to the corresponding SchemaNode index.
40 typedef std::map<std::string, int> IdMap;
41 
42 // List of pairs of references to be assigned later. The string is the "id"
43 // whose corresponding index should be stored in the pointer, once all the IDs
44 // are available.
45 typedef std::vector<std::pair<std::string, int*> > ReferenceList;
46 
47 // Sizes for the storage arrays. These are calculated in advance so that the
48 // arrays don't have to be resized during parsing, which would invalidate
49 // pointers into their contents (i.e. string's c_str() and address of indices
50 // for "$ref" attributes).
51 struct StorageSizes {
StorageSizespolicy::__anon799c62fe0111::StorageSizes52   StorageSizes()
53       : strings(0),
54         schema_nodes(0),
55         property_nodes(0),
56         properties_nodes(0),
57         restriction_nodes(0),
58         required_properties(0),
59         int_enums(0),
60         string_enums(0) {}
61   size_t strings;
62   size_t schema_nodes;
63   size_t property_nodes;
64   size_t properties_nodes;
65   size_t restriction_nodes;
66   size_t required_properties;
67   size_t int_enums;
68   size_t string_enums;
69 };
70 
71 // An invalid index, indicating that a node is not present; similar to a NULL
72 // pointer.
73 const int kInvalid = -1;
74 
SchemaTypeToValueType(const std::string & type_string,base::Value::Type * type)75 bool SchemaTypeToValueType(const std::string& type_string,
76                            base::Value::Type* type) {
77   // Note: "any" is not an accepted type.
78   static const struct {
79     const char* schema_type;
80     base::Value::Type value_type;
81   } kSchemaToValueTypeMap[] = {
82     { schema::kArray,        base::Value::Type::LIST       },
83     { schema::kBoolean,      base::Value::Type::BOOLEAN    },
84     { schema::kInteger,      base::Value::Type::INTEGER    },
85     { schema::kNull,         base::Value::Type::NONE       },
86     { schema::kNumber,       base::Value::Type::DOUBLE     },
87     { schema::kObject,       base::Value::Type::DICTIONARY },
88     { schema::kString,       base::Value::Type::STRING     },
89   };
90   for (size_t i = 0; i < arraysize(kSchemaToValueTypeMap); ++i) {
91     if (kSchemaToValueTypeMap[i].schema_type == type_string) {
92       *type = kSchemaToValueTypeMap[i].value_type;
93       return true;
94     }
95   }
96   return false;
97 }
98 
StrategyAllowInvalidOnTopLevel(SchemaOnErrorStrategy strategy)99 bool StrategyAllowInvalidOnTopLevel(SchemaOnErrorStrategy strategy) {
100   return strategy == SCHEMA_ALLOW_INVALID ||
101          strategy == SCHEMA_ALLOW_INVALID_TOPLEVEL ||
102          strategy == SCHEMA_ALLOW_INVALID_TOPLEVEL_AND_ALLOW_UNKNOWN;
103 }
104 
StrategyAllowUnknownOnTopLevel(SchemaOnErrorStrategy strategy)105 bool StrategyAllowUnknownOnTopLevel(SchemaOnErrorStrategy strategy) {
106   return strategy != SCHEMA_STRICT;
107 }
108 
StrategyForNextLevel(SchemaOnErrorStrategy strategy)109 SchemaOnErrorStrategy StrategyForNextLevel(SchemaOnErrorStrategy strategy) {
110   static SchemaOnErrorStrategy next_level_strategy[] = {
111     SCHEMA_STRICT,         // SCHEMA_STRICT
112     SCHEMA_STRICT,         // SCHEMA_ALLOW_UNKNOWN_TOPLEVEL
113     SCHEMA_ALLOW_UNKNOWN,  // SCHEMA_ALLOW_UNKNOWN
114     SCHEMA_STRICT,         // SCHEMA_ALLOW_INVALID_TOPLEVEL
115     SCHEMA_ALLOW_UNKNOWN,  // SCHEMA_ALLOW_INVALID_TOPLEVEL_AND_ALLOW_UNKNOWN
116     SCHEMA_ALLOW_INVALID,  // SCHEMA_ALLOW_INVALID
117   };
118   return next_level_strategy[static_cast<int>(strategy)];
119 }
120 
SchemaErrorFound(std::string * error_path,std::string * error,const std::string & msg)121 void SchemaErrorFound(std::string* error_path,
122                      std::string* error,
123                      const std::string& msg) {
124   if (error_path)
125     *error_path = "";
126   *error = msg;
127 }
128 
AddListIndexPrefixToPath(int index,std::string * path)129 void AddListIndexPrefixToPath(int index, std::string* path) {
130   if (path) {
131     if (path->empty())
132       *path = base::StringPrintf("items[%d]", index);
133     else
134       *path = base::StringPrintf("items[%d].", index) + *path;
135   }
136 }
137 
AddDictKeyPrefixToPath(const std::string & key,std::string * path)138 void AddDictKeyPrefixToPath(const std::string& key, std::string* path) {
139   if (path) {
140     if (path->empty())
141       *path = key;
142     else
143       *path = key + "." + *path;
144   }
145 }
146 
147 }  // namespace
148 
149 // Contains the internal data representation of a Schema. This can either wrap
150 // a SchemaData owned elsewhere (currently used to wrap the Chrome schema, which
151 // is generated at compile time), or it can own its own SchemaData.
152 class Schema::InternalStorage
153     : public base::RefCountedThreadSafe<InternalStorage> {
154  public:
155   static scoped_refptr<const InternalStorage> Wrap(const SchemaData* data);
156 
157   static scoped_refptr<const InternalStorage> ParseSchema(
158       const base::DictionaryValue& schema,
159       std::string* error);
160 
data() const161   const SchemaData* data() const { return &schema_data_; }
162 
root_node() const163   const SchemaNode* root_node() const {
164     return schema(0);
165   }
166 
167   // Returns the validation_schema root node if one was generated, or nullptr.
validation_schema_root_node() const168   const SchemaNode* validation_schema_root_node() const {
169     return schema_data_.validation_schema_root_index >= 0
170                ? schema(schema_data_.validation_schema_root_index)
171                : nullptr;
172   }
173 
schema(int index) const174   const SchemaNode* schema(int index) const {
175     return schema_data_.schema_nodes + index;
176   }
177 
properties(int index) const178   const PropertiesNode* properties(int index) const {
179     return schema_data_.properties_nodes + index;
180   }
181 
property(int index) const182   const PropertyNode* property(int index) const {
183     return schema_data_.property_nodes + index;
184   }
185 
restriction(int index) const186   const RestrictionNode* restriction(int index) const {
187     return schema_data_.restriction_nodes + index;
188   }
189 
required_property(int index) const190   const char* const* required_property(int index) const {
191     return schema_data_.required_properties + index;
192   }
193 
int_enums(int index) const194   const int* int_enums(int index) const {
195     return schema_data_.int_enums + index;
196   }
197 
string_enums(int index) const198   const char* const* string_enums(int index) const {
199     return schema_data_.string_enums + index;
200   }
201 
202   // Compiles regular expression |pattern|. The result is cached and will be
203   // returned directly next time.
204   re2::RE2* CompileRegex(const std::string& pattern) const;
205 
206  private:
207   friend class base::RefCountedThreadSafe<InternalStorage>;
208 
209   InternalStorage();
210   ~InternalStorage();
211 
212   // Determines the expected |sizes| of the storage for the representation
213   // of |schema|.
214   static void DetermineStorageSizes(const base::DictionaryValue& schema,
215                                    StorageSizes* sizes);
216 
217   // Parses the JSON schema in |schema|.
218   //
219   // If |schema| has a "$ref" attribute then a pending reference is appended
220   // to the |reference_list|, and nothing else is done.
221   //
222   // Otherwise, |index| gets assigned the index of the corresponding SchemaNode
223   // in |schema_nodes_|. If the |schema| contains an "id" then that ID is mapped
224   // to the |index| in the |id_map|.
225   //
226   // If |schema| is invalid then |error| gets the error reason and false is
227   // returned. Otherwise returns true.
228   bool Parse(const base::DictionaryValue& schema,
229              int* index,
230              IdMap* id_map,
231              ReferenceList* reference_list,
232              std::string* error);
233 
234   // Helper for Parse() that gets an already assigned |schema_node| instead of
235   // an |index| pointer.
236   bool ParseDictionary(const base::DictionaryValue& schema,
237                        SchemaNode* schema_node,
238                        IdMap* id_map,
239                        ReferenceList* reference_list,
240                        std::string* error);
241 
242   // Helper for Parse() that gets an already assigned |schema_node| instead of
243   // an |index| pointer.
244   bool ParseList(const base::DictionaryValue& schema,
245                  SchemaNode* schema_node,
246                  IdMap* id_map,
247                  ReferenceList* reference_list,
248                  std::string* error);
249 
250   bool ParseEnum(const base::DictionaryValue& schema,
251                  base::Value::Type type,
252                  SchemaNode* schema_node,
253                  std::string* error);
254 
255   bool ParseRangedInt(const base::DictionaryValue& schema,
256                        SchemaNode* schema_node,
257                        std::string* error);
258 
259   bool ParseStringPattern(const base::DictionaryValue& schema,
260                           SchemaNode* schema_node,
261                           std::string* error);
262 
263   // Assigns the IDs in |id_map| to the pending references in the
264   // |reference_list|. If an ID is missing then |error| is set and false is
265   // returned; otherwise returns true.
266   static bool ResolveReferences(const IdMap& id_map,
267                                 const ReferenceList& reference_list,
268                                 std::string* error);
269 
270   // Cache for CompileRegex(), will memorize return value of every call to
271   // CompileRegex() and return results directly next time.
272   mutable std::map<std::string, std::unique_ptr<re2::RE2>> regex_cache_;
273 
274   SchemaData schema_data_;
275   std::vector<std::string> strings_;
276   std::vector<SchemaNode> schema_nodes_;
277   std::vector<PropertyNode> property_nodes_;
278   std::vector<PropertiesNode> properties_nodes_;
279   std::vector<RestrictionNode> restriction_nodes_;
280   std::vector<const char*> required_properties_;
281   std::vector<int> int_enums_;
282   std::vector<const char*> string_enums_;
283 
284   DISALLOW_COPY_AND_ASSIGN(InternalStorage);
285 };
286 
InternalStorage()287 Schema::InternalStorage::InternalStorage() {
288 }
289 
~InternalStorage()290 Schema::InternalStorage::~InternalStorage() {
291 }
292 
293 // static
Wrap(const SchemaData * data)294 scoped_refptr<const Schema::InternalStorage> Schema::InternalStorage::Wrap(
295     const SchemaData* data) {
296   InternalStorage* storage = new InternalStorage();
297   storage->schema_data_.schema_nodes = data->schema_nodes;
298   storage->schema_data_.property_nodes = data->property_nodes;
299   storage->schema_data_.properties_nodes = data->properties_nodes;
300   storage->schema_data_.restriction_nodes = data->restriction_nodes;
301   storage->schema_data_.required_properties = data->required_properties;
302   storage->schema_data_.int_enums = data->int_enums;
303   storage->schema_data_.string_enums = data->string_enums;
304   storage->schema_data_.validation_schema_root_index =
305       data->validation_schema_root_index;
306   return storage;
307 }
308 
309 // static
310 scoped_refptr<const Schema::InternalStorage>
ParseSchema(const base::DictionaryValue & schema,std::string * error)311 Schema::InternalStorage::ParseSchema(const base::DictionaryValue& schema,
312                                      std::string* error) {
313   // Determine the sizes of the storage arrays and reserve the capacity before
314   // starting to append nodes and strings. This is important to prevent the
315   // arrays from being reallocated, which would invalidate the c_str() pointers
316   // and the addresses of indices to fix.
317   StorageSizes sizes;
318   DetermineStorageSizes(schema, &sizes);
319 
320   scoped_refptr<InternalStorage> storage = new InternalStorage();
321   storage->strings_.reserve(sizes.strings);
322   storage->schema_nodes_.reserve(sizes.schema_nodes);
323   storage->property_nodes_.reserve(sizes.property_nodes);
324   storage->properties_nodes_.reserve(sizes.properties_nodes);
325   storage->restriction_nodes_.reserve(sizes.restriction_nodes);
326   storage->required_properties_.reserve(sizes.required_properties);
327   storage->int_enums_.reserve(sizes.int_enums);
328   storage->string_enums_.reserve(sizes.string_enums);
329 
330   int root_index = kInvalid;
331   IdMap id_map;
332   ReferenceList reference_list;
333   if (!storage->Parse(schema, &root_index, &id_map, &reference_list, error))
334     return nullptr;
335 
336   if (root_index == kInvalid) {
337     *error = "The main schema can't have a $ref";
338     return nullptr;
339   }
340 
341   // None of this should ever happen without having been already detected.
342   // But, if it does happen, then it will lead to corrupted memory; drop
343   // everything in that case.
344   if (root_index != 0 || sizes.strings != storage->strings_.size() ||
345       sizes.schema_nodes != storage->schema_nodes_.size() ||
346       sizes.property_nodes != storage->property_nodes_.size() ||
347       sizes.properties_nodes != storage->properties_nodes_.size() ||
348       sizes.restriction_nodes != storage->restriction_nodes_.size() ||
349       sizes.required_properties != storage->required_properties_.size() ||
350       sizes.int_enums != storage->int_enums_.size() ||
351       sizes.string_enums != storage->string_enums_.size()) {
352     *error = "Failed to parse the schema due to a Chrome bug. Please file a "
353              "new issue at http://crbug.com";
354     return nullptr;
355   }
356 
357   if (!ResolveReferences(id_map, reference_list, error))
358     return nullptr;
359 
360   SchemaData* data = &storage->schema_data_;
361   data->schema_nodes = storage->schema_nodes_.data();
362   data->property_nodes = storage->property_nodes_.data();
363   data->properties_nodes = storage->properties_nodes_.data();
364   data->restriction_nodes = storage->restriction_nodes_.data();
365   data->required_properties = storage->required_properties_.data();
366   data->int_enums = storage->int_enums_.data();
367   data->string_enums = storage->string_enums_.data();
368   data->validation_schema_root_index = -1;
369   return storage;
370 }
371 
CompileRegex(const std::string & pattern) const372 re2::RE2* Schema::InternalStorage::CompileRegex(
373     const std::string& pattern) const {
374   auto it = regex_cache_.find(pattern);
375   if (it == regex_cache_.end()) {
376     std::unique_ptr<re2::RE2> compiled(new re2::RE2(pattern));
377     re2::RE2* compiled_ptr = compiled.get();
378     regex_cache_.insert(std::make_pair(pattern, std::move(compiled)));
379     return compiled_ptr;
380   }
381   return it->second.get();
382 }
383 
384 // static
DetermineStorageSizes(const base::DictionaryValue & schema,StorageSizes * sizes)385 void Schema::InternalStorage::DetermineStorageSizes(
386     const base::DictionaryValue& schema,
387     StorageSizes* sizes) {
388   std::string ref_string;
389   if (schema.GetString(schema::kRef, &ref_string)) {
390     // Schemas with a "$ref" attribute don't take additional storage.
391     return;
392   }
393 
394   std::string type_string;
395   base::Value::Type type = base::Value::Type::NONE;
396   if (!schema.GetString(schema::kType, &type_string) ||
397       !SchemaTypeToValueType(type_string, &type)) {
398     // This schema is invalid.
399     return;
400   }
401 
402   sizes->schema_nodes++;
403 
404   if (type == base::Value::Type::LIST) {
405     const base::DictionaryValue* items = nullptr;
406     if (schema.GetDictionary(schema::kItems, &items))
407       DetermineStorageSizes(*items, sizes);
408   } else if (type == base::Value::Type::DICTIONARY) {
409     sizes->properties_nodes++;
410 
411     const base::DictionaryValue* dict = nullptr;
412     if (schema.GetDictionary(schema::kAdditionalProperties, &dict))
413       DetermineStorageSizes(*dict, sizes);
414 
415     const base::DictionaryValue* properties = nullptr;
416     if (schema.GetDictionary(schema::kProperties, &properties)) {
417       for (base::DictionaryValue::Iterator it(*properties);
418            !it.IsAtEnd(); it.Advance()) {
419         // This should have been verified by the JSONSchemaValidator.
420         CHECK(it.value().GetAsDictionary(&dict));
421         DetermineStorageSizes(*dict, sizes);
422         sizes->strings++;
423         sizes->property_nodes++;
424       }
425     }
426 
427     const base::DictionaryValue* pattern_properties = nullptr;
428     if (schema.GetDictionary(schema::kPatternProperties, &pattern_properties)) {
429       for (base::DictionaryValue::Iterator it(*pattern_properties);
430            !it.IsAtEnd(); it.Advance()) {
431         CHECK(it.value().GetAsDictionary(&dict));
432         DetermineStorageSizes(*dict, sizes);
433         sizes->strings++;
434         sizes->property_nodes++;
435       }
436     }
437 
438     const base::Value* required_properties = schema.FindKey(schema::kRequired);
439     if (required_properties) {
440       // This should have been verified by the JSONSchemaValidator.
441       CHECK(required_properties->is_list());
442       sizes->strings += required_properties->GetList().size();
443       sizes->required_properties += required_properties->GetList().size();
444     }
445   } else if (schema.HasKey(schema::kEnum)) {
446     const base::ListValue* possible_values = nullptr;
447     if (schema.GetList(schema::kEnum, &possible_values)) {
448       if (type == base::Value::Type::INTEGER) {
449         sizes->int_enums += possible_values->GetSize();
450       } else if (type == base::Value::Type::STRING) {
451         sizes->string_enums += possible_values->GetSize();
452         sizes->strings += possible_values->GetSize();
453       }
454       sizes->restriction_nodes++;
455     }
456   } else if (type == base::Value::Type::INTEGER) {
457     if (schema.HasKey(schema::kMinimum) || schema.HasKey(schema::kMaximum))
458       sizes->restriction_nodes++;
459   } else if (type == base::Value::Type::STRING) {
460     if (schema.HasKey(schema::kPattern)) {
461       sizes->strings++;
462       sizes->string_enums++;
463       sizes->restriction_nodes++;
464     }
465   }
466 }
467 
Parse(const base::DictionaryValue & schema,int * index,IdMap * id_map,ReferenceList * reference_list,std::string * error)468 bool Schema::InternalStorage::Parse(const base::DictionaryValue& schema,
469                                     int* index,
470                                     IdMap* id_map,
471                                     ReferenceList* reference_list,
472                                     std::string* error) {
473   std::string ref_string;
474   if (schema.GetString(schema::kRef, &ref_string)) {
475     std::string id_string;
476     if (schema.GetString(schema::kId, &id_string)) {
477       *error = "Schemas with a $ref can't have an id";
478       return false;
479     }
480     reference_list->push_back(std::make_pair(ref_string, index));
481     return true;
482   }
483 
484   std::string type_string;
485   if (!schema.GetString(schema::kType, &type_string)) {
486     *error = "The schema type must be declared.";
487     return false;
488   }
489 
490   base::Value::Type type = base::Value::Type::NONE;
491   if (!SchemaTypeToValueType(type_string, &type)) {
492     *error = "Type not supported: " + type_string;
493     return false;
494   }
495 
496   *index = static_cast<int>(schema_nodes_.size());
497   schema_nodes_.push_back(SchemaNode());
498   SchemaNode* schema_node = &schema_nodes_.back();
499   schema_node->type = type;
500   schema_node->extra = kInvalid;
501 
502   if (type == base::Value::Type::DICTIONARY) {
503     if (!ParseDictionary(schema, schema_node, id_map, reference_list, error))
504       return false;
505   } else if (type == base::Value::Type::LIST) {
506     if (!ParseList(schema, schema_node, id_map, reference_list, error))
507       return false;
508   } else if (schema.HasKey(schema::kEnum)) {
509     if (!ParseEnum(schema, type, schema_node, error))
510       return false;
511   } else if (schema.HasKey(schema::kPattern)) {
512     if (!ParseStringPattern(schema, schema_node, error))
513       return false;
514   } else if (schema.HasKey(schema::kMinimum) ||
515              schema.HasKey(schema::kMaximum)) {
516     if (type != base::Value::Type::INTEGER) {
517       *error = "Only integers can have minimum and maximum";
518       return false;
519     }
520     if (!ParseRangedInt(schema, schema_node, error))
521       return false;
522   }
523   std::string id_string;
524   if (schema.GetString(schema::kId, &id_string)) {
525     if (base::ContainsKey(*id_map, id_string)) {
526       *error = "Duplicated id: " + id_string;
527       return false;
528     }
529     (*id_map)[id_string] = *index;
530   }
531 
532   return true;
533 }
534 
ParseDictionary(const base::DictionaryValue & schema,SchemaNode * schema_node,IdMap * id_map,ReferenceList * reference_list,std::string * error)535 bool Schema::InternalStorage::ParseDictionary(
536     const base::DictionaryValue& schema,
537     SchemaNode* schema_node,
538     IdMap* id_map,
539     ReferenceList* reference_list,
540     std::string* error) {
541   int extra = static_cast<int>(properties_nodes_.size());
542   properties_nodes_.push_back(PropertiesNode());
543   properties_nodes_[extra].additional = kInvalid;
544   schema_node->extra = extra;
545 
546   const base::DictionaryValue* dict = nullptr;
547   if (schema.GetDictionary(schema::kAdditionalProperties, &dict)) {
548     if (!Parse(*dict, &properties_nodes_[extra].additional,
549                id_map, reference_list, error)) {
550       return false;
551     }
552   }
553 
554   properties_nodes_[extra].begin = static_cast<int>(property_nodes_.size());
555 
556   const base::DictionaryValue* properties = nullptr;
557   if (schema.GetDictionary(schema::kProperties, &properties)) {
558     // This and below reserves nodes for all of the |properties|, and makes sure
559     // they are contiguous. Recursive calls to Parse() will append after these
560     // elements.
561     property_nodes_.resize(property_nodes_.size() + properties->size());
562   }
563 
564   properties_nodes_[extra].end = static_cast<int>(property_nodes_.size());
565 
566   const base::DictionaryValue* pattern_properties = nullptr;
567   if (schema.GetDictionary(schema::kPatternProperties, &pattern_properties))
568     property_nodes_.resize(property_nodes_.size() + pattern_properties->size());
569 
570   properties_nodes_[extra].pattern_end =
571       static_cast<int>(property_nodes_.size());
572 
573   if (properties != nullptr) {
574     int base_index = properties_nodes_[extra].begin;
575     int index = base_index;
576 
577     for (base::DictionaryValue::Iterator it(*properties);
578          !it.IsAtEnd(); it.Advance(), ++index) {
579       // This should have been verified by the JSONSchemaValidator.
580       CHECK(it.value().GetAsDictionary(&dict));
581       strings_.push_back(it.key());
582       property_nodes_[index].key = strings_.back().c_str();
583       if (!Parse(*dict, &property_nodes_[index].schema,
584                  id_map, reference_list, error)) {
585         return false;
586       }
587     }
588     CHECK_EQ(static_cast<int>(properties->size()), index - base_index);
589   }
590 
591   if (pattern_properties != nullptr) {
592     int base_index = properties_nodes_[extra].end;
593     int index = base_index;
594 
595     for (base::DictionaryValue::Iterator it(*pattern_properties);
596          !it.IsAtEnd(); it.Advance(), ++index) {
597       CHECK(it.value().GetAsDictionary(&dict));
598       re2::RE2* compiled_regex = CompileRegex(it.key());
599       if (!compiled_regex->ok()) {
600         *error =
601             "/" + it.key() + "/ is a invalid regex: " + compiled_regex->error();
602         return false;
603       }
604       strings_.push_back(it.key());
605       property_nodes_[index].key = strings_.back().c_str();
606       if (!Parse(*dict, &property_nodes_[index].schema,
607                  id_map, reference_list, error)) {
608         return false;
609       }
610     }
611     CHECK_EQ(static_cast<int>(pattern_properties->size()), index - base_index);
612   }
613 
614   properties_nodes_[extra].required_begin = required_properties_.size();
615   const base::Value* required_properties = schema.FindKey(schema::kRequired);
616   if (required_properties) {
617     for (const base::Value& val : required_properties->GetList()) {
618       strings_.push_back(val.GetString());
619       required_properties_.push_back(strings_.back().c_str());
620     }
621   }
622   properties_nodes_[extra].required_end = required_properties_.size();
623 
624   if (properties_nodes_[extra].begin == properties_nodes_[extra].pattern_end) {
625     properties_nodes_[extra].begin = kInvalid;
626     properties_nodes_[extra].end = kInvalid;
627     properties_nodes_[extra].pattern_end = kInvalid;
628     properties_nodes_[extra].required_begin = kInvalid;
629     properties_nodes_[extra].required_end = kInvalid;
630   }
631 
632   return true;
633 }
634 
ParseList(const base::DictionaryValue & schema,SchemaNode * schema_node,IdMap * id_map,ReferenceList * reference_list,std::string * error)635 bool Schema::InternalStorage::ParseList(const base::DictionaryValue& schema,
636                                         SchemaNode* schema_node,
637                                         IdMap* id_map,
638                                         ReferenceList* reference_list,
639                                         std::string* error) {
640   const base::DictionaryValue* dict = nullptr;
641   if (!schema.GetDictionary(schema::kItems, &dict)) {
642     *error = "Arrays must declare a single schema for their items.";
643     return false;
644   }
645   return Parse(*dict, &schema_node->extra, id_map, reference_list, error);
646 }
647 
ParseEnum(const base::DictionaryValue & schema,base::Value::Type type,SchemaNode * schema_node,std::string * error)648 bool Schema::InternalStorage::ParseEnum(const base::DictionaryValue& schema,
649                                         base::Value::Type type,
650                                         SchemaNode* schema_node,
651                                         std::string* error) {
652   const base::ListValue* possible_values = nullptr;
653   if (!schema.GetList(schema::kEnum, &possible_values)) {
654     *error = "Enum attribute must be a list value";
655     return false;
656   }
657   if (possible_values->empty()) {
658     *error = "Enum attribute must be non-empty";
659     return false;
660   }
661   int offset_begin;
662   int offset_end;
663   if (type == base::Value::Type::INTEGER) {
664     offset_begin = static_cast<int>(int_enums_.size());
665     int value;
666     for (base::ListValue::const_iterator it = possible_values->begin();
667          it != possible_values->end(); ++it) {
668       if (!it->GetAsInteger(&value)) {
669         *error = "Invalid enumeration member type";
670         return false;
671       }
672       int_enums_.push_back(value);
673     }
674     offset_end = static_cast<int>(int_enums_.size());
675   } else if (type == base::Value::Type::STRING) {
676     offset_begin = static_cast<int>(string_enums_.size());
677     std::string value;
678     for (base::ListValue::const_iterator it = possible_values->begin();
679          it != possible_values->end(); ++it) {
680       if (!it->GetAsString(&value)) {
681         *error = "Invalid enumeration member type";
682         return false;
683       }
684       strings_.push_back(value);
685       string_enums_.push_back(strings_.back().c_str());
686     }
687     offset_end = static_cast<int>(string_enums_.size());
688   } else {
689     *error = "Enumeration is only supported for integer and string.";
690     return false;
691   }
692   schema_node->extra = static_cast<int>(restriction_nodes_.size());
693   restriction_nodes_.push_back(RestrictionNode());
694   restriction_nodes_.back().enumeration_restriction.offset_begin = offset_begin;
695   restriction_nodes_.back().enumeration_restriction.offset_end = offset_end;
696   return true;
697 }
698 
ParseRangedInt(const base::DictionaryValue & schema,SchemaNode * schema_node,std::string * error)699 bool Schema::InternalStorage::ParseRangedInt(
700     const base::DictionaryValue& schema,
701     SchemaNode* schema_node,
702     std::string* error) {
703   int min_value = INT_MIN;
704   int max_value = INT_MAX;
705   int value;
706   if (schema.GetInteger(schema::kMinimum, &value))
707     min_value = value;
708   if (schema.GetInteger(schema::kMaximum, &value))
709     max_value = value;
710   if (min_value > max_value) {
711     *error = "Invalid range restriction for int type.";
712     return false;
713   }
714   schema_node->extra = static_cast<int>(restriction_nodes_.size());
715   restriction_nodes_.push_back(RestrictionNode());
716   restriction_nodes_.back().ranged_restriction.max_value = max_value;
717   restriction_nodes_.back().ranged_restriction.min_value = min_value;
718   return true;
719 }
720 
ParseStringPattern(const base::DictionaryValue & schema,SchemaNode * schema_node,std::string * error)721 bool Schema::InternalStorage::ParseStringPattern(
722     const base::DictionaryValue& schema,
723     SchemaNode* schema_node,
724     std::string* error) {
725   std::string pattern;
726   if (!schema.GetString(schema::kPattern, &pattern)) {
727     *error = "Schema pattern must be a string.";
728     return false;
729   }
730   re2::RE2* compiled_regex = CompileRegex(pattern);
731   if (!compiled_regex->ok()) {
732     *error = "/" + pattern + "/ is invalid regex: " + compiled_regex->error();
733     return false;
734   }
735   int index = static_cast<int>(string_enums_.size());
736   strings_.push_back(pattern);
737   string_enums_.push_back(strings_.back().c_str());
738   schema_node->extra = static_cast<int>(restriction_nodes_.size());
739   restriction_nodes_.push_back(RestrictionNode());
740   restriction_nodes_.back().string_pattern_restriction.pattern_index = index;
741   restriction_nodes_.back().string_pattern_restriction.pattern_index_backup =
742       index;
743   return true;
744 }
745 
746 // static
ResolveReferences(const IdMap & id_map,const ReferenceList & reference_list,std::string * error)747 bool Schema::InternalStorage::ResolveReferences(
748     const IdMap& id_map,
749     const ReferenceList& reference_list,
750     std::string* error) {
751   for (ReferenceList::const_iterator ref = reference_list.begin();
752        ref != reference_list.end(); ++ref) {
753     IdMap::const_iterator id = id_map.find(ref->first);
754     if (id == id_map.end()) {
755       *error = "Invalid $ref: " + ref->first;
756       return false;
757     }
758     *ref->second = id->second;
759   }
760   return true;
761 }
762 
Iterator(const scoped_refptr<const InternalStorage> & storage,const PropertiesNode * node)763 Schema::Iterator::Iterator(const scoped_refptr<const InternalStorage>& storage,
764                            const PropertiesNode* node)
765     : storage_(storage),
766       it_(storage->property(node->begin)),
767       end_(storage->property(node->end)) {}
768 
Iterator(const Iterator & iterator)769 Schema::Iterator::Iterator(const Iterator& iterator)
770     : storage_(iterator.storage_),
771       it_(iterator.it_),
772       end_(iterator.end_) {}
773 
~Iterator()774 Schema::Iterator::~Iterator() {}
775 
operator =(const Iterator & iterator)776 Schema::Iterator& Schema::Iterator::operator=(const Iterator& iterator) {
777   storage_ = iterator.storage_;
778   it_ = iterator.it_;
779   end_ = iterator.end_;
780   return *this;
781 }
782 
IsAtEnd() const783 bool Schema::Iterator::IsAtEnd() const {
784   return it_ == end_;
785 }
786 
Advance()787 void Schema::Iterator::Advance() {
788   ++it_;
789 }
790 
key() const791 const char* Schema::Iterator::key() const {
792   return it_->key;
793 }
794 
schema() const795 Schema Schema::Iterator::schema() const {
796   return Schema(storage_, storage_->schema(it_->schema));
797 }
798 
Schema()799 Schema::Schema() : node_(nullptr) {}
800 
Schema(const scoped_refptr<const InternalStorage> & storage,const SchemaNode * node)801 Schema::Schema(const scoped_refptr<const InternalStorage>& storage,
802                const SchemaNode* node)
803     : storage_(storage), node_(node) {}
804 
Schema(const Schema & schema)805 Schema::Schema(const Schema& schema)
806     : storage_(schema.storage_), node_(schema.node_) {}
807 
~Schema()808 Schema::~Schema() {}
809 
operator =(const Schema & schema)810 Schema& Schema::operator=(const Schema& schema) {
811   storage_ = schema.storage_;
812   node_ = schema.node_;
813   return *this;
814 }
815 
816 // static
Wrap(const SchemaData * data)817 Schema Schema::Wrap(const SchemaData* data) {
818   scoped_refptr<const InternalStorage> storage = InternalStorage::Wrap(data);
819   return Schema(storage, storage->root_node());
820 }
821 
Validate(const base::Value & value,SchemaOnErrorStrategy strategy,std::string * error_path,std::string * error) const822 bool Schema::Validate(const base::Value& value,
823                       SchemaOnErrorStrategy strategy,
824                       std::string* error_path,
825                       std::string* error) const {
826   if (!valid()) {
827     SchemaErrorFound(error_path, error, "The schema is invalid.");
828     return false;
829   }
830 
831   if (value.type() != type()) {
832     // Allow the integer to double promotion. Note that range restriction on
833     // double is not supported now.
834     if (value.is_int() && type() == base::Value::Type::DOUBLE) {
835       return true;
836     }
837 
838     SchemaErrorFound(
839         error_path, error, "The value type doesn't match the schema type.");
840     return false;
841   }
842 
843   const base::DictionaryValue* dict = nullptr;
844   const base::ListValue* list = nullptr;
845   int int_value;
846   std::string str_value;
847   if (value.GetAsDictionary(&dict)) {
848     base::flat_set<std::string> present_properties;
849     for (base::DictionaryValue::Iterator it(*dict); !it.IsAtEnd();
850          it.Advance()) {
851       SchemaList schema_list = GetMatchingProperties(it.key());
852       if (schema_list.empty()) {
853         // Unknown property was detected.
854         SchemaErrorFound(error_path, error, "Unknown property: " + it.key());
855         if (!StrategyAllowUnknownOnTopLevel(strategy))
856           return false;
857       } else {
858         bool all_subschemas_are_valid = true;
859         for (SchemaList::iterator subschema = schema_list.begin();
860              subschema != schema_list.end(); ++subschema) {
861           if (!subschema->Validate(it.value(),
862                                    StrategyForNextLevel(strategy),
863                                    error_path,
864                                    error)) {
865             // Invalid property was detected.
866             all_subschemas_are_valid = false;
867             AddDictKeyPrefixToPath(it.key(), error_path);
868             if (!StrategyAllowInvalidOnTopLevel(strategy))
869               return false;
870           }
871         }
872         if (all_subschemas_are_valid)
873           present_properties.insert(it.key());
874       }
875     }
876 
877     for (const auto& required_property : GetRequiredProperties()) {
878       if (base::ContainsKey(present_properties, required_property))
879         continue;
880 
881       SchemaErrorFound(
882           error_path, error,
883           "Missing or invalid required property: " + required_property);
884       return false;
885     }
886   } else if (value.GetAsList(&list)) {
887     for (base::ListValue::const_iterator it = list->begin(); it != list->end();
888          ++it) {
889       if (!GetItems().Validate(*it, StrategyForNextLevel(strategy), error_path,
890                                error)) {
891         // Invalid list item was detected.
892         AddListIndexPrefixToPath(it - list->begin(), error_path);
893         if (!StrategyAllowInvalidOnTopLevel(strategy))
894           return false;
895       }
896     }
897   } else if (value.GetAsInteger(&int_value)) {
898     if (node_->extra != kInvalid &&
899         !ValidateIntegerRestriction(node_->extra, int_value)) {
900       SchemaErrorFound(error_path, error, "Invalid value for integer");
901       return false;
902     }
903   } else if (value.GetAsString(&str_value)) {
904     if (node_->extra != kInvalid &&
905         !ValidateStringRestriction(node_->extra, str_value.c_str())) {
906       SchemaErrorFound(error_path, error, "Invalid value for string");
907       return false;
908     }
909   }
910 
911   return true;
912 }
913 
Normalize(base::Value * value,SchemaOnErrorStrategy strategy,std::string * error_path,std::string * error,bool * changed) const914 bool Schema::Normalize(base::Value* value,
915                        SchemaOnErrorStrategy strategy,
916                        std::string* error_path,
917                        std::string* error,
918                        bool* changed) const {
919   if (!valid()) {
920     SchemaErrorFound(error_path, error, "The schema is invalid.");
921     return false;
922   }
923 
924   if (value->type() != type()) {
925     // Allow the integer to double promotion. Note that range restriction on
926     // double is not supported now.
927     if (value->is_int() && type() == base::Value::Type::DOUBLE) {
928       return true;
929     }
930 
931     SchemaErrorFound(
932         error_path, error, "The value type doesn't match the schema type.");
933     return false;
934   }
935 
936   base::DictionaryValue* dict = nullptr;
937   base::ListValue* list = nullptr;
938   if (value->GetAsDictionary(&dict)) {
939     base::flat_set<std::string> present_properties;
940     std::vector<std::string> drop_list;  // Contains the keys to drop.
941     for (base::DictionaryValue::Iterator it(*dict); !it.IsAtEnd();
942          it.Advance()) {
943       SchemaList schema_list = GetMatchingProperties(it.key());
944       if (schema_list.empty()) {
945         // Unknown property was detected.
946         SchemaErrorFound(error_path, error, "Unknown property: " + it.key());
947         if (StrategyAllowUnknownOnTopLevel(strategy))
948           drop_list.push_back(it.key());
949         else
950           return false;
951       } else {
952         bool all_subschemas_are_valid = true;
953         for (SchemaList::iterator subschema = schema_list.begin();
954              subschema != schema_list.end(); ++subschema) {
955           base::Value* sub_value = nullptr;
956           dict->GetWithoutPathExpansion(it.key(), &sub_value);
957           if (!subschema->Normalize(sub_value,
958                                     StrategyForNextLevel(strategy),
959                                     error_path,
960                                     error,
961                                     changed)) {
962             // Invalid property was detected.
963             all_subschemas_are_valid = false;
964             AddDictKeyPrefixToPath(it.key(), error_path);
965             if (StrategyAllowInvalidOnTopLevel(strategy)) {
966               drop_list.push_back(it.key());
967               break;
968             } else {
969               return false;
970             }
971           }
972         }
973         if (all_subschemas_are_valid)
974           present_properties.insert(it.key());
975       }
976     }
977 
978     for (const auto& required_property : GetRequiredProperties()) {
979       if (base::ContainsKey(present_properties, required_property))
980         continue;
981 
982       SchemaErrorFound(
983           error_path, error,
984           "Missing or invalid required property: " + required_property);
985       return false;
986     }
987 
988     if (changed && !drop_list.empty())
989       *changed = true;
990     for (std::vector<std::string>::const_iterator it = drop_list.begin();
991          it != drop_list.end();
992          ++it) {
993       dict->RemoveWithoutPathExpansion(*it, nullptr);
994     }
995     return true;
996   } else if (value->GetAsList(&list)) {
997     std::vector<size_t> drop_list;  // Contains the indexes to drop.
998     for (size_t index = 0; index < list->GetSize(); index++) {
999       base::Value* sub_value = nullptr;
1000       list->Get(index, &sub_value);
1001       if (!sub_value || !GetItems().Normalize(sub_value,
1002                                               StrategyForNextLevel(strategy),
1003                                               error_path,
1004                                               error,
1005                                               changed)) {
1006         // Invalid list item was detected.
1007         AddListIndexPrefixToPath(index, error_path);
1008         if (StrategyAllowInvalidOnTopLevel(strategy))
1009           drop_list.push_back(index);
1010         else
1011           return false;
1012       }
1013     }
1014     if (changed && !drop_list.empty())
1015       *changed = true;
1016     for (std::vector<size_t>::reverse_iterator it = drop_list.rbegin();
1017          it != drop_list.rend(); ++it) {
1018       list->Remove(*it, nullptr);
1019     }
1020     return true;
1021   }
1022 
1023   return Validate(*value, strategy, error_path, error);
1024 }
1025 
1026 // static
Parse(const std::string & content,std::string * error)1027 Schema Schema::Parse(const std::string& content, std::string* error) {
1028   // Validate as a generic JSON schema, and ignore unknown attributes; they
1029   // may become used in a future version of the schema format.
1030   std::unique_ptr<base::DictionaryValue> dict =
1031       JSONSchemaValidator::IsValidSchema(
1032           content, JSONSchemaValidator::OPTIONS_IGNORE_UNKNOWN_ATTRIBUTES,
1033           error);
1034   if (!dict)
1035     return Schema();
1036 
1037   // Validate the main type.
1038   std::string string_value;
1039   if (!dict->GetString(schema::kType, &string_value) ||
1040       string_value != schema::kObject) {
1041     *error =
1042         "The main schema must have a type attribute with \"object\" value.";
1043     return Schema();
1044   }
1045 
1046   // Checks for invalid attributes at the top-level.
1047   if (dict->HasKey(schema::kAdditionalProperties) ||
1048       dict->HasKey(schema::kPatternProperties)) {
1049     *error = "\"additionalProperties\" and \"patternProperties\" are not "
1050              "supported at the main schema.";
1051     return Schema();
1052   }
1053 
1054   scoped_refptr<const InternalStorage> storage =
1055       InternalStorage::ParseSchema(*dict, error);
1056   if (!storage)
1057     return Schema();
1058   return Schema(storage, storage->root_node());
1059 }
1060 
type() const1061 base::Value::Type Schema::type() const {
1062   CHECK(valid());
1063   return node_->type;
1064 }
1065 
GetPropertiesIterator() const1066 Schema::Iterator Schema::GetPropertiesIterator() const {
1067   CHECK(valid());
1068   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1069   return Iterator(storage_, storage_->properties(node_->extra));
1070 }
1071 
1072 namespace {
1073 
CompareKeys(const PropertyNode & node,const std::string & key)1074 bool CompareKeys(const PropertyNode& node, const std::string& key) {
1075   return node.key < key;
1076 }
1077 
1078 }  // namespace
1079 
GetKnownProperty(const std::string & key) const1080 Schema Schema::GetKnownProperty(const std::string& key) const {
1081   CHECK(valid());
1082   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1083   const PropertiesNode* node = storage_->properties(node_->extra);
1084   const PropertyNode* begin = storage_->property(node->begin);
1085   const PropertyNode* end = storage_->property(node->end);
1086   const PropertyNode* it = std::lower_bound(begin, end, key, CompareKeys);
1087   if (it != end && it->key == key)
1088     return Schema(storage_, storage_->schema(it->schema));
1089   return Schema();
1090 }
1091 
GetAdditionalProperties() const1092 Schema Schema::GetAdditionalProperties() const {
1093   CHECK(valid());
1094   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1095   const PropertiesNode* node = storage_->properties(node_->extra);
1096   if (node->additional == kInvalid)
1097     return Schema();
1098   return Schema(storage_, storage_->schema(node->additional));
1099 }
1100 
GetPatternProperties(const std::string & key) const1101 SchemaList Schema::GetPatternProperties(const std::string& key) const {
1102   CHECK(valid());
1103   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1104   const PropertiesNode* node = storage_->properties(node_->extra);
1105   const PropertyNode* begin = storage_->property(node->end);
1106   const PropertyNode* end = storage_->property(node->pattern_end);
1107   SchemaList matching_properties;
1108   for (const PropertyNode* it = begin; it != end; ++it) {
1109     if (re2::RE2::PartialMatch(key, *storage_->CompileRegex(it->key))) {
1110       matching_properties.push_back(
1111           Schema(storage_, storage_->schema(it->schema)));
1112     }
1113   }
1114   return matching_properties;
1115 }
1116 
GetRequiredProperties() const1117 std::vector<std::string> Schema::GetRequiredProperties() const {
1118   CHECK(valid());
1119   CHECK_EQ(base::Value::Type::DICTIONARY, type());
1120   const PropertiesNode* node = storage_->properties(node_->extra);
1121   const size_t begin = node->required_begin;
1122   const size_t end = node->required_end;
1123 
1124   return std::vector<std::string>(storage_->required_property(begin),
1125                                   storage_->required_property(end));
1126 }
1127 
GetProperty(const std::string & key) const1128 Schema Schema::GetProperty(const std::string& key) const {
1129   Schema schema = GetKnownProperty(key);
1130   if (schema.valid())
1131     return schema;
1132   return GetAdditionalProperties();
1133 }
1134 
GetMatchingProperties(const std::string & key) const1135 SchemaList Schema::GetMatchingProperties(const std::string& key) const {
1136   SchemaList schema_list;
1137 
1138   Schema known_property = GetKnownProperty(key);
1139   if (known_property.valid())
1140     schema_list.push_back(known_property);
1141 
1142   SchemaList pattern_properties = GetPatternProperties(key);
1143   schema_list.insert(
1144       schema_list.end(), pattern_properties.begin(), pattern_properties.end());
1145 
1146   if (schema_list.empty()) {
1147     Schema additional_property = GetAdditionalProperties();
1148     if (additional_property.valid())
1149       schema_list.push_back(additional_property);
1150   }
1151 
1152   return schema_list;
1153 }
1154 
GetItems() const1155 Schema Schema::GetItems() const {
1156   CHECK(valid());
1157   CHECK_EQ(base::Value::Type::LIST, type());
1158   if (node_->extra == kInvalid)
1159     return Schema();
1160   return Schema(storage_, storage_->schema(node_->extra));
1161 }
1162 
ValidateIntegerRestriction(int index,int value) const1163 bool Schema::ValidateIntegerRestriction(int index, int value) const {
1164   const RestrictionNode* rnode = storage_->restriction(index);
1165   if (rnode->ranged_restriction.min_value <=
1166       rnode->ranged_restriction.max_value) {
1167     return rnode->ranged_restriction.min_value <= value &&
1168            rnode->ranged_restriction.max_value >= value;
1169   } else {
1170     for (int i = rnode->enumeration_restriction.offset_begin;
1171          i < rnode->enumeration_restriction.offset_end; ++i) {
1172       if (*storage_->int_enums(i) == value)
1173         return true;
1174     }
1175     return false;
1176   }
1177 }
1178 
ValidateStringRestriction(int index,const char * str) const1179 bool Schema::ValidateStringRestriction(int index, const char* str) const {
1180   const RestrictionNode* rnode = storage_->restriction(index);
1181   if (rnode->enumeration_restriction.offset_begin <
1182       rnode->enumeration_restriction.offset_end) {
1183     for (int i = rnode->enumeration_restriction.offset_begin;
1184          i < rnode->enumeration_restriction.offset_end; ++i) {
1185       if (strcmp(*storage_->string_enums(i), str) == 0)
1186         return true;
1187     }
1188     return false;
1189   } else {
1190     int index = rnode->string_pattern_restriction.pattern_index;
1191     DCHECK(index == rnode->string_pattern_restriction.pattern_index_backup);
1192     re2::RE2* regex = storage_->CompileRegex(*storage_->string_enums(index));
1193     return re2::RE2::PartialMatch(str, *regex);
1194   }
1195 }
1196 
GetValidationSchema() const1197 Schema Schema::GetValidationSchema() const {
1198   CHECK(valid());
1199   const SchemaNode* validation_schema_root_node =
1200       storage_->validation_schema_root_node();
1201   if (!validation_schema_root_node)
1202     return Schema();
1203   return Schema(storage_, validation_schema_root_node);
1204 }
1205 
1206 }  // namespace policy
1207