1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCHEMA_SECTION_MANAGER_H_
16 #define ICING_SCHEMA_SECTION_MANAGER_H_
17 
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <vector>
22 
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/proto/document.pb.h"
25 #include "icing/schema/schema-util.h"
26 #include "icing/schema/section.h"
27 #include "icing/store/document-filter-data.h"
28 #include "icing/store/key-mapper.h"
29 
30 namespace icing {
31 namespace lib {
32 
33 inline constexpr std::string_view kPropertySeparator = ".";
34 inline constexpr std::string_view kLBracket = "[";
35 inline constexpr std::string_view kRBracket = "]";
36 
37 // This class provides section-related operations. It assigns sections according
38 // to type configs and extracts section / sections from documents.
39 class SectionManager {
40  public:
41   SectionManager(const SectionManager&) = delete;
42   SectionManager& operator=(const SectionManager&) = delete;
43 
44   // Factory function to create a SectionManager which does not take ownership
45   // of any input components, and all pointers must refer to valid objects that
46   // outlive the created SectionManager instance.
47   //
48   // Returns:
49   //   A SectionManager on success
50   //   FAILED_PRECONDITION on any null pointer input
51   //   INVALID_ARGUMENT if infinite loop detected in the type configs
52   //   OUT_OF_RANGE if number of properties need indexing exceeds the max number
53   //   NOT_FOUND if any type config name not found in the map
54   static libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>> Create(
55       const SchemaUtil::TypeConfigMap& type_config_map,
56       const KeyMapper<SchemaTypeId>* schema_type_mapper);
57 
58   // Finds content of a section by section path (e.g. property1.property2)
59   //
60   // Returns:
61   //   A string of content on success
62   //   NOT_FOUND if:
63   //     1. Property is optional and not found in the document
64   //     2. section_path is invalid
65   //     3. Content is empty
66   libtextclassifier3::StatusOr<std::vector<std::string_view>>
67   GetStringSectionContent(const DocumentProto& document,
68                           std::string_view section_path) const;
69 
70   // Finds content of a section by id
71   //
72   // Returns:
73   //   A string of content on success
74   //   INVALID_ARGUMENT if section id is invalid
75   //   NOT_FOUND if type config name of document not found
76   libtextclassifier3::StatusOr<std::vector<std::string_view>>
77   GetStringSectionContent(const DocumentProto& document,
78                           SectionId section_id) const;
79 
80   // Returns the SectionMetadata associated with the SectionId that's in the
81   // SchemaTypeId.
82   //
83   // Returns:
84   //   pointer to SectionMetadata on success
85   //   INVALID_ARGUMENT if schema type id or section is invalid
86   libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
87       SchemaTypeId schema_type_id, SectionId section_id) const;
88 
89   // Extracts all sections from the given document, sections are sorted by
90   // section id in increasing order. Section ids start from 0. Sections with
91   // empty content won't be returned.
92   //
93   // Returns:
94   //   A list of sections on success
95   //   NOT_FOUND if type config name of document not found
96   libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
97       const DocumentProto& document) const;
98 
99   // Returns:
100   //   - On success, the section metadatas for the specified type
101   //   - NOT_FOUND if the type config name is not present in the schema
102   libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
103   GetMetadataList(const std::string& type_config_name) const;
104 
105  private:
106   // Use SectionManager::Create() to instantiate
107   explicit SectionManager(
108       const KeyMapper<SchemaTypeId>* schema_type_mapper,
109       std::vector<std::vector<SectionMetadata>>&& section_metadata_cache);
110 
111   // Maps schema types to a densely-assigned unique id.
112   const KeyMapper<SchemaTypeId>& schema_type_mapper_;
113 
114   // The index of section_metadata_cache_ corresponds to a schema type's
115   // SchemaTypeId. At that SchemaTypeId index, we store an inner vector. The
116   // inner vector's index corresponds to a section's SectionId. At the SectionId
117   // index, we store the SectionMetadata of that section.
118   //
119   // For example, pretend "email" had a SchemaTypeId of 0 and it had a section
120   // called "subject" with a SectionId of 1. Then there would exist a vector
121   // that holds the "subject" property's SectionMetadata at index 1. This vector
122   // would be stored at index 0 of the section_metadata_cache_ vector.
123   const std::vector<std::vector<SectionMetadata>> section_metadata_cache_;
124 };
125 
126 }  // namespace lib
127 }  // namespace icing
128 
129 #endif  // ICING_SCHEMA_SECTION_MANAGER_H_
130