1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_SCHEMA_SECTION_MANAGER_H_ 16 #define ICING_SCHEMA_SECTION_MANAGER_H_ 17 18 #include <memory> 19 #include <string> 20 #include <string_view> 21 #include <vector> 22 23 #include "icing/text_classifier/lib3/utils/base/statusor.h" 24 #include "icing/proto/document.pb.h" 25 #include "icing/schema/schema-util.h" 26 #include "icing/schema/section.h" 27 #include "icing/store/document-filter-data.h" 28 #include "icing/store/key-mapper.h" 29 30 namespace icing { 31 namespace lib { 32 33 inline constexpr std::string_view kPropertySeparator = "."; 34 inline constexpr std::string_view kLBracket = "["; 35 inline constexpr std::string_view kRBracket = "]"; 36 37 // This class provides section-related operations. It assigns sections according 38 // to type configs and extracts section / sections from documents. 39 class SectionManager { 40 public: 41 SectionManager(const SectionManager&) = delete; 42 SectionManager& operator=(const SectionManager&) = delete; 43 44 // Factory function to create a SectionManager which does not take ownership 45 // of any input components, and all pointers must refer to valid objects that 46 // outlive the created SectionManager instance. 47 // 48 // Returns: 49 // A SectionManager on success 50 // FAILED_PRECONDITION on any null pointer input 51 // INVALID_ARGUMENT if infinite loop detected in the type configs 52 // OUT_OF_RANGE if number of properties need indexing exceeds the max number 53 // NOT_FOUND if any type config name not found in the map 54 static libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>> Create( 55 const SchemaUtil::TypeConfigMap& type_config_map, 56 const KeyMapper<SchemaTypeId>* schema_type_mapper); 57 58 // Finds content of a section by section path (e.g. property1.property2) 59 // 60 // Returns: 61 // A string of content on success 62 // NOT_FOUND if: 63 // 1. Property is optional and not found in the document 64 // 2. section_path is invalid 65 // 3. Content is empty 66 libtextclassifier3::StatusOr<std::vector<std::string_view>> 67 GetStringSectionContent(const DocumentProto& document, 68 std::string_view section_path) const; 69 70 // Finds content of a section by id 71 // 72 // Returns: 73 // A string of content on success 74 // INVALID_ARGUMENT if section id is invalid 75 // NOT_FOUND if type config name of document not found 76 libtextclassifier3::StatusOr<std::vector<std::string_view>> 77 GetStringSectionContent(const DocumentProto& document, 78 SectionId section_id) const; 79 80 // Returns the SectionMetadata associated with the SectionId that's in the 81 // SchemaTypeId. 82 // 83 // Returns: 84 // pointer to SectionMetadata on success 85 // INVALID_ARGUMENT if schema type id or section is invalid 86 libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata( 87 SchemaTypeId schema_type_id, SectionId section_id) const; 88 89 // Extracts all sections from the given document, sections are sorted by 90 // section id in increasing order. Section ids start from 0. Sections with 91 // empty content won't be returned. 92 // 93 // Returns: 94 // A list of sections on success 95 // NOT_FOUND if type config name of document not found 96 libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections( 97 const DocumentProto& document) const; 98 99 // Returns: 100 // - On success, the section metadatas for the specified type 101 // - NOT_FOUND if the type config name is not present in the schema 102 libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*> 103 GetMetadataList(const std::string& type_config_name) const; 104 105 private: 106 // Use SectionManager::Create() to instantiate 107 explicit SectionManager( 108 const KeyMapper<SchemaTypeId>* schema_type_mapper, 109 std::vector<std::vector<SectionMetadata>>&& section_metadata_cache); 110 111 // Maps schema types to a densely-assigned unique id. 112 const KeyMapper<SchemaTypeId>& schema_type_mapper_; 113 114 // The index of section_metadata_cache_ corresponds to a schema type's 115 // SchemaTypeId. At that SchemaTypeId index, we store an inner vector. The 116 // inner vector's index corresponds to a section's SectionId. At the SectionId 117 // index, we store the SectionMetadata of that section. 118 // 119 // For example, pretend "email" had a SchemaTypeId of 0 and it had a section 120 // called "subject" with a SectionId of 1. Then there would exist a vector 121 // that holds the "subject" property's SectionMetadata at index 1. This vector 122 // would be stored at index 0 of the section_metadata_cache_ vector. 123 const std::vector<std::vector<SectionMetadata>> section_metadata_cache_; 124 }; 125 126 } // namespace lib 127 } // namespace icing 128 129 #endif // ICING_SCHEMA_SECTION_MANAGER_H_ 130