1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCHEMA_SECTION_H_
16 #define ICING_SCHEMA_SECTION_H_
17 
18 #include <cstdint>
19 #include <string>
20 #include <string_view>
21 #include <utility>
22 #include <vector>
23 
24 #include "icing/proto/schema.pb.h"
25 #include "icing/proto/term.pb.h"
26 
27 namespace icing {
28 namespace lib {
29 
30 using SectionId = int8_t;
31 // 4 bits for 16 values. NOTE: Increasing this value means that SectionIdMask
32 // must increase from an int16_t to an int32_t
33 inline constexpr int kSectionIdBits = 4;
34 inline constexpr SectionId kInvalidSectionId = (1 << kSectionIdBits);
35 inline constexpr SectionId kMaxSectionId = kInvalidSectionId - 1;
36 inline constexpr SectionId kMinSectionId = 0;
IsSectionIdValid(SectionId section_id)37 constexpr bool IsSectionIdValid(SectionId section_id) {
38   return section_id >= kMinSectionId && section_id <= kMaxSectionId;
39 }
40 
41 using SectionIdMask = int16_t;
42 inline constexpr SectionIdMask kSectionIdMaskAll = ~SectionIdMask{0};
43 inline constexpr SectionIdMask kSectionIdMaskNone = SectionIdMask{0};
44 
45 static_assert(
46     kMaxSectionId < 8 * sizeof(SectionIdMask),
47     "SectionIdMask is not large enough to represent all section values!");
48 
49 struct SectionMetadata {
50   // Dot-joined property names, representing the location of section inside an
51   // document. E.g. "property1.property2"
52   std::string path;
53 
54   // A unique id of property within a type config
55   SectionId id;
56 
57   // How strings should be tokenized. It is invalid for a section to have
58   // tokenizer == 'NONE'.
59   StringIndexingConfig::TokenizerType::Code tokenizer;
60 
61   // How tokens in this section should be matched.
62   //
63   // TermMatchType::UNKNOWN:
64   //   Terms will not match anything
65   //
66   // TermMatchType::PREFIX:
67   //   Terms will be stored as a prefix match, "fool" matches "foo" and "fool"
68   //
69   // TermMatchType::EXACT_ONLY:
70   //   Terms will be only stored as an exact match, "fool" only matches "fool"
71   TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
72 
SectionMetadataSectionMetadata73   SectionMetadata(SectionId id_in, TermMatchType::Code term_match_type_in,
74                   StringIndexingConfig::TokenizerType::Code tokenizer,
75                   std::string&& path_in)
76       : path(std::move(path_in)),
77         id(id_in),
78         tokenizer(tokenizer),
79         term_match_type(term_match_type_in) {}
80 };
81 
82 // Section is an icing internal concept similar to document property but with
83 // extra metadata. The content can be a value or the combination of repeated
84 // values of a property.
85 struct Section {
86   SectionMetadata metadata;
87   std::vector<std::string_view> content;
88 
SectionSection89   Section(SectionMetadata&& metadata_in,
90           std::vector<std::string_view>&& content_in)
91       : metadata(std::move(metadata_in)), content(std::move(content_in)) {}
92 };
93 
94 }  // namespace lib
95 }  // namespace icing
96 
97 #endif  // ICING_SCHEMA_SECTION_H_
98