1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ICING_SCHEMA_SECTION_H_
16 #define ICING_SCHEMA_SECTION_H_
17
18 #include <cstdint>
19 #include <string>
20 #include <string_view>
21 #include <utility>
22 #include <vector>
23
24 #include "icing/proto/schema.pb.h"
25 #include "icing/proto/term.pb.h"
26
27 namespace icing {
28 namespace lib {
29
30 using SectionId = int8_t;
31 // 4 bits for 16 values. NOTE: Increasing this value means that SectionIdMask
32 // must increase from an int16_t to an int32_t
33 inline constexpr int kSectionIdBits = 4;
34 inline constexpr SectionId kInvalidSectionId = (1 << kSectionIdBits);
35 inline constexpr SectionId kMaxSectionId = kInvalidSectionId - 1;
36 inline constexpr SectionId kMinSectionId = 0;
IsSectionIdValid(SectionId section_id)37 constexpr bool IsSectionIdValid(SectionId section_id) {
38 return section_id >= kMinSectionId && section_id <= kMaxSectionId;
39 }
40
41 using SectionIdMask = int16_t;
42 inline constexpr SectionIdMask kSectionIdMaskAll = ~SectionIdMask{0};
43 inline constexpr SectionIdMask kSectionIdMaskNone = SectionIdMask{0};
44
45 static_assert(
46 kMaxSectionId < 8 * sizeof(SectionIdMask),
47 "SectionIdMask is not large enough to represent all section values!");
48
49 struct SectionMetadata {
50 // Dot-joined property names, representing the location of section inside an
51 // document. E.g. "property1.property2"
52 std::string path;
53
54 // A unique id of property within a type config
55 SectionId id;
56
57 // How strings should be tokenized. It is invalid for a section to have
58 // tokenizer == 'NONE'.
59 StringIndexingConfig::TokenizerType::Code tokenizer;
60
61 // How tokens in this section should be matched.
62 //
63 // TermMatchType::UNKNOWN:
64 // Terms will not match anything
65 //
66 // TermMatchType::PREFIX:
67 // Terms will be stored as a prefix match, "fool" matches "foo" and "fool"
68 //
69 // TermMatchType::EXACT_ONLY:
70 // Terms will be only stored as an exact match, "fool" only matches "fool"
71 TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
72
SectionMetadataSectionMetadata73 SectionMetadata(SectionId id_in, TermMatchType::Code term_match_type_in,
74 StringIndexingConfig::TokenizerType::Code tokenizer,
75 std::string&& path_in)
76 : path(std::move(path_in)),
77 id(id_in),
78 tokenizer(tokenizer),
79 term_match_type(term_match_type_in) {}
80 };
81
82 // Section is an icing internal concept similar to document property but with
83 // extra metadata. The content can be a value or the combination of repeated
84 // values of a property.
85 struct Section {
86 SectionMetadata metadata;
87 std::vector<std::string_view> content;
88
SectionSection89 Section(SectionMetadata&& metadata_in,
90 std::vector<std::string_view>&& content_in)
91 : metadata(std::move(metadata_in)), content(std::move(content_in)) {}
92 };
93
94 } // namespace lib
95 } // namespace icing
96
97 #endif // ICING_SCHEMA_SECTION_H_
98