1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto2"; 16 17package icing.lib; 18 19import "icing/proto/document.proto"; 20import "icing/proto/logging.proto"; 21import "icing/proto/status.proto"; 22import "icing/proto/term.proto"; 23 24option java_package = "com.google.android.icing.proto"; 25option java_multiple_files = true; 26option objc_class_prefix = "ICNG"; 27 28// Client-supplied specifications on what documents to retrieve. 29// Next tag: 5 30message SearchSpecProto { 31 // REQUIRED: The "raw" query string that users may type. For example, "cat" 32 // will search for documents with the term cat in it. 33 optional string query = 1; 34 35 // Indicates how the query terms should match terms in the index. 36 // 37 // TermMatchType.Code=UNKNOWN 38 // Should never purposely be set and may lead to undefined behavior. This is 39 // used for backwards compatibility reasons. 40 // 41 // TermMatchType.Code=EXACT_ONLY 42 // Query terms will only match exact tokens in the index. 43 // Ex. A query term "foo" will only match indexed token "foo", and not "foot" 44 // or "football" 45 // 46 // TermMatchType.Code=PREFIX 47 // Query terms will match indexed tokens when the query term is a prefix of 48 // the token. 49 // Ex. A query term "foo" will match indexed tokens like "foo", "foot", and 50 // "football". 51 optional TermMatchType.Code term_match_type = 2; 52 53 // OPTIONAL: Only search for documents that have the specified namespaces. If 54 // unset, the query will search over all namespaces. Note that this applies to 55 // the entire 'query'. To issue different queries for different namespaces, 56 // separate Search()'s will need to be made. 57 repeated string namespace_filters = 3; 58 59 // OPTIONAL: Only search for documents that have the specified schema types. 60 // If unset, the query will search over all schema types. Note that this 61 // applies to the entire 'query'. To issue different queries for different 62 // schema types, separate Search()'s will need to be made. 63 repeated string schema_type_filters = 4; 64} 65 66// Client-supplied specifications on what to include/how to format the search 67// results. 68// Next tag: 6 69message ResultSpecProto { 70 // The results will be returned in pages, and num_per_page specifies the 71 // number of documents in one page. 72 optional int32 num_per_page = 1 [default = 10]; 73 74 // Whether to collect and return debug_info in the SearchResultProto. 75 optional bool debug_info = 2; 76 77 // How to provide snippeting information in the SearchResultProto. 78 // Next tag: 5 79 message SnippetSpecProto { 80 // Only the first num_to_snippet documents will have snippet information 81 // provided. If set to 0, snippeting is disabled. 82 optional int32 num_to_snippet = 1; 83 84 // Only the first num_matches_per_property matches for a single section will 85 // have snippet information provided. If set to 0, snippeting is disabled. 86 optional int32 num_matches_per_property = 2; 87 88 // How large of a window to provide. Windows start at max_window_bytes / 2 89 // bytes before the middle of the matching token and end at max_window_bytes 90 // / 2 bytes after the middle of the matching token. Windowing respects 91 // token boundaries. 92 // Therefore, the returned window may be smaller than requested. Setting 93 // max_window_bytes to 0 will disable windowing information. If matches 94 // enabled is also set to false, then snippeting is disabled. 95 // Ex. max_window_bytes = 16. "foo bar baz bat rat" with a query of "baz" 96 // will return a window of "bar baz bat" which is only 11 bytes long. 97 optional int32 max_window_bytes = 3; 98 } 99 optional SnippetSpecProto snippet_spec = 3; 100 101 // How to specify a subset of properties to retrieve. If no type property mask 102 // has been specified for a schema type, then *all* properties of that schema 103 // type will be retrieved. 104 repeated TypePropertyMask type_property_masks = 4; 105 106 // Groupings of namespaces whose total returned results should be 107 // limited together. 108 // Next tag: 3 109 message ResultGrouping { 110 // The namespaces in this grouping. 111 repeated string namespaces = 1; 112 113 // The maximum number of results in this grouping that should be returned. 114 optional int32 max_results = 2; 115 } 116 117 // How to limit the number of results returned per set of namespaces. If 118 // results match for a namespace that is not present in any result groupings, 119 // then those results will be returned without limit. 120 // 121 // Non-existent namespaces will be ignored. 122 // 123 // Example : Suppose that there are four namespaces each with three results 124 // matching the query for "foo". Without any result groupings, Icing would 125 // return the following results: 126 // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns0doc2", "ns3doc1", 127 // "ns2doc1", "ns3doc2", "ns2doc0", "ns1doc1", "ns2doc2", "ns1doc1"]. 128 // 129 // and the following result groupings: 130 // [ { ["namespace0"], 2 }, { ["namespace1", "namespace2"], 2} ] 131 // 132 // The following results will be returned: 133 // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns3doc1", "ns2doc1", 134 // "ns3doc2"]. 135 repeated ResultGrouping result_groupings = 5; 136} 137 138// The representation of a single match within a DocumentProto property. 139// Next tag: 10 140message SnippetMatchProto { 141 // The index of the byte in the string at which the match begins and the 142 // length in bytes of the match. 143 optional int32 exact_match_byte_position = 2; 144 optional int32 exact_match_byte_length = 3; 145 146 // The index of the UTF-16 code unit in the string at which the match begins 147 // and the length in UTF-16 code units of the match. This is for use with 148 // UTF-16 encoded strings like Java.lang.String. 149 optional int32 exact_match_utf16_position = 6; 150 optional int32 exact_match_utf16_length = 7; 151 152 // The index of the byte in the string at which the suggested snippet window 153 // begins and the length in bytes of the window. 154 optional int32 window_byte_position = 4; 155 optional int32 window_byte_length = 5; 156 157 // The index of the UTF-16 code unit in the string at which the suggested 158 // snippet window begins and the length in UTF-16 code units of the window. 159 // This is for use with UTF-16 encoded strings like Java.lang.String. 160 optional int32 window_utf16_position = 8; 161 optional int32 window_utf16_length = 9; 162 163 reserved 1; 164} 165 166// A Proto representing all snippets for a single DocumentProto. 167// Next tag: 2 168message SnippetProto { 169 // A pair of property name and all snippet matches that correspond to the 170 // property values in the corresponding DocumentProto. 171 // Next tag: 3 172 message EntryProto { 173 // A property path indicating which property in the DocumentProto these 174 // snippets correspond to. Property paths will contain 1) property names, 175 // 2) the property separator character '.' used to represent nested property 176 // and 3) indices surrounded by brackets to represent a specific value in 177 // that property. 178 // 179 // Example properties: 180 // - 'body' : the first and only string value of a top-level 181 // property called 'body'. 182 // - 'sender.name' : the first and only string value of a property 183 // called 'name' that is a subproperty of a 184 // property called 'sender'. 185 // - 'bcc[1].emailaddress': the first and only string value of a property 186 // called 'emailaddress' that is a subproperty of 187 // the second document value of a property called 188 // 'bcc'. 189 // - 'attachments[0]' : the first (of more than one) string value of a 190 // property called 'attachments'. 191 // NOTE: If there is only a single value for a property (like 192 // 'sender.name'), then no value index will be added to the property path. 193 // An index of [0] is implied. If there is more than one value for a 194 // property, then the value index will be added to the property path (like 195 // 'attachements[0]'). 196 optional string property_name = 1; 197 198 repeated SnippetMatchProto snippet_matches = 2; 199 } 200 // Properties that do not appear in entries do not contain any matches. 201 repeated EntryProto entries = 1; 202} 203 204// Icing lib-supplied results from a search results. 205// Next tag: 6 206message SearchResultProto { 207 // Status code can be one of: 208 // OK 209 // FAILED_PRECONDITION 210 // INVALID_ARGUMENT 211 // ABORTED 212 // INTERNAL 213 // 214 // See status.proto for more details. 215 // 216 // TODO(b/147699081): Fix error codes: +ABORTED. 217 // go/icing-library-apis. 218 optional StatusProto status = 1; 219 220 // The Results that matched the query. Empty if there was an error. 221 // Next tag: 4 222 message ResultProto { 223 // Document that matches the SearchSpecProto. 224 optional DocumentProto document = 1; 225 226 // Snippeting information for the document if requested in the 227 // ResultSpecProto. A default instance, if not requested. 228 optional SnippetProto snippet = 2; 229 230 // The score that the document was ranked by. The meaning of this score is 231 // determined by ScoringSpecProto.rank_by. 232 optional double score = 3; 233 } 234 repeated ResultProto results = 2; 235 236 // Various debug fields. Not populated if ResultSpecProto.debug_info = false. 237 // Next tag: 4 238 message DebugInfoProto { 239 // The internal representation of the actual query string that was executed. 240 // This may be different from the SearchSpecProto.query if the original 241 // query was malformed. 242 optional string executed_query = 3; 243 244 reserved 1, 2; 245 } 246 optional DebugInfoProto debug_info = 3; 247 248 // An opaque token used internally to keep track of information needed for 249 // pagination. A valid pagination token is required to fetch other pages of 250 // results. A value 0 means that there're no more pages. 251 // LINT.IfChange(next_page_token) 252 optional uint64 next_page_token = 4; 253 // LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken) 254 255 // Stats for query execution performance. 256 optional QueryStatsProto query_stats = 5; 257} 258 259// Next tag: 3 260message TypePropertyMask { 261 // The schema type to which these property masks should apply. 262 // If the schema type is the wildcard ("*"), then the type property masks 263 // will apply to all results of types that don't have their own, specific 264 // type property mask entry. 265 optional string schema_type = 1; 266 267 // The property masks specifying the property to be retrieved. Property 268 // masks must be composed only of property names, property separators (the 269 // '.' character). For example, "subject", "recipients.name". Specifying no 270 // property masks will result in *no* properties being retrieved. 271 repeated string paths = 2; 272} 273 274// Next tag: 2 275message GetResultSpecProto { 276 // How to specify a subset of properties to retrieve. If no type property mask 277 // has been specified for a schema type, then *all* properties of that schema 278 // type will be retrieved. 279 repeated TypePropertyMask type_property_masks = 1; 280} 281