1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto2";
16
17package icing.lib;
18
19import "icing/proto/document.proto";
20import "icing/proto/logging.proto";
21import "icing/proto/status.proto";
22import "icing/proto/term.proto";
23
24option java_package = "com.google.android.icing.proto";
25option java_multiple_files = true;
26option objc_class_prefix = "ICNG";
27
28// Client-supplied specifications on what documents to retrieve.
29// Next tag: 5
30message SearchSpecProto {
31  // REQUIRED: The "raw" query string that users may type. For example, "cat"
32  // will search for documents with the term cat in it.
33  optional string query = 1;
34
35  // Indicates how the query terms should match terms in the index.
36  //
37  // TermMatchType.Code=UNKNOWN
38  // Should never purposely be set and may lead to undefined behavior. This is
39  // used for backwards compatibility reasons.
40  //
41  // TermMatchType.Code=EXACT_ONLY
42  // Query terms will only match exact tokens in the index.
43  // Ex. A query term "foo" will only match indexed token "foo", and not "foot"
44  // or "football"
45  //
46  // TermMatchType.Code=PREFIX
47  // Query terms will match indexed tokens when the query term is a prefix of
48  // the token.
49  // Ex. A query term "foo" will match indexed tokens like "foo", "foot", and
50  // "football".
51  optional TermMatchType.Code term_match_type = 2;
52
53  // OPTIONAL: Only search for documents that have the specified namespaces. If
54  // unset, the query will search over all namespaces. Note that this applies to
55  // the entire 'query'. To issue different queries for different namespaces,
56  // separate Search()'s will need to be made.
57  repeated string namespace_filters = 3;
58
59  // OPTIONAL: Only search for documents that have the specified schema types.
60  // If unset, the query will search over all schema types. Note that this
61  // applies to the entire 'query'. To issue different queries for different
62  // schema types, separate Search()'s will need to be made.
63  repeated string schema_type_filters = 4;
64}
65
66// Client-supplied specifications on what to include/how to format the search
67// results.
68// Next tag: 6
69message ResultSpecProto {
70  // The results will be returned in pages, and num_per_page specifies the
71  // number of documents in one page.
72  optional int32 num_per_page = 1 [default = 10];
73
74  // Whether to collect and return debug_info in the SearchResultProto.
75  optional bool debug_info = 2;
76
77  // How to provide snippeting information in the SearchResultProto.
78  // Next tag: 5
79  message SnippetSpecProto {
80    // Only the first num_to_snippet documents will have snippet information
81    // provided. If set to 0, snippeting is disabled.
82    optional int32 num_to_snippet = 1;
83
84    // Only the first num_matches_per_property matches for a single section will
85    // have snippet information provided. If set to 0, snippeting is disabled.
86    optional int32 num_matches_per_property = 2;
87
88    // How large of a window to provide. Windows start at max_window_bytes / 2
89    // bytes before the middle of the matching token and end at max_window_bytes
90    // / 2 bytes after the middle of the matching token. Windowing respects
91    // token boundaries.
92    // Therefore, the returned window may be smaller than requested. Setting
93    // max_window_bytes to 0 will disable windowing information. If matches
94    // enabled is also set to false, then snippeting is disabled.
95    // Ex. max_window_bytes = 16. "foo bar baz bat rat" with a query of "baz"
96    // will return a window of "bar baz bat" which is only 11 bytes long.
97    optional int32 max_window_bytes = 3;
98  }
99  optional SnippetSpecProto snippet_spec = 3;
100
101  // How to specify a subset of properties to retrieve. If no type property mask
102  // has been specified for a schema type, then *all* properties of that schema
103  // type will be retrieved.
104  repeated TypePropertyMask type_property_masks = 4;
105
106  // Groupings of namespaces whose total returned results should be
107  // limited together.
108  // Next tag: 3
109  message ResultGrouping {
110    // The namespaces in this grouping.
111    repeated string namespaces = 1;
112
113    // The maximum number of results in this grouping that should be returned.
114    optional int32 max_results = 2;
115  }
116
117  // How to limit the number of results returned per set of namespaces. If
118  // results match for a namespace that is not present in any result groupings,
119  // then those results will be returned without limit.
120  //
121  // Non-existent namespaces will be ignored.
122  //
123  // Example : Suppose that there are four namespaces each with three results
124  // matching the query for "foo". Without any result groupings, Icing would
125  // return the following results:
126  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns0doc2", "ns3doc1",
127  //  "ns2doc1", "ns3doc2", "ns2doc0", "ns1doc1", "ns2doc2", "ns1doc1"].
128  //
129  // and the following result groupings:
130  // [ { ["namespace0"], 2 }, { ["namespace1", "namespace2"], 2} ]
131  //
132  // The following results will be returned:
133  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns3doc1", "ns2doc1",
134  //  "ns3doc2"].
135  repeated ResultGrouping result_groupings = 5;
136}
137
138// The representation of a single match within a DocumentProto property.
139// Next tag: 10
140message SnippetMatchProto {
141  // The index of the byte in the string at which the match begins and the
142  // length in bytes of the match.
143  optional int32 exact_match_byte_position = 2;
144  optional int32 exact_match_byte_length = 3;
145
146  // The index of the UTF-16 code unit in the string at which the match begins
147  // and the length in UTF-16 code units of the match. This is for use with
148  // UTF-16 encoded strings like Java.lang.String.
149  optional int32 exact_match_utf16_position = 6;
150  optional int32 exact_match_utf16_length = 7;
151
152  // The index of the byte in the string at which the suggested snippet window
153  // begins and the length in bytes of the window.
154  optional int32 window_byte_position = 4;
155  optional int32 window_byte_length = 5;
156
157  // The index of the UTF-16 code unit in the string at which the suggested
158  // snippet window begins and the length in UTF-16 code units of the window.
159  // This is for use with UTF-16 encoded strings like Java.lang.String.
160  optional int32 window_utf16_position = 8;
161  optional int32 window_utf16_length = 9;
162
163  reserved 1;
164}
165
166// A Proto representing all snippets for a single DocumentProto.
167// Next tag: 2
168message SnippetProto {
169  // A pair of property name and all snippet matches that correspond to the
170  // property values in the corresponding DocumentProto.
171  // Next tag: 3
172  message EntryProto {
173    // A property path indicating which property in the DocumentProto these
174    // snippets correspond to. Property paths will contain 1) property names,
175    // 2) the property separator character '.' used to represent nested property
176    // and 3) indices surrounded by brackets to represent a specific value in
177    // that property.
178    //
179    // Example properties:
180    // - 'body'               : the first and only string value of a top-level
181    //                          property called 'body'.
182    // - 'sender.name'        : the first and only string value of a property
183    //                          called 'name' that is a subproperty of a
184    //                          property called 'sender'.
185    // - 'bcc[1].emailaddress': the first and only string value of a property
186    //                          called 'emailaddress' that is a subproperty of
187    //                          the second document value of a property called
188    //                          'bcc'.
189    // - 'attachments[0]'     : the first (of more than one) string value of a
190    //                          property called 'attachments'.
191    // NOTE: If there is only a single value for a property (like
192    // 'sender.name'), then no value index will be added to the property path.
193    // An index of [0] is implied. If there is more than one value for a
194    // property, then the value index will be added to the property path (like
195    // 'attachements[0]').
196    optional string property_name = 1;
197
198    repeated SnippetMatchProto snippet_matches = 2;
199  }
200  // Properties that do not appear in entries do not contain any matches.
201  repeated EntryProto entries = 1;
202}
203
204// Icing lib-supplied results from a search results.
205// Next tag: 6
206message SearchResultProto {
207  // Status code can be one of:
208  //   OK
209  //   FAILED_PRECONDITION
210  //   INVALID_ARGUMENT
211  //   ABORTED
212  //   INTERNAL
213  //
214  // See status.proto for more details.
215  //
216  // TODO(b/147699081): Fix error codes: +ABORTED.
217  // go/icing-library-apis.
218  optional StatusProto status = 1;
219
220  // The Results that matched the query. Empty if there was an error.
221  // Next tag: 4
222  message ResultProto {
223    // Document that matches the SearchSpecProto.
224    optional DocumentProto document = 1;
225
226    // Snippeting information for the document if requested in the
227    // ResultSpecProto. A default instance, if not requested.
228    optional SnippetProto snippet = 2;
229
230    // The score that the document was ranked by. The meaning of this score is
231    // determined by ScoringSpecProto.rank_by.
232    optional double score = 3;
233  }
234  repeated ResultProto results = 2;
235
236  // Various debug fields. Not populated if ResultSpecProto.debug_info = false.
237  // Next tag: 4
238  message DebugInfoProto {
239    // The internal representation of the actual query string that was executed.
240    // This may be different from the SearchSpecProto.query if the original
241    // query was malformed.
242    optional string executed_query = 3;
243
244    reserved 1, 2;
245  }
246  optional DebugInfoProto debug_info = 3;
247
248  // An opaque token used internally to keep track of information needed for
249  // pagination. A valid pagination token is required to fetch other pages of
250  // results. A value 0 means that there're no more pages.
251  // LINT.IfChange(next_page_token)
252  optional uint64 next_page_token = 4;
253  // LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken)
254
255  // Stats for query execution performance.
256  optional QueryStatsProto query_stats = 5;
257}
258
259// Next tag: 3
260message TypePropertyMask {
261  // The schema type to which these property masks should apply.
262  // If the schema type is the wildcard ("*"), then the type property masks
263  // will apply to all results of types that don't have their own, specific
264  // type property mask entry.
265  optional string schema_type = 1;
266
267  // The property masks specifying the property to be retrieved. Property
268  // masks must be composed only of property names, property separators (the
269  // '.' character). For example, "subject", "recipients.name". Specifying no
270  // property masks will result in *no* properties being retrieved.
271  repeated string paths = 2;
272}
273
274// Next tag: 2
275message GetResultSpecProto {
276  // How to specify a subset of properties to retrieve. If no type property mask
277  // has been specified for a schema type, then *all* properties of that schema
278  // type will be retrieved.
279  repeated TypePropertyMask type_property_masks = 1;
280}
281