1 /*
2  * Copyright 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server.appsearch.external.localstorage.usagereporting;
18 
19 import android.annotation.NonNull;
20 import android.annotation.Nullable;
21 import android.app.appsearch.GenericDocument;
22 import android.app.appsearch.usagereporting.ActionConstants;
23 
24 import com.android.server.appsearch.external.localstorage.stats.ClickStats;
25 import com.android.server.appsearch.external.localstorage.stats.SearchIntentStats;
26 import com.android.server.appsearch.external.localstorage.stats.SearchSessionStats;
27 
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.List;
31 import java.util.Objects;
32 
33 /**
34  * Extractor class for analyzing a list of taken action {@link GenericDocument} and creating a list
35  * of {@link SearchSessionStats}.
36  *
37  * @hide
38  */
39 public final class SearchSessionStatsExtractor {
40     // TODO(b/319285816): make thresholds configurable.
41     /**
42      * Threshold for noise search intent detection, in millisecond. A search action will be
43      * considered as a noise (and skipped) if all of the following conditions are satisfied:
44      *
45      * <ul>
46      *   <li>The action timestamp (action document creation timestamp) difference between it and its
47      *       previous search action is below this threshold.
48      *   <li>There is no click action associated with it.
49      *   <li>Its raw query string is a prefix of the previous search action's raw query string (or
50      *       the other way around).
51      * </ul>
52      */
53     private static final long NOISE_SEARCH_INTENT_TIMESTAMP_DIFF_THRESHOLD_MILLIS = 2000L;
54 
55     /**
56      * Threshold for independent search intent detection, in millisecond. If the action timestamp
57      * (action document creation timestamp) difference between the previous and the current search
58      * action exceeds this threshold, then the current search action will be considered as a
59      * completely independent search intent (i.e. belonging to a new search session), and there will
60      * be no correlation analysis between the previous and the current search action.
61      */
62     private static final long INDEPENDENT_SEARCH_INTENT_TIMESTAMP_DIFF_THRESHOLD_MILLIS =
63             10L * 60 * 1000;
64 
65     /**
66      * Threshold for marking good click (compared with {@code timeStayOnResultMillis}), in
67      * millisecond. A good click means the user spent decent amount of time on the clicked result
68      * document.
69      */
70     private static final long GOOD_CLICK_TIME_STAY_ON_RESULT_THRESHOLD_MILLIS = 2000L;
71 
72     /**
73      * Threshold for backspace count to become query abandonment. If the user hits backspace for at
74      * least QUERY_ABANDONMENT_BACKSPACE_COUNT times, then the query correction type will be
75      * determined as abandonment.
76      */
77     private static final int QUERY_ABANDONMENT_BACKSPACE_COUNT = 2;
78 
79     /**
80      * Returns the query correction type between the previous and current search actions.
81      *
82      * @param currSearchAction the current search action {@link SearchActionGenericDocument}.
83      * @param prevSearchAction the previous search action {@link SearchActionGenericDocument}.
84      */
getQueryCorrectionType( @onNull SearchActionGenericDocument currSearchAction, @Nullable SearchActionGenericDocument prevSearchAction)85     public static @SearchIntentStats.QueryCorrectionType int getQueryCorrectionType(
86             @NonNull SearchActionGenericDocument currSearchAction,
87             @Nullable SearchActionGenericDocument prevSearchAction) {
88         Objects.requireNonNull(currSearchAction);
89 
90         if (currSearchAction.getQuery() == null) {
91             // Query correction type cannot be determined if the client didn't provide the raw query
92             // string.
93             return SearchIntentStats.QUERY_CORRECTION_TYPE_UNKNOWN;
94         }
95         if (prevSearchAction == null) {
96             // If the previous search action is missing, then it is the first query.
97             return SearchIntentStats.QUERY_CORRECTION_TYPE_FIRST_QUERY;
98         } else if (prevSearchAction.getQuery() == null) {
99             // Query correction type cannot be determined if the client didn't provide the raw query
100             // string.
101             return SearchIntentStats.QUERY_CORRECTION_TYPE_UNKNOWN;
102         }
103 
104         // Determine the query correction type by comparing the current and previous raw query
105         // strings.
106         String prevQuery = prevSearchAction.getQuery();
107         String currQuery = currSearchAction.getQuery();
108         int commonPrefixLength = getCommonPrefixLength(prevQuery, currQuery);
109         // If the user hits backspace >= QUERY_ABANDONMENT_BACKSPACE_COUNT times, then it is query
110         // abandonment. Otherwise, it is query refinement.
111         if (commonPrefixLength <= prevQuery.length() - QUERY_ABANDONMENT_BACKSPACE_COUNT) {
112             return SearchIntentStats.QUERY_CORRECTION_TYPE_ABANDONMENT;
113         } else {
114             return SearchIntentStats.QUERY_CORRECTION_TYPE_REFINEMENT;
115         }
116     }
117 
118     /**
119      * Returns a list of {@link SearchSessionStats} extracted from the given list of taken action
120      * {@link GenericDocument}.
121      *
122      * <p>A search session consists of several related search intents.
123      *
124      * <p>A search intent consists of a valid search action with 0 or more click actions. To extract
125      * search intent metrics, this function will try to group the given taken actions into several
126      * search intents, and yield a {@link SearchIntentStats} for each search intent. Finally related
127      * {@link SearchIntentStats} will be wrapped into {@link SearchSessionStats}.
128      *
129      * @param packageName The package name of the caller.
130      * @param database The database name of the caller.
131      * @param genericDocuments a list of taken actions in generic document form.
132      */
133     @NonNull
extract( @onNull String packageName, @Nullable String database, @NonNull List<GenericDocument> genericDocuments)134     public List<SearchSessionStats> extract(
135             @NonNull String packageName,
136             @Nullable String database,
137             @NonNull List<GenericDocument> genericDocuments) {
138         Objects.requireNonNull(genericDocuments);
139 
140         // Convert GenericDocument list to TakenActionGenericDocument list and sort them by document
141         // creation timestamp.
142         List<TakenActionGenericDocument> takenActionGenericDocuments =
143                 new ArrayList<>(genericDocuments.size());
144         for (int i = 0; i < genericDocuments.size(); ++i) {
145             try {
146                 takenActionGenericDocuments.add(
147                         TakenActionGenericDocument.create(genericDocuments.get(i)));
148             } catch (IllegalArgumentException e) {
149                 // Skip generic documents with unknown action type.
150             }
151         }
152         Collections.sort(
153                 takenActionGenericDocuments,
154                 (TakenActionGenericDocument doc1, TakenActionGenericDocument doc2) ->
155                         Long.compare(
156                                 doc1.getCreationTimestampMillis(),
157                                 doc2.getCreationTimestampMillis()));
158 
159         List<SearchSessionStats> result = new ArrayList<>();
160         SearchSessionStats.Builder searchSessionStatsBuilder = null;
161         SearchActionGenericDocument prevSearchAction = null;
162         // Clients are expected to report search action followed by its associated click actions.
163         // For example, [searchAction1, clickAction1, searchAction2, searchAction3, clickAction2,
164         // clickAction3]:
165         // - There are 3 search actions and 3 click actions.
166         // - clickAction1 is associated with searchAction1.
167         // - There is no click action associated with searchAction2.
168         // - clickAction2 and clickAction3 are associated with searchAction3.
169         // Here we're going to break down the list into segments. Each segment starts with a search
170         // action followed by 0 or more associated click actions, and they form a single search
171         // intent. We will analyze and extract metrics from the taken actions for the search intent.
172         //
173         // If a search intent is considered independent from the previous one, then we will start a
174         // new search session analysis.
175         for (int i = 0; i < takenActionGenericDocuments.size(); ++i) {
176             if (takenActionGenericDocuments.get(i).getActionType()
177                     != ActionConstants.ACTION_TYPE_SEARCH) {
178                 continue;
179             }
180 
181             SearchActionGenericDocument currSearchAction =
182                     (SearchActionGenericDocument) takenActionGenericDocuments.get(i);
183             List<ClickActionGenericDocument> clickActions = new ArrayList<>();
184             // Get all click actions associated with the current search action by advancing until
185             // the next search action.
186             while (i + 1 < takenActionGenericDocuments.size()
187                     && takenActionGenericDocuments.get(i + 1).getActionType()
188                             != ActionConstants.ACTION_TYPE_SEARCH) {
189                 if (takenActionGenericDocuments.get(i + 1).getActionType()
190                         == ActionConstants.ACTION_TYPE_CLICK) {
191                     clickActions.add(
192                             (ClickActionGenericDocument) takenActionGenericDocuments.get(i + 1));
193                 }
194                 ++i;
195             }
196 
197             // Get the reference of the next search action if it exists.
198             SearchActionGenericDocument nextSearchAction = null;
199             if (i + 1 < takenActionGenericDocuments.size()
200                     && takenActionGenericDocuments.get(i + 1).getActionType()
201                             == ActionConstants.ACTION_TYPE_SEARCH) {
202                 nextSearchAction =
203                         (SearchActionGenericDocument) takenActionGenericDocuments.get(i + 1);
204             }
205 
206             if (prevSearchAction != null
207                     && isIndependentSearchAction(currSearchAction, prevSearchAction)) {
208                 // If the current search action is independent from the previous one, then:
209                 // - Build and append the previous search session stats.
210                 // - Start a new search session analysis.
211                 // - Ignore the previous search action when extracting stats.
212                 if (searchSessionStatsBuilder != null) {
213                     result.add(searchSessionStatsBuilder.build());
214                     searchSessionStatsBuilder = null;
215                 }
216                 prevSearchAction = null;
217             } else if (clickActions.isEmpty()
218                     && isIntermediateSearchAction(
219                             currSearchAction, prevSearchAction, nextSearchAction)) {
220                 // If the current search action is an intermediate search action with no click
221                 // actions, then we consider it as a noise and skip it.
222                 continue;
223             }
224 
225             // Now we get a valid search intent (the current search action + a list of click actions
226             // associated with it). Extract metrics and add SearchIntentStats into this search
227             // session.
228             if (searchSessionStatsBuilder == null) {
229                 searchSessionStatsBuilder =
230                         new SearchSessionStats.Builder(packageName).setDatabase(database);
231             }
232             searchSessionStatsBuilder.addSearchIntentsStats(
233                     createSearchIntentStats(
234                             packageName,
235                             database,
236                             currSearchAction,
237                             clickActions,
238                             prevSearchAction));
239             prevSearchAction = currSearchAction;
240         }
241         if (searchSessionStatsBuilder != null) {
242             result.add(searchSessionStatsBuilder.build());
243         }
244         return result;
245     }
246 
247     /**
248      * Creates a {@link SearchIntentStats} object from the current search action + its associated
249      * click actions, and the previous search action (in generic document form).
250      */
createSearchIntentStats( @onNull String packageName, @Nullable String database, @NonNull SearchActionGenericDocument currSearchAction, @NonNull List<ClickActionGenericDocument> clickActions, @Nullable SearchActionGenericDocument prevSearchAction)251     private SearchIntentStats createSearchIntentStats(
252             @NonNull String packageName,
253             @Nullable String database,
254             @NonNull SearchActionGenericDocument currSearchAction,
255             @NonNull List<ClickActionGenericDocument> clickActions,
256             @Nullable SearchActionGenericDocument prevSearchAction) {
257         SearchIntentStats.Builder builder =
258                 new SearchIntentStats.Builder(packageName)
259                         .setDatabase(database)
260                         .setTimestampMillis(currSearchAction.getCreationTimestampMillis())
261                         .setCurrQuery(currSearchAction.getQuery())
262                         .setNumResultsFetched(currSearchAction.getFetchedResultCount())
263                         .setQueryCorrectionType(
264                                 getQueryCorrectionType(currSearchAction, prevSearchAction));
265         if (prevSearchAction != null) {
266             builder.setPrevQuery(prevSearchAction.getQuery());
267         }
268         for (int i = 0; i < clickActions.size(); ++i) {
269             builder.addClicksStats(createClickStats(clickActions.get(i)));
270         }
271         return builder.build();
272     }
273 
274     /**
275      * Creates a {@link ClickStats} object from the given click action (in generic document form).
276      */
createClickStats(ClickActionGenericDocument clickAction)277     private ClickStats createClickStats(ClickActionGenericDocument clickAction) {
278         // A click is considered good if:
279         // - The user spent decent amount of time on the clicked document.
280         // - OR the client didn't provide timeStayOnResultMillis. In this case, the value will be 0.
281         boolean isGoodClick =
282                 clickAction.getTimeStayOnResultMillis() <= 0
283                         || clickAction.getTimeStayOnResultMillis()
284                                 >= GOOD_CLICK_TIME_STAY_ON_RESULT_THRESHOLD_MILLIS;
285         return new ClickStats.Builder()
286                 .setTimestampMillis(clickAction.getCreationTimestampMillis())
287                 .setResultRankInBlock(clickAction.getResultRankInBlock())
288                 .setResultRankGlobal(clickAction.getResultRankGlobal())
289                 .setTimeStayOnResultMillis(clickAction.getTimeStayOnResultMillis())
290                 .setIsGoodClick(isGoodClick)
291                 .build();
292     }
293 
294     /**
295      * Returns if the current search action is an intermediate search action.
296      *
297      * <p>An intermediate search action is used for detecting the situation when the user adds or
298      * deletes characters from the query (e.g. "a" -> "app" -> "apple" or "apple" -> "app" -> "a")
299      * within a short period of time. More precisely, it has to satisfy all of the following
300      * conditions:
301      *
302      * <ul>
303      *   <li>There are related (non-independent) search actions before and after it.
304      *   <li>It occurs within the threshold after its previous search action.
305      *   <li>Its raw query string is a prefix of its previous search action's raw query string, or
306      *       the opposite direction.
307      * </ul>
308      */
isIntermediateSearchAction( @onNull SearchActionGenericDocument currSearchAction, @Nullable SearchActionGenericDocument prevSearchAction, @Nullable SearchActionGenericDocument nextSearchAction)309     private static boolean isIntermediateSearchAction(
310             @NonNull SearchActionGenericDocument currSearchAction,
311             @Nullable SearchActionGenericDocument prevSearchAction,
312             @Nullable SearchActionGenericDocument nextSearchAction) {
313         Objects.requireNonNull(currSearchAction);
314 
315         if (prevSearchAction == null || nextSearchAction == null) {
316             return false;
317         }
318 
319         // Whether the next search action is independent from the current search action. If true,
320         // then the current search action will not be considered as an intermediate search action
321         // since it is the last search action of the search session.
322         boolean isNextSearchActionIndependent =
323                 isIndependentSearchAction(nextSearchAction, currSearchAction);
324 
325         // Whether the current search action occurs within the threshold after the previous search
326         // action.
327         boolean occursWithinTimeThreshold =
328                 currSearchAction.getCreationTimestampMillis()
329                                 - prevSearchAction.getCreationTimestampMillis()
330                         <= NOISE_SEARCH_INTENT_TIMESTAMP_DIFF_THRESHOLD_MILLIS;
331 
332         // Whether the previous search action's raw query string is a prefix of the current search
333         // action's, or the opposite direction (e.g. "app" -> "apple" and "apple" -> "app").
334         String prevQuery = prevSearchAction.getQuery();
335         String currQuery = currSearchAction.getQuery();
336         boolean isPrefix =
337                 prevQuery != null
338                         && currQuery != null
339                         && (currQuery.startsWith(prevQuery) || prevQuery.startsWith(currQuery));
340 
341         return !isNextSearchActionIndependent && occursWithinTimeThreshold && isPrefix;
342     }
343 
344     /**
345      * Returns if the current search action is independent from the previous search action.
346      *
347      * <p>If the current search action occurs later than the threshold after the previous search
348      * action, then they are considered independent.
349      */
isIndependentSearchAction( @onNull SearchActionGenericDocument currSearchAction, @NonNull SearchActionGenericDocument prevSearchAction)350     private static boolean isIndependentSearchAction(
351             @NonNull SearchActionGenericDocument currSearchAction,
352             @NonNull SearchActionGenericDocument prevSearchAction) {
353         Objects.requireNonNull(currSearchAction);
354         Objects.requireNonNull(prevSearchAction);
355 
356         long searchTimeDiffMillis =
357                 currSearchAction.getCreationTimestampMillis()
358                         - prevSearchAction.getCreationTimestampMillis();
359         return searchTimeDiffMillis > INDEPENDENT_SEARCH_INTENT_TIMESTAMP_DIFF_THRESHOLD_MILLIS;
360     }
361 
362     /** Returns the common prefix length of the given 2 strings. */
getCommonPrefixLength(@onNull String s1, @NonNull String s2)363     private static int getCommonPrefixLength(@NonNull String s1, @NonNull String s2) {
364         Objects.requireNonNull(s1);
365         Objects.requireNonNull(s2);
366 
367         int minLength = Math.min(s1.length(), s2.length());
368         for (int i = 0; i < minLength; ++i) {
369             if (s1.charAt(i) != s2.charAt(i)) {
370                 return i;
371             }
372         }
373         return minLength;
374     }
375 }
376