1 /* 2 * Copyright 2024 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server.appsearch.external.localstorage.usagereporting; 18 19 import android.annotation.NonNull; 20 import android.annotation.Nullable; 21 import android.app.appsearch.GenericDocument; 22 import android.app.appsearch.usagereporting.ActionConstants; 23 24 import com.android.server.appsearch.external.localstorage.stats.ClickStats; 25 import com.android.server.appsearch.external.localstorage.stats.SearchIntentStats; 26 import com.android.server.appsearch.external.localstorage.stats.SearchSessionStats; 27 28 import java.util.ArrayList; 29 import java.util.Collections; 30 import java.util.List; 31 import java.util.Objects; 32 33 /** 34 * Extractor class for analyzing a list of taken action {@link GenericDocument} and creating a list 35 * of {@link SearchSessionStats}. 36 * 37 * @hide 38 */ 39 public final class SearchSessionStatsExtractor { 40 // TODO(b/319285816): make thresholds configurable. 41 /** 42 * Threshold for noise search intent detection, in millisecond. A search action will be 43 * considered as a noise (and skipped) if all of the following conditions are satisfied: 44 * 45 * <ul> 46 * <li>The action timestamp (action document creation timestamp) difference between it and its 47 * previous search action is below this threshold. 48 * <li>There is no click action associated with it. 49 * <li>Its raw query string is a prefix of the previous search action's raw query string (or 50 * the other way around). 51 * </ul> 52 */ 53 private static final long NOISE_SEARCH_INTENT_TIMESTAMP_DIFF_THRESHOLD_MILLIS = 2000L; 54 55 /** 56 * Threshold for independent search intent detection, in millisecond. If the action timestamp 57 * (action document creation timestamp) difference between the previous and the current search 58 * action exceeds this threshold, then the current search action will be considered as a 59 * completely independent search intent (i.e. belonging to a new search session), and there will 60 * be no correlation analysis between the previous and the current search action. 61 */ 62 private static final long INDEPENDENT_SEARCH_INTENT_TIMESTAMP_DIFF_THRESHOLD_MILLIS = 63 10L * 60 * 1000; 64 65 /** 66 * Threshold for marking good click (compared with {@code timeStayOnResultMillis}), in 67 * millisecond. A good click means the user spent decent amount of time on the clicked result 68 * document. 69 */ 70 private static final long GOOD_CLICK_TIME_STAY_ON_RESULT_THRESHOLD_MILLIS = 2000L; 71 72 /** 73 * Threshold for backspace count to become query abandonment. If the user hits backspace for at 74 * least QUERY_ABANDONMENT_BACKSPACE_COUNT times, then the query correction type will be 75 * determined as abandonment. 76 */ 77 private static final int QUERY_ABANDONMENT_BACKSPACE_COUNT = 2; 78 79 /** 80 * Returns the query correction type between the previous and current search actions. 81 * 82 * @param currSearchAction the current search action {@link SearchActionGenericDocument}. 83 * @param prevSearchAction the previous search action {@link SearchActionGenericDocument}. 84 */ getQueryCorrectionType( @onNull SearchActionGenericDocument currSearchAction, @Nullable SearchActionGenericDocument prevSearchAction)85 public static @SearchIntentStats.QueryCorrectionType int getQueryCorrectionType( 86 @NonNull SearchActionGenericDocument currSearchAction, 87 @Nullable SearchActionGenericDocument prevSearchAction) { 88 Objects.requireNonNull(currSearchAction); 89 90 if (currSearchAction.getQuery() == null) { 91 // Query correction type cannot be determined if the client didn't provide the raw query 92 // string. 93 return SearchIntentStats.QUERY_CORRECTION_TYPE_UNKNOWN; 94 } 95 if (prevSearchAction == null) { 96 // If the previous search action is missing, then it is the first query. 97 return SearchIntentStats.QUERY_CORRECTION_TYPE_FIRST_QUERY; 98 } else if (prevSearchAction.getQuery() == null) { 99 // Query correction type cannot be determined if the client didn't provide the raw query 100 // string. 101 return SearchIntentStats.QUERY_CORRECTION_TYPE_UNKNOWN; 102 } 103 104 // Determine the query correction type by comparing the current and previous raw query 105 // strings. 106 String prevQuery = prevSearchAction.getQuery(); 107 String currQuery = currSearchAction.getQuery(); 108 int commonPrefixLength = getCommonPrefixLength(prevQuery, currQuery); 109 // If the user hits backspace >= QUERY_ABANDONMENT_BACKSPACE_COUNT times, then it is query 110 // abandonment. Otherwise, it is query refinement. 111 if (commonPrefixLength <= prevQuery.length() - QUERY_ABANDONMENT_BACKSPACE_COUNT) { 112 return SearchIntentStats.QUERY_CORRECTION_TYPE_ABANDONMENT; 113 } else { 114 return SearchIntentStats.QUERY_CORRECTION_TYPE_REFINEMENT; 115 } 116 } 117 118 /** 119 * Returns a list of {@link SearchSessionStats} extracted from the given list of taken action 120 * {@link GenericDocument}. 121 * 122 * <p>A search session consists of several related search intents. 123 * 124 * <p>A search intent consists of a valid search action with 0 or more click actions. To extract 125 * search intent metrics, this function will try to group the given taken actions into several 126 * search intents, and yield a {@link SearchIntentStats} for each search intent. Finally related 127 * {@link SearchIntentStats} will be wrapped into {@link SearchSessionStats}. 128 * 129 * @param packageName The package name of the caller. 130 * @param database The database name of the caller. 131 * @param genericDocuments a list of taken actions in generic document form. 132 */ 133 @NonNull extract( @onNull String packageName, @Nullable String database, @NonNull List<GenericDocument> genericDocuments)134 public List<SearchSessionStats> extract( 135 @NonNull String packageName, 136 @Nullable String database, 137 @NonNull List<GenericDocument> genericDocuments) { 138 Objects.requireNonNull(genericDocuments); 139 140 // Convert GenericDocument list to TakenActionGenericDocument list and sort them by document 141 // creation timestamp. 142 List<TakenActionGenericDocument> takenActionGenericDocuments = 143 new ArrayList<>(genericDocuments.size()); 144 for (int i = 0; i < genericDocuments.size(); ++i) { 145 try { 146 takenActionGenericDocuments.add( 147 TakenActionGenericDocument.create(genericDocuments.get(i))); 148 } catch (IllegalArgumentException e) { 149 // Skip generic documents with unknown action type. 150 } 151 } 152 Collections.sort( 153 takenActionGenericDocuments, 154 (TakenActionGenericDocument doc1, TakenActionGenericDocument doc2) -> 155 Long.compare( 156 doc1.getCreationTimestampMillis(), 157 doc2.getCreationTimestampMillis())); 158 159 List<SearchSessionStats> result = new ArrayList<>(); 160 SearchSessionStats.Builder searchSessionStatsBuilder = null; 161 SearchActionGenericDocument prevSearchAction = null; 162 // Clients are expected to report search action followed by its associated click actions. 163 // For example, [searchAction1, clickAction1, searchAction2, searchAction3, clickAction2, 164 // clickAction3]: 165 // - There are 3 search actions and 3 click actions. 166 // - clickAction1 is associated with searchAction1. 167 // - There is no click action associated with searchAction2. 168 // - clickAction2 and clickAction3 are associated with searchAction3. 169 // Here we're going to break down the list into segments. Each segment starts with a search 170 // action followed by 0 or more associated click actions, and they form a single search 171 // intent. We will analyze and extract metrics from the taken actions for the search intent. 172 // 173 // If a search intent is considered independent from the previous one, then we will start a 174 // new search session analysis. 175 for (int i = 0; i < takenActionGenericDocuments.size(); ++i) { 176 if (takenActionGenericDocuments.get(i).getActionType() 177 != ActionConstants.ACTION_TYPE_SEARCH) { 178 continue; 179 } 180 181 SearchActionGenericDocument currSearchAction = 182 (SearchActionGenericDocument) takenActionGenericDocuments.get(i); 183 List<ClickActionGenericDocument> clickActions = new ArrayList<>(); 184 // Get all click actions associated with the current search action by advancing until 185 // the next search action. 186 while (i + 1 < takenActionGenericDocuments.size() 187 && takenActionGenericDocuments.get(i + 1).getActionType() 188 != ActionConstants.ACTION_TYPE_SEARCH) { 189 if (takenActionGenericDocuments.get(i + 1).getActionType() 190 == ActionConstants.ACTION_TYPE_CLICK) { 191 clickActions.add( 192 (ClickActionGenericDocument) takenActionGenericDocuments.get(i + 1)); 193 } 194 ++i; 195 } 196 197 // Get the reference of the next search action if it exists. 198 SearchActionGenericDocument nextSearchAction = null; 199 if (i + 1 < takenActionGenericDocuments.size() 200 && takenActionGenericDocuments.get(i + 1).getActionType() 201 == ActionConstants.ACTION_TYPE_SEARCH) { 202 nextSearchAction = 203 (SearchActionGenericDocument) takenActionGenericDocuments.get(i + 1); 204 } 205 206 if (prevSearchAction != null 207 && isIndependentSearchAction(currSearchAction, prevSearchAction)) { 208 // If the current search action is independent from the previous one, then: 209 // - Build and append the previous search session stats. 210 // - Start a new search session analysis. 211 // - Ignore the previous search action when extracting stats. 212 if (searchSessionStatsBuilder != null) { 213 result.add(searchSessionStatsBuilder.build()); 214 searchSessionStatsBuilder = null; 215 } 216 prevSearchAction = null; 217 } else if (clickActions.isEmpty() 218 && isIntermediateSearchAction( 219 currSearchAction, prevSearchAction, nextSearchAction)) { 220 // If the current search action is an intermediate search action with no click 221 // actions, then we consider it as a noise and skip it. 222 continue; 223 } 224 225 // Now we get a valid search intent (the current search action + a list of click actions 226 // associated with it). Extract metrics and add SearchIntentStats into this search 227 // session. 228 if (searchSessionStatsBuilder == null) { 229 searchSessionStatsBuilder = 230 new SearchSessionStats.Builder(packageName).setDatabase(database); 231 } 232 searchSessionStatsBuilder.addSearchIntentsStats( 233 createSearchIntentStats( 234 packageName, 235 database, 236 currSearchAction, 237 clickActions, 238 prevSearchAction)); 239 prevSearchAction = currSearchAction; 240 } 241 if (searchSessionStatsBuilder != null) { 242 result.add(searchSessionStatsBuilder.build()); 243 } 244 return result; 245 } 246 247 /** 248 * Creates a {@link SearchIntentStats} object from the current search action + its associated 249 * click actions, and the previous search action (in generic document form). 250 */ createSearchIntentStats( @onNull String packageName, @Nullable String database, @NonNull SearchActionGenericDocument currSearchAction, @NonNull List<ClickActionGenericDocument> clickActions, @Nullable SearchActionGenericDocument prevSearchAction)251 private SearchIntentStats createSearchIntentStats( 252 @NonNull String packageName, 253 @Nullable String database, 254 @NonNull SearchActionGenericDocument currSearchAction, 255 @NonNull List<ClickActionGenericDocument> clickActions, 256 @Nullable SearchActionGenericDocument prevSearchAction) { 257 SearchIntentStats.Builder builder = 258 new SearchIntentStats.Builder(packageName) 259 .setDatabase(database) 260 .setTimestampMillis(currSearchAction.getCreationTimestampMillis()) 261 .setCurrQuery(currSearchAction.getQuery()) 262 .setNumResultsFetched(currSearchAction.getFetchedResultCount()) 263 .setQueryCorrectionType( 264 getQueryCorrectionType(currSearchAction, prevSearchAction)); 265 if (prevSearchAction != null) { 266 builder.setPrevQuery(prevSearchAction.getQuery()); 267 } 268 for (int i = 0; i < clickActions.size(); ++i) { 269 builder.addClicksStats(createClickStats(clickActions.get(i))); 270 } 271 return builder.build(); 272 } 273 274 /** 275 * Creates a {@link ClickStats} object from the given click action (in generic document form). 276 */ createClickStats(ClickActionGenericDocument clickAction)277 private ClickStats createClickStats(ClickActionGenericDocument clickAction) { 278 // A click is considered good if: 279 // - The user spent decent amount of time on the clicked document. 280 // - OR the client didn't provide timeStayOnResultMillis. In this case, the value will be 0. 281 boolean isGoodClick = 282 clickAction.getTimeStayOnResultMillis() <= 0 283 || clickAction.getTimeStayOnResultMillis() 284 >= GOOD_CLICK_TIME_STAY_ON_RESULT_THRESHOLD_MILLIS; 285 return new ClickStats.Builder() 286 .setTimestampMillis(clickAction.getCreationTimestampMillis()) 287 .setResultRankInBlock(clickAction.getResultRankInBlock()) 288 .setResultRankGlobal(clickAction.getResultRankGlobal()) 289 .setTimeStayOnResultMillis(clickAction.getTimeStayOnResultMillis()) 290 .setIsGoodClick(isGoodClick) 291 .build(); 292 } 293 294 /** 295 * Returns if the current search action is an intermediate search action. 296 * 297 * <p>An intermediate search action is used for detecting the situation when the user adds or 298 * deletes characters from the query (e.g. "a" -> "app" -> "apple" or "apple" -> "app" -> "a") 299 * within a short period of time. More precisely, it has to satisfy all of the following 300 * conditions: 301 * 302 * <ul> 303 * <li>There are related (non-independent) search actions before and after it. 304 * <li>It occurs within the threshold after its previous search action. 305 * <li>Its raw query string is a prefix of its previous search action's raw query string, or 306 * the opposite direction. 307 * </ul> 308 */ isIntermediateSearchAction( @onNull SearchActionGenericDocument currSearchAction, @Nullable SearchActionGenericDocument prevSearchAction, @Nullable SearchActionGenericDocument nextSearchAction)309 private static boolean isIntermediateSearchAction( 310 @NonNull SearchActionGenericDocument currSearchAction, 311 @Nullable SearchActionGenericDocument prevSearchAction, 312 @Nullable SearchActionGenericDocument nextSearchAction) { 313 Objects.requireNonNull(currSearchAction); 314 315 if (prevSearchAction == null || nextSearchAction == null) { 316 return false; 317 } 318 319 // Whether the next search action is independent from the current search action. If true, 320 // then the current search action will not be considered as an intermediate search action 321 // since it is the last search action of the search session. 322 boolean isNextSearchActionIndependent = 323 isIndependentSearchAction(nextSearchAction, currSearchAction); 324 325 // Whether the current search action occurs within the threshold after the previous search 326 // action. 327 boolean occursWithinTimeThreshold = 328 currSearchAction.getCreationTimestampMillis() 329 - prevSearchAction.getCreationTimestampMillis() 330 <= NOISE_SEARCH_INTENT_TIMESTAMP_DIFF_THRESHOLD_MILLIS; 331 332 // Whether the previous search action's raw query string is a prefix of the current search 333 // action's, or the opposite direction (e.g. "app" -> "apple" and "apple" -> "app"). 334 String prevQuery = prevSearchAction.getQuery(); 335 String currQuery = currSearchAction.getQuery(); 336 boolean isPrefix = 337 prevQuery != null 338 && currQuery != null 339 && (currQuery.startsWith(prevQuery) || prevQuery.startsWith(currQuery)); 340 341 return !isNextSearchActionIndependent && occursWithinTimeThreshold && isPrefix; 342 } 343 344 /** 345 * Returns if the current search action is independent from the previous search action. 346 * 347 * <p>If the current search action occurs later than the threshold after the previous search 348 * action, then they are considered independent. 349 */ isIndependentSearchAction( @onNull SearchActionGenericDocument currSearchAction, @NonNull SearchActionGenericDocument prevSearchAction)350 private static boolean isIndependentSearchAction( 351 @NonNull SearchActionGenericDocument currSearchAction, 352 @NonNull SearchActionGenericDocument prevSearchAction) { 353 Objects.requireNonNull(currSearchAction); 354 Objects.requireNonNull(prevSearchAction); 355 356 long searchTimeDiffMillis = 357 currSearchAction.getCreationTimestampMillis() 358 - prevSearchAction.getCreationTimestampMillis(); 359 return searchTimeDiffMillis > INDEPENDENT_SEARCH_INTENT_TIMESTAMP_DIFF_THRESHOLD_MILLIS; 360 } 361 362 /** Returns the common prefix length of the given 2 strings. */ getCommonPrefixLength(@onNull String s1, @NonNull String s2)363 private static int getCommonPrefixLength(@NonNull String s1, @NonNull String s2) { 364 Objects.requireNonNull(s1); 365 Objects.requireNonNull(s2); 366 367 int minLength = Math.min(s1.length(), s2.length()); 368 for (int i = 0; i < minLength; ++i) { 369 if (s1.charAt(i) != s2.charAt(i)) { 370 return i; 371 } 372 } 373 return minLength; 374 } 375 } 376