1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.adservices.service.topics.classifier;
18 
19 import static com.android.adservices.service.stats.AdServicesStatsLog.AD_SERVICES_ERROR_REPORTED__ERROR_CODE__TOPICS_MESSAGE_DIGEST_ALGORITHM_NOT_FOUND;
20 import static com.android.adservices.service.stats.AdServicesStatsLog.AD_SERVICES_ERROR_REPORTED__ERROR_CODE__TOPICS_READ_CLASSIFIER_ASSET_FILE_FAILURE;
21 import static com.android.adservices.service.stats.AdServicesStatsLog.AD_SERVICES_ERROR_REPORTED__PPAPI_NAME__TOPICS;
22 
23 import android.annotation.NonNull;
24 import android.content.Context;
25 import android.content.res.AssetManager;
26 import android.util.JsonReader;
27 
28 import com.android.adservices.LoggerFactory;
29 import com.android.adservices.data.topics.Topic;
30 import com.android.adservices.errorlogging.ErrorLogUtil;
31 import com.android.adservices.service.stats.AdServicesLogger;
32 import com.android.adservices.service.stats.EpochComputationGetTopTopicsStats;
33 import com.android.internal.util.Preconditions;
34 
35 import java.io.IOException;
36 import java.io.InputStream;
37 import java.io.InputStreamReader;
38 import java.security.MessageDigest;
39 import java.security.NoSuchAlgorithmException;
40 import java.util.ArrayList;
41 import java.util.Collections;
42 import java.util.HashMap;
43 import java.util.List;
44 import java.util.Map;
45 import java.util.Random;
46 import java.util.stream.Collectors;
47 
48 /** Helper methods for shared implementations of {@link Classifier}. */
49 public class CommonClassifierHelper {
50     private static final LoggerFactory.Logger sLogger = LoggerFactory.getTopicsLogger();
51     // The key name of asset metadata property in classifier_assets_metadata.json
52     private static final String ASSET_PROPERTY_NAME = "property";
53     // The key name of asset element in classifier_assets_metadata.json
54     private static final String ASSET_ELEMENT_NAME = "asset_name";
55     // The algorithm name of checksum
56     private static final String SHA256_DIGEST_ALGORITHM_NAME = "SHA-256";
57     private static final String BUILD_ID_FIELD = "build_id";
58 
59     // Defined constants for error codes which have very long names.
60     private static final int TOPICS_READ_CLASSIFIER_ASSET_FILE_FAILURE =
61             AD_SERVICES_ERROR_REPORTED__ERROR_CODE__TOPICS_READ_CLASSIFIER_ASSET_FILE_FAILURE;
62     private static final int TOPICS_MESSAGE_DIGEST_ALGORITHM_NOT_FOUND =
63             AD_SERVICES_ERROR_REPORTED__ERROR_CODE__TOPICS_MESSAGE_DIGEST_ALGORITHM_NOT_FOUND;
64 
65     /**
66      * Compute the SHA256 checksum of classifier asset.
67      *
68      * @return A string of classifier asset's SHA256 checksum.
69      */
computeClassifierAssetChecksum( @onNull AssetManager assetManager, @NonNull String classifierAssetsMetadataPath)70     static String computeClassifierAssetChecksum(
71             @NonNull AssetManager assetManager, @NonNull String classifierAssetsMetadataPath) {
72         StringBuilder assetSha256CheckSum = new StringBuilder();
73         try {
74             MessageDigest sha256Digest = MessageDigest.getInstance(SHA256_DIGEST_ALGORITHM_NAME);
75 
76             try (InputStream inputStream = assetManager.open(classifierAssetsMetadataPath)) {
77 
78                 // Create byte array to read data in chunks
79                 byte[] byteArray = new byte[8192];
80                 int byteCount = 0;
81 
82                 // Read file data and update in message digest
83                 while ((byteCount = inputStream.read(byteArray)) != -1) {
84                     sha256Digest.update(byteArray, 0, byteCount);
85                 }
86 
87                 // Get the hash's bytes
88                 byte[] bytes = sha256Digest.digest();
89 
90                 // This bytes[] has bytes in decimal format;
91                 // Convert it to hexadecimal format
92                 for (int i = 0; i < bytes.length; i++) {
93                     assetSha256CheckSum.append(
94                             Integer.toString((bytes[i] & 0xff) + 0x100, 16).substring(1));
95                 }
96             } catch (IOException e) {
97                 sLogger.e(e, "Unable to read classifier asset file");
98                 ErrorLogUtil.e(
99                         e,
100                         TOPICS_READ_CLASSIFIER_ASSET_FILE_FAILURE,
101                         AD_SERVICES_ERROR_REPORTED__PPAPI_NAME__TOPICS);
102                 // When catching IOException -> return empty string.
103                 return "";
104             }
105         } catch (NoSuchAlgorithmException e) {
106             sLogger.e(e, "Unable to find correct message digest algorithm.");
107             // When catching NoSuchAlgorithmException -> return empty string.
108             ErrorLogUtil.e(
109                     e,
110                     TOPICS_MESSAGE_DIGEST_ALGORITHM_NOT_FOUND,
111                     AD_SERVICES_ERROR_REPORTED__PPAPI_NAME__TOPICS);
112             return "";
113         }
114 
115         return assetSha256CheckSum.toString();
116     }
117 
118     /**
119      * Create a list of top topicIds with numberOfTopTopics + numberOfRandomTopics topicIds.
120      *
121      * @param appTopics appPackageName to topics map.
122      * @param labelIds all topicIds from the labels file.
123      * @param random to fetch random elements from the labelIds.
124      * @param numberOfTopTopics number of top topics to be added at the start of the list.
125      * @param numberOfRandomTopics number of random topics to be added at the end of the list.
126      * @return a list of topics with numberOfTopTopics top predicted topics and numberOfRandomTopics
127      *     random topics.
128      */
129     @NonNull
getTopTopics( @onNull Map<String, List<Topic>> appTopics, @NonNull List<Integer> labelIds, @NonNull Random random, @NonNull int numberOfTopTopics, @NonNull int numberOfRandomTopics, @NonNull AdServicesLogger logger)130     static List<Topic> getTopTopics(
131             @NonNull Map<String, List<Topic>> appTopics,
132             @NonNull List<Integer> labelIds,
133             @NonNull Random random,
134             @NonNull int numberOfTopTopics,
135             @NonNull int numberOfRandomTopics,
136             @NonNull AdServicesLogger logger) {
137         Preconditions.checkArgument(
138                 numberOfTopTopics > 0, "numberOfTopTopics should larger than 0");
139         Preconditions.checkArgument(
140                 numberOfRandomTopics > 0, "numberOfRandomTopics should larger than 0");
141 
142         // A map from Topics to the count of its occurrences.
143         Map<Topic, Integer> topicsToAppTopicCount = new HashMap<>();
144         for (List<Topic> appTopic : appTopics.values()) {
145             for (Topic topic : appTopic) {
146                 topicsToAppTopicCount.put(topic, topicsToAppTopicCount.getOrDefault(topic, 0) + 1);
147             }
148         }
149 
150         // If there are no topic in the appTopics list, an empty topic list will be returned.
151         if (topicsToAppTopicCount.isEmpty()) {
152             sLogger.w("Unable to retrieve any topics from device.");
153             // Log atom for getTopTopics call.
154             logger.logEpochComputationGetTopTopicsStats(
155                     EpochComputationGetTopTopicsStats.builder()
156                             .setTopTopicCount(0)
157                             .setPaddedRandomTopicsCount(0)
158                             .setAppsConsideredCount(appTopics.size())
159                             .setSdksConsideredCount(-1)
160                             .build());
161             return new ArrayList<>();
162         }
163 
164         // Sort the topics by their count.
165         List<Topic> allSortedTopics =
166                 topicsToAppTopicCount.entrySet().stream()
167                         .sorted(Collections.reverseOrder(Map.Entry.comparingByValue()))
168                         .map(Map.Entry::getKey)
169                         .collect(Collectors.toList());
170 
171         // The number of topics to pad in top topics.
172         int numberOfRandomPaddingTopics = Math.max(0, numberOfTopTopics - allSortedTopics.size());
173         List<Topic> topTopics =
174                 allSortedTopics.subList(0, Math.min(numberOfTopTopics, allSortedTopics.size()));
175 
176         // Log atom for getTopTopics call.
177         // TODO(b/256638889): Log apps and sdk considered count.
178         logger.logEpochComputationGetTopTopicsStats(
179                 EpochComputationGetTopTopicsStats.builder()
180                         .setTopTopicCount(numberOfTopTopics)
181                         .setPaddedRandomTopicsCount(numberOfRandomPaddingTopics)
182                         .setAppsConsideredCount(appTopics.size())
183                         .setSdksConsideredCount(-1)
184                         .build());
185 
186         // If the size of topTopics smaller than numberOfTopTopics,
187         // the top topics list will be padded by numberOfRandomPaddingTopics random topics.
188         return getRandomTopics(
189                 labelIds, random, topTopics, numberOfRandomTopics + numberOfRandomPaddingTopics);
190     }
191 
192     // This helper function will populate numOfRandomTopics random topics in the topTopics list.
193     @NonNull
getRandomTopics( @onNull List<Integer> labelIds, @NonNull Random random, @NonNull List<Topic> topTopics, @NonNull int numberOfRandomTopics)194     private static List<Topic> getRandomTopics(
195             @NonNull List<Integer> labelIds,
196             @NonNull Random random,
197             @NonNull List<Topic> topTopics,
198             @NonNull int numberOfRandomTopics) {
199         if (numberOfRandomTopics <= 0) {
200             return topTopics;
201         }
202 
203         // Get version information from the first top topic if present
204         // (all topics' versions are identical in a given classification).
205         long taxonomyVersion = 0L;
206         long modelVersion = 0L;
207         if (!topTopics.isEmpty()) {
208             Topic firstTopic = topTopics.get(0);
209             taxonomyVersion = firstTopic.getTaxonomyVersion();
210             modelVersion = firstTopic.getModelVersion();
211         }
212 
213         List<Topic> returnedTopics = new ArrayList<>();
214 
215         // First add all the topTopics.
216         returnedTopics.addAll(topTopics);
217 
218         // Counter of how many random topics need to be added.
219         int topicsCounter = numberOfRandomTopics;
220 
221         // Then add random topics.
222         while (topicsCounter > 0 && returnedTopics.size() < labelIds.size()) {
223             // Pick up a random topic from labels list and check if it is a duplicate.
224             int randTopicId = labelIds.get(random.nextInt(labelIds.size()));
225             Topic randTopic = Topic.create(randTopicId, taxonomyVersion, modelVersion);
226             if (returnedTopics.contains(randTopic)) {
227                 continue;
228             }
229 
230             returnedTopics.add(randTopic);
231             topicsCounter--;
232         }
233 
234         return returnedTopics;
235     }
236 
237     /**
238      * Gets bundled model build_id from classifierAssetsMetadata file. Returns the default value of
239      * -1 if there is no build_id available.
240      *
241      * @return bundled model build_id
242      */
getBundledModelBuildId( @onNull Context context, @NonNull String classifierAssetsMetadataPath)243     public static long getBundledModelBuildId(
244             @NonNull Context context, @NonNull String classifierAssetsMetadataPath) {
245         InputStream inputStream = null; // InputStream.nullInputStream() is not available on S-.
246         try {
247             inputStream = context.getAssets().open(classifierAssetsMetadataPath);
248         } catch (IOException e) {
249             throw new RuntimeException("Failed to read bundled metadata file", e);
250         }
251         JsonReader reader = new JsonReader(new InputStreamReader(inputStream));
252         try {
253             reader.beginArray();
254             while (reader.hasNext()) {
255                 // Read through each JSONObject.
256                 reader.beginObject();
257                 while (reader.hasNext()) {
258                     // Read through version info object and find build_id.
259                     String elementKeyName = reader.nextName();
260                     if (BUILD_ID_FIELD.equals(elementKeyName)) {
261                         return reader.nextLong();
262                     } else {
263                         reader.skipValue();
264                     }
265                 }
266                 reader.endObject();
267             }
268             reader.endArray();
269         } catch (IOException e) {
270             throw new RuntimeException("Failed to parse classifier assets metadata file", e);
271         }
272         return -1;
273     }
274 }
275