1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.timezone.location.validation;
17 
18 import static java.util.stream.Collectors.toList;
19 
20 import com.android.timezone.location.lookup.GeoTimeZonesFinder;
21 import com.android.timezone.location.validation.CitiesFile.City;
22 import com.android.timezone.location.validation.Types.KnownDifference;
23 import com.android.timezone.location.validation.Types.KnownDifferenceMismatch;
24 import com.android.timezone.location.validation.Types.KnownDifferences;
25 import com.android.timezone.location.validation.Types.Result;
26 import com.android.timezone.location.validation.Types.TestCaseId;
27 import com.android.timezone.tzids.TimeZoneIds;
28 
29 import com.google.common.geometry.S2CellId;
30 import com.google.common.geometry.S2LatLng;
31 
32 import java.io.File;
33 import java.io.FileOutputStream;
34 import java.io.IOException;
35 import java.io.OutputStreamWriter;
36 import java.io.Writer;
37 import java.net.URI;
38 import java.nio.charset.StandardCharsets;
39 import java.time.Instant;
40 import java.util.ArrayList;
41 import java.util.Collections;
42 import java.util.HashMap;
43 import java.util.Iterator;
44 import java.util.List;
45 import java.util.Map;
46 import java.util.stream.Stream;
47 
48 /**
49  * A tool that compares the Android location time zone data against geonames data, recording
50  * differences and (optionally) comparing them with an historic set of known differences.
51  *
52  * <p>This can be used to understand what has changed when assessing a new tzs2.dat file or geonames
53  * data: we don't expect them to 100% agree, but we are interested in changes to the way they
54  * disagree.
55  */
56 public class GeonamesComparison {
57 
58     private static final int KNOWN_DIFFERENCE_S2_LEVEL = 16;
59     private static final boolean DBG = false;
60     private static final boolean OUTPUT_CONFIRMED_KNOWN_DIFFERENCES = false;
61     private static final boolean OUTPUT_AMBIGUOUS_RESULT_INFO = false;
62 
63     /**
64      * Runs the comparison.
65      *
66      * <p>Args:
67      * {tzs2.dat file} {tzids.prototxt file} {ISO 8601 date/time for tz ID replacement}
68      * {geonames cities file} {output dir} {knowndifferences.prototxt files comma separated}
69      */
main(String[] args)70     public static void main(String[] args) throws Exception {
71         File tzS2File = new File(args[0]);
72         File tzIdsFile = new File(args[1]);
73         Instant replacementThreshold = Instant.parse(args[2]);
74         File geoNamesCitiesFile = new File(args[3]);
75         File outputDir = new File(args[4]);
76         String knownDifferencesFiles = "";
77         if (args.length == 6) {
78             knownDifferencesFiles = args[5];
79         }
80 
81         // Load Android's time zone aliases so we can account for cases where Android may us a
82         // different ID than the upstream data.
83         TimeZoneIds androidTimeZoneIds = TimeZoneIds.load(tzIdsFile);
84 
85         // Load known differences, if any, so we can ignore them and only highlight new differences.
86         KnownDifferencesRegistry knownDifferencesRegistry = new KnownDifferencesRegistry();
87         if (!knownDifferencesFiles.isEmpty()) {
88             String[] knownDifferencesFileNames = knownDifferencesFiles.split(",");
89             for (String knownDifferencesFileName : knownDifferencesFileNames) {
90                 knownDifferencesRegistry.addKnownDifferences(new File(knownDifferencesFileName));
91             }
92         }
93 
94         try (GeoTimeZonesFinder geoTzFinder = GeoTimeZonesFinder.create(tzS2File);
95                 Stream<City> cityStream = CitiesFile.read(geoNamesCitiesFile)) {
96             Iterator<City> cityIterator = cityStream.iterator();
97             int locationCheckCount = 0;
98             while (cityIterator.hasNext()) {
99                 locationCheckCount++;
100                 City city = cityIterator.next();
101 
102                 String geonamesTimeZoneId = city.getTimeZoneId();
103                 String geonamesCountryCode = city.getIsoCountryCode();
104                 String androidCountryCodeForZoneId =
105                         androidTimeZoneIds.getCountryCodeForZoneId(geonamesTimeZoneId);
106 
107                 if (androidCountryCodeForZoneId == null) {
108                     logWarn("Geonames data=" + city + " is using an unexpected zoneId:"
109                             + " Android cannot map geonamesTimeZoneId=" + geonamesTimeZoneId
110                             + " to a country");
111                     continue;
112                 }
113 
114                 // Modify the geonames expectation to account for Android's preferred time zone IDs.
115                 Map<String, String> countryIdMap = androidTimeZoneIds.getCountryIdMap(
116                         androidCountryCodeForZoneId, replacementThreshold);
117                 String adjustedGeonamesTimeZoneId = countryIdMap.get(geonamesTimeZoneId);
118                 if (!adjustedGeonamesTimeZoneId.equals(geonamesTimeZoneId)) {
119                     logDebug("Mapped geonames tz=" + geonamesTimeZoneId
120                             + " to " + adjustedGeonamesTimeZoneId);
121                 }
122 
123                 Result adjustedGeonamesResult = new Result(
124                         Collections.singletonList(geonamesCountryCode),
125                         Collections.singletonList(adjustedGeonamesTimeZoneId));
126 
127                 // Perform the Android data lookup.
128                 S2LatLng latLng = city.getLatLng();
129                 List<String> cityTimeZoneIds = geoTzFinder.findTimeZonesForLatLng(
130                         latLng.latDegrees(), latLng.lngDegrees());
131                 Result actualResult = createResult(androidTimeZoneIds, cityTimeZoneIds);
132 
133                 // Remove any matching known difference so we can easily detect old known
134                 // differences that are no longer being tested.
135                 S2CellId s2CellId = S2CellId.fromLatLng(latLng);
136                 S2CellId knownDifferencesCellId = s2CellId.parent(KNOWN_DIFFERENCE_S2_LEVEL);
137                 TestCaseId testCaseId = new TestCaseId(city.getName(), knownDifferencesCellId);
138                 knownDifferencesRegistry.recordResult(
139                         testCaseId, city, adjustedGeonamesResult, actualResult);
140             }
141 
142             knownDifferencesRegistry.outputKnownDifferencesProtos(outputDir);
143 
144             System.out.println("Checked locations: " + locationCheckCount);
145             knownDifferencesRegistry.printStatsReport();
146         }
147     }
148 
outputKnownDifferences(File outputFile, List<KnownDifference> knownDifferencesList)149     private static void outputKnownDifferences(File outputFile,
150             List<KnownDifference> knownDifferencesList) throws IOException {
151         KnownDifferences knownDifferences = KnownDifferences.create(knownDifferencesList);
152         try (Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile),
153                 StandardCharsets.UTF_8)) {
154             writer.write(knownDifferences.toProtoText());
155         }
156     }
157 
outputKnownDifferenceMismatches( File outputFile, List<KnownDifferenceMismatch> knownDifferenceMismatches)158     private static void outputKnownDifferenceMismatches(
159             File outputFile, List<KnownDifferenceMismatch> knownDifferenceMismatches)
160             throws IOException {
161 
162         try (Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile),
163                 StandardCharsets.UTF_8)) {
164             for (KnownDifferenceMismatch knownDifferenceMismatch : knownDifferenceMismatches) {
165                 writer.append("Recorded known difference:\n");
166                 KnownDifference referenceDataKnownDifference =
167                         knownDifferenceMismatch.getReferenceDataKnownDifference();
168                 writer.append(referenceDataKnownDifference.toProtoText());
169                 writer.append("\n\n");
170                 writer.append("Actual:\n");
171                 writer.append(knownDifferenceMismatch.getActualKnownDifference().toProtoText());
172                 writer.append("\n\n===========================\n");
173             }
174         }
175     }
176 
createNewKnownDifference( TestCaseId testCaseId, Result referenceDataResult, Result actualResult)177     private static Types.KnownDifference createNewKnownDifference(
178             TestCaseId testCaseId, Result referenceDataResult, Result actualResult) {
179         return new Types.KnownDifference(
180                 testCaseId, referenceDataResult, actualResult, Types.KnownDifference.Type.UNKNOWN,
181                 "Add a comment here", URI.create("http://b/bugId"));
182     }
183 
createResult( TimeZoneIds androidTimeZoneIds, List<String> cityTimeZoneIds)184     private static Result createResult(
185             TimeZoneIds androidTimeZoneIds, List<String> cityTimeZoneIds) {
186         List<String> uniqueIsoCountryCodes = cityTimeZoneIds.stream()
187                 .map(androidTimeZoneIds::getCountryCodeForZoneId)
188                 .distinct()
189                 .collect(toList());
190         return new Result(uniqueIsoCountryCodes, cityTimeZoneIds);
191     }
192 
193     static class KnownDifferencesRegistry {
194 
195         public Map<TestCaseId, KnownDifference> knownDifferences = new HashMap<>();
196         public Map<TestCaseId, Types.KnownDifference> mutableKnownDifferencesMap = new HashMap<>();
197         public List<Types.KnownDifference> newKnownDifferences = new ArrayList<>();
198         public List<Types.KnownDifference> confirmedKnownDifferences = new ArrayList<>();
199         public List<KnownDifferenceMismatch> incorrectKnownDifferences = new ArrayList<>();
200         public List<CityResult> multipleIdAndroidResults = new ArrayList<>();
201 
addKnownDifferences(File knownDifferencesFile)202         void addKnownDifferences(File knownDifferencesFile) throws IOException {
203             Map<TestCaseId, KnownDifference> knownDifferencesMap =
204                     KnownDifferences.load(knownDifferencesFile).buildIdMap();
205             Map<TestCaseId, KnownDifference> dupes =
206                     putAllNoDupes(knownDifferences, knownDifferencesMap);
207             for (TestCaseId dupeKey : dupes.keySet()) {
208                 logWarn("Duplicated key in file " + knownDifferencesFile + ": " + dupeKey);
209             }
210             putAllNoDupes(mutableKnownDifferencesMap, knownDifferencesMap);
211 
212         }
213 
putAllNoDupes(Map<K, V> target, Map<K, V> source)214         private static <K, V> Map<K, V> putAllNoDupes(Map<K, V> target, Map<K, V> source) {
215             Map<K, V> dupes = new HashMap<>();
216             for (Map.Entry<K, V> entry : source.entrySet()) {
217                 if (target.putIfAbsent(entry.getKey(), entry.getValue()) != null) {
218                     dupes.put(entry.getKey(), entry.getValue());
219                 }
220             }
221             return dupes;
222         }
223 
recordResult(TestCaseId testCaseId, City city, Result geonamesResult, Result androidResult)224         void recordResult(TestCaseId testCaseId,
225                 City city,
226                 Result geonamesResult,
227                 Result androidResult) {
228 
229             Types.KnownDifference preexistingKnownDifference =
230                     mutableKnownDifferencesMap.remove(testCaseId);
231             // A null known difference is the common case.
232 
233             if (androidResult.hasMultipleZoneIds()) {
234                 multipleIdAndroidResults.add(new CityResult(city, androidResult));
235             }
236 
237             // Check the actual meets expectations.
238             if (preexistingKnownDifference == null) {
239                 if (!geonamesResult.intersects(androidResult)) {
240                     // Not a known difference: Report it!
241                     Types.KnownDifference newKnownDifference = createNewKnownDifference(
242                             testCaseId, geonamesResult, androidResult);
243                     newKnownDifferences.add(newKnownDifference);
244                 }
245             } else {
246                 Result knownDifferenceActualResult =
247                         preexistingKnownDifference.getActualResult();
248                 Result knownDifferenceReferenceDataResult =
249                         preexistingKnownDifference.getReferenceDataResult();
250                 if (knownDifferenceActualResult.equals(androidResult)
251                         && knownDifferenceReferenceDataResult.equals(geonamesResult)) {
252                     confirmedKnownDifferences.add(preexistingKnownDifference);
253                 } else {
254                     // There is a known difference, but it doesn't match: Report it!
255                     KnownDifferenceMismatch mismatch = new KnownDifferenceMismatch(
256                             preexistingKnownDifference, geonamesResult, androidResult);
257                     incorrectKnownDifferences.add(mismatch);
258                 }
259             }
260         }
261 
outputKnownDifferencesProtos(File outputDir)262         void outputKnownDifferencesProtos(File outputDir) throws IOException {
263             System.out.println();
264             System.out.println("=====================");
265             System.out.println("Outputting diff info:");
266             System.out.println();
267 
268             if (newKnownDifferences.size() > 0) {
269                 File outputFile = new File(outputDir, "new_diffs.prototxt");
270                 outputKnownDifferences(outputFile, newKnownDifferences);
271                 System.out.println("New differences: " + outputFile + " (investigate these)");
272                 System.out.println();
273             }
274 
275             if (incorrectKnownDifferences.size() > 0) {
276                 File outputFile = new File(outputDir, "incorrect_diffs.prototxt");
277                 outputKnownDifferenceMismatches(outputFile, incorrectKnownDifferences);
278                 System.out.println(
279                         "Incorrect known differences: " + outputFile + " (investigate these)");
280                 System.out.println();
281             }
282 
283             if (OUTPUT_CONFIRMED_KNOWN_DIFFERENCES && confirmedKnownDifferences.size() > 0) {
284                 File outputFile = new File(outputDir, "confirmed_diffs.prototxt");
285                 outputKnownDifferences(outputFile, confirmedKnownDifferences);
286                 System.out.println(
287                         "Correct known differences: " + outputFile + " (don't touch these)");
288                 System.out.println();
289             }
290 
291             // Whatever is left in the mutableKnownDifferencesMap are test cases that no longer
292             // exist.
293             if (mutableKnownDifferencesMap.size() > 0) {
294                 File outputFile = new File(outputDir, "missing_diffs.prototxt");
295                 outputKnownDifferences(outputFile,
296                         new ArrayList<>(mutableKnownDifferencesMap.values()));
297                 System.out.println(
298                         "No longer tested known differences: " + outputFile + " (remove these)");
299                 System.out.println();
300             }
301 
302             if (OUTPUT_AMBIGUOUS_RESULT_INFO) {
303                 // Calculate the population of lookups that resulted in ambiguous (multi-zone)
304                 // answers, which should help us prioritize the parts of the world to look at for
305                 // resolution improvements.
306                 Map<Result, Long> ambiguousResultStats = new HashMap<>();
307                 for (CityResult cityResult : multipleIdAndroidResults) {
308                     Result result = cityResult.getResult();
309                     ambiguousResultStats.merge(
310                             result, (long) cityResult.getCity().getPopulation(), Long::sum);
311                 }
312 
313                 if (ambiguousResultStats.size() > 0) {
314                     File outputFile = new File(outputDir, "ambiguous_lookups.txt");
315                     outputResultCounts(outputFile, ambiguousResultStats);
316                     System.out.println("Ambiguous lookups file: " + outputFile);
317                 }
318             }
319         }
320 
outputResultCounts( File outputFile, Map<Result, Long> ambiguousResultStats)321         private static void outputResultCounts(
322                 File outputFile, Map<Result, Long> ambiguousResultStats)
323                 throws IOException {
324             try (Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile),
325                     StandardCharsets.UTF_8)) {
326                 writer.write("Result, Population affected\n");
327                 for (Map.Entry<Result, Long> resultEntry : ambiguousResultStats.entrySet()) {
328                     writer.write(resultEntry.getKey() + ": " + resultEntry.getValue());
329                     writer.write("\n");
330                 }
331             }
332         }
333 
printStatsReport()334         void printStatsReport() {
335             System.out.println("New known differences:" + newKnownDifferences.size());
336             System.out.println("Incorrect known differences:" + incorrectKnownDifferences.size());
337             System.out.println("Confirmed known differences:" + confirmedKnownDifferences.size());
338             System.out.println();
339             System.out.println("No longer tested locations: " + mutableKnownDifferencesMap.size());
340             System.out.println();
341             System.out.println("Number of cities with multiple TZ ID results: "
342                     + multipleIdAndroidResults.size());
343         }
344     }
345 
346     private static class CityResult {
347         private final CitiesFile.City mCity;
348         private final Result mResult;
349 
CityResult(CitiesFile.City city, Result result)350         CityResult(CitiesFile.City city,
351                 Result result) {
352             this.mCity = city;
353             this.mResult = result;
354         }
355 
getCity()356         CitiesFile.City getCity() {
357             return mCity;
358         }
359 
getResult()360         Result getResult() {
361             return mResult;
362         }
363     }
364 
logWarn(String message)365     private static void logWarn(String message) {
366         System.err.println(message);
367     }
368 
logDebug(String message)369     private static void logDebug(String message) {
370         if (DBG) {
371             System.out.println(message);
372         }
373     }
374 }
375