1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.timezone.location.validation; 17 18 import static java.util.stream.Collectors.toList; 19 20 import com.android.timezone.location.lookup.GeoTimeZonesFinder; 21 import com.android.timezone.location.validation.CitiesFile.City; 22 import com.android.timezone.location.validation.Types.KnownDifference; 23 import com.android.timezone.location.validation.Types.KnownDifferenceMismatch; 24 import com.android.timezone.location.validation.Types.KnownDifferences; 25 import com.android.timezone.location.validation.Types.Result; 26 import com.android.timezone.location.validation.Types.TestCaseId; 27 import com.android.timezone.tzids.TimeZoneIds; 28 29 import com.google.common.geometry.S2CellId; 30 import com.google.common.geometry.S2LatLng; 31 32 import java.io.File; 33 import java.io.FileOutputStream; 34 import java.io.IOException; 35 import java.io.OutputStreamWriter; 36 import java.io.Writer; 37 import java.net.URI; 38 import java.nio.charset.StandardCharsets; 39 import java.time.Instant; 40 import java.util.ArrayList; 41 import java.util.Collections; 42 import java.util.HashMap; 43 import java.util.Iterator; 44 import java.util.List; 45 import java.util.Map; 46 import java.util.stream.Stream; 47 48 /** 49 * A tool that compares the Android location time zone data against geonames data, recording 50 * differences and (optionally) comparing them with an historic set of known differences. 51 * 52 * <p>This can be used to understand what has changed when assessing a new tzs2.dat file or geonames 53 * data: we don't expect them to 100% agree, but we are interested in changes to the way they 54 * disagree. 55 */ 56 public class GeonamesComparison { 57 58 private static final int KNOWN_DIFFERENCE_S2_LEVEL = 16; 59 private static final boolean DBG = false; 60 private static final boolean OUTPUT_CONFIRMED_KNOWN_DIFFERENCES = false; 61 private static final boolean OUTPUT_AMBIGUOUS_RESULT_INFO = false; 62 63 /** 64 * Runs the comparison. 65 * 66 * <p>Args: 67 * {tzs2.dat file} {tzids.prototxt file} {ISO 8601 date/time for tz ID replacement} 68 * {geonames cities file} {output dir} {knowndifferences.prototxt files comma separated} 69 */ main(String[] args)70 public static void main(String[] args) throws Exception { 71 File tzS2File = new File(args[0]); 72 File tzIdsFile = new File(args[1]); 73 Instant replacementThreshold = Instant.parse(args[2]); 74 File geoNamesCitiesFile = new File(args[3]); 75 File outputDir = new File(args[4]); 76 String knownDifferencesFiles = ""; 77 if (args.length == 6) { 78 knownDifferencesFiles = args[5]; 79 } 80 81 // Load Android's time zone aliases so we can account for cases where Android may us a 82 // different ID than the upstream data. 83 TimeZoneIds androidTimeZoneIds = TimeZoneIds.load(tzIdsFile); 84 85 // Load known differences, if any, so we can ignore them and only highlight new differences. 86 KnownDifferencesRegistry knownDifferencesRegistry = new KnownDifferencesRegistry(); 87 if (!knownDifferencesFiles.isEmpty()) { 88 String[] knownDifferencesFileNames = knownDifferencesFiles.split(","); 89 for (String knownDifferencesFileName : knownDifferencesFileNames) { 90 knownDifferencesRegistry.addKnownDifferences(new File(knownDifferencesFileName)); 91 } 92 } 93 94 try (GeoTimeZonesFinder geoTzFinder = GeoTimeZonesFinder.create(tzS2File); 95 Stream<City> cityStream = CitiesFile.read(geoNamesCitiesFile)) { 96 Iterator<City> cityIterator = cityStream.iterator(); 97 int locationCheckCount = 0; 98 while (cityIterator.hasNext()) { 99 locationCheckCount++; 100 City city = cityIterator.next(); 101 102 String geonamesTimeZoneId = city.getTimeZoneId(); 103 String geonamesCountryCode = city.getIsoCountryCode(); 104 String androidCountryCodeForZoneId = 105 androidTimeZoneIds.getCountryCodeForZoneId(geonamesTimeZoneId); 106 107 if (androidCountryCodeForZoneId == null) { 108 logWarn("Geonames data=" + city + " is using an unexpected zoneId:" 109 + " Android cannot map geonamesTimeZoneId=" + geonamesTimeZoneId 110 + " to a country"); 111 continue; 112 } 113 114 // Modify the geonames expectation to account for Android's preferred time zone IDs. 115 Map<String, String> countryIdMap = androidTimeZoneIds.getCountryIdMap( 116 androidCountryCodeForZoneId, replacementThreshold); 117 String adjustedGeonamesTimeZoneId = countryIdMap.get(geonamesTimeZoneId); 118 if (!adjustedGeonamesTimeZoneId.equals(geonamesTimeZoneId)) { 119 logDebug("Mapped geonames tz=" + geonamesTimeZoneId 120 + " to " + adjustedGeonamesTimeZoneId); 121 } 122 123 Result adjustedGeonamesResult = new Result( 124 Collections.singletonList(geonamesCountryCode), 125 Collections.singletonList(adjustedGeonamesTimeZoneId)); 126 127 // Perform the Android data lookup. 128 S2LatLng latLng = city.getLatLng(); 129 List<String> cityTimeZoneIds = geoTzFinder.findTimeZonesForLatLng( 130 latLng.latDegrees(), latLng.lngDegrees()); 131 Result actualResult = createResult(androidTimeZoneIds, cityTimeZoneIds); 132 133 // Remove any matching known difference so we can easily detect old known 134 // differences that are no longer being tested. 135 S2CellId s2CellId = S2CellId.fromLatLng(latLng); 136 S2CellId knownDifferencesCellId = s2CellId.parent(KNOWN_DIFFERENCE_S2_LEVEL); 137 TestCaseId testCaseId = new TestCaseId(city.getName(), knownDifferencesCellId); 138 knownDifferencesRegistry.recordResult( 139 testCaseId, city, adjustedGeonamesResult, actualResult); 140 } 141 142 knownDifferencesRegistry.outputKnownDifferencesProtos(outputDir); 143 144 System.out.println("Checked locations: " + locationCheckCount); 145 knownDifferencesRegistry.printStatsReport(); 146 } 147 } 148 outputKnownDifferences(File outputFile, List<KnownDifference> knownDifferencesList)149 private static void outputKnownDifferences(File outputFile, 150 List<KnownDifference> knownDifferencesList) throws IOException { 151 KnownDifferences knownDifferences = KnownDifferences.create(knownDifferencesList); 152 try (Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile), 153 StandardCharsets.UTF_8)) { 154 writer.write(knownDifferences.toProtoText()); 155 } 156 } 157 outputKnownDifferenceMismatches( File outputFile, List<KnownDifferenceMismatch> knownDifferenceMismatches)158 private static void outputKnownDifferenceMismatches( 159 File outputFile, List<KnownDifferenceMismatch> knownDifferenceMismatches) 160 throws IOException { 161 162 try (Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile), 163 StandardCharsets.UTF_8)) { 164 for (KnownDifferenceMismatch knownDifferenceMismatch : knownDifferenceMismatches) { 165 writer.append("Recorded known difference:\n"); 166 KnownDifference referenceDataKnownDifference = 167 knownDifferenceMismatch.getReferenceDataKnownDifference(); 168 writer.append(referenceDataKnownDifference.toProtoText()); 169 writer.append("\n\n"); 170 writer.append("Actual:\n"); 171 writer.append(knownDifferenceMismatch.getActualKnownDifference().toProtoText()); 172 writer.append("\n\n===========================\n"); 173 } 174 } 175 } 176 createNewKnownDifference( TestCaseId testCaseId, Result referenceDataResult, Result actualResult)177 private static Types.KnownDifference createNewKnownDifference( 178 TestCaseId testCaseId, Result referenceDataResult, Result actualResult) { 179 return new Types.KnownDifference( 180 testCaseId, referenceDataResult, actualResult, Types.KnownDifference.Type.UNKNOWN, 181 "Add a comment here", URI.create("http://b/bugId")); 182 } 183 createResult( TimeZoneIds androidTimeZoneIds, List<String> cityTimeZoneIds)184 private static Result createResult( 185 TimeZoneIds androidTimeZoneIds, List<String> cityTimeZoneIds) { 186 List<String> uniqueIsoCountryCodes = cityTimeZoneIds.stream() 187 .map(androidTimeZoneIds::getCountryCodeForZoneId) 188 .distinct() 189 .collect(toList()); 190 return new Result(uniqueIsoCountryCodes, cityTimeZoneIds); 191 } 192 193 static class KnownDifferencesRegistry { 194 195 public Map<TestCaseId, KnownDifference> knownDifferences = new HashMap<>(); 196 public Map<TestCaseId, Types.KnownDifference> mutableKnownDifferencesMap = new HashMap<>(); 197 public List<Types.KnownDifference> newKnownDifferences = new ArrayList<>(); 198 public List<Types.KnownDifference> confirmedKnownDifferences = new ArrayList<>(); 199 public List<KnownDifferenceMismatch> incorrectKnownDifferences = new ArrayList<>(); 200 public List<CityResult> multipleIdAndroidResults = new ArrayList<>(); 201 addKnownDifferences(File knownDifferencesFile)202 void addKnownDifferences(File knownDifferencesFile) throws IOException { 203 Map<TestCaseId, KnownDifference> knownDifferencesMap = 204 KnownDifferences.load(knownDifferencesFile).buildIdMap(); 205 Map<TestCaseId, KnownDifference> dupes = 206 putAllNoDupes(knownDifferences, knownDifferencesMap); 207 for (TestCaseId dupeKey : dupes.keySet()) { 208 logWarn("Duplicated key in file " + knownDifferencesFile + ": " + dupeKey); 209 } 210 putAllNoDupes(mutableKnownDifferencesMap, knownDifferencesMap); 211 212 } 213 putAllNoDupes(Map<K, V> target, Map<K, V> source)214 private static <K, V> Map<K, V> putAllNoDupes(Map<K, V> target, Map<K, V> source) { 215 Map<K, V> dupes = new HashMap<>(); 216 for (Map.Entry<K, V> entry : source.entrySet()) { 217 if (target.putIfAbsent(entry.getKey(), entry.getValue()) != null) { 218 dupes.put(entry.getKey(), entry.getValue()); 219 } 220 } 221 return dupes; 222 } 223 recordResult(TestCaseId testCaseId, City city, Result geonamesResult, Result androidResult)224 void recordResult(TestCaseId testCaseId, 225 City city, 226 Result geonamesResult, 227 Result androidResult) { 228 229 Types.KnownDifference preexistingKnownDifference = 230 mutableKnownDifferencesMap.remove(testCaseId); 231 // A null known difference is the common case. 232 233 if (androidResult.hasMultipleZoneIds()) { 234 multipleIdAndroidResults.add(new CityResult(city, androidResult)); 235 } 236 237 // Check the actual meets expectations. 238 if (preexistingKnownDifference == null) { 239 if (!geonamesResult.intersects(androidResult)) { 240 // Not a known difference: Report it! 241 Types.KnownDifference newKnownDifference = createNewKnownDifference( 242 testCaseId, geonamesResult, androidResult); 243 newKnownDifferences.add(newKnownDifference); 244 } 245 } else { 246 Result knownDifferenceActualResult = 247 preexistingKnownDifference.getActualResult(); 248 Result knownDifferenceReferenceDataResult = 249 preexistingKnownDifference.getReferenceDataResult(); 250 if (knownDifferenceActualResult.equals(androidResult) 251 && knownDifferenceReferenceDataResult.equals(geonamesResult)) { 252 confirmedKnownDifferences.add(preexistingKnownDifference); 253 } else { 254 // There is a known difference, but it doesn't match: Report it! 255 KnownDifferenceMismatch mismatch = new KnownDifferenceMismatch( 256 preexistingKnownDifference, geonamesResult, androidResult); 257 incorrectKnownDifferences.add(mismatch); 258 } 259 } 260 } 261 outputKnownDifferencesProtos(File outputDir)262 void outputKnownDifferencesProtos(File outputDir) throws IOException { 263 System.out.println(); 264 System.out.println("====================="); 265 System.out.println("Outputting diff info:"); 266 System.out.println(); 267 268 if (newKnownDifferences.size() > 0) { 269 File outputFile = new File(outputDir, "new_diffs.prototxt"); 270 outputKnownDifferences(outputFile, newKnownDifferences); 271 System.out.println("New differences: " + outputFile + " (investigate these)"); 272 System.out.println(); 273 } 274 275 if (incorrectKnownDifferences.size() > 0) { 276 File outputFile = new File(outputDir, "incorrect_diffs.prototxt"); 277 outputKnownDifferenceMismatches(outputFile, incorrectKnownDifferences); 278 System.out.println( 279 "Incorrect known differences: " + outputFile + " (investigate these)"); 280 System.out.println(); 281 } 282 283 if (OUTPUT_CONFIRMED_KNOWN_DIFFERENCES && confirmedKnownDifferences.size() > 0) { 284 File outputFile = new File(outputDir, "confirmed_diffs.prototxt"); 285 outputKnownDifferences(outputFile, confirmedKnownDifferences); 286 System.out.println( 287 "Correct known differences: " + outputFile + " (don't touch these)"); 288 System.out.println(); 289 } 290 291 // Whatever is left in the mutableKnownDifferencesMap are test cases that no longer 292 // exist. 293 if (mutableKnownDifferencesMap.size() > 0) { 294 File outputFile = new File(outputDir, "missing_diffs.prototxt"); 295 outputKnownDifferences(outputFile, 296 new ArrayList<>(mutableKnownDifferencesMap.values())); 297 System.out.println( 298 "No longer tested known differences: " + outputFile + " (remove these)"); 299 System.out.println(); 300 } 301 302 if (OUTPUT_AMBIGUOUS_RESULT_INFO) { 303 // Calculate the population of lookups that resulted in ambiguous (multi-zone) 304 // answers, which should help us prioritize the parts of the world to look at for 305 // resolution improvements. 306 Map<Result, Long> ambiguousResultStats = new HashMap<>(); 307 for (CityResult cityResult : multipleIdAndroidResults) { 308 Result result = cityResult.getResult(); 309 ambiguousResultStats.merge( 310 result, (long) cityResult.getCity().getPopulation(), Long::sum); 311 } 312 313 if (ambiguousResultStats.size() > 0) { 314 File outputFile = new File(outputDir, "ambiguous_lookups.txt"); 315 outputResultCounts(outputFile, ambiguousResultStats); 316 System.out.println("Ambiguous lookups file: " + outputFile); 317 } 318 } 319 } 320 outputResultCounts( File outputFile, Map<Result, Long> ambiguousResultStats)321 private static void outputResultCounts( 322 File outputFile, Map<Result, Long> ambiguousResultStats) 323 throws IOException { 324 try (Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile), 325 StandardCharsets.UTF_8)) { 326 writer.write("Result, Population affected\n"); 327 for (Map.Entry<Result, Long> resultEntry : ambiguousResultStats.entrySet()) { 328 writer.write(resultEntry.getKey() + ": " + resultEntry.getValue()); 329 writer.write("\n"); 330 } 331 } 332 } 333 printStatsReport()334 void printStatsReport() { 335 System.out.println("New known differences:" + newKnownDifferences.size()); 336 System.out.println("Incorrect known differences:" + incorrectKnownDifferences.size()); 337 System.out.println("Confirmed known differences:" + confirmedKnownDifferences.size()); 338 System.out.println(); 339 System.out.println("No longer tested locations: " + mutableKnownDifferencesMap.size()); 340 System.out.println(); 341 System.out.println("Number of cities with multiple TZ ID results: " 342 + multipleIdAndroidResults.size()); 343 } 344 } 345 346 private static class CityResult { 347 private final CitiesFile.City mCity; 348 private final Result mResult; 349 CityResult(CitiesFile.City city, Result result)350 CityResult(CitiesFile.City city, 351 Result result) { 352 this.mCity = city; 353 this.mResult = result; 354 } 355 getCity()356 CitiesFile.City getCity() { 357 return mCity; 358 } 359 getResult()360 Result getResult() { 361 return mResult; 362 } 363 } 364 logWarn(String message)365 private static void logWarn(String message) { 366 System.err.println(message); 367 } 368 logDebug(String message)369 private static void logDebug(String message) { 370 if (DBG) { 371 System.out.println(message); 372 } 373 } 374 } 375