1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2016, Google, Inc.; International Business Machines * 6 * Corporation and others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.util; 11 12 import java.util.Collections; 13 import java.util.Comparator; 14 import java.util.Iterator; 15 import java.util.LinkedHashMap; 16 import java.util.LinkedHashSet; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.regex.Matcher; 22 import java.util.regex.Pattern; 23 24 /** 25 * Provides an immutable list of languages (locales) in priority order. 26 * The string format is based on the Accept-Language format 27 * <a href="http://www.ietf.org/rfc/rfc2616.txt">http://www.ietf.org/rfc/rfc2616.txt</a>, such as 28 * "af, en, fr;q=0.9". Syntactically it is slightly 29 * more lenient, in allowing extra whitespace between elements, extra commas, 30 * and more than 3 decimals (on input), and pins between 0 and 1. 31 * <p>In theory, Accept-Language indicates the relative 'quality' of each item, 32 * but in practice, all of the browsers just take an ordered list, like 33 * "en, fr, de", and synthesize arbitrary quality values that put these in the 34 * right order, like: "en, fr;q=0.7, de;q=0.3". The quality values in these de facto 35 * semantics thus have <b>nothing</b> to do with the relative qualities of the 36 * original. Accept-Language also doesn't 37 * specify the interpretation of multiple instances, eg what "en, fr, en;q=.5" 38 * means. 39 * <p>There are various ways to build a LanguagePriorityList, such 40 * as using the following equivalent patterns: 41 * 42 * <pre> 43 * list = LanguagePriorityList.add("af, en, fr;q=0.9").build(); 44 * 45 * list2 = LanguagePriorityList 46 * .add(ULocale.forString("af")) 47 * .add(ULocale.ENGLISH) 48 * .add(ULocale.FRENCH, 0.9d) 49 * .build(); 50 * </pre> 51 * When the list is built, the internal values are sorted in descending order by 52 * weight, and then by input order. That is, if two languages have the same weight, the first one in the original order 53 * comes first. If exactly the same language tag appears multiple times, 54 * the last one wins. 55 * 56 * There are two options when building. If preserveWeights are on, then "de;q=0.3, ja;q=0.3, en, fr;q=0.7, de " would result in the following: 57 * <pre> en;q=1.0 58 * de;q=1.0 59 * fr;q=0.7 60 * ja;q=0.3</pre> 61 * If it is off (the default), then all weights are reset to 1.0 after reordering. 62 * This is to match the effect of the Accept-Language semantics as used in browsers, and results in the following: 63 * * <pre> en;q=1.0 64 * de;q=1.0 65 * fr;q=1.0 66 * ja;q=1.0</pre> 67 * @author markdavis@google.com 68 * @stable ICU 4.4 69 */ 70 public class LocalePriorityList implements Iterable<ULocale> { 71 private static final double D0 = 0.0d; 72 private static final Double D1 = 1.0d; 73 74 private static final Pattern languageSplitter = Pattern.compile("\\s*,\\s*"); 75 private static final Pattern weightSplitter = Pattern 76 .compile("\\s*(\\S*)\\s*;\\s*q\\s*=\\s*(\\S*)"); 77 private final Map<ULocale, Double> languagesAndWeights; 78 79 /** 80 * Add a language code to the list being built, with weight 1.0. 81 * 82 * @param languageCode locale/language to be added 83 * @return internal builder, for chaining 84 * @stable ICU 4.4 85 */ add(ULocale... languageCode)86 public static Builder add(ULocale... languageCode) { 87 return new Builder().add(languageCode); 88 } 89 90 /** 91 * Add a language code to the list being built, with specified weight. 92 * 93 * @param languageCode locale/language to be added 94 * @param weight value from 0.0 to 1.0 95 * @return internal builder, for chaining 96 * @stable ICU 4.4 97 */ add(ULocale languageCode, final double weight)98 public static Builder add(ULocale languageCode, final double weight) { 99 return new Builder().add(languageCode, weight); 100 } 101 102 /** 103 * Add a language priority list. 104 * 105 * @param languagePriorityList list to add all the members of 106 * @return internal builder, for chaining 107 * @stable ICU 4.4 108 */ add(LocalePriorityList languagePriorityList)109 public static Builder add(LocalePriorityList languagePriorityList) { 110 return new Builder().add(languagePriorityList); 111 } 112 113 /** 114 * Add language codes to the list being built, using a string in rfc2616 115 * (lenient) format, where each language is a valid {@link ULocale}. 116 * 117 * @param acceptLanguageString String in rfc2616 format (but leniently parsed) 118 * @return internal builder, for chaining 119 * @stable ICU 4.4 120 */ add(String acceptLanguageString)121 public static Builder add(String acceptLanguageString) { 122 return new Builder().add(acceptLanguageString); 123 } 124 125 /** 126 * Return the weight for a given language, or null if there is none. Note that 127 * the weights may be adjusted from those used to build the list. 128 * 129 * @param language to get weight of 130 * @return weight 131 * @stable ICU 4.4 132 */ getWeight(ULocale language)133 public Double getWeight(ULocale language) { 134 return languagesAndWeights.get(language); 135 } 136 137 /** 138 * {@inheritDoc} 139 * @stable ICU 4.4 140 */ 141 @Override toString()142 public String toString() { 143 final StringBuilder result = new StringBuilder(); 144 for (final ULocale language : languagesAndWeights.keySet()) { 145 if (result.length() != 0) { 146 result.append(", "); 147 } 148 result.append(language); 149 double weight = languagesAndWeights.get(language); 150 if (weight != D1) { 151 result.append(";q=").append(weight); 152 } 153 } 154 return result.toString(); 155 } 156 157 /** 158 * {@inheritDoc} 159 * @stable ICU 4.4 160 */ iterator()161 public Iterator<ULocale> iterator() { 162 return languagesAndWeights.keySet().iterator(); 163 } 164 165 /** 166 * {@inheritDoc} 167 * @stable ICU 4.4 168 */ 169 @Override equals(final Object o)170 public boolean equals(final Object o) { 171 if (o == null) { 172 return false; 173 } 174 if (this == o) { 175 return true; 176 } 177 try { 178 final LocalePriorityList that = (LocalePriorityList) o; 179 return languagesAndWeights.equals(that.languagesAndWeights); 180 } catch (final RuntimeException e) { 181 return false; 182 } 183 } 184 185 /** 186 * {@inheritDoc} 187 * @stable ICU 4.4 188 */ 189 @Override hashCode()190 public int hashCode() { 191 return languagesAndWeights.hashCode(); 192 } 193 194 // ==================== Privates ==================== 195 196 LocalePriorityList(final Map<ULocale, Double> languageToWeight)197 private LocalePriorityList(final Map<ULocale, Double> languageToWeight) { 198 this.languagesAndWeights = languageToWeight; 199 } 200 201 /** 202 * Class used for building LanguagePriorityLists 203 * @stable ICU 4.4 204 */ 205 public static class Builder { 206 /** 207 * These store the input languages and weights, in chronological order, 208 * where later additions override previous ones. 209 */ 210 private final Map<ULocale, Double> languageToWeight 211 = new LinkedHashMap<ULocale, Double>(); 212 213 /* 214 * Private constructor, only used by LocalePriorityList 215 */ Builder()216 private Builder() { 217 } 218 219 /** 220 * Creates a LocalePriorityList. This is equivalent to 221 * {@link Builder#build(boolean) Builder.build(false)}. 222 * 223 * @return A LocalePriorityList 224 * @stable ICU 4.4 225 */ build()226 public LocalePriorityList build() { 227 return build(false); 228 } 229 230 /** 231 * Creates a LocalePriorityList. 232 * 233 * @param preserveWeights when true, the weights originally came 234 * from a language priority list specified by add() are preserved. 235 * @return A LocalePriorityList 236 * @stable ICU 4.4 237 */ build(boolean preserveWeights)238 public LocalePriorityList build(boolean preserveWeights) { 239 // Walk through the input list, collecting the items with the same weights. 240 final Map<Double, Set<ULocale>> doubleCheck = new TreeMap<Double, Set<ULocale>>( 241 myDescendingDouble); 242 for (final ULocale lang : languageToWeight.keySet()) { 243 Double weight = languageToWeight.get(lang); 244 Set<ULocale> s = doubleCheck.get(weight); 245 if (s == null) { 246 doubleCheck.put(weight, s = new LinkedHashSet<ULocale>()); 247 } 248 s.add(lang); 249 } 250 // We now have a bunch of items sorted by weight, then chronologically. 251 // We can now create a list in the right order 252 final Map<ULocale, Double> temp = new LinkedHashMap<ULocale, Double>(); 253 for (Entry<Double, Set<ULocale>> langEntry : doubleCheck.entrySet()) { 254 final Double weight = langEntry.getKey(); 255 for (final ULocale lang : langEntry.getValue()) { 256 temp.put(lang, preserveWeights ? weight : D1); 257 } 258 } 259 return new LocalePriorityList(Collections.unmodifiableMap(temp)); 260 } 261 262 /** 263 * Adds a LocalePriorityList 264 * 265 * @param languagePriorityList a LocalePriorityList 266 * @return this, for chaining 267 * @stable ICU 4.4 268 */ add( final LocalePriorityList languagePriorityList)269 public Builder add( 270 final LocalePriorityList languagePriorityList) { 271 for (final ULocale language : languagePriorityList.languagesAndWeights 272 .keySet()) { 273 add(language, languagePriorityList.languagesAndWeights.get(language)); 274 } 275 return this; 276 } 277 278 /** 279 * Adds a new language code, with weight = 1.0. 280 * 281 * @param languageCode to add with weight 1.0 282 * @return this, for chaining 283 * @stable ICU 4.4 284 */ add(final ULocale languageCode)285 public Builder add(final ULocale languageCode) { 286 return add(languageCode, D1); 287 } 288 289 /** 290 * Adds language codes, with each having weight = 1.0. 291 * 292 * @param languageCodes List of language codes. 293 * @return this, for chaining. 294 * @stable ICU 4.4 295 */ add(ULocale... languageCodes)296 public Builder add(ULocale... languageCodes) { 297 for (final ULocale languageCode : languageCodes) { 298 add(languageCode, D1); 299 } 300 return this; 301 } 302 303 /** 304 * Adds a new supported languageCode, with specified weight. Overrides any 305 * previous weight for the language. 306 * 307 * @param languageCode language/locale to add 308 * @param weight value between 0.0 and 1.1 309 * @return this, for chaining. 310 * @stable ICU 4.4 311 */ add(final ULocale languageCode, double weight)312 public Builder add(final ULocale languageCode, 313 double weight) { 314 if (languageToWeight.containsKey(languageCode)) { 315 languageToWeight.remove(languageCode); 316 } 317 if (weight <= D0) { 318 return this; // skip zeros 319 } else if (weight > D1) { 320 weight = D1; 321 } 322 languageToWeight.put(languageCode, weight); 323 return this; 324 } 325 326 /** 327 * Adds rfc2616 list. 328 * 329 * @param acceptLanguageList in rfc2616 format 330 * @return this, for chaining. 331 * @stable ICU 4.4 332 */ add(final String acceptLanguageList)333 public Builder add(final String acceptLanguageList) { 334 final String[] items = languageSplitter.split(acceptLanguageList.trim()); 335 final Matcher itemMatcher = weightSplitter.matcher(""); 336 for (final String item : items) { 337 if (itemMatcher.reset(item).matches()) { 338 final ULocale language = new ULocale(itemMatcher.group(1)); 339 final double weight = Double.parseDouble(itemMatcher.group(2)); 340 if (!(weight >= D0 && weight <= D1)) { // do ! for NaN 341 throw new IllegalArgumentException("Illegal weight, must be 0..1: " 342 + weight); 343 } 344 add(language, weight); 345 } else if (item.length() != 0) { 346 add(new ULocale(item)); 347 } 348 } 349 return this; 350 } 351 } 352 353 private static Comparator<Double> myDescendingDouble = new Comparator<Double>() { 354 public int compare(Double o1, Double o2) { 355 int result = o1.compareTo(o2); 356 return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order. 357 } 358 }; 359 } 360