1 package org.unicode.cldr.util;
2 
3 import java.nio.charset.Charset;
4 import java.security.MessageDigest;
5 import java.util.Map;
6 import java.util.concurrent.ConcurrentHashMap;
7 
8 /**
9  * Produce an ID for a string based on a long hash. When used properly, the odds
10  * of collision are so low that the ID can be used as a proxy for the
11  * original string. The ID is non-negative. The algorithm uses SHA-1 over the
12  * UTF-8 bytes in the string. Also provides lookup for long previously generated for string.
13  *
14  * @author markdavis
15  */
16 public final class StringId {
17     private static final Map<String, Long> STRING_TO_ID = new ConcurrentHashMap<String, Long>();
18     private static final Map<Long, String> ID_TO_STRING = new ConcurrentHashMap<Long, String>();
19     private static final MessageDigest digest;
20     private static final Charset UTF_8 = Charset.forName("UTF-8");
21     private static final int RETRY_LIMIT = 9;
22     static {
23         try {
24             digest = MessageDigest.getInstance("SHA-1");
25         } catch (Exception e) {
26             throw new IllegalArgumentException(e); // darn'd checked exceptions
27         }
28     }
29 
30     /**
31      * Get the ID for a string.
32      *
33      * @param string
34      *            input string.
35      * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL.
36      */
getId(CharSequence charSequence)37     public static long getId(CharSequence charSequence) {
38         String string = charSequence.toString();
39         Long resultLong = STRING_TO_ID.get(string);
40         if (resultLong != null) {
41             return resultLong;
42         }
43         int retryCount = RETRY_LIMIT;
44         while (true) {
45             try {
46                 synchronized (digest) {
47                     byte[] hash = digest.digest(string.getBytes(UTF_8));
48                     long result = 0;
49                     for (int i = 0; i < 8; ++i) {
50                         result <<= 8;
51                         result ^= hash[i];
52                     }
53                     // mash the top bit to make things easier
54                     result &= 0x7FFFFFFFFFFFFFFFL;
55                     STRING_TO_ID.put(string, result);
56                     ID_TO_STRING.put(result, string);
57                     return result;
58                 }
59             } catch (RuntimeException e) {
60                 if (--retryCount < 0) {
61                     throw e;
62                 }
63             }
64         }
65     }
66 
67     /**
68      * Get the hex ID for a string.
69      *
70      * @param string
71      *            input string.
72      * @return a string with the hex value
73      */
getHexId(CharSequence string)74     public static String getHexId(CharSequence string) {
75         return Long.toHexString(getId(string));
76     }
77 
78     /**
79      * Get the hex ID for a string.
80      *
81      * @param string
82      *            input string.
83      * @return a string with the hex value
84      */
getStringFromHexId(String string)85     public static String getStringFromHexId(String string) {
86         return getStringFromId(Long.parseLong(string, 16));
87     }
88 
89     /**
90      * Returns string previously used to generate the longValue with getId.
91      * @param longValue
92      * @return String previously used to generate the longValue with getId.
93      */
getStringFromId(long longValue)94     public static String getStringFromId(long longValue) {
95         return ID_TO_STRING.get(longValue);
96     }
97 }