1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4 ******************************************************************************
5 * Copyright (C) 2007-2012, International Business Machines Corporation and   *
6 * others. All Rights Reserved.                                               *
7 ******************************************************************************
8 */
9 
10 package com.ibm.icu.impl.duration.impl;
11 
12 import java.util.Locale;
13 
14 public class Utils {
localeFromString(String s)15   public static final Locale localeFromString(String s) {
16     String language = s;
17     String region = "";
18     String variant = "";
19 
20     int x = language.indexOf("_");
21     if (x != -1) {
22       region = language.substring(x+1);
23       language = language.substring(0, x);
24     }
25     x = region.indexOf("_");
26     if (x != -1) {
27       variant = region.substring(x+1);
28       region = region.substring(0, x);
29     }
30     return new Locale(language, region, variant);
31   }
32     /*
33   public static <T> T[] arraycopy(T[] src) {
34     T[] result = (T[])Array.newInstance(src.getClass().getComponentType(), src.length); // can we do this without casting?
35     for (int i = 0; i < src.length; ++i) {
36       result[i] = src[i];
37     }
38     return result;
39   }
40     */
41 
42   /**
43    * Interesting features of chinese numbers:
44    * - Each digit is followed by a unit symbol (10's, 100's, 1000's).
45    * - Units repeat in levels of 10,000, there are symbols for each level too (except 1's).
46    * - The digit 2 has a special form before the 10 symbol and at the end of the number.
47    * - If the first digit in the number is 1 and its unit is 10, the 1 is omitted.
48    * - Sequences of 0 digits and their units are replaced by a single 0 and no unit.
49    * - If there are two such sequences of 0 digits in a level (1000's and 10's), the 1000's 0 is also omitted.
50    * - The 1000's 0 is also omitted in alternating levels, such that it is omitted in the rightmost
51    *     level with a 10's 0, or if none, in the rightmost level.
52    * - Level symbols are omitted if all of their units are omitted
53    */
chineseNumber(long n, ChineseDigits zh)54   public static String chineseNumber(long n, ChineseDigits zh) {
55     if (n < 0) {
56       n = -n;
57     }
58     if (n <= 10) {
59       if (n == 2) {
60         return String.valueOf(zh.liang);
61       }
62       return String.valueOf(zh.digits[(int)n]);
63     }
64 
65     // 9223372036854775807
66     char[] buf = new char[40]; // as long as we get, and actually we can't get this high, no units past zhao
67     char[] digits = String.valueOf(n).toCharArray();
68 
69     // first, generate all the digits in place
70     // convert runs of zeros into a single zero, but keep places
71     //
72     boolean inZero = true; // true if we should zap zeros in this block, resets at start of block
73     boolean forcedZero = false; // true if we have a 0 in tens's place
74     int x = buf.length;
75     for (int i = digits.length, u = -1, l = -1; --i >= 0;) {
76       if (u == -1) {
77         if (l != -1) {
78           buf[--x] = zh.levels[l];
79           inZero = true;
80           forcedZero = false;
81         }
82         ++u;
83       } else {
84         buf[--x] = zh.units[u++];
85         if (u == 3) {
86           u = -1;
87           ++l;
88         }
89       }
90       int d = digits[i] - '0';
91       if (d == 0) {
92         if (x < buf.length-1 && u != 0) {
93           buf[x] = '*';
94         }
95         if (inZero || forcedZero) {
96           buf[--x] = '*';
97         } else {
98           buf[--x] = zh.digits[0];
99           inZero = true;
100           forcedZero = u == 1;
101         }
102       } else {
103         inZero = false;
104         buf[--x] = zh.digits[d];
105       }
106     }
107 
108     // scanning from right, find first required 'ling'
109     // we only care if n > 101,0000 as this is the first case where
110     // it might shift.  remove optional lings in alternating blocks.
111     if (n > 1000000) {
112       boolean last = true;
113       int i = buf.length - 3;
114       do {
115         if (buf[i] == '0') {
116           break;
117         }
118         i -= 8;
119         last = !last;
120       } while (i > x);
121 
122       i = buf.length - 7;
123       do {
124         if (buf[i] == zh.digits[0] && !last) {
125           buf[i] = '*';
126         }
127         i -= 8;
128         last = !last;
129       } while (i > x);
130 
131       // remove levels for empty blocks
132       if (n >= 100000000) {
133         i = buf.length - 8;
134         do {
135           boolean empty = true;
136           for (int j = i-1, e = Math.max(x-1, i-8); j > e; --j) {
137             if (buf[j] != '*') {
138               empty = false;
139               break;
140             }
141           }
142           if (empty) {
143             if (buf[i+1] != '*' && buf[i+1] != zh.digits[0]) {
144               buf[i] = zh.digits[0];
145             } else {
146               buf[i] = '*';
147             }
148           }
149           i -= 8;
150         } while (i > x);
151       }
152     }
153 
154     // replace er by liang except before or after shi or after ling
155     for (int i = x; i < buf.length; ++i) {
156       if (buf[i] != zh.digits[2]) continue;
157       if (i < buf.length - 1 && buf[i+1] == zh.units[0]) continue;
158       if (i > x && (buf[i-1] == zh.units[0] || buf[i-1] == zh.digits[0] || buf[i-1] == '*')) continue;
159 
160       buf[i] = zh.liang;
161     }
162 
163     // eliminate leading 1 if following unit is shi
164     if (buf[x] == zh.digits[1] && (zh.ko || buf[x+1] == zh.units[0])) {
165       ++x;
166     }
167 
168     // now, compress out the '*'
169     int w = x;
170     for (int r = x; r < buf.length; ++r) {
171       if (buf[r] != '*') {
172         buf[w++] = buf[r];
173       }
174     }
175     return new String(buf, x, w-x);
176   }
177 
178 //  public static void main(String[] args) {
179 //    for (int i = 0; i < args.length; ++i) {
180 //      String arg = args[i];
181 //      System.out.print(arg);
182 //      System.out.print(" > ");
183 //      long n = Long.parseLong(arg);
184 //      System.out.println(chineseNumber(n, ChineseDigits.DEBUG));
185 //    }
186 //  }
187 
188   public static class ChineseDigits {
189     final char[] digits;
190     final char[] units;
191     final char[] levels;
192     final char liang;
193     final boolean ko;
194 
ChineseDigits(String digits, String units, String levels, char liang, boolean ko)195     ChineseDigits(String digits, String units, String levels, char liang, boolean ko) {
196       this.digits = digits.toCharArray();
197       this.units = units.toCharArray();
198       this.levels = levels.toCharArray();
199       this.liang = liang;
200       this.ko = ko;
201     }
202 
203     public static final ChineseDigits DEBUG =
204       new ChineseDigits("0123456789s", "sbq", "WYZ", 'L', false);
205 
206     public static final ChineseDigits TRADITIONAL =
207       new ChineseDigits("\u96f6\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341", // to shi
208                         "\u5341\u767e\u5343", // shi, bai, qian
209                         "\u842c\u5104\u5146", // wan, yi, zhao
210                         '\u5169', false); // liang
211 
212     public static final ChineseDigits SIMPLIFIED =
213       new ChineseDigits("\u96f6\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341", // to shi
214                         "\u5341\u767e\u5343", // shi, bai, qian
215                         "\u4e07\u4ebf\u5146", // wan, yi, zhao
216                         '\u4e24', false); // liang
217 
218     // no 1 before first unit no matter what it is
219     // not sure if there are 'ling' units
220     public static final ChineseDigits KOREAN =
221       new ChineseDigits("\uc601\uc77c\uc774\uc0bc\uc0ac\uc624\uc721\uce60\ud314\uad6c\uc2ed", // to ten
222                         "\uc2ed\ubc31\ucc9c", // 10, 100, 1000
223                         "\ub9cc\uc5b5?", // 10^4, 10^8, 10^12
224                         '\uc774', true);
225   }
226 }
227