1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2001-2009, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 */
9 
10 package com.ibm.icu.samples.shaping;
11 
12 import com.ibm.icu.text.ArabicShaping;
13 import com.ibm.icu.text.ArabicShapingException;
14 
15 /**
16  * Interactive test for Arabic shaping.
17  * Invoke from a command line passing args and strings.  Use '-help' to see description of arguments.
18  */
19 public class ArabicShapingSample{
20     private static final int COPY = 0;
21     private static final int INPLACE = 1;
22     private static final int STRING = 2;
23 
main(String[] args)24     public static final void main(String[] args) {
25         int testtype = COPY;
26         int options = 0;
27         int ss = 0;
28         int sl = -1;
29         int ds = 0;
30         int dl = -1;
31         String text = "$22.4 test 123 \ufef6\u0644\u0622 456 \u0664\u0665\u0666!";
32 
33         for (int i = 0; i < args.length; ++i) {
34             String arg = args[i];
35             if (arg.charAt(0) == '-') {
36                 String opt = arg.substring(1);
37                 String val = opt;
38                 int index = arg.indexOf(':');
39                 if (index != -1) {
40                     opt = opt.substring(0, Math.min(index, 3));
41                     val = arg.substring(index + 1);
42                 }
43 
44                 if (opt.equalsIgnoreCase("len")) {
45                     options &= ~ArabicShaping.LENGTH_MASK;
46                     if (val.equalsIgnoreCase("gs")) {
47                         options |= ArabicShaping.LENGTH_GROW_SHRINK;
48                     } else if (val.equalsIgnoreCase("sn")) {
49                         options |= ArabicShaping.LENGTH_FIXED_SPACES_NEAR;
50                     } else if (val.equalsIgnoreCase("se")) {
51                         options |= ArabicShaping.LENGTH_FIXED_SPACES_AT_END;
52                     } else if (val.equalsIgnoreCase("sb")) {
53                         options |= ArabicShaping.LENGTH_FIXED_SPACES_AT_BEGINNING;
54                     } else {
55                         throwValError(opt, val);
56                     }
57                 } else if (opt.equalsIgnoreCase("dir")) {
58                     options &= ~ArabicShaping.TEXT_DIRECTION_MASK;
59                     if (val.equalsIgnoreCase("log")) {
60                         options |= ArabicShaping.TEXT_DIRECTION_LOGICAL;
61                     } else if (val.equalsIgnoreCase("vis")) {
62                         options |= ArabicShaping.TEXT_DIRECTION_VISUAL_LTR;
63                     } else {
64                         throwValError(opt, val);
65                     }
66                 } else if (opt.equalsIgnoreCase("let")) {
67                     options &= ~ArabicShaping.LETTERS_MASK;
68                     if (val.equalsIgnoreCase("no")) {
69                         options |= ArabicShaping.LETTERS_NOOP;
70                     } else if (val.equalsIgnoreCase("sh")) {
71                         options |= ArabicShaping.LETTERS_SHAPE;
72                     } else if (val.equalsIgnoreCase("un")) {
73                         options |= ArabicShaping.LETTERS_UNSHAPE;
74                     } else if (val.equalsIgnoreCase("ta")) {
75                         options |= ArabicShaping.LETTERS_SHAPE_TASHKEEL_ISOLATED;
76                     } else {
77                         throwValError(opt, val);
78                     }
79                 } else if (opt.equalsIgnoreCase("dig")) {
80                     options &= ~ArabicShaping.DIGITS_MASK;
81                     if (val.equalsIgnoreCase("no")) {
82                         options |= ArabicShaping.DIGITS_NOOP;
83                     } else if (val.equalsIgnoreCase("ea")) {
84                         options |= ArabicShaping.DIGITS_EN2AN;
85                     } else if (val.equalsIgnoreCase("ae")) {
86                         options |= ArabicShaping.DIGITS_AN2EN;
87                     } else if (val.equalsIgnoreCase("lr")) {
88                         options |= ArabicShaping.DIGITS_EN2AN_INIT_LR;
89                     } else if (val.equalsIgnoreCase("al")) {
90                         options |= ArabicShaping.DIGITS_EN2AN_INIT_AL;
91                     } else {
92                         throwValError(opt, val);
93                     }
94                 } else if (opt.equalsIgnoreCase("typ")) {
95                     options &= ~ArabicShaping.DIGIT_TYPE_MASK;
96                     if (val.equalsIgnoreCase("an")) {
97                         options |= ArabicShaping.DIGIT_TYPE_AN;
98                     } else if (val.equalsIgnoreCase("ex")) {
99                         options |= ArabicShaping.DIGIT_TYPE_AN_EXTENDED;
100                     } else {
101                         throwValError(opt, val);
102                     }
103                 } else if (opt.equalsIgnoreCase("dst")) {
104                     try {
105                         ds = Integer.parseInt(val);
106                     }
107                     catch (Exception e) {
108                         throwValError(opt, val);
109                     }
110                 } else if (opt.equalsIgnoreCase("dln")) {
111                     try {
112                         dl = Integer.parseInt(val);
113                     }
114                     catch (Exception e) {
115                         throwValError(opt, val);
116                     }
117                 } else if (opt.equalsIgnoreCase("sst")) {
118                     try {
119                         ss = Integer.parseInt(val);
120                     }
121                     catch (Exception e) {
122                         throwValError(opt, val);
123                     }
124                 } else if (opt.equalsIgnoreCase("sln")) {
125                     try {
126                         sl = Integer.parseInt(val);
127                     }
128                     catch (Exception e) {
129                         throwValError(opt, val);
130                     }
131                 } else if (opt.equalsIgnoreCase("tes")) {
132                     if (val.equalsIgnoreCase("cp")) {
133                         testtype = COPY;
134                     } else if (val.equalsIgnoreCase("ip")) {
135                         testtype = INPLACE;
136                     } else if (val.equalsIgnoreCase("st")) {
137                         testtype = STRING;
138                     } else {
139                         throwValError(opt, val);
140                     }
141                 } else if (opt.equalsIgnoreCase("help")) {
142                     System.out.println(usage);
143                 } else {
144                     throwOptError(opt);
145                 }
146             } else {
147                 // assume text
148                 text = parseText(arg);
149             }
150         }
151 
152         if (sl < 0) {
153             sl = text.length() - ss;
154             System.out.println("sl defaulting to " + sl);
155         }
156         if (dl < 0) {
157             dl = 2 * sl;
158             System.out.println("dl defaulting to " + dl);
159         }
160 
161         ArabicShaping shaper = new ArabicShaping(options);
162         System.out.println("shaper: " + shaper);
163 
164         char[] src = text.toCharArray();
165         System.out.println(" input: '" + escapedText(src, ss, sl) + "'");
166         if (testtype != STRING) {
167             System.out.println("start: " + ss + " length: " + sl + " total length: " + src.length);
168         }
169 
170         int result = -1;
171         char[] dest = null;
172 
173         try {
174             switch (testtype) {
175             case COPY:
176                 dest = new char[ds + dl];
177                 result = shaper.shape(src, ss, sl, dest, ds, dl);
178                 break;
179 
180             case INPLACE:
181                 shaper.shape(src, ss, sl);
182                 ds = ss;
183                 result = sl;
184                 dest = src;
185                 break;
186 
187             case STRING:
188                 dest = shaper.shape(text).toCharArray();
189                 ds = 0;
190                 result = dest.length;
191                 break;
192             }
193 
194             System.out.println("output: '" + escapedText(dest, ds, result) + "'");
195             System.out.println("length: " + result);
196             if (ds != 0 || result != dest.length) {
197                 System.out.println("full output: '" + escapedText(dest, 0, dest.length) + "'");
198             }
199         }
200         catch (ArabicShapingException e) {
201             System.out.println("Caught ArabicShapingException");
202             System.out.println(e);
203         }
204         catch (Exception e) {
205             System.out.println("Caught Exception");
206             System.out.println(e);
207         }
208     }
209 
throwOptError(String opt)210     private static void throwOptError(String opt) {
211         throwUsageError("unknown option: " + opt);
212     }
213 
throwValError(String opt, String val)214     private static void throwValError(String opt, String val) {
215         throwUsageError("unknown value: " + val + " for option: " + opt);
216     }
217 
throwUsageError(String message)218     private static void throwUsageError(String message) {
219         StringBuffer buf = new StringBuffer("*** usage error ***\n");
220         buf.append(message);
221         buf.append("\n");
222         buf.append(usage);
223         throw new Error(buf.toString());
224     }
225 
226     private static final String usage =
227         "Usage: [option]* [text]\n" +
228         "  where option is in the format '-opt[:val]'\n" +
229         "  options are:\n" +
230         "    -len:[gs|sn|se|sb]    (length: grow/shrink, spaces near, spaces end, spaces beginning)\n" +
231         "    -dir:[log|vis]        (direction: logical, visual)\n" +
232         "    -let:[no|sh|un|ta]    (letters: noop, shape, unshape, tashkeel)\n" +
233         // "    -let:[no|sh|un]       (letters: noop, shape, unshape)\n" +
234         "    -dig:[no|ea|ae|lr|al] (digits: noop, en2an, an2en, en2an_lr, en2an_al)\n" +
235         "    -typ:[an|ex]          (digit type: arabic, arabic extended)\n" +
236         "    -dst:#                (dest start: [integer])\n" +
237         "    -dln:#                (dest length (max size): [integer])\n" +
238         "    -sst:#                (source start: [integer])\n" +
239         "    -sln:#                (source length: [integer])\n" +
240         "    -tes:[cp|ip|st]       (test type: copy, in place, string)\n" +
241         "    -help                 (print this help message)\n" +
242         "  text can contain unicode escape values in the format '\\uXXXX' only\n";
243 
escapedText(char[] text, int start, int length)244     private static String escapedText(char[] text, int start, int length) {
245         StringBuffer buf = new StringBuffer();
246         for (int i = start, e = start + length; i < e; ++i) {
247             char ch = text[i];
248             if (ch < 0x20 || ch > 0x7e) {
249                 buf.append("\\u");
250                 if (ch < 0x1000) {
251                     buf.append('0');
252                 }
253                 if (ch < 0x100) {
254                     buf.append('0');
255                 }
256                 if (ch < 0x10) {
257                     buf.append('0');
258                 }
259                 buf.append(Integer.toHexString(ch));
260             } else {
261                 buf.append(ch);
262             }
263         }
264         return buf.toString();
265     }
266 
parseText(String text)267     private static String parseText(String text) {
268         // process unicode escapes (only)
269         StringBuffer buf = new StringBuffer();
270         char[] chars = text.toCharArray();
271         for (int i = 0; i < chars.length; ++i) {
272             char ch = chars[i];
273             if (ch == '\\') {
274                 if ((i < chars.length - 1) &&
275                     (chars[i+1] == 'u')) {
276                     int val = Integer.parseInt(text.substring(i+2, i+6), 16);
277                     buf.append((char)val);
278                     i += 5;
279                 } else {
280                     buf.append('\\');
281                 }
282             } else {
283                 buf.append(ch);
284             }
285         }
286         return buf.toString();
287     }
288 }
289