1 package org.unicode.cldr.test;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.ArrayList;
6 import java.util.List;
7 import java.util.regex.Matcher;
8 import java.util.regex.Pattern;
9 
10 import org.unicode.cldr.util.CldrUtility;
11 import org.unicode.cldr.util.PatternCache;
12 import org.unicode.cldr.util.SimpleHtmlParser;
13 import org.unicode.cldr.util.SimpleHtmlParser.Type;
14 import org.unicode.cldr.util.TransliteratorUtilities;
15 
16 import com.ibm.icu.text.MessageFormat;
17 
18 /**
19  * Private class to get the messages from a help file.
20  */
21 public class HelpMessages {
22     private static final Matcher CLEANUP_BOOKMARK = PatternCache.get("[^a-zA-Z0-9]").matcher("");
23 
24     private static final MessageFormat DEFAULT_HEADER_PATTERN = new MessageFormat("<p>{0}</p>"
25         + CldrUtility.LINE_SEPARATOR);
26 
27     private static final Matcher HEADER_HTML = PatternCache.get("<h[0-9]>(.*)</h[0-9]>").matcher("");
28 
29     List<Matcher> keys = new ArrayList<>();
30 
31     List<String> values = new ArrayList<>();
32 
33     enum Status {
34         BASE, BEFORE_CELL, IN_CELL, IN_INSIDE_TABLE
35     }
36 
37     StringBuilder[] currentColumn = new StringBuilder[2];
38 
39     int column = 0;
40 
41     private static HelpMessages helpMessages;
42 
43     /**
44      * Create a HelpMessages object from a filename.
45      * The file has to be in the format of a table of <keyRegex,htmlText> pairs,
46      * where the key is a keyRegex expression and htmlText is arbitrary HTML text. For example:
47      * <p>
48      * {@link http://unicode.org/cldr/data/tools/java/org/unicode/cldr/util/data/chart_messages.html} is used for
49      * chart messages, where the key is the name of the chart.
50      * <p>
51      * {@link http://unicode.org/cldr/data/tools/java/org/unicode/cldr/util/data/test_help_messages.html} is used
52      * for help messages in the survey tool, where the key is an xpath.
53      *
54      * @param filename
55      */
HelpMessages(String filename)56     public HelpMessages(String filename) {
57         currentColumn[0] = new StringBuilder();
58         currentColumn[1] = new StringBuilder();
59         BufferedReader in;
60         try {
61             in = CldrUtility.getUTF8Data(filename);
62             int tableCount = 0;
63 
64             boolean inContent = false;
65             // if the table level is 1 (we are in the main table), then we look for <td>...</td><td>...</td>. That
66             // means that we have column 1 and column 2.
67 
68             SimpleHtmlParser simple = new SimpleHtmlParser().setReader(in);
69             StringBuilder result = new StringBuilder();
70             boolean hadPop = false;
71             main: while (true) {
72                 Type x = simple.next(result);
73                 switch (x) {
74                 case ELEMENT: // with /table we pop the count
75                     if (SimpleHtmlParser.equals("table", result)) {
76                         if (hadPop) {
77                             --tableCount;
78                         } else {
79                             ++tableCount;
80                         }
81                     } else if (tableCount == 1) {
82                         if (SimpleHtmlParser.equals("tr", result)) {
83                             if (hadPop) {
84                                 addHelpMessages();
85                             }
86                             column = 0;
87                         } else if (SimpleHtmlParser.equals("td", result)) {
88                             if (hadPop) {
89                                 inContent = false;
90                                 ++column;
91                             } else {
92                                 inContent = true;
93                                 continue main; // skip adding
94                             }
95                         }
96                     }
97                     break;
98                 case ELEMENT_POP:
99                     hadPop = true;
100                     break;
101                 case ELEMENT_END:
102                     hadPop = false;
103                     break;
104                 case DONE:
105                     break main;
106                 }
107                 if (inContent) {
108                     SimpleHtmlParser.writeResult(x, result, currentColumn[column]);
109                 }
110             }
111 
112             in.close();
113         } catch (IOException e) {
114             System.err.println("Can't initialize help text");
115         }
116     }
117 
118     /**
119      * Get message corresponding to a key out of the file set on this object.
120      * For many files, the key will be an xpath, but it doesn't have to be.
121      * Note that <i>all</i> of pairs of <keyRegex,htmlText> where the key matches keyRegex
122      * will be concatenated together in order to get the result.
123      *
124      * @param key
125      * @return
126      */
find(String key)127     public String find(String key) {
128         return find(key, DEFAULT_HEADER_PATTERN);
129     }
130 
131     /**
132      * Get message corresponding to a key out of the file set on this object.
133      * For many files, the key will be an xpath, but it doesn't have to be.
134      * Note that <i>all</i> of pairs of <keyRegex,htmlText> where the key matches keyRegex
135      * will be concatenated together in order to get the result.
136      *
137      * @param key
138      * @param addHeader
139      *            true if you want a header formed by looking at all the hN elements.
140      * @return
141      */
find(String key, MessageFormat headerPattern)142     public String find(String key, MessageFormat headerPattern) {
143         StringBuilder header = new StringBuilder();
144         StringBuilder result = new StringBuilder();
145         int keyCount = 0;
146         for (int i = 0; i < keys.size(); ++i) {
147             if (keys.get(i).reset(key).matches()) {
148                 if (result.length() != 0) {
149                     result.append(CldrUtility.LINE_SEPARATOR);
150                 }
151                 String value = values.get(i);
152                 if (headerPattern != null) {
153                     HEADER_HTML.reset(value);
154                     int lastEnd = 0;
155                     StringBuilder newValue = new StringBuilder();
156                     while (HEADER_HTML.find()) {
157                         String contents = HEADER_HTML.group(1);
158                         if (contents.contains("<")) {
159                             continue; // disallow other formatting
160                         }
161                         String bookmark = "HM_" + CLEANUP_BOOKMARK.reset(contents).replaceAll("_");
162                         keyCount++;
163                         if (header.length() > 0) {
164                             header.append(" | ");
165                         }
166                         header.append("<a href='#").append(bookmark).append("'>").append(contents).append("</a>");
167                         newValue.append(value.substring(lastEnd, HEADER_HTML.start(1)));
168                         newValue.append("<a name='").append(bookmark).append("'>").append(contents).append("</a>");
169                         lastEnd = HEADER_HTML.end(1);
170                     }
171                     newValue.append(value.substring(lastEnd));
172                     value = newValue.toString();
173                 }
174                 result.append(value);
175             }
176         }
177         if (result.length() != 0) {
178             if (keyCount > 1) {
179                 result.insert(0, headerPattern.format(new Object[] { header.toString() }));
180             }
181             return result.toString();
182         }
183         return null;
184     }
185 
addHelpMessages()186     private void addHelpMessages() {
187         if (column == 2) { // must have two columns
188             try {
189                 // remove the first character and the last two characters, since the are >....</
190                 String key = currentColumn[0].substring(1, currentColumn[0].length() - 2).trim();
191                 String value = currentColumn[1].substring(1, currentColumn[1].length() - 2).trim();
192                 if (ExampleGenerator.DEBUG_SHOW_HELP) {
193                     System.out.println("{" + key + "} => {" + value + "}");
194                 }
195                 Matcher m = Pattern.compile(TransliteratorUtilities.fromHTML.transliterate(key), Pattern.COMMENTS)
196                     .matcher("");
197                 keys.add(m);
198                 values.add(value);
199             } catch (RuntimeException e) {
200                 System.err.println("Help file has illegal regex: " + currentColumn[0]);
201             }
202         }
203         currentColumn[0].setLength(0);
204         currentColumn[1].setLength(0);
205         column = 0;
206     }
207 
getChartMessages(String xpath)208     public static String getChartMessages(String xpath) {
209         synchronized (HelpMessages.class) {
210             if (HelpMessages.helpMessages == null) {
211                 HelpMessages.helpMessages = new HelpMessages("chart_messages.html");
212             }
213         }
214         return HelpMessages.helpMessages.find(xpath);
215         // if (xpath.contains("/exemplarCharacters")) {
216         // result = "The standard exemplar characters are those used in customary writing ([a-z] for English; "
217         // + "the auxiliary characters are used in foreign words found in typical magazines, newspapers, &c.; "
218         // + "currency auxilliary characters are those used in currency symbols, like 'US$ 1,234'. ";
219         // }
220         // return result == null ? null : TransliteratorUtilities.toHTML.transliterate(result);
221     }
222 }