1 #include "pseudolocalize.h"
2 
3 using namespace std;
4 
5 // String basis to generate expansion
6 static const String16 k_expansion_string = String16("one two three "
7     "four five six seven eight nine ten eleven twelve thirteen "
8     "fourteen fiveteen sixteen seventeen nineteen twenty");
9 
10 // Special unicode characters to override directionality of the words
11 static const String16 k_rlm = String16("\xe2\x80\x8f");
12 static const String16 k_rlo = String16("\xE2\x80\xae");
13 static const String16 k_pdf = String16("\xE2\x80\xac");
14 
15 // Placeholder marks
16 static const String16 k_placeholder_open = String16("\xc2\xbb");
17 static const String16 k_placeholder_close = String16("\xc2\xab");
18 
19 static const char*
pseudolocalize_char(const char16_t c)20 pseudolocalize_char(const char16_t c)
21 {
22     switch (c) {
23         case 'a':   return "\xc3\xa5";
24         case 'b':   return "\xc9\x93";
25         case 'c':   return "\xc3\xa7";
26         case 'd':   return "\xc3\xb0";
27         case 'e':   return "\xc3\xa9";
28         case 'f':   return "\xc6\x92";
29         case 'g':   return "\xc4\x9d";
30         case 'h':   return "\xc4\xa5";
31         case 'i':   return "\xc3\xae";
32         case 'j':   return "\xc4\xb5";
33         case 'k':   return "\xc4\xb7";
34         case 'l':   return "\xc4\xbc";
35         case 'm':   return "\xe1\xb8\xbf";
36         case 'n':   return "\xc3\xb1";
37         case 'o':   return "\xc3\xb6";
38         case 'p':   return "\xc3\xbe";
39         case 'q':   return "\x51";
40         case 'r':   return "\xc5\x95";
41         case 's':   return "\xc5\xa1";
42         case 't':   return "\xc5\xa3";
43         case 'u':   return "\xc3\xbb";
44         case 'v':   return "\x56";
45         case 'w':   return "\xc5\xb5";
46         case 'x':   return "\xd1\x85";
47         case 'y':   return "\xc3\xbd";
48         case 'z':   return "\xc5\xbe";
49         case 'A':   return "\xc3\x85";
50         case 'B':   return "\xce\xb2";
51         case 'C':   return "\xc3\x87";
52         case 'D':   return "\xc3\x90";
53         case 'E':   return "\xc3\x89";
54         case 'G':   return "\xc4\x9c";
55         case 'H':   return "\xc4\xa4";
56         case 'I':   return "\xc3\x8e";
57         case 'J':   return "\xc4\xb4";
58         case 'K':   return "\xc4\xb6";
59         case 'L':   return "\xc4\xbb";
60         case 'M':   return "\xe1\xb8\xbe";
61         case 'N':   return "\xc3\x91";
62         case 'O':   return "\xc3\x96";
63         case 'P':   return "\xc3\x9e";
64         case 'Q':   return "\x71";
65         case 'R':   return "\xc5\x94";
66         case 'S':   return "\xc5\xa0";
67         case 'T':   return "\xc5\xa2";
68         case 'U':   return "\xc3\x9b";
69         case 'V':   return "\xce\xbd";
70         case 'W':   return "\xc5\xb4";
71         case 'X':   return "\xc3\x97";
72         case 'Y':   return "\xc3\x9d";
73         case 'Z':   return "\xc5\xbd";
74         case '!':   return "\xc2\xa1";
75         case '?':   return "\xc2\xbf";
76         case '$':   return "\xe2\x82\xac";
77         default:    return NULL;
78     }
79 }
80 
81 static bool
is_possible_normal_placeholder_end(const char16_t c)82 is_possible_normal_placeholder_end(const char16_t c) {
83     switch (c) {
84         case 's': return true;
85         case 'S': return true;
86         case 'c': return true;
87         case 'C': return true;
88         case 'd': return true;
89         case 'o': return true;
90         case 'x': return true;
91         case 'X': return true;
92         case 'f': return true;
93         case 'e': return true;
94         case 'E': return true;
95         case 'g': return true;
96         case 'G': return true;
97         case 'a': return true;
98         case 'A': return true;
99         case 'b': return true;
100         case 'B': return true;
101         case 'h': return true;
102         case 'H': return true;
103         case '%': return true;
104         case 'n': return true;
105         default:  return false;
106     }
107 }
108 
109 String16
pseudo_generate_expansion(const unsigned int length)110 pseudo_generate_expansion(const unsigned int length) {
111     String16 result = k_expansion_string;
112     const char16_t* s = result.string();
113     if (result.size() < length) {
114         result += String16(" ");
115         result += pseudo_generate_expansion(length - result.size());
116     } else {
117         int ext = 0;
118         // Should contain only whole words, so looking for a space
119         for (unsigned int i = length + 1; i < result.size(); ++i) {
120           ++ext;
121           if (s[i] == ' ') {
122             break;
123           }
124         }
125         result.remove(length + ext, 0);
126     }
127     return result;
128 }
129 
130 /**
131  * Converts characters so they look like they've been localized.
132  *
133  * Note: This leaves escape sequences untouched so they can later be
134  * processed by ResTable::collectString in the normal way.
135  */
136 String16
pseudolocalize_string(const String16 & source)137 pseudolocalize_string(const String16& source)
138 {
139     const char16_t* s = source.string();
140     String16 result;
141     const size_t I = source.size();
142     for (size_t i=0; i<I; i++) {
143         char16_t c = s[i];
144         if (c == '\\') {
145             // Escape syntax, no need to pseudolocalize
146             if (i<I-1) {
147                 result += String16("\\");
148                 i++;
149                 c = s[i];
150                 switch (c) {
151                     case 'u':
152                         // this one takes up 5 chars
153                         result += String16(s+i, 5);
154                         i += 4;
155                         break;
156                     case 't':
157                     case 'n':
158                     case '#':
159                     case '@':
160                     case '?':
161                     case '"':
162                     case '\'':
163                     case '\\':
164                     default:
165                         result.append(&c, 1);
166                         break;
167                 }
168             } else {
169                 result.append(&c, 1);
170             }
171         } else if (c == '%') {
172             // Placeholder syntax, no need to pseudolocalize
173             result += k_placeholder_open;
174             bool end = false;
175             result.append(&c, 1);
176             while (!end && i < I) {
177                 ++i;
178                 c = s[i];
179                 result.append(&c, 1);
180                 if (is_possible_normal_placeholder_end(c)) {
181                     end = true;
182                 } else if (c == 't') {
183                     ++i;
184                     c = s[i];
185                     result.append(&c, 1);
186                     end = true;
187                 }
188             }
189             result += k_placeholder_close;
190         } else if (c == '<' || c == '&') {
191             // html syntax, no need to pseudolocalize
192             bool tag_closed = false;
193             while (!tag_closed && i < I) {
194                 if (c == '&') {
195                     String16 escape_text;
196                     escape_text.append(&c, 1);
197                     bool end = false;
198                     size_t htmlCodePos = i;
199                     while (!end && htmlCodePos < I) {
200                         ++htmlCodePos;
201                         c = s[htmlCodePos];
202                         escape_text.append(&c, 1);
203                         // Valid html code
204                         if (c == ';') {
205                             end = true;
206                             i = htmlCodePos;
207                         }
208                         // Wrong html code
209                         else if (!((c == '#' ||
210                                  (c >= 'a' && c <= 'z') ||
211                                  (c >= 'A' && c <= 'Z') ||
212                                  (c >= '0' && c <= '9')))) {
213                             end = true;
214                         }
215                     }
216                     result += escape_text;
217                     if (escape_text != String16("&lt;")) {
218                         tag_closed = true;
219                     }
220                     continue;
221                 }
222                 if (c == '>') {
223                     tag_closed = true;
224                     result.append(&c, 1);
225                     continue;
226                 }
227                 result.append(&c, 1);
228                 i++;
229                 c = s[i];
230             }
231         } else {
232             // This is a pure text that should be pseudolocalized
233             const char* p = pseudolocalize_char(c);
234             if (p != NULL) {
235                 result += String16(p);
236             } else {
237                 result.append(&c, 1);
238             }
239         }
240     }
241     return result;
242 }
243 
244 String16
pseudobidi_string(const String16 & source)245 pseudobidi_string(const String16& source)
246 {
247     const char16_t* s = source.string();
248     String16 result;
249     result += k_rlm;
250     result += k_rlo;
251     for (size_t i=0; i<source.size(); i++) {
252         char16_t c = s[i];
253         switch(c) {
254             case ' ': result += k_pdf;
255                       result += k_rlm;
256                       result.append(&c, 1);
257                       result += k_rlm;
258                       result += k_rlo;
259                       break;
260             default: result.append(&c, 1);
261                      break;
262         }
263     }
264     result += k_pdf;
265     result += k_rlm;
266     return result;
267 }
268 
269