1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "util/BigBuffer.h"
18 #include "util/Maybe.h"
19 #include "util/StringPiece.h"
20 #include "util/Util.h"
21 
22 #include <algorithm>
23 #include <ostream>
24 #include <string>
25 #include <utils/Unicode.h>
26 #include <vector>
27 
28 namespace aapt {
29 namespace util {
30 
splitAndTransform(const StringPiece & str,char sep,const std::function<char (char)> & f)31 static std::vector<std::string> splitAndTransform(const StringPiece& str, char sep,
32         const std::function<char(char)>& f) {
33     std::vector<std::string> parts;
34     const StringPiece::const_iterator end = std::end(str);
35     StringPiece::const_iterator start = std::begin(str);
36     StringPiece::const_iterator current;
37     do {
38         current = std::find(start, end, sep);
39         parts.emplace_back(str.substr(start, current).toString());
40         if (f) {
41             std::string& part = parts.back();
42             std::transform(part.begin(), part.end(), part.begin(), f);
43         }
44         start = current + 1;
45     } while (current != end);
46     return parts;
47 }
48 
split(const StringPiece & str,char sep)49 std::vector<std::string> split(const StringPiece& str, char sep) {
50     return splitAndTransform(str, sep, nullptr);
51 }
52 
splitAndLowercase(const StringPiece & str,char sep)53 std::vector<std::string> splitAndLowercase(const StringPiece& str, char sep) {
54     return splitAndTransform(str, sep, ::tolower);
55 }
56 
trimWhitespace(const StringPiece16 & str)57 StringPiece16 trimWhitespace(const StringPiece16& str) {
58     if (str.size() == 0 || str.data() == nullptr) {
59         return str;
60     }
61 
62     const char16_t* start = str.data();
63     const char16_t* end = str.data() + str.length();
64 
65     while (start != end && util::isspace16(*start)) {
66         start++;
67     }
68 
69     while (end != start && util::isspace16(*(end - 1))) {
70         end--;
71     }
72 
73     return StringPiece16(start, end - start);
74 }
75 
trimWhitespace(const StringPiece & str)76 StringPiece trimWhitespace(const StringPiece& str) {
77     if (str.size() == 0 || str.data() == nullptr) {
78         return str;
79     }
80 
81     const char* start = str.data();
82     const char* end = str.data() + str.length();
83 
84     while (start != end && isspace(*start)) {
85         start++;
86     }
87 
88     while (end != start && isspace(*(end - 1))) {
89         end--;
90     }
91 
92     return StringPiece(start, end - start);
93 }
94 
findNonAlphaNumericAndNotInSet(const StringPiece16 & str,const StringPiece16 & allowedChars)95 StringPiece16::const_iterator findNonAlphaNumericAndNotInSet(const StringPiece16& str,
96         const StringPiece16& allowedChars) {
97     const auto endIter = str.end();
98     for (auto iter = str.begin(); iter != endIter; ++iter) {
99         char16_t c = *iter;
100         if ((c >= u'a' && c <= u'z') ||
101                 (c >= u'A' && c <= u'Z') ||
102                 (c >= u'0' && c <= u'9')) {
103             continue;
104         }
105 
106         bool match = false;
107         for (char16_t i : allowedChars) {
108             if (c == i) {
109                 match = true;
110                 break;
111             }
112         }
113 
114         if (!match) {
115             return iter;
116         }
117     }
118     return endIter;
119 }
120 
isJavaClassName(const StringPiece16 & str)121 bool isJavaClassName(const StringPiece16& str) {
122     size_t pieces = 0;
123     for (const StringPiece16& piece : tokenize(str, u'.')) {
124         pieces++;
125         if (piece.empty()) {
126             return false;
127         }
128 
129         // Can't have starting or trailing $ character.
130         if (piece.data()[0] == u'$' || piece.data()[piece.size() - 1] == u'$') {
131             return false;
132         }
133 
134         if (findNonAlphaNumericAndNotInSet(piece, u"$_") != piece.end()) {
135             return false;
136         }
137     }
138     return pieces >= 2;
139 }
140 
isJavaPackageName(const StringPiece16 & str)141 bool isJavaPackageName(const StringPiece16& str) {
142     if (str.empty()) {
143         return false;
144     }
145 
146     size_t pieces = 0;
147     for (const StringPiece16& piece : tokenize(str, u'.')) {
148         pieces++;
149         if (piece.empty()) {
150             return false;
151         }
152 
153         if (piece.data()[0] == u'_' || piece.data()[piece.size() - 1] == u'_') {
154             return false;
155         }
156 
157         if (findNonAlphaNumericAndNotInSet(piece, u"_") != piece.end()) {
158             return false;
159         }
160     }
161     return pieces >= 1;
162 }
163 
getFullyQualifiedClassName(const StringPiece16 & package,const StringPiece16 & className)164 Maybe<std::u16string> getFullyQualifiedClassName(const StringPiece16& package,
165                                                  const StringPiece16& className) {
166     if (className.empty()) {
167         return {};
168     }
169 
170     if (util::isJavaClassName(className)) {
171         return className.toString();
172     }
173 
174     if (package.empty()) {
175         return {};
176     }
177 
178     if (className.data()[0] != u'.') {
179         return {};
180     }
181 
182     std::u16string result(package.data(), package.size());
183     result.append(className.data(), className.size());
184     if (!isJavaClassName(result)) {
185         return {};
186     }
187     return result;
188 }
189 
consumeDigits(const char16_t * start,const char16_t * end)190 static size_t consumeDigits(const char16_t* start, const char16_t* end) {
191     const char16_t* c = start;
192     for (; c != end && *c >= u'0' && *c <= u'9'; c++) {}
193     return static_cast<size_t>(c - start);
194 }
195 
verifyJavaStringFormat(const StringPiece16 & str)196 bool verifyJavaStringFormat(const StringPiece16& str) {
197     const char16_t* c = str.begin();
198     const char16_t* const end = str.end();
199 
200     size_t argCount = 0;
201     bool nonpositional = false;
202     while (c != end) {
203         if (*c == u'%' && c + 1 < end) {
204             c++;
205 
206             if (*c == u'%') {
207                 c++;
208                 continue;
209             }
210 
211             argCount++;
212 
213             size_t numDigits = consumeDigits(c, end);
214             if (numDigits > 0) {
215                 c += numDigits;
216                 if (c != end && *c != u'$') {
217                     // The digits were a size, but not a positional argument.
218                     nonpositional = true;
219                 }
220             } else if (*c == u'<') {
221                 // Reusing last argument, bad idea since positions can be moved around
222                 // during translation.
223                 nonpositional = true;
224 
225                 c++;
226 
227                 // Optionally we can have a $ after
228                 if (c != end && *c == u'$') {
229                     c++;
230                 }
231             } else {
232                 nonpositional = true;
233             }
234 
235             // Ignore size, width, flags, etc.
236             while (c != end && (*c == u'-' ||
237                     *c == u'#' ||
238                     *c == u'+' ||
239                     *c == u' ' ||
240                     *c == u',' ||
241                     *c == u'(' ||
242                     (*c >= u'0' && *c <= '9'))) {
243                 c++;
244             }
245 
246             /*
247              * This is a shortcut to detect strings that are going to Time.format()
248              * instead of String.format()
249              *
250              * Comparison of String.format() and Time.format() args:
251              *
252              * String: ABC E GH  ST X abcdefgh  nost x
253              *   Time:    DEFGHKMS W Za  d   hkm  s w yz
254              *
255              * Therefore we know it's definitely Time if we have:
256              *     DFKMWZkmwyz
257              */
258             if (c != end) {
259                 switch (*c) {
260                 case 'D':
261                 case 'F':
262                 case 'K':
263                 case 'M':
264                 case 'W':
265                 case 'Z':
266                 case 'k':
267                 case 'm':
268                 case 'w':
269                 case 'y':
270                 case 'z':
271                     return true;
272                 }
273             }
274         }
275 
276         if (c != end) {
277             c++;
278         }
279     }
280 
281     if (argCount > 1 && nonpositional) {
282         // Multiple arguments were specified, but some or all were non positional. Translated
283         // strings may rearrange the order of the arguments, which will break the string.
284         return false;
285     }
286     return true;
287 }
288 
parseUnicodeCodepoint(const char16_t ** start,const char16_t * end)289 static Maybe<char16_t> parseUnicodeCodepoint(const char16_t** start, const char16_t* end) {
290     char16_t code = 0;
291     for (size_t i = 0; i < 4 && *start != end; i++, (*start)++) {
292         char16_t c = **start;
293         int a;
294         if (c >= '0' && c <= '9') {
295             a = c - '0';
296         } else if (c >= 'a' && c <= 'f') {
297             a = c - 'a' + 10;
298         } else if (c >= 'A' && c <= 'F') {
299             a = c - 'A' + 10;
300         } else {
301             return make_nothing<char16_t>();
302         }
303         code = (code << 4) | a;
304     }
305     return make_value(code);
306 }
307 
append(const StringPiece16 & str)308 StringBuilder& StringBuilder::append(const StringPiece16& str) {
309     if (!mError.empty()) {
310         return *this;
311     }
312 
313     const char16_t* const end = str.end();
314     const char16_t* start = str.begin();
315     const char16_t* current = start;
316     while (current != end) {
317         if (mLastCharWasEscape) {
318             switch (*current) {
319                 case u't':
320                     mStr += u'\t';
321                     break;
322                 case u'n':
323                     mStr += u'\n';
324                     break;
325                 case u'#':
326                     mStr += u'#';
327                     break;
328                 case u'@':
329                     mStr += u'@';
330                     break;
331                 case u'?':
332                     mStr += u'?';
333                     break;
334                 case u'"':
335                     mStr += u'"';
336                     break;
337                 case u'\'':
338                     mStr += u'\'';
339                     break;
340                 case u'\\':
341                     mStr += u'\\';
342                     break;
343                 case u'u': {
344                     current++;
345                     Maybe<char16_t> c = parseUnicodeCodepoint(&current, end);
346                     if (!c) {
347                         mError = "invalid unicode escape sequence";
348                         return *this;
349                     }
350                     mStr += c.value();
351                     current -= 1;
352                     break;
353                 }
354 
355                 default:
356                     // Ignore.
357                     break;
358             }
359             mLastCharWasEscape = false;
360             start = current + 1;
361         } else if (*current == u'"') {
362             if (!mQuote && mTrailingSpace) {
363                 // We found an opening quote, and we have
364                 // trailing space, so we should append that
365                 // space now.
366                 if (mTrailingSpace) {
367                     // We had trailing whitespace, so
368                     // replace with a single space.
369                     if (!mStr.empty()) {
370                         mStr += u' ';
371                     }
372                     mTrailingSpace = false;
373                 }
374             }
375             mQuote = !mQuote;
376             mStr.append(start, current - start);
377             start = current + 1;
378         } else if (*current == u'\'' && !mQuote) {
379             // This should be escaped.
380             mError = "unescaped apostrophe";
381             return *this;
382         } else if (*current == u'\\') {
383             // This is an escape sequence, convert to the real value.
384             if (!mQuote && mTrailingSpace) {
385                 // We had trailing whitespace, so
386                 // replace with a single space.
387                 if (!mStr.empty()) {
388                     mStr += u' ';
389                 }
390                 mTrailingSpace = false;
391             }
392             mStr.append(start, current - start);
393             start = current + 1;
394             mLastCharWasEscape = true;
395         } else if (!mQuote) {
396             // This is not quoted text, so look for whitespace.
397             if (isspace16(*current)) {
398                 // We found whitespace, see if we have seen some
399                 // before.
400                 if (!mTrailingSpace) {
401                     // We didn't see a previous adjacent space,
402                     // so mark that we did.
403                     mTrailingSpace = true;
404                     mStr.append(start, current - start);
405                 }
406 
407                 // Keep skipping whitespace.
408                 start = current + 1;
409             } else if (mTrailingSpace) {
410                 // We saw trailing space before, so replace all
411                 // that trailing space with one space.
412                 if (!mStr.empty()) {
413                     mStr += u' ';
414                 }
415                 mTrailingSpace = false;
416             }
417         }
418         current++;
419     }
420     mStr.append(start, end - start);
421     return *this;
422 }
423 
utf8ToUtf16(const StringPiece & utf8)424 std::u16string utf8ToUtf16(const StringPiece& utf8) {
425     ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()),
426             utf8.length());
427     if (utf16Length <= 0) {
428         return {};
429     }
430 
431     std::u16string utf16;
432     utf16.resize(utf16Length);
433     utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin());
434     return utf16;
435 }
436 
utf16ToUtf8(const StringPiece16 & utf16)437 std::string utf16ToUtf8(const StringPiece16& utf16) {
438     ssize_t utf8Length = utf16_to_utf8_length(utf16.data(), utf16.length());
439     if (utf8Length <= 0) {
440         return {};
441     }
442 
443     std::string utf8;
444     utf8.resize(utf8Length);
445     utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin());
446     return utf8;
447 }
448 
writeAll(std::ostream & out,const BigBuffer & buffer)449 bool writeAll(std::ostream& out, const BigBuffer& buffer) {
450     for (const auto& b : buffer) {
451         if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
452             return false;
453         }
454     }
455     return true;
456 }
457 
copy(const BigBuffer & buffer)458 std::unique_ptr<uint8_t[]> copy(const BigBuffer& buffer) {
459     std::unique_ptr<uint8_t[]> data = std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
460     uint8_t* p = data.get();
461     for (const auto& block : buffer) {
462         memcpy(p, block.buffer.get(), block.size);
463         p += block.size;
464     }
465     return data;
466 }
467 
extractResFilePathParts(const StringPiece16 & path,StringPiece16 * outPrefix,StringPiece16 * outEntry,StringPiece16 * outSuffix)468 bool extractResFilePathParts(const StringPiece16& path, StringPiece16* outPrefix,
469                              StringPiece16* outEntry, StringPiece16* outSuffix) {
470     if (!stringStartsWith<char16_t>(path, u"res/")) {
471         return false;
472     }
473 
474     StringPiece16::const_iterator lastOccurence = path.end();
475     for (auto iter = path.begin() + StringPiece16(u"res/").size(); iter != path.end(); ++iter) {
476         if (*iter == u'/') {
477             lastOccurence = iter;
478         }
479     }
480 
481     if (lastOccurence == path.end()) {
482         return false;
483     }
484 
485     auto iter = std::find(lastOccurence, path.end(), u'.');
486     *outSuffix = StringPiece16(iter, path.end() - iter);
487     *outEntry = StringPiece16(lastOccurence + 1, iter - lastOccurence - 1);
488     *outPrefix = StringPiece16(path.begin(), lastOccurence - path.begin() + 1);
489     return true;
490 }
491 
492 } // namespace util
493 } // namespace aapt
494