1 // Copyright 2019 Google LLC.
2 // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
3 
4 #include "include/core/SkTypes.h"
5 #include "include/private/SkTemplates.h"
6 #include "modules/skplaintexteditor/src/word_boundaries.h"
7 
8 #include <unicode/ubrk.h>
9 #include <unicode/utext.h>
10 #include <unicode/utypes.h>
11 #include <memory>
12 
13 
14 namespace {
15 template <typename T,typename P,P* p> using resource = std::unique_ptr<T, SkFunctionWrapper<P, p>>;
16 using ICUBrk   = resource<UBreakIterator, decltype(ubrk_close)       , ubrk_close       >;
17 using ICUUText = resource<UText         , decltype(utext_close)      , utext_close      >;
18 }  // namespace
19 
GetUtf8WordBoundaries(const char * begin,size_t byteCount,const char * locale)20 std::vector<bool> GetUtf8WordBoundaries(const char* begin, size_t byteCount, const char* locale) {
21     std::vector<bool> result;
22     if (0 == byteCount) {
23         return result;
24     }
25     result.resize(byteCount);
26 
27     UErrorCode status = U_ZERO_ERROR;
28     UText sUtf8UText = UTEXT_INITIALIZER;
29     ICUUText utf8UText(utext_openUTF8(&sUtf8UText, begin, byteCount, &status));
30     if (U_FAILURE(status)) {
31         SkDebugf("Could not create utf8UText: %s", u_errorName(status));
32         return result;
33     }
34 
35     ICUBrk wordBreakIterator(ubrk_open(UBRK_WORD, locale, nullptr, 0, &status));
36     if (!wordBreakIterator || U_FAILURE(status)) {
37         SkDEBUGF("Could not create line break iterator: %s", u_errorName(status));
38         return result;
39     }
40 
41     ubrk_setUText(&*wordBreakIterator, utf8UText.get(), &status);
42     if (U_FAILURE(status)) {
43         SkDebugf("Could not setText on break iterator: %s", u_errorName(status));
44         return result;
45     }
46 
47     int32_t pos = ubrk_first(&*wordBreakIterator);
48     while (pos != UBRK_DONE) {
49         if ((size_t)pos < byteCount) {
50             result[pos] = true;
51         }
52         pos = ubrk_next(&*wordBreakIterator);
53     }
54     return result;
55 }
56