1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
18 #define LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
19
20 #include <cstddef>
21 #include <string>
22
23 #include "utils/base/logging.h"
24
25 namespace libtextclassifier3 {
26
27 // Read-only "view" of a piece of data. Does not own the underlying data.
28 class StringPiece {
29 public:
30 static constexpr size_t npos = static_cast<size_t>(-1);
31
StringPiece()32 StringPiece() : StringPiece(nullptr, 0) {}
33
StringPiece(const char * str)34 StringPiece(const char* str) // NOLINT(runtime/explicit)
35 : start_(str), size_(str == nullptr ? 0 : strlen(str)) {}
36
StringPiece(const char * start,size_t size)37 StringPiece(const char* start, size_t size) : start_(start), size_(size) {}
38
39 // Intentionally no "explicit" keyword: in function calls, we want strings to
40 // be converted to StringPiece implicitly.
StringPiece(const std::string & s)41 StringPiece(const std::string& s) // NOLINT(runtime/explicit)
42 : StringPiece(s.data(), s.size()) {}
43
StringPiece(const std::string & s,int offset,int len)44 StringPiece(const std::string& s, int offset, int len)
45 : StringPiece(s.data() + offset, len) {}
46
47 char operator[](size_t i) const { return start_[i]; }
48
49 // Returns start address of underlying data.
data()50 const char* data() const { return start_; }
51
52 // Returns number of bytes of underlying data.
size()53 size_t size() const { return size_; }
length()54 size_t length() const { return size_; }
55
empty()56 bool empty() const { return size_ == 0; }
57
58 // Returns a std::string containing a copy of the underlying data.
ToString()59 std::string ToString() const { return std::string(data(), size()); }
60
61 // Returns whether string ends with a given suffix.
EndsWith(StringPiece suffix)62 bool EndsWith(StringPiece suffix) const {
63 return suffix.empty() || (size_ >= suffix.size() &&
64 memcmp(start_ + (size_ - suffix.size()),
65 suffix.data(), suffix.size()) == 0);
66 }
67
68 // Returns whether the string begins with a given prefix.
StartsWith(StringPiece prefix)69 bool StartsWith(StringPiece prefix) const {
70 return prefix.empty() ||
71 (size_ >= prefix.size() &&
72 memcmp(start_, prefix.data(), prefix.size()) == 0);
73 }
74
Equals(StringPiece other)75 bool Equals(StringPiece other) const {
76 return size() == other.size() && memcmp(start_, other.data(), size_) == 0;
77 }
78
79 // Removes the first `n` characters from the string piece. Note that the
80 // underlying string is not changed, only the view.
RemovePrefix(int n)81 void RemovePrefix(int n) {
82 TC3_CHECK_LE(n, size_);
83 start_ += n;
84 size_ -= n;
85 }
86
87 // Removes the last `n` characters from the string piece. Note that the
88 // underlying string is not changed, only the view.
RemoveSuffix(int n)89 void RemoveSuffix(int n) {
90 TC3_CHECK_LE(n, size_);
91 size_ -= n;
92 }
93
94 // Finds the first occurrence of the substring `s` within the `StringPiece`,
95 // returning the position of the first character's match, or `npos` if no
96 // match was found.
97 // Here
98 // - c is the char to search for in the StringPiece
99 // - pos is the position at which to start the search.
100 size_t find(char c, size_t pos = 0) const noexcept {
101 if (empty() || pos >= size_) {
102 return npos;
103 }
104 const char* result =
105 static_cast<const char*>(memchr(start_ + pos, c, size_ - pos));
106 return result != nullptr ? result - start_ : npos;
107 }
108
109 size_t find(StringPiece s, size_t pos = 0) const noexcept {
110 if (empty() || pos >= size_) {
111 if (empty() && pos == 0 && s.empty()) {
112 return 0;
113 }
114 return npos;
115 }
116 const char* result = memmatch(start_ + pos, size_ - pos, s.start_, s.size_);
117 return result ? result - start_ : npos;
118 }
119
120 private:
memmatch(const char * phaystack,size_t haylen,const char * pneedle,size_t neelen)121 const char* memmatch(const char* phaystack, size_t haylen,
122 const char* pneedle, size_t neelen) const {
123 if (0 == neelen) {
124 return phaystack; // Even if haylen is 0.
125 }
126 if (haylen < neelen) {
127 return nullptr;
128 }
129
130 const char* match;
131 const char* hayend = phaystack + haylen - neelen + 1;
132 while ((match = static_cast<const char*>(
133 memchr(phaystack, pneedle[0], hayend - phaystack)))) {
134 if (memcmp(match, pneedle, neelen) == 0) {
135 return match;
136 } else {
137 phaystack = match + 1;
138 }
139 }
140 return nullptr;
141 }
142
143 const char* start_; // Not owned.
144 size_t size_;
145 };
146
EndsWith(StringPiece text,StringPiece suffix)147 inline bool EndsWith(StringPiece text, StringPiece suffix) {
148 return text.EndsWith(suffix);
149 }
150
StartsWith(StringPiece text,StringPiece prefix)151 inline bool StartsWith(StringPiece text, StringPiece prefix) {
152 return text.StartsWith(prefix);
153 }
154
ConsumePrefix(StringPiece * text,StringPiece prefix)155 inline bool ConsumePrefix(StringPiece* text, StringPiece prefix) {
156 if (!text->StartsWith(prefix)) {
157 return false;
158 }
159 text->RemovePrefix(prefix.size());
160 return true;
161 }
162
ConsumeSuffix(StringPiece * text,StringPiece suffix)163 inline bool ConsumeSuffix(StringPiece* text, StringPiece suffix) {
164 if (!text->EndsWith(suffix)) {
165 return false;
166 }
167 text->RemoveSuffix(suffix.size());
168 return true;
169 }
170
171 inline logging::LoggingStringStream& operator<<(
172 logging::LoggingStringStream& stream, StringPiece message) {
173 stream.message.append(message.data(), message.size());
174 return stream;
175 }
176
177 } // namespace libtextclassifier3
178
179 #endif // LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
180