1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
18 #define LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
19 
20 #include <cstddef>
21 #include <string>
22 
23 #include "utils/base/logging.h"
24 
25 namespace libtextclassifier3 {
26 
27 // Read-only "view" of a piece of data.  Does not own the underlying data.
28 class StringPiece {
29  public:
30   static constexpr size_t npos = static_cast<size_t>(-1);
31 
StringPiece()32   StringPiece() : StringPiece(nullptr, 0) {}
33 
StringPiece(const char * str)34   StringPiece(const char* str)  // NOLINT(runtime/explicit)
35       : start_(str), size_(str == nullptr ? 0 : strlen(str)) {}
36 
StringPiece(const char * start,size_t size)37   StringPiece(const char* start, size_t size) : start_(start), size_(size) {}
38 
39   // Intentionally no "explicit" keyword: in function calls, we want strings to
40   // be converted to StringPiece implicitly.
StringPiece(const std::string & s)41   StringPiece(const std::string& s)  // NOLINT(runtime/explicit)
42       : StringPiece(s.data(), s.size()) {}
43 
StringPiece(const std::string & s,int offset,int len)44   StringPiece(const std::string& s, int offset, int len)
45       : StringPiece(s.data() + offset, len) {}
46 
47   char operator[](size_t i) const { return start_[i]; }
48 
49   // Returns start address of underlying data.
data()50   const char* data() const { return start_; }
51 
52   // Returns number of bytes of underlying data.
size()53   size_t size() const { return size_; }
length()54   size_t length() const { return size_; }
55 
empty()56   bool empty() const { return size_ == 0; }
57 
58   // Returns a std::string containing a copy of the underlying data.
ToString()59   std::string ToString() const { return std::string(data(), size()); }
60 
61   // Returns whether string ends with a given suffix.
EndsWith(StringPiece suffix)62   bool EndsWith(StringPiece suffix) const {
63     return suffix.empty() || (size_ >= suffix.size() &&
64                               memcmp(start_ + (size_ - suffix.size()),
65                                      suffix.data(), suffix.size()) == 0);
66   }
67 
68   // Returns whether the string begins with a given prefix.
StartsWith(StringPiece prefix)69   bool StartsWith(StringPiece prefix) const {
70     return prefix.empty() ||
71            (size_ >= prefix.size() &&
72             memcmp(start_, prefix.data(), prefix.size()) == 0);
73   }
74 
Equals(StringPiece other)75   bool Equals(StringPiece other) const {
76     return size() == other.size() && memcmp(start_, other.data(), size_) == 0;
77   }
78 
79   // Removes the first `n` characters from the string piece. Note that the
80   // underlying string is not changed, only the view.
RemovePrefix(int n)81   void RemovePrefix(int n) {
82     TC3_CHECK_LE(n, size_);
83     start_ += n;
84     size_ -= n;
85   }
86 
87   // Removes the last `n` characters from the string piece. Note that the
88   // underlying string is not changed, only the view.
RemoveSuffix(int n)89   void RemoveSuffix(int n) {
90     TC3_CHECK_LE(n, size_);
91     size_ -= n;
92   }
93 
94   // Finds the first occurrence of the substring `s` within the `StringPiece`,
95   // returning the position of the first character's match, or `npos` if no
96   // match was found.
97   // Here
98   // - c is the char to search for in the StringPiece
99   // - pos is the position at which to start the search.
100   size_t find(char c, size_t pos = 0) const noexcept {
101     if (empty() || pos >= size_) {
102       return npos;
103     }
104     const char* result =
105         static_cast<const char*>(memchr(start_ + pos, c, size_ - pos));
106     return result != nullptr ? result - start_ : npos;
107   }
108 
109   size_t find(StringPiece s, size_t pos = 0) const noexcept {
110     if (empty() || pos >= size_) {
111       if (empty() && pos == 0 && s.empty()) {
112         return 0;
113       }
114       return npos;
115     }
116     const char* result = memmatch(start_ + pos, size_ - pos, s.start_, s.size_);
117     return result ? result - start_ : npos;
118   }
119 
120  private:
memmatch(const char * phaystack,size_t haylen,const char * pneedle,size_t neelen)121   const char* memmatch(const char* phaystack, size_t haylen,
122                        const char* pneedle, size_t neelen) const {
123     if (0 == neelen) {
124       return phaystack;  // Even if haylen is 0.
125     }
126     if (haylen < neelen) {
127       return nullptr;
128     }
129 
130     const char* match;
131     const char* hayend = phaystack + haylen - neelen + 1;
132     while ((match = static_cast<const char*>(
133                 memchr(phaystack, pneedle[0], hayend - phaystack)))) {
134       if (memcmp(match, pneedle, neelen) == 0) {
135         return match;
136       } else {
137         phaystack = match + 1;
138       }
139     }
140     return nullptr;
141   }
142 
143   const char* start_;  // Not owned.
144   size_t size_;
145 };
146 
EndsWith(StringPiece text,StringPiece suffix)147 inline bool EndsWith(StringPiece text, StringPiece suffix) {
148   return text.EndsWith(suffix);
149 }
150 
StartsWith(StringPiece text,StringPiece prefix)151 inline bool StartsWith(StringPiece text, StringPiece prefix) {
152   return text.StartsWith(prefix);
153 }
154 
ConsumePrefix(StringPiece * text,StringPiece prefix)155 inline bool ConsumePrefix(StringPiece* text, StringPiece prefix) {
156   if (!text->StartsWith(prefix)) {
157     return false;
158   }
159   text->RemovePrefix(prefix.size());
160   return true;
161 }
162 
ConsumeSuffix(StringPiece * text,StringPiece suffix)163 inline bool ConsumeSuffix(StringPiece* text, StringPiece suffix) {
164   if (!text->EndsWith(suffix)) {
165     return false;
166   }
167   text->RemoveSuffix(suffix.size());
168   return true;
169 }
170 
171 inline logging::LoggingStringStream& operator<<(
172     logging::LoggingStringStream& stream, StringPiece message) {
173   stream.message.append(message.data(), message.size());
174   return stream;
175 }
176 
177 }  // namespace libtextclassifier3
178 
179 #endif  // LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
180