1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PLATFORM_TSTRING_H_
17 #define TENSORFLOW_CORE_PLATFORM_TSTRING_H_
18 
19 #include <assert.h>
20 
21 #include <ostream>
22 #include <string>
23 
24 #include "tensorflow/core/platform/cord.h"
25 #include "tensorflow/core/platform/ctstring.h"
26 
27 // TODO(dero): This include is temporary, and will be superfluous once
28 // absl::string_view is aliased to std::string_view.
29 namespace absl {
30 #ifdef ABSL_NAMESPACE_BEGIN
31 ABSL_NAMESPACE_BEGIN
32 #endif  // ABSL_NAMESPACE_BEGIN
33 class AlphaNum;
34 #ifdef ABSL_NAMESPACE_END
35 ABSL_NAMESPACE_END
36 #endif  // ABSL_NAMESPACE_END
37 }  // namespace absl
38 
39 namespace tensorflow {
40 
41 // tensorflow::tstring is the scalar type for DT_STRING tensors.
42 //
43 // tstrings are meant to be used when interfacing with string tensors, and
44 // should not be considered as a general replacement for std::string in
45 // tensorflow.  The primary purpose of tstring is to provide a unified and
46 // stable ABI for string tensors across TF Core/C-API/Lite/etc---mitigating
47 // unnecessary conversions across language boundaries, and allowing for compiler
48 // agnostic interoperability across dynamically loaded modules.
49 //
50 // In addition to ABI stability, tstrings features two string subtypes, VIEW and
51 // OFFSET.
52 //
53 // VIEW tstrings are views into unowned character buffers; they can be used to
54 // pass around existing character strings without incurring a per object heap
55 // allocation.  Note that, like std::string_view, it is the user's
56 // responsibility to ensure that the underlying buffer of a VIEW tstring exceeds
57 // the lifetime of the associated tstring object.
58 //
59 // TODO(dero): Methods for creating OFFSET tensors are not currently
60 // implemented.
61 //
62 // OFFSET tstrings are platform independent offset defined strings which can be
63 // directly mmaped or copied into a tensor buffer without the need for
64 // deserialization or processing.  For security reasons, it is imperative that
65 // OFFSET based string tensors are validated before use, or are from a trusted
66 // source.
67 //
68 // Underlying VIEW and OFFSET buffers are considered immutable, so l-value
69 // assignment, mutation, or non-const access to data() of tstrings will result
70 // in the conversion to an owned SMALL/LARGE type.
71 //
72 // The interface for tstring largely overlaps with std::string. Except where
73 // noted, expect equivalent semantics with synonymous std::string methods.
74 class tstring {
75   TF_TString tstr_;
76 
77  public:
78   enum Type {
79     // See cstring.h
80     SMALL = TF_TSTR_SMALL,
81     LARGE = TF_TSTR_LARGE,
82     OFFSET = TF_TSTR_OFFSET,
83     VIEW = TF_TSTR_VIEW,
84   };
85 
86   // Assignment to a tstring object with a tstring::view type will create a VIEW
87   // type tstring.
88   class view {
89     const char* data_;
90     size_t size_;
91 
92    public:
view(const char * data,size_t size)93     explicit view(const char* data, size_t size) : data_(data), size_(size) {}
view(const char * data)94     explicit view(const char* data) : data_(data), size_(::strlen(data)) {}
95 
data()96     const char* data() const { return data_; }
97 
size()98     size_t size() const { return size_; }
99 
100     view() = delete;
101     view(const view&) = delete;
102     view& operator=(const view&) = delete;
103   };
104 
105   typedef const char* const_iterator;
106 
107   // Ctor
108   tstring();
109   tstring(const std::string& str);  // NOLINT TODO(b/147740521): Make explicit.
110   tstring(const char* str, size_t len);
111   tstring(const char* str);  // NOLINT TODO(b/147740521): Make explicit.
112   tstring(size_t n, char c);
113   explicit tstring(const std::string_view str);
114 #ifdef PLATFORM_GOOGLE
115   explicit tstring(const std::Cord& cord);
116 #endif  // PLATFORM_GOOGLE
117 
118   // Copy
119   tstring(const tstring& str);
120 
121   // Move
122   tstring(tstring&& str) noexcept;
123 
124   // Dtor
125   ~tstring();
126 
127   // Copy Assignment
128   tstring& operator=(const tstring& str);
129   tstring& operator=(const std::string& str);
130   tstring& operator=(const char* str);
131   tstring& operator=(char ch);
132   tstring& operator=(const std::string_view str);
133 #ifdef PLATFORM_GOOGLE
134   tstring& operator=(const std::Cord& cord);
135 #endif  // PLATFORM_GOOGLE
136 
137   // View Assignment
138   tstring& operator=(const view& tsv);
139 
140   // Move Assignment
141   tstring& operator=(tstring&& str);
142 
143   // Comparison
144   int compare(const char* str, size_t len) const;
145   bool operator<(const tstring& o) const;
146   bool operator>(const tstring& o) const;
147   bool operator==(const char* str) const;
148   bool operator==(const tstring& o) const;
149   bool operator!=(const char* str) const;
150   bool operator!=(const tstring& o) const;
151 
152   // Conversion Operators
153   // TODO(b/147740521): Make explicit.
154   operator std::string() const;  // NOLINT
155   // TODO(b/147740521): Make explicit.
156   operator std::string_view() const;  // NOLINT
157 #ifdef PLATFORM_GOOGLE
158   template <typename T,
159             typename std::enable_if<std::is_same<T, std::AlphaNum>::value,
160                                     T>::type* = nullptr>
161   operator T() const;  // NOLINT TODO(b/147740521): Remove.
162 #endif  // PLATFORM_GOOGLE
163 
164   // Attributes
165   size_t size() const;
166   size_t length() const;
167   size_t capacity() const;
168   bool empty() const;
169   Type type() const;
170 
171   // Allocation
172   void resize(size_t new_size, char c = 0);
173   // Similar to resize, but will leave the newly grown region uninitialized.
174   void resize_uninitialized(size_t new_size);
175   void clear() noexcept;
176   void reserve(size_t n);
177 
178   // Iterators
179   const_iterator begin() const;
180   const_iterator end() const;
181 
182   // Const Element Access
183   const char* c_str() const;
184   const char* data() const;
185   const char& operator[](size_t i) const;
186   const char& back() const;
187 
188   // Mutable Element Access
189   // NOTE: For VIEW/OFFSET types, calling these methods will result in the
190   // conversion to a SMALL or heap allocated LARGE type.  As a result,
191   // previously obtained pointers, references, or iterators to the underlying
192   // buffer will point to the original VIEW/OFFSET and not the new allocation.
193   char* mdata();
194   char* data();  // DEPRECATED: Use mdata().
195   char& operator[](size_t i);
196 
197   // Assignment
198   tstring& assign(const char* str, size_t len);
199   tstring& assign(const char* str);
200 
201   // View Assignment
202   tstring& assign_as_view(const tstring& str);
203   tstring& assign_as_view(const std::string& str);
204   tstring& assign_as_view(const std::string_view str);
205   tstring& assign_as_view(const char* str, size_t len);
206   tstring& assign_as_view(const char* str);
207 
208   // Modifiers
209   // NOTE: Invalid input will result in undefined behavior.
210   tstring& append(const tstring& str);
211   tstring& append(const char* str, size_t len);
212   tstring& append(const char* str);
213   tstring& append(size_t n, char c);
214 
215   tstring& erase(size_t pos, size_t len);
216 
217   tstring& insert(size_t pos, const tstring& str, size_t subpos, size_t sublen);
218   tstring& insert(size_t pos, size_t n, char c);
219   void swap(tstring& str);
220   void push_back(char ch);
221 
222   // Friends
223   friend bool operator==(const char* a, const tstring& b);
224   friend bool operator==(const std::string& a, const tstring& b);
225   friend tstring operator+(const tstring& a, const tstring& b);
226   friend std::ostream& operator<<(std::ostream& o, const tstring& str);
227   friend std::hash<tstring>;
228 };
229 
230 // Non-member function overloads
231 
232 bool operator==(const char* a, const tstring& b);
233 bool operator==(const std::string& a, const tstring& b);
234 tstring operator+(const tstring& a, const tstring& b);
235 std::ostream& operator<<(std::ostream& o, const tstring& str);
236 
237 // Implementations
238 
239 // Ctor
240 
tstring()241 inline tstring::tstring() { TF_TString_Init(&tstr_); }
242 
tstring(const char * str,size_t len)243 inline tstring::tstring(const char* str, size_t len) {
244   TF_TString_Init(&tstr_);
245   TF_TString_Copy(&tstr_, str, len);
246 }
247 
tstring(const char * str)248 inline tstring::tstring(const char* str) : tstring(str, ::strlen(str)) {}
249 
tstring(size_t n,char c)250 inline tstring::tstring(size_t n, char c) {
251   TF_TString_Init(&tstr_);
252   TF_TString_Resize(&tstr_, n, c);
253 }
254 
tstring(const std::string & str)255 inline tstring::tstring(const std::string& str)
256     : tstring(str.data(), str.size()) {}
257 
tstring(const std::string_view str)258 inline tstring::tstring(const std::string_view str)
259     : tstring(str.data(), str.size()) {}
260 
261 #ifdef PLATFORM_GOOGLE
tstring(const std::Cord & cord)262 inline tstring::tstring(const std::Cord& cord) {
263   TF_TString_Init(&tstr_);
264   TF_TString_ResizeUninitialized(&tstr_, cord.size());
265 
266   cord.CopyToArray(data());
267 }
268 #endif  // PLATFORM_GOOGLE
269 
270 // Copy
271 
tstring(const tstring & str)272 inline tstring::tstring(const tstring& str) {
273   TF_TString_Init(&tstr_);
274   TF_TString_Assign(&tstr_, &str.tstr_);
275 }
276 
277 // Move
278 
tstring(tstring && str)279 inline tstring::tstring(tstring&& str) noexcept {
280   TF_TString_Init(&tstr_);
281   TF_TString_Move(&tstr_, &str.tstr_);
282 }
283 
284 // Dtor
285 
~tstring()286 inline tstring::~tstring() { TF_TString_Dealloc(&tstr_); }
287 
288 // Copy Assignment
289 
290 inline tstring& tstring::operator=(const tstring& str) {
291   TF_TString_Assign(&tstr_, &str.tstr_);
292 
293   return *this;
294 }
295 
296 inline tstring& tstring::operator=(const std::string& str) {
297   TF_TString_Copy(&tstr_, str.data(), str.size());
298   return *this;
299 }
300 
301 inline tstring& tstring::operator=(const char* str) {
302   TF_TString_Copy(&tstr_, str, ::strlen(str));
303 
304   return *this;
305 }
306 
307 inline tstring& tstring::operator=(char c) {
308   resize_uninitialized(1);
309   (*this)[0] = c;
310 
311   return *this;
312 }
313 
314 inline tstring& tstring::operator=(const std::string_view str) {
315   TF_TString_Copy(&tstr_, str.data(), str.size());
316 
317   return *this;
318 }
319 
320 #ifdef PLATFORM_GOOGLE
321 inline tstring& tstring::operator=(const std::Cord& cord) {
322   TF_TString_ResizeUninitialized(&tstr_, cord.size());
323 
324   cord.CopyToArray(data());
325 
326   return *this;
327 }
328 #endif  // PLATFORM_GOOGLE
329 
330 // View Assignment
331 
332 inline tstring& tstring::operator=(const tstring::view& tsv) {
333   assign_as_view(tsv.data(), tsv.size());
334 
335   return *this;
336 }
337 
338 // Move Assignment
339 
340 inline tstring& tstring::operator=(tstring&& str) {
341   TF_TString_Move(&tstr_, &str.tstr_);
342 
343   return *this;
344 }
345 
346 // Comparison
347 
compare(const char * str,size_t len)348 inline int tstring::compare(const char* str, size_t len) const {
349   int ret = ::memcmp(data(), str, std::min(len, size()));
350 
351   if (ret < 0) return -1;
352   if (ret > 0) return +1;
353 
354   if (size() < len) return -1;
355   if (size() > len) return +1;
356 
357   return 0;
358 }
359 
360 inline bool tstring::operator<(const tstring& o) const {
361   return compare(o.data(), o.size()) < 0;
362 }
363 
364 inline bool tstring::operator>(const tstring& o) const {
365   return compare(o.data(), o.size()) > 0;
366 }
367 
368 inline bool tstring::operator==(const char* str) const {
369   return ::strlen(str) == size() && ::memcmp(data(), str, size()) == 0;
370 }
371 
372 inline bool tstring::operator==(const tstring& o) const {
373   return o.size() == size() && ::memcmp(data(), o.data(), size()) == 0;
374 }
375 
376 inline bool tstring::operator!=(const char* str) const {
377   return !(*this == str);
378 }
379 
380 inline bool tstring::operator!=(const tstring& o) const {
381   return !(*this == o);
382 }
383 
384 // Conversion Operators
385 
string()386 inline tstring::operator std::string() const {
387   return std::string(data(), size());
388 }
389 
string_view()390 inline tstring::operator std::string_view() const {
391   return std::string_view(data(), size());
392 }
393 
394 #ifdef PLATFORM_GOOGLE
395 template <typename T, typename std::enable_if<
396                           std::is_same<T, std::AlphaNum>::value, T>::type*>
T()397 inline tstring::operator T() const {
398   return T(std::string_view(*this));
399 }
400 #endif  // PLATFORM_GOOGLE
401 
402 // Attributes
403 
size()404 inline size_t tstring::size() const { return TF_TString_GetSize(&tstr_); }
405 
length()406 inline size_t tstring::length() const { return size(); }
407 
capacity()408 inline size_t tstring::capacity() const {
409   return TF_TString_GetCapacity(&tstr_);
410 }
411 
empty()412 inline bool tstring::empty() const { return size() == 0; }
413 
type()414 inline tstring::Type tstring::type() const {
415   return static_cast<tstring::Type>(TF_TString_GetType(&tstr_));
416 }
417 
418 // Allocation
419 
resize(size_t new_size,char c)420 inline void tstring::resize(size_t new_size, char c) {
421   TF_TString_Resize(&tstr_, new_size, c);
422 }
423 
resize_uninitialized(size_t new_size)424 inline void tstring::resize_uninitialized(size_t new_size) {
425   TF_TString_ResizeUninitialized(&tstr_, new_size);
426 }
427 
clear()428 inline void tstring::clear() noexcept {
429   TF_TString_ResizeUninitialized(&tstr_, 0);
430 }
431 
reserve(size_t n)432 inline void tstring::reserve(size_t n) { TF_TString_Reserve(&tstr_, n); }
433 
434 // Iterators
435 
begin()436 inline tstring::const_iterator tstring::begin() const { return &(*this)[0]; }
end()437 inline tstring::const_iterator tstring::end() const { return &(*this)[size()]; }
438 
439 // Element Access
440 
c_str()441 inline const char* tstring::c_str() const { return data(); }
442 
data()443 inline const char* tstring::data() const {
444   return TF_TString_GetDataPointer(&tstr_);
445 }
446 
447 inline const char& tstring::operator[](size_t i) const { return data()[i]; }
448 
back()449 inline const char& tstring::back() const { return (*this)[size() - 1]; }
450 
mdata()451 inline char* tstring::mdata() {
452   return TF_TString_GetMutableDataPointer(&tstr_);
453 }
454 
data()455 inline char* tstring::data() {
456   // Deprecated
457   return mdata();
458 }
459 
460 inline char& tstring::operator[](size_t i) { return mdata()[i]; }
461 
462 // Assignment
463 
assign(const char * str,size_t len)464 inline tstring& tstring::assign(const char* str, size_t len) {
465   TF_TString_Copy(&tstr_, str, len);
466 
467   return *this;
468 }
469 
assign(const char * str)470 inline tstring& tstring::assign(const char* str) {
471   assign(str, ::strlen(str));
472 
473   return *this;
474 }
475 
476 // View Assignment
477 
assign_as_view(const tstring & str)478 inline tstring& tstring::assign_as_view(const tstring& str) {
479   assign_as_view(str.data(), str.size());
480 
481   return *this;
482 }
483 
assign_as_view(const std::string & str)484 inline tstring& tstring::assign_as_view(const std::string& str) {
485   assign_as_view(str.data(), str.size());
486 
487   return *this;
488 }
489 
assign_as_view(const std::string_view str)490 inline tstring& tstring::assign_as_view(const std::string_view str) {
491   assign_as_view(str.data(), str.size());
492 
493   return *this;
494 }
495 
assign_as_view(const char * str,size_t len)496 inline tstring& tstring::assign_as_view(const char* str, size_t len) {
497   TF_TString_AssignView(&tstr_, str, len);
498 
499   return *this;
500 }
501 
assign_as_view(const char * str)502 inline tstring& tstring::assign_as_view(const char* str) {
503   assign_as_view(str, ::strlen(str));
504 
505   return *this;
506 }
507 
508 // Modifiers
509 
append(const tstring & str)510 inline tstring& tstring::append(const tstring& str) {
511   TF_TString_Append(&tstr_, &str.tstr_);
512 
513   return *this;
514 }
515 
append(const char * str,size_t len)516 inline tstring& tstring::append(const char* str, size_t len) {
517   TF_TString_AppendN(&tstr_, str, len);
518 
519   return *this;
520 }
521 
append(const char * str)522 inline tstring& tstring::append(const char* str) {
523   append(str, ::strlen(str));
524 
525   return *this;
526 }
527 
append(size_t n,char c)528 inline tstring& tstring::append(size_t n, char c) {
529   resize(size() + n, c);
530 
531   return *this;
532 }
533 
erase(size_t pos,size_t len)534 inline tstring& tstring::erase(size_t pos, size_t len) {
535   memmove(mdata() + pos, data() + pos + len, size() - len - pos);
536 
537   resize(size() - len);
538 
539   return *this;
540 }
541 
insert(size_t pos,const tstring & str,size_t subpos,size_t sublen)542 inline tstring& tstring::insert(size_t pos, const tstring& str, size_t subpos,
543                                 size_t sublen) {
544   size_t orig_size = size();
545   TF_TString_ResizeUninitialized(&tstr_, orig_size + sublen);
546 
547   memmove(mdata() + pos + sublen, data() + pos, orig_size - pos);
548   memmove(mdata() + pos, str.data() + subpos, sublen);
549 
550   return *this;
551 }
552 
insert(size_t pos,size_t n,char c)553 inline tstring& tstring::insert(size_t pos, size_t n, char c) {
554   size_t size_ = size();
555   TF_TString_ResizeUninitialized(&tstr_, size_ + n);
556 
557   memmove(mdata() + pos + n, data() + pos, size_ - pos);
558   memset(mdata() + pos, c, n);
559 
560   return *this;
561 }
562 
swap(tstring & str)563 inline void tstring::swap(tstring& str) {
564   // TODO(dero): Invalid for OFFSET (unimplemented).
565   std::swap(tstr_, str.tstr_);
566 }
567 
push_back(char ch)568 inline void tstring::push_back(char ch) { append(1, ch); }
569 
570 // Friends
571 
572 inline bool operator==(const char* a, const tstring& b) {
573   return ::strlen(a) == b.size() && ::memcmp(a, b.data(), b.size()) == 0;
574 }
575 
576 inline bool operator==(const std::string& a, const tstring& b) {
577   return a.size() == b.size() && ::memcmp(a.data(), b.data(), b.size()) == 0;
578 }
579 
580 inline tstring operator+(const tstring& a, const tstring& b) {
581   tstring r;
582   r.reserve(a.size() + b.size());
583   r.append(a);
584   r.append(b);
585 
586   return r;
587 }
588 
589 inline std::ostream& operator<<(std::ostream& o, const tstring& str) {
590   return o.write(str.data(), str.size());
591 }
592 
593 }  // namespace tensorflow
594 
595 #endif  // TENSORFLOW_CORE_PLATFORM_TSTRING_H_
596