1 //===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_ADT_STRINGREF_H 11 #define LLVM_ADT_STRINGREF_H 12 13 #include "llvm/Support/Compiler.h" 14 #include <algorithm> 15 #include <cassert> 16 #include <cstring> 17 #include <limits> 18 #include <string> 19 #include <utility> 20 21 namespace llvm { 22 template <typename T> 23 class SmallVectorImpl; 24 class APInt; 25 class hash_code; 26 class StringRef; 27 28 /// Helper functions for StringRef::getAsInteger. 29 bool getAsUnsignedInteger(StringRef Str, unsigned Radix, 30 unsigned long long &Result); 31 32 bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); 33 34 /// StringRef - Represent a constant reference to a string, i.e. a character 35 /// array and a length, which need not be null terminated. 36 /// 37 /// This class does not own the string data, it is expected to be used in 38 /// situations where the character data resides in some other buffer, whose 39 /// lifetime extends past that of the StringRef. For this reason, it is not in 40 /// general safe to store a StringRef. 41 class StringRef { 42 public: 43 typedef const char *iterator; 44 typedef const char *const_iterator; 45 static const size_t npos = ~size_t(0); 46 typedef size_t size_type; 47 48 private: 49 /// The start of the string, in an external buffer. 50 const char *Data; 51 52 /// The length of the string. 53 size_t Length; 54 55 // Workaround memcmp issue with null pointers (undefined behavior) 56 // by providing a specialized version 57 LLVM_ATTRIBUTE_ALWAYS_INLINE compareMemory(const char * Lhs,const char * Rhs,size_t Length)58 static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { 59 if (Length == 0) { return 0; } 60 return ::memcmp(Lhs,Rhs,Length); 61 } 62 63 public: 64 /// @name Constructors 65 /// @{ 66 67 /// Construct an empty string ref. StringRef()68 /*implicit*/ StringRef() : Data(nullptr), Length(0) {} 69 70 /// Construct a string ref from a cstring. StringRef(const char * Str)71 /*implicit*/ StringRef(const char *Str) 72 : Data(Str) { 73 assert(Str && "StringRef cannot be built from a NULL argument"); 74 Length = ::strlen(Str); // invoking strlen(NULL) is undefined behavior 75 } 76 77 /// Construct a string ref from a pointer and length. 78 LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef(const char * data,size_t length)79 /*implicit*/ StringRef(const char *data, size_t length) 80 : Data(data), Length(length) { 81 assert((data || length == 0) && 82 "StringRef cannot be built from a NULL argument with non-null length"); 83 } 84 85 /// Construct a string ref from an std::string. 86 LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef(const std::string & Str)87 /*implicit*/ StringRef(const std::string &Str) 88 : Data(Str.data()), Length(Str.length()) {} 89 90 /// @} 91 /// @name Iterators 92 /// @{ 93 begin()94 iterator begin() const { return Data; } 95 end()96 iterator end() const { return Data + Length; } 97 bytes_begin()98 const unsigned char *bytes_begin() const { 99 return reinterpret_cast<const unsigned char *>(begin()); 100 } bytes_end()101 const unsigned char *bytes_end() const { 102 return reinterpret_cast<const unsigned char *>(end()); 103 } 104 105 /// @} 106 /// @name String Operations 107 /// @{ 108 109 /// data - Get a pointer to the start of the string (which may not be null 110 /// terminated). 111 LLVM_ATTRIBUTE_ALWAYS_INLINE data()112 const char *data() const { return Data; } 113 114 /// empty - Check if the string is empty. 115 LLVM_ATTRIBUTE_ALWAYS_INLINE empty()116 bool empty() const { return Length == 0; } 117 118 /// size - Get the string size. 119 LLVM_ATTRIBUTE_ALWAYS_INLINE size()120 size_t size() const { return Length; } 121 122 /// front - Get the first character in the string. front()123 char front() const { 124 assert(!empty()); 125 return Data[0]; 126 } 127 128 /// back - Get the last character in the string. back()129 char back() const { 130 assert(!empty()); 131 return Data[Length-1]; 132 } 133 134 // copy - Allocate copy in Allocator and return StringRef to it. copy(Allocator & A)135 template <typename Allocator> StringRef copy(Allocator &A) const { 136 char *S = A.template Allocate<char>(Length); 137 std::copy(begin(), end(), S); 138 return StringRef(S, Length); 139 } 140 141 /// equals - Check for string equality, this is more efficient than 142 /// compare() when the relative ordering of inequal strings isn't needed. 143 LLVM_ATTRIBUTE_ALWAYS_INLINE equals(StringRef RHS)144 bool equals(StringRef RHS) const { 145 return (Length == RHS.Length && 146 compareMemory(Data, RHS.Data, RHS.Length) == 0); 147 } 148 149 /// equals_lower - Check for string equality, ignoring case. equals_lower(StringRef RHS)150 bool equals_lower(StringRef RHS) const { 151 return Length == RHS.Length && compare_lower(RHS) == 0; 152 } 153 154 /// compare - Compare two strings; the result is -1, 0, or 1 if this string 155 /// is lexicographically less than, equal to, or greater than the \p RHS. 156 LLVM_ATTRIBUTE_ALWAYS_INLINE compare(StringRef RHS)157 int compare(StringRef RHS) const { 158 // Check the prefix for a mismatch. 159 if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length))) 160 return Res < 0 ? -1 : 1; 161 162 // Otherwise the prefixes match, so we only need to check the lengths. 163 if (Length == RHS.Length) 164 return 0; 165 return Length < RHS.Length ? -1 : 1; 166 } 167 168 /// compare_lower - Compare two strings, ignoring case. 169 int compare_lower(StringRef RHS) const; 170 171 /// compare_numeric - Compare two strings, treating sequences of digits as 172 /// numbers. 173 int compare_numeric(StringRef RHS) const; 174 175 /// \brief Determine the edit distance between this string and another 176 /// string. 177 /// 178 /// \param Other the string to compare this string against. 179 /// 180 /// \param AllowReplacements whether to allow character 181 /// replacements (change one character into another) as a single 182 /// operation, rather than as two operations (an insertion and a 183 /// removal). 184 /// 185 /// \param MaxEditDistance If non-zero, the maximum edit distance that 186 /// this routine is allowed to compute. If the edit distance will exceed 187 /// that maximum, returns \c MaxEditDistance+1. 188 /// 189 /// \returns the minimum number of character insertions, removals, 190 /// or (if \p AllowReplacements is \c true) replacements needed to 191 /// transform one of the given strings into the other. If zero, 192 /// the strings are identical. 193 unsigned edit_distance(StringRef Other, bool AllowReplacements = true, 194 unsigned MaxEditDistance = 0) const; 195 196 /// str - Get the contents as an std::string. str()197 std::string str() const { 198 if (!Data) return std::string(); 199 return std::string(Data, Length); 200 } 201 202 /// @} 203 /// @name Operator Overloads 204 /// @{ 205 206 char operator[](size_t Index) const { 207 assert(Index < Length && "Invalid index!"); 208 return Data[Index]; 209 } 210 211 /// @} 212 /// @name Type Conversions 213 /// @{ 214 string()215 operator std::string() const { 216 return str(); 217 } 218 219 /// @} 220 /// @name String Predicates 221 /// @{ 222 223 /// Check if this string starts with the given \p Prefix. 224 LLVM_ATTRIBUTE_ALWAYS_INLINE startswith(StringRef Prefix)225 bool startswith(StringRef Prefix) const { 226 return Length >= Prefix.Length && 227 compareMemory(Data, Prefix.Data, Prefix.Length) == 0; 228 } 229 230 /// Check if this string starts with the given \p Prefix, ignoring case. 231 bool startswith_lower(StringRef Prefix) const; 232 233 /// Check if this string ends with the given \p Suffix. 234 LLVM_ATTRIBUTE_ALWAYS_INLINE endswith(StringRef Suffix)235 bool endswith(StringRef Suffix) const { 236 return Length >= Suffix.Length && 237 compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; 238 } 239 240 /// Check if this string ends with the given \p Suffix, ignoring case. 241 bool endswith_lower(StringRef Suffix) const; 242 243 /// @} 244 /// @name String Searching 245 /// @{ 246 247 /// Search for the first character \p C in the string. 248 /// 249 /// \returns The index of the first occurrence of \p C, or npos if not 250 /// found. 251 LLVM_ATTRIBUTE_ALWAYS_INLINE 252 size_t find(char C, size_t From = 0) const { 253 size_t FindBegin = std::min(From, Length); 254 if (FindBegin < Length) { // Avoid calling memchr with nullptr. 255 // Just forward to memchr, which is faster than a hand-rolled loop. 256 if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin)) 257 return static_cast<const char *>(P) - Data; 258 } 259 return npos; 260 } 261 262 /// Search for the first string \p Str in the string. 263 /// 264 /// \returns The index of the first occurrence of \p Str, or npos if not 265 /// found. 266 size_t find(StringRef Str, size_t From = 0) const; 267 268 /// Search for the last character \p C in the string. 269 /// 270 /// \returns The index of the last occurrence of \p C, or npos if not 271 /// found. 272 size_t rfind(char C, size_t From = npos) const { 273 From = std::min(From, Length); 274 size_t i = From; 275 while (i != 0) { 276 --i; 277 if (Data[i] == C) 278 return i; 279 } 280 return npos; 281 } 282 283 /// Search for the last string \p Str in the string. 284 /// 285 /// \returns The index of the last occurrence of \p Str, or npos if not 286 /// found. 287 size_t rfind(StringRef Str) const; 288 289 /// Find the first character in the string that is \p C, or npos if not 290 /// found. Same as find. 291 size_t find_first_of(char C, size_t From = 0) const { 292 return find(C, From); 293 } 294 295 /// Find the first character in the string that is in \p Chars, or npos if 296 /// not found. 297 /// 298 /// Complexity: O(size() + Chars.size()) 299 size_t find_first_of(StringRef Chars, size_t From = 0) const; 300 301 /// Find the first character in the string that is not \p C or npos if not 302 /// found. 303 size_t find_first_not_of(char C, size_t From = 0) const; 304 305 /// Find the first character in the string that is not in the string 306 /// \p Chars, or npos if not found. 307 /// 308 /// Complexity: O(size() + Chars.size()) 309 size_t find_first_not_of(StringRef Chars, size_t From = 0) const; 310 311 /// Find the last character in the string that is \p C, or npos if not 312 /// found. 313 size_t find_last_of(char C, size_t From = npos) const { 314 return rfind(C, From); 315 } 316 317 /// Find the last character in the string that is in \p C, or npos if not 318 /// found. 319 /// 320 /// Complexity: O(size() + Chars.size()) 321 size_t find_last_of(StringRef Chars, size_t From = npos) const; 322 323 /// Find the last character in the string that is not \p C, or npos if not 324 /// found. 325 size_t find_last_not_of(char C, size_t From = npos) const; 326 327 /// Find the last character in the string that is not in \p Chars, or 328 /// npos if not found. 329 /// 330 /// Complexity: O(size() + Chars.size()) 331 size_t find_last_not_of(StringRef Chars, size_t From = npos) const; 332 333 /// @} 334 /// @name Helpful Algorithms 335 /// @{ 336 337 /// Return the number of occurrences of \p C in the string. count(char C)338 size_t count(char C) const { 339 size_t Count = 0; 340 for (size_t i = 0, e = Length; i != e; ++i) 341 if (Data[i] == C) 342 ++Count; 343 return Count; 344 } 345 346 /// Return the number of non-overlapped occurrences of \p Str in 347 /// the string. 348 size_t count(StringRef Str) const; 349 350 /// Parse the current string as an integer of the specified radix. If 351 /// \p Radix is specified as zero, this does radix autosensing using 352 /// extended C rules: 0 is octal, 0x is hex, 0b is binary. 353 /// 354 /// If the string is invalid or if only a subset of the string is valid, 355 /// this returns true to signify the error. The string is considered 356 /// erroneous if empty or if it overflows T. 357 template <typename T> 358 typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type getAsInteger(unsigned Radix,T & Result)359 getAsInteger(unsigned Radix, T &Result) const { 360 long long LLVal; 361 if (getAsSignedInteger(*this, Radix, LLVal) || 362 static_cast<T>(LLVal) != LLVal) 363 return true; 364 Result = LLVal; 365 return false; 366 } 367 368 template <typename T> 369 typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type getAsInteger(unsigned Radix,T & Result)370 getAsInteger(unsigned Radix, T &Result) const { 371 unsigned long long ULLVal; 372 // The additional cast to unsigned long long is required to avoid the 373 // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type 374 // 'unsigned __int64' when instantiating getAsInteger with T = bool. 375 if (getAsUnsignedInteger(*this, Radix, ULLVal) || 376 static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) 377 return true; 378 Result = ULLVal; 379 return false; 380 } 381 382 /// Parse the current string as an integer of the specified \p Radix, or of 383 /// an autosensed radix if the \p Radix given is 0. The current value in 384 /// \p Result is discarded, and the storage is changed to be wide enough to 385 /// store the parsed integer. 386 /// 387 /// \returns true if the string does not solely consist of a valid 388 /// non-empty number in the appropriate base. 389 /// 390 /// APInt::fromString is superficially similar but assumes the 391 /// string is well-formed in the given radix. 392 bool getAsInteger(unsigned Radix, APInt &Result) const; 393 394 /// @} 395 /// @name String Operations 396 /// @{ 397 398 // Convert the given ASCII string to lowercase. 399 std::string lower() const; 400 401 /// Convert the given ASCII string to uppercase. 402 std::string upper() const; 403 404 /// @} 405 /// @name Substring Operations 406 /// @{ 407 408 /// Return a reference to the substring from [Start, Start + N). 409 /// 410 /// \param Start The index of the starting character in the substring; if 411 /// the index is npos or greater than the length of the string then the 412 /// empty substring will be returned. 413 /// 414 /// \param N The number of characters to included in the substring. If N 415 /// exceeds the number of characters remaining in the string, the string 416 /// suffix (starting with \p Start) will be returned. 417 LLVM_ATTRIBUTE_ALWAYS_INLINE 418 StringRef substr(size_t Start, size_t N = npos) const { 419 Start = std::min(Start, Length); 420 return StringRef(Data + Start, std::min(N, Length - Start)); 421 } 422 423 /// Return a StringRef equal to 'this' but with the first \p N elements 424 /// dropped. 425 LLVM_ATTRIBUTE_ALWAYS_INLINE 426 StringRef drop_front(size_t N = 1) const { 427 assert(size() >= N && "Dropping more elements than exist"); 428 return substr(N); 429 } 430 431 /// Return a StringRef equal to 'this' but with the last \p N elements 432 /// dropped. 433 LLVM_ATTRIBUTE_ALWAYS_INLINE 434 StringRef drop_back(size_t N = 1) const { 435 assert(size() >= N && "Dropping more elements than exist"); 436 return substr(0, size()-N); 437 } 438 439 /// Return a reference to the substring from [Start, End). 440 /// 441 /// \param Start The index of the starting character in the substring; if 442 /// the index is npos or greater than the length of the string then the 443 /// empty substring will be returned. 444 /// 445 /// \param End The index following the last character to include in the 446 /// substring. If this is npos, or less than \p Start, or exceeds the 447 /// number of characters remaining in the string, the string suffix 448 /// (starting with \p Start) will be returned. 449 LLVM_ATTRIBUTE_ALWAYS_INLINE slice(size_t Start,size_t End)450 StringRef slice(size_t Start, size_t End) const { 451 Start = std::min(Start, Length); 452 End = std::min(std::max(Start, End), Length); 453 return StringRef(Data + Start, End - Start); 454 } 455 456 /// Split into two substrings around the first occurrence of a separator 457 /// character. 458 /// 459 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 460 /// such that (*this == LHS + Separator + RHS) is true and RHS is 461 /// maximal. If \p Separator is not in the string, then the result is a 462 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 463 /// 464 /// \param Separator The character to split on. 465 /// \returns The split substrings. split(char Separator)466 std::pair<StringRef, StringRef> split(char Separator) const { 467 size_t Idx = find(Separator); 468 if (Idx == npos) 469 return std::make_pair(*this, StringRef()); 470 return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 471 } 472 473 /// Split into two substrings around the first occurrence of a separator 474 /// string. 475 /// 476 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 477 /// such that (*this == LHS + Separator + RHS) is true and RHS is 478 /// maximal. If \p Separator is not in the string, then the result is a 479 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 480 /// 481 /// \param Separator - The string to split on. 482 /// \return - The split substrings. split(StringRef Separator)483 std::pair<StringRef, StringRef> split(StringRef Separator) const { 484 size_t Idx = find(Separator); 485 if (Idx == npos) 486 return std::make_pair(*this, StringRef()); 487 return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); 488 } 489 490 /// Split into substrings around the occurrences of a separator string. 491 /// 492 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most 493 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 494 /// elements are added to A. 495 /// If \p KeepEmpty is false, empty strings are not added to \p A. They 496 /// still count when considering \p MaxSplit 497 /// An useful invariant is that 498 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 499 /// 500 /// \param A - Where to put the substrings. 501 /// \param Separator - The string to split on. 502 /// \param MaxSplit - The maximum number of times the string is split. 503 /// \param KeepEmpty - True if empty substring should be added. 504 void split(SmallVectorImpl<StringRef> &A, 505 StringRef Separator, int MaxSplit = -1, 506 bool KeepEmpty = true) const; 507 508 /// Split into substrings around the occurrences of a separator character. 509 /// 510 /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most 511 /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 512 /// elements are added to A. 513 /// If \p KeepEmpty is false, empty strings are not added to \p A. They 514 /// still count when considering \p MaxSplit 515 /// An useful invariant is that 516 /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true 517 /// 518 /// \param A - Where to put the substrings. 519 /// \param Separator - The string to split on. 520 /// \param MaxSplit - The maximum number of times the string is split. 521 /// \param KeepEmpty - True if empty substring should be added. 522 void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1, 523 bool KeepEmpty = true) const; 524 525 /// Split into two substrings around the last occurrence of a separator 526 /// character. 527 /// 528 /// If \p Separator is in the string, then the result is a pair (LHS, RHS) 529 /// such that (*this == LHS + Separator + RHS) is true and RHS is 530 /// minimal. If \p Separator is not in the string, then the result is a 531 /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). 532 /// 533 /// \param Separator - The character to split on. 534 /// \return - The split substrings. rsplit(char Separator)535 std::pair<StringRef, StringRef> rsplit(char Separator) const { 536 size_t Idx = rfind(Separator); 537 if (Idx == npos) 538 return std::make_pair(*this, StringRef()); 539 return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); 540 } 541 542 /// Return string with consecutive characters in \p Chars starting from 543 /// the left removed. 544 StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const { 545 return drop_front(std::min(Length, find_first_not_of(Chars))); 546 } 547 548 /// Return string with consecutive characters in \p Chars starting from 549 /// the right removed. 550 StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const { 551 return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1)); 552 } 553 554 /// Return string with consecutive characters in \p Chars starting from 555 /// the left and right removed. 556 StringRef trim(StringRef Chars = " \t\n\v\f\r") const { 557 return ltrim(Chars).rtrim(Chars); 558 } 559 560 /// @} 561 }; 562 563 /// @name StringRef Comparison Operators 564 /// @{ 565 566 LLVM_ATTRIBUTE_ALWAYS_INLINE 567 inline bool operator==(StringRef LHS, StringRef RHS) { 568 return LHS.equals(RHS); 569 } 570 571 LLVM_ATTRIBUTE_ALWAYS_INLINE 572 inline bool operator!=(StringRef LHS, StringRef RHS) { 573 return !(LHS == RHS); 574 } 575 576 inline bool operator<(StringRef LHS, StringRef RHS) { 577 return LHS.compare(RHS) == -1; 578 } 579 580 inline bool operator<=(StringRef LHS, StringRef RHS) { 581 return LHS.compare(RHS) != 1; 582 } 583 584 inline bool operator>(StringRef LHS, StringRef RHS) { 585 return LHS.compare(RHS) == 1; 586 } 587 588 inline bool operator>=(StringRef LHS, StringRef RHS) { 589 return LHS.compare(RHS) != -1; 590 } 591 592 inline std::string &operator+=(std::string &buffer, StringRef string) { 593 return buffer.append(string.data(), string.size()); 594 } 595 596 /// @} 597 598 /// \brief Compute a hash_code for a StringRef. 599 hash_code hash_value(StringRef S); 600 601 // StringRefs can be treated like a POD type. 602 template <typename T> struct isPodLike; 603 template <> struct isPodLike<StringRef> { static const bool value = true; }; 604 } 605 606 #endif 607