1 //===-- String utils --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LIBC_SRC_STRING_STRING_UTILS_H
10 #define LIBC_SRC_STRING_STRING_UTILS_H
11 
12 #include "utils/CPP/Bitset.h"
13 #include <stddef.h> // size_t
14 
15 namespace __llvm_libc {
16 namespace internal {
17 
18 // Returns the length of a string, denoted by the first occurrence
19 // of a null terminator.
string_length(const char * src)20 static inline size_t string_length(const char *src) {
21   size_t length;
22   for (length = 0; *src; ++src, ++length)
23     ;
24   return length;
25 }
26 
27 // Returns the first occurrence of 'ch' within the first 'n' characters of
28 // 'src'. If 'ch' is not found, returns nullptr.
find_first_character(const unsigned char * src,unsigned char ch,size_t n)29 static inline void *find_first_character(const unsigned char *src,
30                                          unsigned char ch, size_t n) {
31   for (; n && *src != ch; --n, ++src)
32     ;
33   return n ? const_cast<unsigned char *>(src) : nullptr;
34 }
35 
36 // Returns the maximum length span that contains only characters not found in
37 // 'segment'. If no characters are found, returns the length of 'src'.
complementary_span(const char * src,const char * segment)38 static inline size_t complementary_span(const char *src, const char *segment) {
39   const char *initial = src;
40   cpp::Bitset<256> bitset;
41 
42   for (; *segment; ++segment)
43     bitset.set(*segment);
44   for (; *src && !bitset.test(*src); ++src)
45     ;
46   return src - initial;
47 }
48 
49 // Given the similarities between strtok and strtok_r, we can implement both
50 // using a utility function. On the first call, 'src' is scanned for the
51 // first character not found in 'delimiter_string'. Once found, it scans until
52 // the first character in the 'delimiter_string' or the null terminator is
53 // found. We define this span as a token. The end of the token is appended with
54 // a null terminator, and the token is returned. The point where the last token
55 // is found is then stored within 'context' for subsequent calls. Subsequent
56 // calls will use 'context' when a nullptr is passed in for 'src'. Once the null
57 // terminating character is reached, returns a nullptr.
string_token(char * __restrict src,const char * __restrict delimiter_string,char ** __restrict saveptr)58 static inline char *string_token(char *__restrict src,
59                                  const char *__restrict delimiter_string,
60                                  char **__restrict saveptr) {
61   cpp::Bitset<256> delimiter_set;
62   for (; *delimiter_string; ++delimiter_string)
63     delimiter_set.set(*delimiter_string);
64 
65   src = src ? src : *saveptr;
66   for (; *src && delimiter_set.test(*src); ++src)
67     ;
68   if (!*src) {
69     *saveptr = src;
70     return nullptr;
71   }
72   char *token = src;
73   for (; *src && !delimiter_set.test(*src); ++src)
74     ;
75   if (*src) {
76     *src = '\0';
77     ++src;
78   }
79   *saveptr = src;
80   return token;
81 }
82 
83 } // namespace internal
84 } // namespace __llvm_libc
85 
86 #endif //  LIBC_SRC_STRING_STRING_UTILS_H
87