1 // Copyright 2015 Google Inc. All rights reserved
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // +build ignore
16 
17 #include "strutil.h"
18 
19 #include <ctype.h>
20 #include <limits.h>
21 #include <unistd.h>
22 
23 #include <algorithm>
24 #include <stack>
25 #include <utility>
26 
27 #ifdef __SSE4_2__
28 #include <smmintrin.h>
29 #endif
30 
31 #include "log.h"
32 
isSpace(char c)33 static bool isSpace(char c) {
34   return (9 <= c && c <= 13) || c == 32;
35 }
36 
37 #ifdef __SSE4_2__
SkipUntilSSE42(const char * s,int len,const char * ranges,int ranges_size)38 static int SkipUntilSSE42(const char* s, int len,
39                           const char* ranges, int ranges_size) {
40   __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
41   int i = 0;
42   do {
43     __m128i b16 = _mm_loadu_si128((const __m128i*)(s + i));
44     int r = _mm_cmpestri(
45         ranges16, ranges_size, b16, len - i,
46         _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
47     if (r != 16) {
48       return i + r;
49     }
50     i += 16;
51   } while (i < len);
52   return len;
53 }
54 #endif
55 
operator ++()56 WordScanner::Iterator& WordScanner::Iterator::operator++() {
57   int len = static_cast<int>(in->size());
58   for (s = i + 1; s < len; s++) {
59     if (!isSpace((*in)[s]))
60       break;
61   }
62   if (s >= len) {
63     in = NULL;
64     s = 0;
65     i = 0;
66     return *this;
67   }
68 
69 #ifdef __SSE4_2__
70   static const char ranges[] = "\x09\x0d  ";
71   i = s;
72   i += SkipUntilSSE42(in->data() + s, len - s, ranges, 4);
73 #else
74   for (i = s; i < len; i++) {
75     if (isSpace((*in)[i]))
76       break;
77   }
78 #endif
79 
80   return *this;
81 }
82 
operator *() const83 StringPiece WordScanner::Iterator::operator*() const {
84   return in->substr(s, i - s);
85 }
86 
WordScanner(StringPiece in)87 WordScanner::WordScanner(StringPiece in)
88     : in_(in) {
89 }
90 
begin() const91 WordScanner::Iterator WordScanner::begin() const {
92   Iterator iter;
93   iter.in = &in_;
94   iter.s = 0;
95   iter.i = -1;
96   ++iter;
97   return iter;
98 }
99 
end() const100 WordScanner::Iterator WordScanner::end() const {
101   Iterator iter;
102   iter.in = NULL;
103   iter.s = 0;
104   iter.i = 0;
105   return iter;
106 }
107 
Split(vector<StringPiece> * o)108 void WordScanner::Split(vector<StringPiece>* o) {
109   for (StringPiece t : *this)
110     o->push_back(t);
111 }
112 
WordWriter(string * o)113 WordWriter::WordWriter(string* o)
114     : out_(o),
115       needs_space_(false) {
116 }
117 
MaybeAddWhitespace()118 void WordWriter::MaybeAddWhitespace() {
119   if (needs_space_) {
120     out_->push_back(' ');
121   } else {
122     needs_space_ = true;
123   }
124 }
125 
Write(StringPiece s)126 void WordWriter::Write(StringPiece s) {
127   MaybeAddWhitespace();
128   AppendString(s, out_);
129 }
130 
ScopedTerminator(StringPiece s)131 ScopedTerminator::ScopedTerminator(StringPiece s)
132     : s_(s), c_(s[s.size()]) {
133   const_cast<char*>(s_.data())[s_.size()] = '\0';
134 }
135 
~ScopedTerminator()136 ScopedTerminator::~ScopedTerminator() {
137   const_cast<char*>(s_.data())[s_.size()] = c_;
138 }
139 
AppendString(StringPiece str,string * out)140 void AppendString(StringPiece str, string* out) {
141   out->append(str.begin(), str.end());
142 }
143 
HasPrefix(StringPiece str,StringPiece prefix)144 bool HasPrefix(StringPiece str, StringPiece prefix) {
145   ssize_t size_diff = str.size() - prefix.size();
146   return size_diff >= 0 && str.substr(0, prefix.size()) == prefix;
147 }
148 
HasSuffix(StringPiece str,StringPiece suffix)149 bool HasSuffix(StringPiece str, StringPiece suffix) {
150   ssize_t size_diff = str.size() - suffix.size();
151   return size_diff >= 0 && str.substr(size_diff) == suffix;
152 }
153 
HasWord(StringPiece str,StringPiece w)154 bool HasWord(StringPiece str, StringPiece w) {
155   size_t found = str.find(w);
156   if (found == string::npos)
157     return false;
158   if (found != 0 && !isSpace(str[found-1]))
159     return false;
160   size_t end = found + w.size();
161   if (end != str.size() && !isSpace(str[end]))
162     return false;
163   return true;
164 }
165 
TrimSuffix(StringPiece str,StringPiece suffix)166 StringPiece TrimSuffix(StringPiece str, StringPiece suffix) {
167   ssize_t size_diff = str.size() - suffix.size();
168   if (size_diff < 0 || str.substr(size_diff) != suffix)
169     return str;
170   return str.substr(0, size_diff);
171 }
172 
Pattern(StringPiece pat)173 Pattern::Pattern(StringPiece pat)
174     : pat_(pat), percent_index_(pat.find('%')) {
175 }
176 
Match(StringPiece str) const177 bool Pattern::Match(StringPiece str) const {
178   if (percent_index_ == string::npos)
179     return str == pat_;
180   return MatchImpl(str);
181 }
182 
MatchImpl(StringPiece str) const183 bool Pattern::MatchImpl(StringPiece str) const {
184   return (HasPrefix(str, pat_.substr(0, percent_index_)) &&
185           HasSuffix(str, pat_.substr(percent_index_ + 1)));
186 }
187 
Stem(StringPiece str) const188 StringPiece Pattern::Stem(StringPiece str) const {
189   if (!Match(str))
190     return "";
191   return str.substr(percent_index_,
192                     str.size() - (pat_.size() - percent_index_ - 1));
193 }
194 
AppendSubst(StringPiece str,StringPiece subst,string * out) const195 void Pattern::AppendSubst(StringPiece str, StringPiece subst,
196                           string* out) const {
197   if (percent_index_ == string::npos) {
198     if (str == pat_) {
199       AppendString(subst, out);
200       return;
201     } else {
202       AppendString(str, out);
203       return;
204     }
205   }
206 
207   if (MatchImpl(str)) {
208     size_t subst_percent_index = subst.find('%');
209     if (subst_percent_index == string::npos) {
210       AppendString(subst, out);
211       return;
212     } else {
213       AppendString(subst.substr(0, subst_percent_index), out);
214       AppendString(str.substr(percent_index_,
215                               str.size() - pat_.size() + 1), out);
216       AppendString(subst.substr(subst_percent_index + 1), out);
217       return;
218     }
219   }
220   AppendString(str, out);
221 }
222 
AppendSubstRef(StringPiece str,StringPiece subst,string * out) const223 void Pattern::AppendSubstRef(StringPiece str, StringPiece subst,
224                              string* out) const {
225   if (percent_index_ != string::npos && subst.find('%') != string::npos) {
226     AppendSubst(str, subst, out);
227     return;
228   }
229   StringPiece s = TrimSuffix(str, pat_);
230   out->append(s.begin(), s.end());
231   out->append(subst.begin(), subst.end());
232 }
233 
NoLineBreak(const string & s)234 string NoLineBreak(const string& s) {
235   size_t index = s.find('\n');
236   if (index == string::npos)
237     return s;
238   string r = s;
239   while (index != string::npos) {
240     r = r.substr(0, index) + "\\n" + r.substr(index + 1);
241     index = r.find('\n', index + 2);
242   }
243   return r;
244 }
245 
TrimLeftSpace(StringPiece s)246 StringPiece TrimLeftSpace(StringPiece s) {
247   size_t i = 0;
248   for (; i < s.size(); i++) {
249     if (isSpace(s[i]))
250       continue;
251     char n = s.get(i+1);
252     if (s[i] == '\\' && (n == '\r' || n == '\n')) {
253       i++;
254       continue;
255     }
256     break;
257   }
258   return s.substr(i, s.size() - i);
259 }
260 
TrimRightSpace(StringPiece s)261 StringPiece TrimRightSpace(StringPiece s) {
262   size_t i = 0;
263   for (; i < s.size(); i++) {
264     char c = s[s.size() - 1 - i];
265     if (isSpace(c)) {
266       if ((c == '\r' || c == '\n') && s.get(s.size() - 2 - i) == '\\')
267         i++;
268       continue;
269     }
270     break;
271   }
272   return s.substr(0, s.size() - i);
273 }
274 
TrimSpace(StringPiece s)275 StringPiece TrimSpace(StringPiece s) {
276   return TrimRightSpace(TrimLeftSpace(s));
277 }
278 
Dirname(StringPiece s)279 StringPiece Dirname(StringPiece s) {
280   size_t found = s.rfind('/');
281   if (found == string::npos)
282     return StringPiece(".");
283   if (found == 0)
284     return StringPiece("");
285   return s.substr(0, found);
286 }
287 
Basename(StringPiece s)288 StringPiece Basename(StringPiece s) {
289   size_t found = s.rfind('/');
290   if (found == string::npos || found == 0)
291     return s;
292   return s.substr(found + 1);
293 }
294 
GetExt(StringPiece s)295 StringPiece GetExt(StringPiece s) {
296   size_t found = s.rfind('.');
297   if (found == string::npos)
298     return StringPiece("");
299   return s.substr(found);
300 }
301 
StripExt(StringPiece s)302 StringPiece StripExt(StringPiece s) {
303   size_t slash_index = s.rfind('/');
304   size_t found = s.rfind('.');
305   if (found == string::npos ||
306       (slash_index != string::npos && found < slash_index))
307     return s;
308   return s.substr(0, found);
309 }
310 
NormalizePath(string * o)311 void NormalizePath(string* o) {
312   if (o->empty())
313     return;
314   size_t start_index = 0;
315   if ((*o)[0] == '/')
316     start_index++;
317   size_t j = start_index;
318   size_t prev_start = start_index;
319   for (size_t i = start_index; i <= o->size(); i++) {
320     char c = (*o)[i];
321     if (c != '/' && c != 0) {
322       (*o)[j] = c;
323       j++;
324       continue;
325     }
326 
327     StringPiece prev_dir = StringPiece(o->data() + prev_start, j - prev_start);
328     if (prev_dir == ".") {
329       j--;
330     } else if (prev_dir == ".." && j != 2 /* .. */) {
331       if (j == 3) {
332         // /..
333         j = start_index;
334       } else {
335         size_t orig_j = j;
336         j -= 4;
337         j = o->rfind('/', j);
338         if (j == string::npos) {
339           j = start_index;
340         } else {
341           j++;
342         }
343         if (StringPiece(o->data() + j, 3) == "../") {
344           j = orig_j;
345           (*o)[j] = c;
346           j++;
347         }
348       }
349     } else if (!prev_dir.empty()) {
350       if (c) {
351         (*o)[j] = c;
352         j++;
353       }
354     }
355     prev_start = j;
356   }
357   if (j > 1 && (*o)[j-1] == '/')
358     j--;
359   o->resize(j);
360 }
361 
AbsPath(StringPiece s,string * o)362 void AbsPath(StringPiece s, string* o) {
363   if (s.get(0) == '/') {
364     o->clear();
365   } else {
366     char buf[PATH_MAX];
367     if (!getcwd(buf, PATH_MAX)) {
368       fprintf(stderr, "getcwd failed\n");
369       CHECK(false);
370     }
371 
372     CHECK(buf[0] == '/');
373     *o = buf;
374     *o += '/';
375   }
376   AppendString(s, o);
377   NormalizePath(o);
378 }
379 
380 template<typename Cond>
FindOutsideParenImpl(StringPiece s,Cond cond)381 size_t FindOutsideParenImpl(StringPiece s, Cond cond) {
382   bool prev_backslash = false;
383   stack<char> paren_stack;
384   for (size_t i = 0; i < s.size(); i++) {
385     char c = s[i];
386     if (cond(c) && paren_stack.empty() && !prev_backslash) {
387       return i;
388     }
389     switch (c) {
390       case '(':
391         paren_stack.push(')');
392         break;
393       case '{':
394         paren_stack.push('}');
395         break;
396 
397       case ')':
398       case '}':
399         if (!paren_stack.empty() && c == paren_stack.top()) {
400           paren_stack.pop();
401         }
402         break;
403     }
404     prev_backslash = c == '\\' && !prev_backslash;
405   }
406   return string::npos;
407 }
408 
FindOutsideParen(StringPiece s,char c)409 size_t FindOutsideParen(StringPiece s, char c) {
410   return FindOutsideParenImpl(s, [&c](char d){return c == d;});
411 }
412 
FindTwoOutsideParen(StringPiece s,char c1,char c2)413 size_t FindTwoOutsideParen(StringPiece s, char c1, char c2) {
414   return FindOutsideParenImpl(s, [&c1, &c2](char d){
415       return d == c1 || d == c2;
416     });
417 }
418 
FindThreeOutsideParen(StringPiece s,char c1,char c2,char c3)419 size_t FindThreeOutsideParen(StringPiece s, char c1, char c2, char c3) {
420   return FindOutsideParenImpl(s, [&c1, &c2, &c3](char d){
421       return d == c1 || d == c2 || d == c3;
422     });
423 }
424 
FindEndOfLine(StringPiece s,size_t e,size_t * lf_cnt)425 size_t FindEndOfLine(StringPiece s, size_t e, size_t* lf_cnt) {
426 #ifdef __SSE4_2__
427   static const char ranges[] = "\0\0\n\n\\\\";
428   while (e < s.size()) {
429     e += SkipUntilSSE42(s.data() + e, s.size() - e, ranges, 6);
430     if (e >= s.size()) {
431       CHECK(s.size() == e);
432       break;
433     }
434     char c = s[e];
435     if (c == '\0')
436       break;
437     if (c == '\\') {
438       if (s[e+1] == '\n') {
439         e += 2;
440         ++*lf_cnt;
441       } else if (s[e+1] == '\r' && s[e+2] == '\n') {
442         e += 3;
443         ++*lf_cnt;
444       } else if (s[e+1] == '\\') {
445         e += 2;
446       } else {
447         e++;
448       }
449     } else if (c == '\n') {
450       ++*lf_cnt;
451       return e;
452     }
453   }
454   return e;
455 #else
456   bool prev_backslash = false;
457   for (; e < s.size(); e++) {
458     char c = s[e];
459     if (c == '\\') {
460       prev_backslash = !prev_backslash;
461     } else if (c == '\n') {
462       ++*lf_cnt;
463       if (!prev_backslash) {
464         return e;
465       }
466       prev_backslash = false;
467     } else if (c != '\r') {
468       prev_backslash = false;
469     }
470   }
471   return e;
472 #endif
473 }
474 
TrimLeadingCurdir(StringPiece s)475 StringPiece TrimLeadingCurdir(StringPiece s) {
476   while (s.substr(0, 2) == "./")
477     s = s.substr(2);
478   return s;
479 }
480 
FormatForCommandSubstitution(string * s)481 void FormatForCommandSubstitution(string* s) {
482   while ((*s)[s->size()-1] == '\n')
483     s->pop_back();
484   for (size_t i = 0; i < s->size(); i++) {
485     if ((*s)[i] == '\n')
486       (*s)[i] = ' ';
487   }
488 }
489 
SortWordsInString(StringPiece s)490 string SortWordsInString(StringPiece s) {
491   vector<string> toks;
492   for (StringPiece tok : WordScanner(s)) {
493     toks.push_back(tok.as_string());
494   }
495   sort(toks.begin(), toks.end());
496   return JoinStrings(toks, " ");
497 }
498 
ConcatDir(StringPiece b,StringPiece n)499 string ConcatDir(StringPiece b, StringPiece n) {
500   string r;
501   if (!b.empty()) {
502     b.AppendToString(&r);
503     r += '/';
504   }
505   n.AppendToString(&r);
506   NormalizePath(&r);
507   return r;
508 }
509 
EchoEscape(const string str)510 string EchoEscape(const string str) {
511   const char *in = str.c_str();
512   string buf;
513   for (; *in; in++) {
514     switch(*in) {
515       case '\\':
516         buf += "\\\\\\\\";
517         break;
518       case '\n':
519         buf += "\\n";
520         break;
521       case '"':
522         buf += "\\\"";
523         break;
524       default:
525         buf += *in;
526     }
527   }
528   return buf;
529 }
530 
EscapeShell(string * s)531 void EscapeShell(string* s) {
532 #ifdef __SSE4_2__
533   static const char ranges[] = "\0\0\"\"$$\\\\``";
534   size_t prev = 0;
535   size_t i = SkipUntilSSE42(s->c_str(), s->size(), ranges, 10);
536   if (i == s->size())
537     return;
538 
539   string r;
540   for (; i < s->size();) {
541     StringPiece(*s).substr(prev, i - prev).AppendToString(&r);
542     char c = (*s)[i];
543     r += '\\';
544     if (c == '$') {
545       if ((*s)[i+1] == '$') {
546         r += '$';
547         i++;
548       }
549     }
550     r += c;
551     i++;
552     prev = i;
553     i += SkipUntilSSE42(s->c_str() + i, s->size() - i, ranges, 10);
554   }
555   StringPiece(*s).substr(prev).AppendToString(&r);
556   s->swap(r);
557 #else
558   if (s->find_first_of("$`\\\"") == string::npos)
559     return;
560   string r;
561   bool last_dollar = false;
562   for (char c : *s) {
563     switch (c) {
564       case '$':
565         if (last_dollar) {
566           r += c;
567           last_dollar = false;
568         } else {
569           r += '\\';
570           r += c;
571           last_dollar = true;
572         }
573         break;
574       case '`':
575       case '"':
576       case '\\':
577         r += '\\';
578         // fall through.
579       default:
580         r += c;
581         last_dollar = false;
582     }
583   }
584   s->swap(r);
585 #endif
586 }
587