1 // Copyright 2015 Google Inc. All rights reserved
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // +build ignore
16
17 #include "strutil.h"
18
19 #include <ctype.h>
20 #include <limits.h>
21 #include <unistd.h>
22
23 #include <algorithm>
24 #include <stack>
25 #include <utility>
26
27 #ifdef __SSE4_2__
28 #include <smmintrin.h>
29 #endif
30
31 #include "log.h"
32
isSpace(char c)33 static bool isSpace(char c) {
34 return (9 <= c && c <= 13) || c == 32;
35 }
36
37 #ifdef __SSE4_2__
SkipUntilSSE42(const char * s,int len,const char * ranges,int ranges_size)38 static int SkipUntilSSE42(const char* s, int len,
39 const char* ranges, int ranges_size) {
40 __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
41 int i = 0;
42 do {
43 __m128i b16 = _mm_loadu_si128((const __m128i*)(s + i));
44 int r = _mm_cmpestri(
45 ranges16, ranges_size, b16, len - i,
46 _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
47 if (r != 16) {
48 return i + r;
49 }
50 i += 16;
51 } while (i < len);
52 return len;
53 }
54 #endif
55
operator ++()56 WordScanner::Iterator& WordScanner::Iterator::operator++() {
57 int len = static_cast<int>(in->size());
58 for (s = i + 1; s < len; s++) {
59 if (!isSpace((*in)[s]))
60 break;
61 }
62 if (s >= len) {
63 in = NULL;
64 s = 0;
65 i = 0;
66 return *this;
67 }
68
69 #ifdef __SSE4_2__
70 static const char ranges[] = "\x09\x0d ";
71 i = s;
72 i += SkipUntilSSE42(in->data() + s, len - s, ranges, 4);
73 #else
74 for (i = s; i < len; i++) {
75 if (isSpace((*in)[i]))
76 break;
77 }
78 #endif
79
80 return *this;
81 }
82
operator *() const83 StringPiece WordScanner::Iterator::operator*() const {
84 return in->substr(s, i - s);
85 }
86
WordScanner(StringPiece in)87 WordScanner::WordScanner(StringPiece in)
88 : in_(in) {
89 }
90
begin() const91 WordScanner::Iterator WordScanner::begin() const {
92 Iterator iter;
93 iter.in = &in_;
94 iter.s = 0;
95 iter.i = -1;
96 ++iter;
97 return iter;
98 }
99
end() const100 WordScanner::Iterator WordScanner::end() const {
101 Iterator iter;
102 iter.in = NULL;
103 iter.s = 0;
104 iter.i = 0;
105 return iter;
106 }
107
Split(vector<StringPiece> * o)108 void WordScanner::Split(vector<StringPiece>* o) {
109 for (StringPiece t : *this)
110 o->push_back(t);
111 }
112
WordWriter(string * o)113 WordWriter::WordWriter(string* o)
114 : out_(o),
115 needs_space_(false) {
116 }
117
MaybeAddWhitespace()118 void WordWriter::MaybeAddWhitespace() {
119 if (needs_space_) {
120 out_->push_back(' ');
121 } else {
122 needs_space_ = true;
123 }
124 }
125
Write(StringPiece s)126 void WordWriter::Write(StringPiece s) {
127 MaybeAddWhitespace();
128 AppendString(s, out_);
129 }
130
ScopedTerminator(StringPiece s)131 ScopedTerminator::ScopedTerminator(StringPiece s)
132 : s_(s), c_(s[s.size()]) {
133 const_cast<char*>(s_.data())[s_.size()] = '\0';
134 }
135
~ScopedTerminator()136 ScopedTerminator::~ScopedTerminator() {
137 const_cast<char*>(s_.data())[s_.size()] = c_;
138 }
139
AppendString(StringPiece str,string * out)140 void AppendString(StringPiece str, string* out) {
141 out->append(str.begin(), str.end());
142 }
143
HasPrefix(StringPiece str,StringPiece prefix)144 bool HasPrefix(StringPiece str, StringPiece prefix) {
145 ssize_t size_diff = str.size() - prefix.size();
146 return size_diff >= 0 && str.substr(0, prefix.size()) == prefix;
147 }
148
HasSuffix(StringPiece str,StringPiece suffix)149 bool HasSuffix(StringPiece str, StringPiece suffix) {
150 ssize_t size_diff = str.size() - suffix.size();
151 return size_diff >= 0 && str.substr(size_diff) == suffix;
152 }
153
HasWord(StringPiece str,StringPiece w)154 bool HasWord(StringPiece str, StringPiece w) {
155 size_t found = str.find(w);
156 if (found == string::npos)
157 return false;
158 if (found != 0 && !isSpace(str[found-1]))
159 return false;
160 size_t end = found + w.size();
161 if (end != str.size() && !isSpace(str[end]))
162 return false;
163 return true;
164 }
165
TrimSuffix(StringPiece str,StringPiece suffix)166 StringPiece TrimSuffix(StringPiece str, StringPiece suffix) {
167 ssize_t size_diff = str.size() - suffix.size();
168 if (size_diff < 0 || str.substr(size_diff) != suffix)
169 return str;
170 return str.substr(0, size_diff);
171 }
172
Pattern(StringPiece pat)173 Pattern::Pattern(StringPiece pat)
174 : pat_(pat), percent_index_(pat.find('%')) {
175 }
176
Match(StringPiece str) const177 bool Pattern::Match(StringPiece str) const {
178 if (percent_index_ == string::npos)
179 return str == pat_;
180 return MatchImpl(str);
181 }
182
MatchImpl(StringPiece str) const183 bool Pattern::MatchImpl(StringPiece str) const {
184 return (HasPrefix(str, pat_.substr(0, percent_index_)) &&
185 HasSuffix(str, pat_.substr(percent_index_ + 1)));
186 }
187
Stem(StringPiece str) const188 StringPiece Pattern::Stem(StringPiece str) const {
189 if (!Match(str))
190 return "";
191 return str.substr(percent_index_,
192 str.size() - (pat_.size() - percent_index_ - 1));
193 }
194
AppendSubst(StringPiece str,StringPiece subst,string * out) const195 void Pattern::AppendSubst(StringPiece str, StringPiece subst,
196 string* out) const {
197 if (percent_index_ == string::npos) {
198 if (str == pat_) {
199 AppendString(subst, out);
200 return;
201 } else {
202 AppendString(str, out);
203 return;
204 }
205 }
206
207 if (MatchImpl(str)) {
208 size_t subst_percent_index = subst.find('%');
209 if (subst_percent_index == string::npos) {
210 AppendString(subst, out);
211 return;
212 } else {
213 AppendString(subst.substr(0, subst_percent_index), out);
214 AppendString(str.substr(percent_index_,
215 str.size() - pat_.size() + 1), out);
216 AppendString(subst.substr(subst_percent_index + 1), out);
217 return;
218 }
219 }
220 AppendString(str, out);
221 }
222
AppendSubstRef(StringPiece str,StringPiece subst,string * out) const223 void Pattern::AppendSubstRef(StringPiece str, StringPiece subst,
224 string* out) const {
225 if (percent_index_ != string::npos && subst.find('%') != string::npos) {
226 AppendSubst(str, subst, out);
227 return;
228 }
229 StringPiece s = TrimSuffix(str, pat_);
230 out->append(s.begin(), s.end());
231 out->append(subst.begin(), subst.end());
232 }
233
NoLineBreak(const string & s)234 string NoLineBreak(const string& s) {
235 size_t index = s.find('\n');
236 if (index == string::npos)
237 return s;
238 string r = s;
239 while (index != string::npos) {
240 r = r.substr(0, index) + "\\n" + r.substr(index + 1);
241 index = r.find('\n', index + 2);
242 }
243 return r;
244 }
245
TrimLeftSpace(StringPiece s)246 StringPiece TrimLeftSpace(StringPiece s) {
247 size_t i = 0;
248 for (; i < s.size(); i++) {
249 if (isSpace(s[i]))
250 continue;
251 char n = s.get(i+1);
252 if (s[i] == '\\' && (n == '\r' || n == '\n')) {
253 i++;
254 continue;
255 }
256 break;
257 }
258 return s.substr(i, s.size() - i);
259 }
260
TrimRightSpace(StringPiece s)261 StringPiece TrimRightSpace(StringPiece s) {
262 size_t i = 0;
263 for (; i < s.size(); i++) {
264 char c = s[s.size() - 1 - i];
265 if (isSpace(c)) {
266 if ((c == '\r' || c == '\n') && s.get(s.size() - 2 - i) == '\\')
267 i++;
268 continue;
269 }
270 break;
271 }
272 return s.substr(0, s.size() - i);
273 }
274
TrimSpace(StringPiece s)275 StringPiece TrimSpace(StringPiece s) {
276 return TrimRightSpace(TrimLeftSpace(s));
277 }
278
Dirname(StringPiece s)279 StringPiece Dirname(StringPiece s) {
280 size_t found = s.rfind('/');
281 if (found == string::npos)
282 return StringPiece(".");
283 if (found == 0)
284 return StringPiece("");
285 return s.substr(0, found);
286 }
287
Basename(StringPiece s)288 StringPiece Basename(StringPiece s) {
289 size_t found = s.rfind('/');
290 if (found == string::npos || found == 0)
291 return s;
292 return s.substr(found + 1);
293 }
294
GetExt(StringPiece s)295 StringPiece GetExt(StringPiece s) {
296 size_t found = s.rfind('.');
297 if (found == string::npos)
298 return StringPiece("");
299 return s.substr(found);
300 }
301
StripExt(StringPiece s)302 StringPiece StripExt(StringPiece s) {
303 size_t slash_index = s.rfind('/');
304 size_t found = s.rfind('.');
305 if (found == string::npos ||
306 (slash_index != string::npos && found < slash_index))
307 return s;
308 return s.substr(0, found);
309 }
310
NormalizePath(string * o)311 void NormalizePath(string* o) {
312 if (o->empty())
313 return;
314 size_t start_index = 0;
315 if ((*o)[0] == '/')
316 start_index++;
317 size_t j = start_index;
318 size_t prev_start = start_index;
319 for (size_t i = start_index; i <= o->size(); i++) {
320 char c = (*o)[i];
321 if (c != '/' && c != 0) {
322 (*o)[j] = c;
323 j++;
324 continue;
325 }
326
327 StringPiece prev_dir = StringPiece(o->data() + prev_start, j - prev_start);
328 if (prev_dir == ".") {
329 j--;
330 } else if (prev_dir == ".." && j != 2 /* .. */) {
331 if (j == 3) {
332 // /..
333 j = start_index;
334 } else {
335 size_t orig_j = j;
336 j -= 4;
337 j = o->rfind('/', j);
338 if (j == string::npos) {
339 j = start_index;
340 } else {
341 j++;
342 }
343 if (StringPiece(o->data() + j, 3) == "../") {
344 j = orig_j;
345 (*o)[j] = c;
346 j++;
347 }
348 }
349 } else if (!prev_dir.empty()) {
350 if (c) {
351 (*o)[j] = c;
352 j++;
353 }
354 }
355 prev_start = j;
356 }
357 if (j > 1 && (*o)[j-1] == '/')
358 j--;
359 o->resize(j);
360 }
361
AbsPath(StringPiece s,string * o)362 void AbsPath(StringPiece s, string* o) {
363 if (s.get(0) == '/') {
364 o->clear();
365 } else {
366 char buf[PATH_MAX];
367 if (!getcwd(buf, PATH_MAX)) {
368 fprintf(stderr, "getcwd failed\n");
369 CHECK(false);
370 }
371
372 CHECK(buf[0] == '/');
373 *o = buf;
374 *o += '/';
375 }
376 AppendString(s, o);
377 NormalizePath(o);
378 }
379
380 template<typename Cond>
FindOutsideParenImpl(StringPiece s,Cond cond)381 size_t FindOutsideParenImpl(StringPiece s, Cond cond) {
382 bool prev_backslash = false;
383 stack<char> paren_stack;
384 for (size_t i = 0; i < s.size(); i++) {
385 char c = s[i];
386 if (cond(c) && paren_stack.empty() && !prev_backslash) {
387 return i;
388 }
389 switch (c) {
390 case '(':
391 paren_stack.push(')');
392 break;
393 case '{':
394 paren_stack.push('}');
395 break;
396
397 case ')':
398 case '}':
399 if (!paren_stack.empty() && c == paren_stack.top()) {
400 paren_stack.pop();
401 }
402 break;
403 }
404 prev_backslash = c == '\\' && !prev_backslash;
405 }
406 return string::npos;
407 }
408
FindOutsideParen(StringPiece s,char c)409 size_t FindOutsideParen(StringPiece s, char c) {
410 return FindOutsideParenImpl(s, [&c](char d){return c == d;});
411 }
412
FindTwoOutsideParen(StringPiece s,char c1,char c2)413 size_t FindTwoOutsideParen(StringPiece s, char c1, char c2) {
414 return FindOutsideParenImpl(s, [&c1, &c2](char d){
415 return d == c1 || d == c2;
416 });
417 }
418
FindThreeOutsideParen(StringPiece s,char c1,char c2,char c3)419 size_t FindThreeOutsideParen(StringPiece s, char c1, char c2, char c3) {
420 return FindOutsideParenImpl(s, [&c1, &c2, &c3](char d){
421 return d == c1 || d == c2 || d == c3;
422 });
423 }
424
FindEndOfLine(StringPiece s,size_t e,size_t * lf_cnt)425 size_t FindEndOfLine(StringPiece s, size_t e, size_t* lf_cnt) {
426 #ifdef __SSE4_2__
427 static const char ranges[] = "\0\0\n\n\\\\";
428 while (e < s.size()) {
429 e += SkipUntilSSE42(s.data() + e, s.size() - e, ranges, 6);
430 if (e >= s.size()) {
431 CHECK(s.size() == e);
432 break;
433 }
434 char c = s[e];
435 if (c == '\0')
436 break;
437 if (c == '\\') {
438 if (s[e+1] == '\n') {
439 e += 2;
440 ++*lf_cnt;
441 } else if (s[e+1] == '\r' && s[e+2] == '\n') {
442 e += 3;
443 ++*lf_cnt;
444 } else if (s[e+1] == '\\') {
445 e += 2;
446 } else {
447 e++;
448 }
449 } else if (c == '\n') {
450 ++*lf_cnt;
451 return e;
452 }
453 }
454 return e;
455 #else
456 bool prev_backslash = false;
457 for (; e < s.size(); e++) {
458 char c = s[e];
459 if (c == '\\') {
460 prev_backslash = !prev_backslash;
461 } else if (c == '\n') {
462 ++*lf_cnt;
463 if (!prev_backslash) {
464 return e;
465 }
466 prev_backslash = false;
467 } else if (c != '\r') {
468 prev_backslash = false;
469 }
470 }
471 return e;
472 #endif
473 }
474
TrimLeadingCurdir(StringPiece s)475 StringPiece TrimLeadingCurdir(StringPiece s) {
476 while (s.substr(0, 2) == "./")
477 s = s.substr(2);
478 return s;
479 }
480
FormatForCommandSubstitution(string * s)481 void FormatForCommandSubstitution(string* s) {
482 while ((*s)[s->size()-1] == '\n')
483 s->pop_back();
484 for (size_t i = 0; i < s->size(); i++) {
485 if ((*s)[i] == '\n')
486 (*s)[i] = ' ';
487 }
488 }
489
SortWordsInString(StringPiece s)490 string SortWordsInString(StringPiece s) {
491 vector<string> toks;
492 for (StringPiece tok : WordScanner(s)) {
493 toks.push_back(tok.as_string());
494 }
495 sort(toks.begin(), toks.end());
496 return JoinStrings(toks, " ");
497 }
498
ConcatDir(StringPiece b,StringPiece n)499 string ConcatDir(StringPiece b, StringPiece n) {
500 string r;
501 if (!b.empty()) {
502 b.AppendToString(&r);
503 r += '/';
504 }
505 n.AppendToString(&r);
506 NormalizePath(&r);
507 return r;
508 }
509
EchoEscape(const string str)510 string EchoEscape(const string str) {
511 const char *in = str.c_str();
512 string buf;
513 for (; *in; in++) {
514 switch(*in) {
515 case '\\':
516 buf += "\\\\\\\\";
517 break;
518 case '\n':
519 buf += "\\n";
520 break;
521 case '"':
522 buf += "\\\"";
523 break;
524 default:
525 buf += *in;
526 }
527 }
528 return buf;
529 }
530
EscapeShell(string * s)531 void EscapeShell(string* s) {
532 #ifdef __SSE4_2__
533 static const char ranges[] = "\0\0\"\"$$\\\\``";
534 size_t prev = 0;
535 size_t i = SkipUntilSSE42(s->c_str(), s->size(), ranges, 10);
536 if (i == s->size())
537 return;
538
539 string r;
540 for (; i < s->size();) {
541 StringPiece(*s).substr(prev, i - prev).AppendToString(&r);
542 char c = (*s)[i];
543 r += '\\';
544 if (c == '$') {
545 if ((*s)[i+1] == '$') {
546 r += '$';
547 i++;
548 }
549 }
550 r += c;
551 i++;
552 prev = i;
553 i += SkipUntilSSE42(s->c_str() + i, s->size() - i, ranges, 10);
554 }
555 StringPiece(*s).substr(prev).AppendToString(&r);
556 s->swap(r);
557 #else
558 if (s->find_first_of("$`\\\"") == string::npos)
559 return;
560 string r;
561 bool last_dollar = false;
562 for (char c : *s) {
563 switch (c) {
564 case '$':
565 if (last_dollar) {
566 r += c;
567 last_dollar = false;
568 } else {
569 r += '\\';
570 r += c;
571 last_dollar = true;
572 }
573 break;
574 case '`':
575 case '"':
576 case '\\':
577 r += '\\';
578 // fall through.
579 default:
580 r += c;
581 last_dollar = false;
582 }
583 }
584 s->swap(r);
585 #endif
586 }
587