1 /*
2  *  Copyright 2004 The WebRTC Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/base/stringencode.h"
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 
16 #include "webrtc/base/basictypes.h"
17 #include "webrtc/base/checks.h"
18 #include "webrtc/base/stringutils.h"
19 
20 namespace rtc {
21 
22 /////////////////////////////////////////////////////////////////////////////
23 // String Encoding Utilities
24 /////////////////////////////////////////////////////////////////////////////
25 
escape(char * buffer,size_t buflen,const char * source,size_t srclen,const char * illegal,char escape)26 size_t escape(char * buffer, size_t buflen,
27               const char * source, size_t srclen,
28               const char * illegal, char escape) {
29   RTC_DCHECK(buffer);  // TODO(grunell): estimate output size
30   if (buflen <= 0)
31     return 0;
32 
33   size_t srcpos = 0, bufpos = 0;
34   while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
35     char ch = source[srcpos++];
36     if ((ch == escape) || ::strchr(illegal, ch)) {
37       if (bufpos + 2 >= buflen)
38         break;
39       buffer[bufpos++] = escape;
40     }
41     buffer[bufpos++] = ch;
42   }
43 
44   buffer[bufpos] = '\0';
45   return bufpos;
46 }
47 
unescape(char * buffer,size_t buflen,const char * source,size_t srclen,char escape)48 size_t unescape(char * buffer, size_t buflen,
49                 const char * source, size_t srclen,
50                 char escape) {
51   RTC_DCHECK(buffer);  // TODO(grunell): estimate output size
52   if (buflen <= 0)
53     return 0;
54 
55   size_t srcpos = 0, bufpos = 0;
56   while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
57     char ch = source[srcpos++];
58     if ((ch == escape) && (srcpos < srclen)) {
59       ch = source[srcpos++];
60     }
61     buffer[bufpos++] = ch;
62   }
63   buffer[bufpos] = '\0';
64   return bufpos;
65 }
66 
encode(char * buffer,size_t buflen,const char * source,size_t srclen,const char * illegal,char escape)67 size_t encode(char * buffer, size_t buflen,
68               const char * source, size_t srclen,
69               const char * illegal, char escape) {
70   RTC_DCHECK(buffer);  // TODO(grunell): estimate output size
71   if (buflen <= 0)
72     return 0;
73 
74   size_t srcpos = 0, bufpos = 0;
75   while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
76     char ch = source[srcpos++];
77     if ((ch != escape) && !::strchr(illegal, ch)) {
78       buffer[bufpos++] = ch;
79     } else if (bufpos + 3 >= buflen) {
80       break;
81     } else {
82       buffer[bufpos+0] = escape;
83       buffer[bufpos+1] = hex_encode((static_cast<unsigned char>(ch) >> 4) & 0xF);
84       buffer[bufpos+2] = hex_encode((static_cast<unsigned char>(ch)     ) & 0xF);
85       bufpos += 3;
86     }
87   }
88   buffer[bufpos] = '\0';
89   return bufpos;
90 }
91 
decode(char * buffer,size_t buflen,const char * source,size_t srclen,char escape)92 size_t decode(char * buffer, size_t buflen,
93               const char * source, size_t srclen,
94               char escape) {
95   if (buflen <= 0)
96     return 0;
97 
98   unsigned char h1, h2;
99   size_t srcpos = 0, bufpos = 0;
100   while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
101     char ch = source[srcpos++];
102     if ((ch == escape)
103         && (srcpos + 1 < srclen)
104         && hex_decode(source[srcpos], &h1)
105         && hex_decode(source[srcpos+1], &h2)) {
106       buffer[bufpos++] = (h1 << 4) | h2;
107       srcpos += 2;
108     } else {
109       buffer[bufpos++] = ch;
110     }
111   }
112   buffer[bufpos] = '\0';
113   return bufpos;
114 }
115 
unsafe_filename_characters()116 const char* unsafe_filename_characters() {
117   // It might be better to have a single specification which is the union of
118   // all operating systems, unless one system is overly restrictive.
119 #if defined(WEBRTC_WIN)
120   return "\\/:*?\"<>|";
121 #else  // !WEBRTC_WIN
122   // TODO(grunell): Should this never be reached?
123   RTC_DCHECK(false);
124   return "";
125 #endif  // !WEBRTC_WIN
126 }
127 
128 const unsigned char URL_UNSAFE  = 0x1; // 0-33 "#$%&+,/:;<=>?@[\]^`{|} 127
129 const unsigned char XML_UNSAFE  = 0x2; // "&'<>
130 const unsigned char HTML_UNSAFE = 0x2; // "&'<>
131 
132 //  ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 6 5 7 8 9 : ; < = > ?
133 //@ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
134 //` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
135 
136 const unsigned char ASCII_CLASS[128] = {
137   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
138   1,0,3,1,1,1,3,2,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,3,1,3,1,
139   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,
140   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,
141 };
142 
url_encode(char * buffer,size_t buflen,const char * source,size_t srclen)143 size_t url_encode(char * buffer, size_t buflen,
144                   const char * source, size_t srclen) {
145   if (NULL == buffer)
146     return srclen * 3 + 1;
147   if (buflen <= 0)
148     return 0;
149 
150   size_t srcpos = 0, bufpos = 0;
151   while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
152     unsigned char ch = source[srcpos++];
153     if ((ch < 128) && (ASCII_CLASS[ch] & URL_UNSAFE)) {
154       if (bufpos + 3 >= buflen) {
155         break;
156       }
157       buffer[bufpos+0] = '%';
158       buffer[bufpos+1] = hex_encode((ch >> 4) & 0xF);
159       buffer[bufpos+2] = hex_encode((ch     ) & 0xF);
160       bufpos += 3;
161     } else {
162       buffer[bufpos++] = ch;
163     }
164   }
165   buffer[bufpos] = '\0';
166   return bufpos;
167 }
168 
url_decode(char * buffer,size_t buflen,const char * source,size_t srclen)169 size_t url_decode(char * buffer, size_t buflen,
170                   const char * source, size_t srclen) {
171   if (NULL == buffer)
172     return srclen + 1;
173   if (buflen <= 0)
174     return 0;
175 
176   unsigned char h1, h2;
177   size_t srcpos = 0, bufpos = 0;
178   while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
179     unsigned char ch = source[srcpos++];
180     if (ch == '+') {
181       buffer[bufpos++] = ' ';
182     } else if ((ch == '%')
183                && (srcpos + 1 < srclen)
184                && hex_decode(source[srcpos], &h1)
185                && hex_decode(source[srcpos+1], &h2))
186     {
187       buffer[bufpos++] = (h1 << 4) | h2;
188       srcpos += 2;
189     } else {
190       buffer[bufpos++] = ch;
191     }
192   }
193   buffer[bufpos] = '\0';
194   return bufpos;
195 }
196 
utf8_decode(const char * source,size_t srclen,unsigned long * value)197 size_t utf8_decode(const char* source, size_t srclen, unsigned long* value) {
198   const unsigned char* s = reinterpret_cast<const unsigned char*>(source);
199   if ((s[0] & 0x80) == 0x00) {                    // Check s[0] == 0xxxxxxx
200     *value = s[0];
201     return 1;
202   }
203   if ((srclen < 2) || ((s[1] & 0xC0) != 0x80)) {  // Check s[1] != 10xxxxxx
204     return 0;
205   }
206   // Accumulate the trailer byte values in value16, and combine it with the
207   // relevant bits from s[0], once we've determined the sequence length.
208   unsigned long value16 = (s[1] & 0x3F);
209   if ((s[0] & 0xE0) == 0xC0) {                    // Check s[0] == 110xxxxx
210     *value = ((s[0] & 0x1F) << 6) | value16;
211     return 2;
212   }
213   if ((srclen < 3) || ((s[2] & 0xC0) != 0x80)) {  // Check s[2] != 10xxxxxx
214     return 0;
215   }
216   value16 = (value16 << 6) | (s[2] & 0x3F);
217   if ((s[0] & 0xF0) == 0xE0) {                    // Check s[0] == 1110xxxx
218     *value = ((s[0] & 0x0F) << 12) | value16;
219     return 3;
220   }
221   if ((srclen < 4) || ((s[3] & 0xC0) != 0x80)) {  // Check s[3] != 10xxxxxx
222     return 0;
223   }
224   value16 = (value16 << 6) | (s[3] & 0x3F);
225   if ((s[0] & 0xF8) == 0xF0) {                    // Check s[0] == 11110xxx
226     *value = ((s[0] & 0x07) << 18) | value16;
227     return 4;
228   }
229   return 0;
230 }
231 
utf8_encode(char * buffer,size_t buflen,unsigned long value)232 size_t utf8_encode(char* buffer, size_t buflen, unsigned long value) {
233   if ((value <= 0x7F) && (buflen >= 1)) {
234     buffer[0] = static_cast<unsigned char>(value);
235     return 1;
236   }
237   if ((value <= 0x7FF) && (buflen >= 2)) {
238     buffer[0] = 0xC0 | static_cast<unsigned char>(value >> 6);
239     buffer[1] = 0x80 | static_cast<unsigned char>(value & 0x3F);
240     return 2;
241   }
242   if ((value <= 0xFFFF) && (buflen >= 3)) {
243     buffer[0] = 0xE0 | static_cast<unsigned char>(value >> 12);
244     buffer[1] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
245     buffer[2] = 0x80 | static_cast<unsigned char>(value & 0x3F);
246     return 3;
247   }
248   if ((value <= 0x1FFFFF) && (buflen >= 4)) {
249     buffer[0] = 0xF0 | static_cast<unsigned char>(value >> 18);
250     buffer[1] = 0x80 | static_cast<unsigned char>((value >> 12) & 0x3F);
251     buffer[2] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
252     buffer[3] = 0x80 | static_cast<unsigned char>(value & 0x3F);
253     return 4;
254   }
255   return 0;
256 }
257 
html_encode(char * buffer,size_t buflen,const char * source,size_t srclen)258 size_t html_encode(char * buffer, size_t buflen,
259                    const char * source, size_t srclen) {
260   RTC_DCHECK(buffer);  // TODO(grunell): estimate output size
261   if (buflen <= 0)
262     return 0;
263 
264   size_t srcpos = 0, bufpos = 0;
265   while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
266     unsigned char ch = source[srcpos];
267     if (ch < 128) {
268       srcpos += 1;
269       if (ASCII_CLASS[ch] & HTML_UNSAFE) {
270         const char * escseq = 0;
271         size_t esclen = 0;
272         switch (ch) {
273           case '<':  escseq = "&lt;";   esclen = 4; break;
274           case '>':  escseq = "&gt;";   esclen = 4; break;
275           case '\'': escseq = "&#39;";  esclen = 5; break;
276           case '\"': escseq = "&quot;"; esclen = 6; break;
277           case '&':  escseq = "&amp;";  esclen = 5; break;
278           default: RTC_DCHECK(false);
279         }
280         if (bufpos + esclen >= buflen) {
281           break;
282         }
283         memcpy(buffer + bufpos, escseq, esclen);
284         bufpos += esclen;
285       } else {
286         buffer[bufpos++] = ch;
287       }
288     } else {
289       // Largest value is 0x1FFFFF => &#2097151;  (10 characters)
290       const size_t kEscseqSize = 11;
291       char escseq[kEscseqSize];
292       unsigned long val;
293       if (size_t vallen = utf8_decode(&source[srcpos], srclen - srcpos, &val)) {
294         srcpos += vallen;
295       } else {
296         // Not a valid utf8 sequence, just use the raw character.
297         val = static_cast<unsigned char>(source[srcpos++]);
298       }
299       size_t esclen = sprintfn(escseq, kEscseqSize, "&#%lu;", val);
300       if (bufpos + esclen >= buflen) {
301         break;
302       }
303       memcpy(buffer + bufpos, escseq, esclen);
304       bufpos += esclen;
305     }
306   }
307   buffer[bufpos] = '\0';
308   return bufpos;
309 }
310 
html_decode(char * buffer,size_t buflen,const char * source,size_t srclen)311 size_t html_decode(char * buffer, size_t buflen,
312                    const char * source, size_t srclen) {
313   RTC_DCHECK(buffer);  // TODO(grunell): estimate output size
314   return xml_decode(buffer, buflen, source, srclen);
315 }
316 
xml_encode(char * buffer,size_t buflen,const char * source,size_t srclen)317 size_t xml_encode(char * buffer, size_t buflen,
318                   const char * source, size_t srclen) {
319   RTC_DCHECK(buffer);  // TODO(grunell): estimate output size
320   if (buflen <= 0)
321     return 0;
322 
323   size_t srcpos = 0, bufpos = 0;
324   while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
325     unsigned char ch = source[srcpos++];
326     if ((ch < 128) && (ASCII_CLASS[ch] & XML_UNSAFE)) {
327       const char * escseq = 0;
328       size_t esclen = 0;
329       switch (ch) {
330         case '<':  escseq = "&lt;";   esclen = 4; break;
331         case '>':  escseq = "&gt;";   esclen = 4; break;
332         case '\'': escseq = "&apos;"; esclen = 6; break;
333         case '\"': escseq = "&quot;"; esclen = 6; break;
334         case '&':  escseq = "&amp;";  esclen = 5; break;
335         default: RTC_DCHECK(false);
336       }
337       if (bufpos + esclen >= buflen) {
338         break;
339       }
340       memcpy(buffer + bufpos, escseq, esclen);
341       bufpos += esclen;
342     } else {
343       buffer[bufpos++] = ch;
344     }
345   }
346   buffer[bufpos] = '\0';
347   return bufpos;
348 }
349 
xml_decode(char * buffer,size_t buflen,const char * source,size_t srclen)350 size_t xml_decode(char * buffer, size_t buflen,
351                   const char * source, size_t srclen) {
352   RTC_DCHECK(buffer);  // TODO(grunell): estimate output size
353   if (buflen <= 0)
354     return 0;
355 
356   size_t srcpos = 0, bufpos = 0;
357   while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
358     unsigned char ch = source[srcpos++];
359     if (ch != '&') {
360       buffer[bufpos++] = ch;
361     } else if ((srcpos + 2 < srclen)
362                && (memcmp(source + srcpos, "lt;", 3) == 0)) {
363       buffer[bufpos++] = '<';
364       srcpos += 3;
365     } else if ((srcpos + 2 < srclen)
366                && (memcmp(source + srcpos, "gt;", 3) == 0)) {
367       buffer[bufpos++] = '>';
368       srcpos += 3;
369     } else if ((srcpos + 4 < srclen)
370                && (memcmp(source + srcpos, "apos;", 5) == 0)) {
371       buffer[bufpos++] = '\'';
372       srcpos += 5;
373     } else if ((srcpos + 4 < srclen)
374                && (memcmp(source + srcpos, "quot;", 5) == 0)) {
375       buffer[bufpos++] = '\"';
376       srcpos += 5;
377     } else if ((srcpos + 3 < srclen)
378                && (memcmp(source + srcpos, "amp;", 4) == 0)) {
379       buffer[bufpos++] = '&';
380       srcpos += 4;
381     } else if ((srcpos < srclen) && (source[srcpos] == '#')) {
382       int int_base = 10;
383       if ((srcpos + 1 < srclen) && (source[srcpos+1] == 'x')) {
384         int_base = 16;
385         srcpos += 1;
386       }
387       char * ptr;
388       // TODO(grunell): Fix hack (ptr may go past end of data)
389       unsigned long val = strtoul(source + srcpos + 1, &ptr, int_base);
390       if ((static_cast<size_t>(ptr - source) < srclen) && (*ptr == ';')) {
391         srcpos = ptr - source + 1;
392       } else {
393         // Not a valid escape sequence.
394         break;
395       }
396       if (size_t esclen = utf8_encode(buffer + bufpos, buflen - bufpos, val)) {
397         bufpos += esclen;
398       } else {
399         // Not enough room to encode the character, or illegal character
400         break;
401       }
402     } else {
403       // Unrecognized escape sequence.
404       break;
405     }
406   }
407   buffer[bufpos] = '\0';
408   return bufpos;
409 }
410 
411 static const char HEX[] = "0123456789abcdef";
412 
hex_encode(unsigned char val)413 char hex_encode(unsigned char val) {
414   RTC_DCHECK_LT(val, 16);
415   return (val < 16) ? HEX[val] : '!';
416 }
417 
hex_decode(char ch,unsigned char * val)418 bool hex_decode(char ch, unsigned char* val) {
419   if ((ch >= '0') && (ch <= '9')) {
420     *val = ch - '0';
421   } else if ((ch >= 'A') && (ch <= 'Z')) {
422     *val = (ch - 'A') + 10;
423   } else if ((ch >= 'a') && (ch <= 'z')) {
424     *val = (ch - 'a') + 10;
425   } else {
426     return false;
427   }
428   return true;
429 }
430 
hex_encode(char * buffer,size_t buflen,const char * csource,size_t srclen)431 size_t hex_encode(char* buffer, size_t buflen,
432                   const char* csource, size_t srclen) {
433   return hex_encode_with_delimiter(buffer, buflen, csource, srclen, 0);
434 }
435 
hex_encode_with_delimiter(char * buffer,size_t buflen,const char * csource,size_t srclen,char delimiter)436 size_t hex_encode_with_delimiter(char* buffer, size_t buflen,
437                                  const char* csource, size_t srclen,
438                                  char delimiter) {
439   RTC_DCHECK(buffer);  // TODO(grunell): estimate output size
440   if (buflen == 0)
441     return 0;
442 
443   // Init and check bounds.
444   const unsigned char* bsource =
445       reinterpret_cast<const unsigned char*>(csource);
446   size_t srcpos = 0, bufpos = 0;
447   size_t needed = delimiter ? (srclen * 3) : (srclen * 2 + 1);
448   if (buflen < needed)
449     return 0;
450 
451   while (srcpos < srclen) {
452     unsigned char ch = bsource[srcpos++];
453     buffer[bufpos  ] = hex_encode((ch >> 4) & 0xF);
454     buffer[bufpos+1] = hex_encode((ch     ) & 0xF);
455     bufpos += 2;
456 
457     // Don't write a delimiter after the last byte.
458     if (delimiter && (srcpos < srclen)) {
459       buffer[bufpos] = delimiter;
460       ++bufpos;
461     }
462   }
463 
464   // Null terminate.
465   buffer[bufpos] = '\0';
466   return bufpos;
467 }
468 
hex_encode(const std::string & str)469 std::string hex_encode(const std::string& str) {
470   return hex_encode(str.c_str(), str.size());
471 }
472 
hex_encode(const char * source,size_t srclen)473 std::string hex_encode(const char* source, size_t srclen) {
474   return hex_encode_with_delimiter(source, srclen, 0);
475 }
476 
hex_encode_with_delimiter(const char * source,size_t srclen,char delimiter)477 std::string hex_encode_with_delimiter(const char* source, size_t srclen,
478                                       char delimiter) {
479   const size_t kBufferSize = srclen * 3;
480   char* buffer = STACK_ARRAY(char, kBufferSize);
481   size_t length = hex_encode_with_delimiter(buffer, kBufferSize,
482                                             source, srclen, delimiter);
483   RTC_DCHECK(srclen == 0 || length > 0);
484   return std::string(buffer, length);
485 }
486 
hex_decode(char * cbuffer,size_t buflen,const char * source,size_t srclen)487 size_t hex_decode(char * cbuffer, size_t buflen,
488                   const char * source, size_t srclen) {
489   return hex_decode_with_delimiter(cbuffer, buflen, source, srclen, 0);
490 }
491 
hex_decode_with_delimiter(char * cbuffer,size_t buflen,const char * source,size_t srclen,char delimiter)492 size_t hex_decode_with_delimiter(char* cbuffer, size_t buflen,
493                                  const char* source, size_t srclen,
494                                  char delimiter) {
495   RTC_DCHECK(cbuffer);  // TODO(grunell): estimate output size
496   if (buflen == 0)
497     return 0;
498 
499   // Init and bounds check.
500   unsigned char* bbuffer = reinterpret_cast<unsigned char*>(cbuffer);
501   size_t srcpos = 0, bufpos = 0;
502   size_t needed = (delimiter) ? (srclen + 1) / 3 : srclen / 2;
503   if (buflen < needed)
504     return 0;
505 
506   while (srcpos < srclen) {
507     if ((srclen - srcpos) < 2) {
508       // This means we have an odd number of bytes.
509       return 0;
510     }
511 
512     unsigned char h1, h2;
513     if (!hex_decode(source[srcpos], &h1) ||
514         !hex_decode(source[srcpos + 1], &h2))
515       return 0;
516 
517     bbuffer[bufpos++] = (h1 << 4) | h2;
518     srcpos += 2;
519 
520     // Remove the delimiter if needed.
521     if (delimiter && (srclen - srcpos) > 1) {
522       if (source[srcpos] != delimiter)
523         return 0;
524       ++srcpos;
525     }
526   }
527 
528   return bufpos;
529 }
530 
hex_decode(char * buffer,size_t buflen,const std::string & source)531 size_t hex_decode(char* buffer, size_t buflen, const std::string& source) {
532   return hex_decode_with_delimiter(buffer, buflen, source, 0);
533 }
hex_decode_with_delimiter(char * buffer,size_t buflen,const std::string & source,char delimiter)534 size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
535                                  const std::string& source, char delimiter) {
536   return hex_decode_with_delimiter(buffer, buflen,
537                                    source.c_str(), source.length(), delimiter);
538 }
539 
transform(std::string & value,size_t maxlen,const std::string & source,Transform t)540 size_t transform(std::string& value, size_t maxlen, const std::string& source,
541                  Transform t) {
542   char* buffer = STACK_ARRAY(char, maxlen + 1);
543   size_t length = t(buffer, maxlen + 1, source.data(), source.length());
544   value.assign(buffer, length);
545   return length;
546 }
547 
s_transform(const std::string & source,Transform t)548 std::string s_transform(const std::string& source, Transform t) {
549   // Ask transformation function to approximate the destination size (returns upper bound)
550   size_t maxlen = t(NULL, 0, source.data(), source.length());
551   char * buffer = STACK_ARRAY(char, maxlen);
552   size_t len = t(buffer, maxlen, source.data(), source.length());
553   std::string result(buffer, len);
554   return result;
555 }
556 
tokenize(const std::string & source,char delimiter,std::vector<std::string> * fields)557 size_t tokenize(const std::string& source, char delimiter,
558                 std::vector<std::string>* fields) {
559   fields->clear();
560   size_t last = 0;
561   for (size_t i = 0; i < source.length(); ++i) {
562     if (source[i] == delimiter) {
563       if (i != last) {
564         fields->push_back(source.substr(last, i - last));
565       }
566       last = i + 1;
567     }
568   }
569   if (last != source.length()) {
570     fields->push_back(source.substr(last, source.length() - last));
571   }
572   return fields->size();
573 }
574 
tokenize_with_empty_tokens(const std::string & source,char delimiter,std::vector<std::string> * fields)575 size_t tokenize_with_empty_tokens(const std::string& source,
576                                   char delimiter,
577                                   std::vector<std::string>* fields) {
578   fields->clear();
579   size_t last = 0;
580   for (size_t i = 0; i < source.length(); ++i) {
581     if (source[i] == delimiter) {
582       fields->push_back(source.substr(last, i - last));
583       last = i + 1;
584     }
585   }
586   fields->push_back(source.substr(last, source.length() - last));
587   return fields->size();
588 }
589 
tokenize_append(const std::string & source,char delimiter,std::vector<std::string> * fields)590 size_t tokenize_append(const std::string& source, char delimiter,
591                        std::vector<std::string>* fields) {
592   if (!fields) return 0;
593 
594   std::vector<std::string> new_fields;
595   tokenize(source, delimiter, &new_fields);
596   fields->insert(fields->end(), new_fields.begin(), new_fields.end());
597   return fields->size();
598 }
599 
tokenize(const std::string & source,char delimiter,char start_mark,char end_mark,std::vector<std::string> * fields)600 size_t tokenize(const std::string& source, char delimiter, char start_mark,
601                 char end_mark, std::vector<std::string>* fields) {
602   if (!fields) return 0;
603   fields->clear();
604 
605   std::string remain_source = source;
606   while (!remain_source.empty()) {
607     size_t start_pos = remain_source.find(start_mark);
608     if (std::string::npos == start_pos) break;
609     std::string pre_mark;
610     if (start_pos > 0) {
611       pre_mark = remain_source.substr(0, start_pos - 1);
612     }
613 
614     ++start_pos;
615     size_t end_pos = remain_source.find(end_mark, start_pos);
616     if (std::string::npos == end_pos) break;
617 
618     // We have found the matching marks. First tokenize the pre-mask. Then add
619     // the marked part as a single field. Finally, loop back for the post-mark.
620     tokenize_append(pre_mark, delimiter, fields);
621     fields->push_back(remain_source.substr(start_pos, end_pos - start_pos));
622     remain_source = remain_source.substr(end_pos + 1);
623   }
624 
625   return tokenize_append(remain_source, delimiter, fields);
626 }
627 
tokenize_first(const std::string & source,const char delimiter,std::string * token,std::string * rest)628 bool tokenize_first(const std::string& source,
629                     const char delimiter,
630                     std::string* token,
631                     std::string* rest) {
632   // Find the first delimiter
633   size_t left_pos = source.find(delimiter);
634   if (left_pos == std::string::npos) {
635     return false;
636   }
637 
638   // Look for additional occurrances of delimiter.
639   size_t right_pos = left_pos + 1;
640   while (source[right_pos] == delimiter) {
641     right_pos++;
642   }
643 
644   *token = source.substr(0, left_pos);
645   *rest = source.substr(right_pos);
646   return true;
647 }
648 
split(const std::string & source,char delimiter,std::vector<std::string> * fields)649 size_t split(const std::string& source, char delimiter,
650              std::vector<std::string>* fields) {
651   RTC_DCHECK(fields);
652   fields->clear();
653   size_t last = 0;
654   for (size_t i = 0; i < source.length(); ++i) {
655     if (source[i] == delimiter) {
656       fields->push_back(source.substr(last, i - last));
657       last = i + 1;
658     }
659   }
660   fields->push_back(source.substr(last, source.length() - last));
661   return fields->size();
662 }
663 
make_char_safe_for_filename(char c)664 char make_char_safe_for_filename(char c) {
665   if (c < 32)
666     return '_';
667 
668   switch (c) {
669     case '<':
670     case '>':
671     case ':':
672     case '"':
673     case '/':
674     case '\\':
675     case '|':
676     case '*':
677     case '?':
678       return '_';
679 
680     default:
681       return c;
682   }
683 }
684 
685 /*
686 void sprintf(std::string& value, size_t maxlen, const char * format, ...) {
687   char * buffer = STACK_ARRAY(char, maxlen + 1);
688   va_list args;
689   va_start(args, format);
690   value.assign(buffer, vsprintfn(buffer, maxlen + 1, format, args));
691   va_end(args);
692 }
693 */
694 
695 /////////////////////////////////////////////////////////////////////////////
696 
697 }  // namespace rtc
698