1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_URI_H_
6 #define V8_URI_H_
7 
8 #include "src/v8.h"
9 
10 #include "src/conversions.h"
11 #include "src/string-search.h"
12 #include "src/utils.h"
13 
14 namespace v8 {
15 namespace internal {
16 
17 
18 template <typename Char>
19 static INLINE(Vector<const Char> GetCharVector(Handle<String> string));
20 
21 
22 template <>
GetCharVector(Handle<String> string)23 Vector<const uint8_t> GetCharVector(Handle<String> string) {
24   String::FlatContent flat = string->GetFlatContent();
25   DCHECK(flat.IsOneByte());
26   return flat.ToOneByteVector();
27 }
28 
29 
30 template <>
GetCharVector(Handle<String> string)31 Vector<const uc16> GetCharVector(Handle<String> string) {
32   String::FlatContent flat = string->GetFlatContent();
33   DCHECK(flat.IsTwoByte());
34   return flat.ToUC16Vector();
35 }
36 
37 
38 class URIUnescape : public AllStatic {
39  public:
40   template<typename Char>
41   MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate,
42                                                       Handle<String> source);
43 
44  private:
45   static const signed char kHexValue['g'];
46 
47   template<typename Char>
48   MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(
49       Isolate* isolate, Handle<String> string, int start_index);
50 
51   static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
52 
53   template <typename Char>
54   static INLINE(int UnescapeChar(Vector<const Char> vector,
55                                  int i,
56                                  int length,
57                                  int* step));
58 };
59 
60 
61 const signed char URIUnescape::kHexValue[] = {
62     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
63     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
64     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
65     -0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
66     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
67     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
68     -1, 10, 11, 12, 13, 14, 15 };
69 
70 
71 template<typename Char>
Unescape(Isolate * isolate,Handle<String> source)72 MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate,
73                                           Handle<String> source) {
74   int index;
75   { DisallowHeapAllocation no_allocation;
76     StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
77     index = search.Search(GetCharVector<Char>(source), 0);
78     if (index < 0) return source;
79   }
80   return UnescapeSlow<Char>(isolate, source, index);
81 }
82 
83 
84 template <typename Char>
UnescapeSlow(Isolate * isolate,Handle<String> string,int start_index)85 MaybeHandle<String> URIUnescape::UnescapeSlow(
86     Isolate* isolate, Handle<String> string, int start_index) {
87   bool one_byte = true;
88   int length = string->length();
89 
90   int unescaped_length = 0;
91   { DisallowHeapAllocation no_allocation;
92     Vector<const Char> vector = GetCharVector<Char>(string);
93     for (int i = start_index; i < length; unescaped_length++) {
94       int step;
95       if (UnescapeChar(vector, i, length, &step) >
96               String::kMaxOneByteCharCode) {
97         one_byte = false;
98       }
99       i += step;
100     }
101   }
102 
103   DCHECK(start_index < length);
104   Handle<String> first_part =
105       isolate->factory()->NewProperSubString(string, 0, start_index);
106 
107   int dest_position = 0;
108   Handle<String> second_part;
109   DCHECK(unescaped_length <= String::kMaxLength);
110   if (one_byte) {
111     Handle<SeqOneByteString> dest = isolate->factory()->NewRawOneByteString(
112         unescaped_length).ToHandleChecked();
113     DisallowHeapAllocation no_allocation;
114     Vector<const Char> vector = GetCharVector<Char>(string);
115     for (int i = start_index; i < length; dest_position++) {
116       int step;
117       dest->SeqOneByteStringSet(dest_position,
118                                 UnescapeChar(vector, i, length, &step));
119       i += step;
120     }
121     second_part = dest;
122   } else {
123     Handle<SeqTwoByteString> dest = isolate->factory()->NewRawTwoByteString(
124         unescaped_length).ToHandleChecked();
125     DisallowHeapAllocation no_allocation;
126     Vector<const Char> vector = GetCharVector<Char>(string);
127     for (int i = start_index; i < length; dest_position++) {
128       int step;
129       dest->SeqTwoByteStringSet(dest_position,
130                                 UnescapeChar(vector, i, length, &step));
131       i += step;
132     }
133     second_part = dest;
134   }
135   return isolate->factory()->NewConsString(first_part, second_part);
136 }
137 
138 
TwoDigitHex(uint16_t character1,uint16_t character2)139 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
140   if (character1 > 'f') return -1;
141   int hi = kHexValue[character1];
142   if (hi == -1) return -1;
143   if (character2 > 'f') return -1;
144   int lo = kHexValue[character2];
145   if (lo == -1) return -1;
146   return (hi << 4) + lo;
147 }
148 
149 
150 template <typename Char>
UnescapeChar(Vector<const Char> vector,int i,int length,int * step)151 int URIUnescape::UnescapeChar(Vector<const Char> vector,
152                               int i,
153                               int length,
154                               int* step) {
155   uint16_t character = vector[i];
156   int32_t hi = 0;
157   int32_t lo = 0;
158   if (character == '%' &&
159       i <= length - 6 &&
160       vector[i + 1] == 'u' &&
161       (hi = TwoDigitHex(vector[i + 2],
162                         vector[i + 3])) != -1 &&
163       (lo = TwoDigitHex(vector[i + 4],
164                         vector[i + 5])) != -1) {
165     *step = 6;
166     return (hi << 8) + lo;
167   } else if (character == '%' &&
168       i <= length - 3 &&
169       (lo = TwoDigitHex(vector[i + 1],
170                         vector[i + 2])) != -1) {
171     *step = 3;
172     return lo;
173   } else {
174     *step = 1;
175     return character;
176   }
177 }
178 
179 
180 class URIEscape : public AllStatic {
181  public:
182   template<typename Char>
183   MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate,
184                                                     Handle<String> string);
185 
186  private:
187   static const char kHexChars[17];
188   static const char kNotEscaped[256];
189 
IsNotEscaped(uint16_t c)190   static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
191 };
192 
193 
194 const char URIEscape::kHexChars[] = "0123456789ABCDEF";
195 
196 
197 // kNotEscaped is generated by the following:
198 //
199 // #!/bin/perl
200 // for (my $i = 0; $i < 256; $i++) {
201 //   print "\n" if $i % 16 == 0;
202 //   my $c = chr($i);
203 //   my $escaped = 1;
204 //   $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
205 //   print $escaped ? "0, " : "1, ";
206 // }
207 
208 const char URIEscape::kNotEscaped[] = {
209     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
212     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
213     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
214     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
215     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
216     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
217     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
225 
226 
227 template<typename Char>
Escape(Isolate * isolate,Handle<String> string)228 MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
229   DCHECK(string->IsFlat());
230   int escaped_length = 0;
231   int length = string->length();
232 
233   { DisallowHeapAllocation no_allocation;
234     Vector<const Char> vector = GetCharVector<Char>(string);
235     for (int i = 0; i < length; i++) {
236       uint16_t c = vector[i];
237       if (c >= 256) {
238         escaped_length += 6;
239       } else if (IsNotEscaped(c)) {
240         escaped_length++;
241       } else {
242         escaped_length += 3;
243       }
244 
245       // We don't allow strings that are longer than a maximal length.
246       DCHECK(String::kMaxLength < 0x7fffffff - 6);  // Cannot overflow.
247       if (escaped_length > String::kMaxLength) break;  // Provoke exception.
248     }
249   }
250 
251   // No length change implies no change.  Return original string if no change.
252   if (escaped_length == length) return string;
253 
254   Handle<SeqOneByteString> dest;
255   ASSIGN_RETURN_ON_EXCEPTION(
256       isolate, dest,
257       isolate->factory()->NewRawOneByteString(escaped_length),
258       String);
259   int dest_position = 0;
260 
261   { DisallowHeapAllocation no_allocation;
262     Vector<const Char> vector = GetCharVector<Char>(string);
263     for (int i = 0; i < length; i++) {
264       uint16_t c = vector[i];
265       if (c >= 256) {
266         dest->SeqOneByteStringSet(dest_position, '%');
267         dest->SeqOneByteStringSet(dest_position+1, 'u');
268         dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]);
269         dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]);
270         dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]);
271         dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]);
272         dest_position += 6;
273       } else if (IsNotEscaped(c)) {
274         dest->SeqOneByteStringSet(dest_position, c);
275         dest_position++;
276       } else {
277         dest->SeqOneByteStringSet(dest_position, '%');
278         dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]);
279         dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]);
280         dest_position += 3;
281       }
282     }
283   }
284 
285   return dest;
286 }
287 
288 } }  // namespace v8::internal
289 
290 #endif  // V8_URI_H_
291