1 /*
2  * Copyright (C) 2006 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.webkit;
18 
19 import android.annotation.Nullable;
20 import android.net.ParseException;
21 import android.net.Uri;
22 import android.net.WebAddress;
23 import android.util.Log;
24 
25 import java.io.UnsupportedEncodingException;
26 import java.util.Locale;
27 import java.util.regex.Matcher;
28 import java.util.regex.Pattern;
29 
30 public final class URLUtil {
31 
32     private static final String LOGTAG = "webkit";
33     private static final boolean TRACE = false;
34 
35     // to refer to bar.png under your package's asset/foo/ directory, use
36     // "file:///android_asset/foo/bar.png".
37     static final String ASSET_BASE = "file:///android_asset/";
38     // to refer to bar.png under your package's res/drawable/ directory, use
39     // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
40     // "drawable-hdpi" directory as well.
41     static final String RESOURCE_BASE = "file:///android_res/";
42     static final String FILE_BASE = "file:";
43     static final String PROXY_BASE = "file:///cookieless_proxy/";
44     static final String CONTENT_BASE = "content:";
45 
46     /**
47      * Cleans up (if possible) user-entered web addresses
48      */
guessUrl(String inUrl)49     public static String guessUrl(String inUrl) {
50 
51         String retVal = inUrl;
52         WebAddress webAddress;
53 
54         if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
55 
56         if (inUrl.length() == 0) return inUrl;
57         if (inUrl.startsWith("about:")) return inUrl;
58         // Do not try to interpret data scheme URLs
59         if (inUrl.startsWith("data:")) return inUrl;
60         // Do not try to interpret file scheme URLs
61         if (inUrl.startsWith("file:")) return inUrl;
62         // Do not try to interpret javascript scheme URLs
63         if (inUrl.startsWith("javascript:")) return inUrl;
64 
65         // bug 762454: strip period off end of url
66         if (inUrl.endsWith(".") == true) {
67             inUrl = inUrl.substring(0, inUrl.length() - 1);
68         }
69 
70         try {
71             webAddress = new WebAddress(inUrl);
72         } catch (ParseException ex) {
73 
74             if (TRACE) {
75                 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
76             }
77             return retVal;
78         }
79 
80         // Check host
81         if (webAddress.getHost().indexOf('.') == -1) {
82             // no dot: user probably entered a bare domain.  try .com
83             webAddress.setHost("www." + webAddress.getHost() + ".com");
84         }
85         return webAddress.toString();
86     }
87 
composeSearchUrl(String inQuery, String template, String queryPlaceHolder)88     public static String composeSearchUrl(String inQuery, String template,
89                                           String queryPlaceHolder) {
90         int placeHolderIndex = template.indexOf(queryPlaceHolder);
91         if (placeHolderIndex < 0) {
92             return null;
93         }
94 
95         String query;
96         StringBuilder buffer = new StringBuilder();
97         buffer.append(template.substring(0, placeHolderIndex));
98 
99         try {
100             query = java.net.URLEncoder.encode(inQuery, "utf-8");
101             buffer.append(query);
102         } catch (UnsupportedEncodingException ex) {
103             return null;
104         }
105 
106         buffer.append(template.substring(
107                 placeHolderIndex + queryPlaceHolder.length()));
108 
109         return buffer.toString();
110     }
111 
decode(byte[] url)112     public static byte[] decode(byte[] url) throws IllegalArgumentException {
113         if (url.length == 0) {
114             return new byte[0];
115         }
116 
117         // Create a new byte array with the same length to ensure capacity
118         byte[] tempData = new byte[url.length];
119 
120         int tempCount = 0;
121         for (int i = 0; i < url.length; i++) {
122             byte b = url[i];
123             if (b == '%') {
124                 if (url.length - i > 2) {
125                     b = (byte) (parseHex(url[i + 1]) * 16
126                             + parseHex(url[i + 2]));
127                     i += 2;
128                 } else {
129                     throw new IllegalArgumentException("Invalid format");
130                 }
131             }
132             tempData[tempCount++] = b;
133         }
134         byte[] retData = new byte[tempCount];
135         System.arraycopy(tempData, 0, retData, 0, tempCount);
136         return retData;
137     }
138 
139     /**
140      * @return {@code true} if the url is correctly URL encoded
141      */
verifyURLEncoding(String url)142     static boolean verifyURLEncoding(String url) {
143         int count = url.length();
144         if (count == 0) {
145             return false;
146         }
147 
148         int index = url.indexOf('%');
149         while (index >= 0 && index < count) {
150             if (index < count - 2) {
151                 try {
152                     parseHex((byte) url.charAt(++index));
153                     parseHex((byte) url.charAt(++index));
154                 } catch (IllegalArgumentException e) {
155                     return false;
156                 }
157             } else {
158                 return false;
159             }
160             index = url.indexOf('%', index + 1);
161         }
162         return true;
163     }
164 
parseHex(byte b)165     private static int parseHex(byte b) {
166         if (b >= '0' && b <= '9') return (b - '0');
167         if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
168         if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
169 
170         throw new IllegalArgumentException("Invalid hex char '" + b + "'");
171     }
172 
173     /**
174      * @return {@code true} if the url is an asset file.
175      */
isAssetUrl(String url)176     public static boolean isAssetUrl(String url) {
177         return (null != url) && url.startsWith(ASSET_BASE);
178     }
179 
180     /**
181      * @return {@code true} if the url is a resource file.
182      * @hide
183      */
isResourceUrl(String url)184     public static boolean isResourceUrl(String url) {
185         return (null != url) && url.startsWith(RESOURCE_BASE);
186     }
187 
188     /**
189      * @return {@code true} if the url is a proxy url to allow cookieless network
190      * requests from a file url.
191      * @deprecated Cookieless proxy is no longer supported.
192      */
193     @Deprecated
isCookielessProxyUrl(String url)194     public static boolean isCookielessProxyUrl(String url) {
195         return (null != url) && url.startsWith(PROXY_BASE);
196     }
197 
198     /**
199      * @return {@code true} if the url is a local file.
200      */
isFileUrl(String url)201     public static boolean isFileUrl(String url) {
202         return (null != url) && (url.startsWith(FILE_BASE) &&
203                                  !url.startsWith(ASSET_BASE) &&
204                                  !url.startsWith(PROXY_BASE));
205     }
206 
207     /**
208      * @return {@code true} if the url is an about: url.
209      */
isAboutUrl(String url)210     public static boolean isAboutUrl(String url) {
211         return (null != url) && url.startsWith("about:");
212     }
213 
214     /**
215      * @return {@code true} if the url is a data: url.
216      */
isDataUrl(String url)217     public static boolean isDataUrl(String url) {
218         return (null != url) && url.startsWith("data:");
219     }
220 
221     /**
222      * @return {@code true} if the url is a javascript: url.
223      */
isJavaScriptUrl(String url)224     public static boolean isJavaScriptUrl(String url) {
225         return (null != url) && url.startsWith("javascript:");
226     }
227 
228     /**
229      * @return {@code true} if the url is an http: url.
230      */
isHttpUrl(String url)231     public static boolean isHttpUrl(String url) {
232         return (null != url) &&
233                (url.length() > 6) &&
234                url.substring(0, 7).equalsIgnoreCase("http://");
235     }
236 
237     /**
238      * @return {@code true} if the url is an https: url.
239      */
isHttpsUrl(String url)240     public static boolean isHttpsUrl(String url) {
241         return (null != url) &&
242                (url.length() > 7) &&
243                url.substring(0, 8).equalsIgnoreCase("https://");
244     }
245 
246     /**
247      * @return {@code true} if the url is a network url.
248      */
isNetworkUrl(String url)249     public static boolean isNetworkUrl(String url) {
250         if (url == null || url.length() == 0) {
251             return false;
252         }
253         return isHttpUrl(url) || isHttpsUrl(url);
254     }
255 
256     /**
257      * @return {@code true} if the url is a content: url.
258      */
isContentUrl(String url)259     public static boolean isContentUrl(String url) {
260         return (null != url) && url.startsWith(CONTENT_BASE);
261     }
262 
263     /**
264      * @return {@code true} if the url is valid.
265      */
isValidUrl(String url)266     public static boolean isValidUrl(String url) {
267         if (url == null || url.length() == 0) {
268             return false;
269         }
270 
271         return (isAssetUrl(url) ||
272                 isResourceUrl(url) ||
273                 isFileUrl(url) ||
274                 isAboutUrl(url) ||
275                 isHttpUrl(url) ||
276                 isHttpsUrl(url) ||
277                 isJavaScriptUrl(url) ||
278                 isContentUrl(url));
279     }
280 
281     /**
282      * Strips the url of the anchor.
283      */
stripAnchor(String url)284     public static String stripAnchor(String url) {
285         int anchorIndex = url.indexOf('#');
286         if (anchorIndex != -1) {
287             return url.substring(0, anchorIndex);
288         }
289         return url;
290     }
291 
292     /**
293      * Guesses canonical filename that a download would have, using
294      * the URL and contentDisposition. File extension, if not defined,
295      * is added based on the mimetype
296      * @param url Url to the content
297      * @param contentDisposition Content-Disposition HTTP header or {@code null}
298      * @param mimeType Mime-type of the content or {@code null}
299      *
300      * @return suggested filename
301      */
guessFileName( String url, @Nullable String contentDisposition, @Nullable String mimeType)302     public static final String guessFileName(
303             String url,
304             @Nullable String contentDisposition,
305             @Nullable String mimeType) {
306         String filename = null;
307         String extension = null;
308 
309         // If we couldn't do anything with the hint, move toward the content disposition
310         if (filename == null && contentDisposition != null) {
311             filename = parseContentDisposition(contentDisposition);
312             if (filename != null) {
313                 int index = filename.lastIndexOf('/') + 1;
314                 if (index > 0) {
315                     filename = filename.substring(index);
316                 }
317             }
318         }
319 
320         // If all the other http-related approaches failed, use the plain uri
321         if (filename == null) {
322             String decodedUrl = Uri.decode(url);
323             if (decodedUrl != null) {
324                 int queryIndex = decodedUrl.indexOf('?');
325                 // If there is a query string strip it, same as desktop browsers
326                 if (queryIndex > 0) {
327                     decodedUrl = decodedUrl.substring(0, queryIndex);
328                 }
329                 if (!decodedUrl.endsWith("/")) {
330                     int index = decodedUrl.lastIndexOf('/') + 1;
331                     if (index > 0) {
332                         filename = decodedUrl.substring(index);
333                     }
334                 }
335             }
336         }
337 
338         // Finally, if couldn't get filename from URI, get a generic filename
339         if (filename == null) {
340             filename = "downloadfile";
341         }
342 
343         // Split filename between base and extension
344         // Add an extension if filename does not have one
345         int dotIndex = filename.indexOf('.');
346         if (dotIndex < 0) {
347             if (mimeType != null) {
348                 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
349                 if (extension != null) {
350                     extension = "." + extension;
351                 }
352             }
353             if (extension == null) {
354                 if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
355                     if (mimeType.equalsIgnoreCase("text/html")) {
356                         extension = ".html";
357                     } else {
358                         extension = ".txt";
359                     }
360                 } else {
361                     extension = ".bin";
362                 }
363             }
364         } else {
365             if (mimeType != null) {
366                 // Compare the last segment of the extension against the mime type.
367                 // If there's a mismatch, discard the entire extension.
368                 int lastDotIndex = filename.lastIndexOf('.');
369                 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
370                         filename.substring(lastDotIndex + 1));
371                 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
372                     extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
373                     if (extension != null) {
374                         extension = "." + extension;
375                     }
376                 }
377             }
378             if (extension == null) {
379                 extension = filename.substring(dotIndex);
380             }
381             filename = filename.substring(0, dotIndex);
382         }
383 
384         return filename + extension;
385     }
386 
387     /** Regex used to parse content-disposition headers */
388     private static final Pattern CONTENT_DISPOSITION_PATTERN =
389             Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
390             Pattern.CASE_INSENSITIVE);
391 
392     /**
393      * Parse the Content-Disposition HTTP Header. The format of the header
394      * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
395      * This header provides a filename for content that is going to be
396      * downloaded to the file system. We only support the attachment type.
397      * Note that RFC 2616 specifies the filename value must be double-quoted.
398      * Unfortunately some servers do not quote the value so to maintain
399      * consistent behaviour with other browsers, we allow unquoted values too.
400      */
parseContentDisposition(String contentDisposition)401     static String parseContentDisposition(String contentDisposition) {
402         try {
403             Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
404             if (m.find()) {
405                 return m.group(2);
406             }
407         } catch (IllegalStateException ex) {
408              // This function is defined as returning null when it can't parse the header
409         }
410         return null;
411     }
412 }
413