1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.io;
18 
19 import java.io.File;
20 import java.util.ArrayList;
21 import java.util.Collection;
22 import java.util.Iterator;
23 import java.util.Stack;
24 
25 /**
26  * General filename and filepath manipulation utilities.
27  * <p>
28  * When dealing with filenames you can hit problems when moving from a Windows
29  * based development machine to a Unix based production machine.
30  * This class aims to help avoid those problems.
31  * <p>
32  * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
33  * using JDK {@link java.io.File File} objects and the two argument constructor
34  * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
35  * <p>
36  * Most methods on this class are designed to work the same on both Unix and Windows.
37  * Those that don't include 'System', 'Unix' or 'Windows' in their name.
38  * <p>
39  * Most methods recognise both separators (forward and back), and both
40  * sets of prefixes. See the javadoc of each method for details.
41  * <p>
42  * This class defines six components within a filename
43  * (example C:\dev\project\file.txt):
44  * <ul>
45  * <li>the prefix - C:\</li>
46  * <li>the path - dev\project\</li>
47  * <li>the full path - C:\dev\project\</li>
48  * <li>the name - file.txt</li>
49  * <li>the base name - file</li>
50  * <li>the extension - txt</li>
51  * </ul>
52  * Note that this class works best if directory filenames end with a separator.
53  * If you omit the last separator, it is impossible to determine if the filename
54  * corresponds to a file or a directory. As a result, we have chosen to say
55  * it corresponds to a file.
56  * <p>
57  * This class only supports Unix and Windows style names.
58  * Prefixes are matched as follows:
59  * <pre>
60  * Windows:
61  * a\b\c.txt           --> ""          --> relative
62  * \a\b\c.txt          --> "\"         --> current drive absolute
63  * C:a\b\c.txt         --> "C:"        --> drive relative
64  * C:\a\b\c.txt        --> "C:\"       --> absolute
65  * \\server\a\b\c.txt  --> "\\server\" --> UNC
66  *
67  * Unix:
68  * a/b/c.txt           --> ""          --> relative
69  * /a/b/c.txt          --> "/"         --> absolute
70  * ~/a/b/c.txt         --> "~/"        --> current user
71  * ~                   --> "~/"        --> current user (slash added)
72  * ~user/a/b/c.txt     --> "~user/"    --> named user
73  * ~user               --> "~user/"    --> named user (slash added)
74  * </pre>
75  * Both prefix styles are matched always, irrespective of the machine that you are
76  * currently running on.
77  * <p>
78  * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
79  *
80  * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</A>
81  * @author <a href="mailto:sanders@apache.org">Scott Sanders</a>
82  * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
83  * @author <a href="mailto:Christoph.Reck@dlr.de">Christoph.Reck</a>
84  * @author <a href="mailto:peter@apache.org">Peter Donald</a>
85  * @author <a href="mailto:jefft@apache.org">Jeff Turner</a>
86  * @author Matthew Hawthorne
87  * @author Martin Cooper
88  * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
89  * @author Stephen Colebourne
90  * @version $Id: FilenameUtils.java 609870 2008-01-08 04:46:26Z niallp $
91  * @since Commons IO 1.1
92  */
93 public class FilenameUtils {
94 
95     /**
96      * The extension separator character.
97      * @since Commons IO 1.4
98      */
99     public static final char EXTENSION_SEPARATOR = '.';
100 
101     /**
102      * The extension separator String.
103      * @since Commons IO 1.4
104      */
105     public static final String EXTENSION_SEPARATOR_STR = (new Character(EXTENSION_SEPARATOR)).toString();
106 
107     /**
108      * The Unix separator character.
109      */
110     private static final char UNIX_SEPARATOR = '/';
111 
112     /**
113      * The Windows separator character.
114      */
115     private static final char WINDOWS_SEPARATOR = '\\';
116 
117     /**
118      * The system separator character.
119      */
120     private static final char SYSTEM_SEPARATOR = File.separatorChar;
121 
122     /**
123      * The separator character that is the opposite of the system separator.
124      */
125     private static final char OTHER_SEPARATOR;
126     static {
127         if (isSystemWindows()) {
128             OTHER_SEPARATOR = UNIX_SEPARATOR;
129         } else {
130             OTHER_SEPARATOR = WINDOWS_SEPARATOR;
131         }
132     }
133 
134     /**
135      * Instances should NOT be constructed in standard programming.
136      */
FilenameUtils()137     public FilenameUtils() {
138         super();
139     }
140 
141     //-----------------------------------------------------------------------
142     /**
143      * Determines if Windows file system is in use.
144      *
145      * @return true if the system is Windows
146      */
isSystemWindows()147     static boolean isSystemWindows() {
148         return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
149     }
150 
151     //-----------------------------------------------------------------------
152     /**
153      * Checks if the character is a separator.
154      *
155      * @param ch  the character to check
156      * @return true if it is a separator character
157      */
isSeparator(char ch)158     private static boolean isSeparator(char ch) {
159         return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR);
160     }
161 
162     //-----------------------------------------------------------------------
163     /**
164      * Normalizes a path, removing double and single dot path steps.
165      * <p>
166      * This method normalizes a path to a standard format.
167      * The input may contain separators in either Unix or Windows format.
168      * The output will contain separators in the format of the system.
169      * <p>
170      * A trailing slash will be retained.
171      * A double slash will be merged to a single slash (but UNC names are handled).
172      * A single dot path segment will be removed.
173      * A double dot will cause that path segment and the one before to be removed.
174      * If the double dot has no parent path segment to work with, <code>null</code>
175      * is returned.
176      * <p>
177      * The output will be the same on both Unix and Windows except
178      * for the separator character.
179      * <pre>
180      * /foo//               -->   /foo/
181      * /foo/./              -->   /foo/
182      * /foo/../bar          -->   /bar
183      * /foo/../bar/         -->   /bar/
184      * /foo/../bar/../baz   -->   /baz
185      * //foo//./bar         -->   /foo/bar
186      * /../                 -->   null
187      * ../foo               -->   null
188      * foo/bar/..           -->   foo/
189      * foo/../../bar        -->   null
190      * foo/../bar           -->   bar
191      * //server/foo/../bar  -->   //server/bar
192      * //server/../bar      -->   null
193      * C:\foo\..\bar        -->   C:\bar
194      * C:\..\bar            -->   null
195      * ~/foo/../bar/        -->   ~/bar/
196      * ~/../bar             -->   null
197      * </pre>
198      * (Note the file separator returned will be correct for Windows/Unix)
199      *
200      * @param filename  the filename to normalize, null returns null
201      * @return the normalized filename, or null if invalid
202      */
normalize(String filename)203     public static String normalize(String filename) {
204         return doNormalize(filename, true);
205     }
206 
207     //-----------------------------------------------------------------------
208     /**
209      * Normalizes a path, removing double and single dot path steps,
210      * and removing any final directory separator.
211      * <p>
212      * This method normalizes a path to a standard format.
213      * The input may contain separators in either Unix or Windows format.
214      * The output will contain separators in the format of the system.
215      * <p>
216      * A trailing slash will be removed.
217      * A double slash will be merged to a single slash (but UNC names are handled).
218      * A single dot path segment will be removed.
219      * A double dot will cause that path segment and the one before to be removed.
220      * If the double dot has no parent path segment to work with, <code>null</code>
221      * is returned.
222      * <p>
223      * The output will be the same on both Unix and Windows except
224      * for the separator character.
225      * <pre>
226      * /foo//               -->   /foo
227      * /foo/./              -->   /foo
228      * /foo/../bar          -->   /bar
229      * /foo/../bar/         -->   /bar
230      * /foo/../bar/../baz   -->   /baz
231      * //foo//./bar         -->   /foo/bar
232      * /../                 -->   null
233      * ../foo               -->   null
234      * foo/bar/..           -->   foo
235      * foo/../../bar        -->   null
236      * foo/../bar           -->   bar
237      * //server/foo/../bar  -->   //server/bar
238      * //server/../bar      -->   null
239      * C:\foo\..\bar        -->   C:\bar
240      * C:\..\bar            -->   null
241      * ~/foo/../bar/        -->   ~/bar
242      * ~/../bar             -->   null
243      * </pre>
244      * (Note the file separator returned will be correct for Windows/Unix)
245      *
246      * @param filename  the filename to normalize, null returns null
247      * @return the normalized filename, or null if invalid
248      */
normalizeNoEndSeparator(String filename)249     public static String normalizeNoEndSeparator(String filename) {
250         return doNormalize(filename, false);
251     }
252 
253     /**
254      * Internal method to perform the normalization.
255      *
256      * @param filename  the filename
257      * @param keepSeparator  true to keep the final separator
258      * @return the normalized filename
259      */
doNormalize(String filename, boolean keepSeparator)260     private static String doNormalize(String filename, boolean keepSeparator) {
261         if (filename == null) {
262             return null;
263         }
264         int size = filename.length();
265         if (size == 0) {
266             return filename;
267         }
268         int prefix = getPrefixLength(filename);
269         if (prefix < 0) {
270             return null;
271         }
272 
273         char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
274         filename.getChars(0, filename.length(), array, 0);
275 
276         // fix separators throughout
277         for (int i = 0; i < array.length; i++) {
278             if (array[i] == OTHER_SEPARATOR) {
279                 array[i] = SYSTEM_SEPARATOR;
280             }
281         }
282 
283         // add extra separator on the end to simplify code below
284         boolean lastIsDirectory = true;
285         if (array[size - 1] != SYSTEM_SEPARATOR) {
286             array[size++] = SYSTEM_SEPARATOR;
287             lastIsDirectory = false;
288         }
289 
290         // adjoining slashes
291         for (int i = prefix + 1; i < size; i++) {
292             if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == SYSTEM_SEPARATOR) {
293                 System.arraycopy(array, i, array, i - 1, size - i);
294                 size--;
295                 i--;
296             }
297         }
298 
299         // dot slash
300         for (int i = prefix + 1; i < size; i++) {
301             if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' &&
302                     (i == prefix + 1 || array[i - 2] == SYSTEM_SEPARATOR)) {
303                 if (i == size - 1) {
304                     lastIsDirectory = true;
305                 }
306                 System.arraycopy(array, i + 1, array, i - 1, size - i);
307                 size -=2;
308                 i--;
309             }
310         }
311 
312         // double dot slash
313         outer:
314         for (int i = prefix + 2; i < size; i++) {
315             if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && array[i - 2] == '.' &&
316                     (i == prefix + 2 || array[i - 3] == SYSTEM_SEPARATOR)) {
317                 if (i == prefix + 2) {
318                     return null;
319                 }
320                 if (i == size - 1) {
321                     lastIsDirectory = true;
322                 }
323                 int j;
324                 for (j = i - 4 ; j >= prefix; j--) {
325                     if (array[j] == SYSTEM_SEPARATOR) {
326                         // remove b/../ from a/b/../c
327                         System.arraycopy(array, i + 1, array, j + 1, size - i);
328                         size -= (i - j);
329                         i = j + 1;
330                         continue outer;
331                     }
332                 }
333                 // remove a/../ from a/../c
334                 System.arraycopy(array, i + 1, array, prefix, size - i);
335                 size -= (i + 1 - prefix);
336                 i = prefix + 1;
337             }
338         }
339 
340         if (size <= 0) {  // should never be less than 0
341             return "";
342         }
343         if (size <= prefix) {  // should never be less than prefix
344             return new String(array, 0, size);
345         }
346         if (lastIsDirectory && keepSeparator) {
347             return new String(array, 0, size);  // keep trailing separator
348         }
349         return new String(array, 0, size - 1);  // lose trailing separator
350     }
351 
352     //-----------------------------------------------------------------------
353     /**
354      * Concatenates a filename to a base path using normal command line style rules.
355      * <p>
356      * The effect is equivalent to resultant directory after changing
357      * directory to the first argument, followed by changing directory to
358      * the second argument.
359      * <p>
360      * The first argument is the base path, the second is the path to concatenate.
361      * The returned path is always normalized via {@link #normalize(String)},
362      * thus <code>..</code> is handled.
363      * <p>
364      * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
365      * it will be normalized and returned.
366      * Otherwise, the paths will be joined, normalized and returned.
367      * <p>
368      * The output will be the same on both Unix and Windows except
369      * for the separator character.
370      * <pre>
371      * /foo/ + bar          -->   /foo/bar
372      * /foo + bar           -->   /foo/bar
373      * /foo + /bar          -->   /bar
374      * /foo + C:/bar        -->   C:/bar
375      * /foo + C:bar         -->   C:bar (*)
376      * /foo/a/ + ../bar     -->   foo/bar
377      * /foo/ + ../../bar    -->   null
378      * /foo/ + /bar         -->   /bar
379      * /foo/.. + /bar       -->   /bar
380      * /foo + bar/c.txt     -->   /foo/bar/c.txt
381      * /foo/c.txt + bar     -->   /foo/c.txt/bar (!)
382      * </pre>
383      * (*) Note that the Windows relative drive prefix is unreliable when
384      * used with this method.
385      * (!) Note that the first parameter must be a path. If it ends with a name, then
386      * the name will be built into the concatenated path. If this might be a problem,
387      * use {@link #getFullPath(String)} on the base path argument.
388      *
389      * @param basePath  the base path to attach to, always treated as a path
390      * @param fullFilenameToAdd  the filename (or path) to attach to the base
391      * @return the concatenated path, or null if invalid
392      */
concat(String basePath, String fullFilenameToAdd)393     public static String concat(String basePath, String fullFilenameToAdd) {
394         int prefix = getPrefixLength(fullFilenameToAdd);
395         if (prefix < 0) {
396             return null;
397         }
398         if (prefix > 0) {
399             return normalize(fullFilenameToAdd);
400         }
401         if (basePath == null) {
402             return null;
403         }
404         int len = basePath.length();
405         if (len == 0) {
406             return normalize(fullFilenameToAdd);
407         }
408         char ch = basePath.charAt(len - 1);
409         if (isSeparator(ch)) {
410             return normalize(basePath + fullFilenameToAdd);
411         } else {
412             return normalize(basePath + '/' + fullFilenameToAdd);
413         }
414     }
415 
416     //-----------------------------------------------------------------------
417     /**
418      * Converts all separators to the Unix separator of forward slash.
419      *
420      * @param path  the path to be changed, null ignored
421      * @return the updated path
422      */
separatorsToUnix(String path)423     public static String separatorsToUnix(String path) {
424         if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) {
425             return path;
426         }
427         return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
428     }
429 
430     /**
431      * Converts all separators to the Windows separator of backslash.
432      *
433      * @param path  the path to be changed, null ignored
434      * @return the updated path
435      */
separatorsToWindows(String path)436     public static String separatorsToWindows(String path) {
437         if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) {
438             return path;
439         }
440         return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
441     }
442 
443     /**
444      * Converts all separators to the system separator.
445      *
446      * @param path  the path to be changed, null ignored
447      * @return the updated path
448      */
separatorsToSystem(String path)449     public static String separatorsToSystem(String path) {
450         if (path == null) {
451             return null;
452         }
453         if (isSystemWindows()) {
454             return separatorsToWindows(path);
455         } else {
456             return separatorsToUnix(path);
457         }
458     }
459 
460     //-----------------------------------------------------------------------
461     /**
462      * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
463      * <p>
464      * This method will handle a file in either Unix or Windows format.
465      * <p>
466      * The prefix length includes the first slash in the full filename
467      * if applicable. Thus, it is possible that the length returned is greater
468      * than the length of the input string.
469      * <pre>
470      * Windows:
471      * a\b\c.txt           --> ""          --> relative
472      * \a\b\c.txt          --> "\"         --> current drive absolute
473      * C:a\b\c.txt         --> "C:"        --> drive relative
474      * C:\a\b\c.txt        --> "C:\"       --> absolute
475      * \\server\a\b\c.txt  --> "\\server\" --> UNC
476      *
477      * Unix:
478      * a/b/c.txt           --> ""          --> relative
479      * /a/b/c.txt          --> "/"         --> absolute
480      * ~/a/b/c.txt         --> "~/"        --> current user
481      * ~                   --> "~/"        --> current user (slash added)
482      * ~user/a/b/c.txt     --> "~user/"    --> named user
483      * ~user               --> "~user/"    --> named user (slash added)
484      * </pre>
485      * <p>
486      * The output will be the same irrespective of the machine that the code is running on.
487      * ie. both Unix and Windows prefixes are matched regardless.
488      *
489      * @param filename  the filename to find the prefix in, null returns -1
490      * @return the length of the prefix, -1 if invalid or null
491      */
getPrefixLength(String filename)492     public static int getPrefixLength(String filename) {
493         if (filename == null) {
494             return -1;
495         }
496         int len = filename.length();
497         if (len == 0) {
498             return 0;
499         }
500         char ch0 = filename.charAt(0);
501         if (ch0 == ':') {
502             return -1;
503         }
504         if (len == 1) {
505             if (ch0 == '~') {
506                 return 2;  // return a length greater than the input
507             }
508             return (isSeparator(ch0) ? 1 : 0);
509         } else {
510             if (ch0 == '~') {
511                 int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
512                 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
513                 if (posUnix == -1 && posWin == -1) {
514                     return len + 1;  // return a length greater than the input
515                 }
516                 posUnix = (posUnix == -1 ? posWin : posUnix);
517                 posWin = (posWin == -1 ? posUnix : posWin);
518                 return Math.min(posUnix, posWin) + 1;
519             }
520             char ch1 = filename.charAt(1);
521             if (ch1 == ':') {
522                 ch0 = Character.toUpperCase(ch0);
523                 if (ch0 >= 'A' && ch0 <= 'Z') {
524                     if (len == 2 || isSeparator(filename.charAt(2)) == false) {
525                         return 2;
526                     }
527                     return 3;
528                 }
529                 return -1;
530 
531             } else if (isSeparator(ch0) && isSeparator(ch1)) {
532                 int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
533                 int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
534                 if ((posUnix == -1 && posWin == -1) || posUnix == 2 || posWin == 2) {
535                     return -1;
536                 }
537                 posUnix = (posUnix == -1 ? posWin : posUnix);
538                 posWin = (posWin == -1 ? posUnix : posWin);
539                 return Math.min(posUnix, posWin) + 1;
540             } else {
541                 return (isSeparator(ch0) ? 1 : 0);
542             }
543         }
544     }
545 
546     /**
547      * Returns the index of the last directory separator character.
548      * <p>
549      * This method will handle a file in either Unix or Windows format.
550      * The position of the last forward or backslash is returned.
551      * <p>
552      * The output will be the same irrespective of the machine that the code is running on.
553      *
554      * @param filename  the filename to find the last path separator in, null returns -1
555      * @return the index of the last separator character, or -1 if there
556      * is no such character
557      */
indexOfLastSeparator(String filename)558     public static int indexOfLastSeparator(String filename) {
559         if (filename == null) {
560             return -1;
561         }
562         int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
563         int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
564         return Math.max(lastUnixPos, lastWindowsPos);
565     }
566 
567     /**
568      * Returns the index of the last extension separator character, which is a dot.
569      * <p>
570      * This method also checks that there is no directory separator after the last dot.
571      * To do this it uses {@link #indexOfLastSeparator(String)} which will
572      * handle a file in either Unix or Windows format.
573      * <p>
574      * The output will be the same irrespective of the machine that the code is running on.
575      *
576      * @param filename  the filename to find the last path separator in, null returns -1
577      * @return the index of the last separator character, or -1 if there
578      * is no such character
579      */
indexOfExtension(String filename)580     public static int indexOfExtension(String filename) {
581         if (filename == null) {
582             return -1;
583         }
584         int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
585         int lastSeparator = indexOfLastSeparator(filename);
586         return (lastSeparator > extensionPos ? -1 : extensionPos);
587     }
588 
589     //-----------------------------------------------------------------------
590     /**
591      * Gets the prefix from a full filename, such as <code>C:/</code>
592      * or <code>~/</code>.
593      * <p>
594      * This method will handle a file in either Unix or Windows format.
595      * The prefix includes the first slash in the full filename where applicable.
596      * <pre>
597      * Windows:
598      * a\b\c.txt           --> ""          --> relative
599      * \a\b\c.txt          --> "\"         --> current drive absolute
600      * C:a\b\c.txt         --> "C:"        --> drive relative
601      * C:\a\b\c.txt        --> "C:\"       --> absolute
602      * \\server\a\b\c.txt  --> "\\server\" --> UNC
603      *
604      * Unix:
605      * a/b/c.txt           --> ""          --> relative
606      * /a/b/c.txt          --> "/"         --> absolute
607      * ~/a/b/c.txt         --> "~/"        --> current user
608      * ~                   --> "~/"        --> current user (slash added)
609      * ~user/a/b/c.txt     --> "~user/"    --> named user
610      * ~user               --> "~user/"    --> named user (slash added)
611      * </pre>
612      * <p>
613      * The output will be the same irrespective of the machine that the code is running on.
614      * ie. both Unix and Windows prefixes are matched regardless.
615      *
616      * @param filename  the filename to query, null returns null
617      * @return the prefix of the file, null if invalid
618      */
getPrefix(String filename)619     public static String getPrefix(String filename) {
620         if (filename == null) {
621             return null;
622         }
623         int len = getPrefixLength(filename);
624         if (len < 0) {
625             return null;
626         }
627         if (len > filename.length()) {
628             return filename + UNIX_SEPARATOR;  // we know this only happens for unix
629         }
630         return filename.substring(0, len);
631     }
632 
633     /**
634      * Gets the path from a full filename, which excludes the prefix.
635      * <p>
636      * This method will handle a file in either Unix or Windows format.
637      * The method is entirely text based, and returns the text before and
638      * including the last forward or backslash.
639      * <pre>
640      * C:\a\b\c.txt --> a\b\
641      * ~/a/b/c.txt  --> a/b/
642      * a.txt        --> ""
643      * a/b/c        --> a/b/
644      * a/b/c/       --> a/b/c/
645      * </pre>
646      * <p>
647      * The output will be the same irrespective of the machine that the code is running on.
648      * <p>
649      * This method drops the prefix from the result.
650      * See {@link #getFullPath(String)} for the method that retains the prefix.
651      *
652      * @param filename  the filename to query, null returns null
653      * @return the path of the file, an empty string if none exists, null if invalid
654      */
getPath(String filename)655     public static String getPath(String filename) {
656         return doGetPath(filename, 1);
657     }
658 
659     /**
660      * Gets the path from a full filename, which excludes the prefix, and
661      * also excluding the final directory separator.
662      * <p>
663      * This method will handle a file in either Unix or Windows format.
664      * The method is entirely text based, and returns the text before the
665      * last forward or backslash.
666      * <pre>
667      * C:\a\b\c.txt --> a\b
668      * ~/a/b/c.txt  --> a/b
669      * a.txt        --> ""
670      * a/b/c        --> a/b
671      * a/b/c/       --> a/b/c
672      * </pre>
673      * <p>
674      * The output will be the same irrespective of the machine that the code is running on.
675      * <p>
676      * This method drops the prefix from the result.
677      * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
678      *
679      * @param filename  the filename to query, null returns null
680      * @return the path of the file, an empty string if none exists, null if invalid
681      */
getPathNoEndSeparator(String filename)682     public static String getPathNoEndSeparator(String filename) {
683         return doGetPath(filename, 0);
684     }
685 
686     /**
687      * Does the work of getting the path.
688      *
689      * @param filename  the filename
690      * @param separatorAdd  0 to omit the end separator, 1 to return it
691      * @return the path
692      */
doGetPath(String filename, int separatorAdd)693     private static String doGetPath(String filename, int separatorAdd) {
694         if (filename == null) {
695             return null;
696         }
697         int prefix = getPrefixLength(filename);
698         if (prefix < 0) {
699             return null;
700         }
701         int index = indexOfLastSeparator(filename);
702         if (prefix >= filename.length() || index < 0) {
703             return "";
704         }
705         return filename.substring(prefix, index + separatorAdd);
706     }
707 
708     /**
709      * Gets the full path from a full filename, which is the prefix + path.
710      * <p>
711      * This method will handle a file in either Unix or Windows format.
712      * The method is entirely text based, and returns the text before and
713      * including the last forward or backslash.
714      * <pre>
715      * C:\a\b\c.txt --> C:\a\b\
716      * ~/a/b/c.txt  --> ~/a/b/
717      * a.txt        --> ""
718      * a/b/c        --> a/b/
719      * a/b/c/       --> a/b/c/
720      * C:           --> C:
721      * C:\          --> C:\
722      * ~            --> ~/
723      * ~/           --> ~/
724      * ~user        --> ~user/
725      * ~user/       --> ~user/
726      * </pre>
727      * <p>
728      * The output will be the same irrespective of the machine that the code is running on.
729      *
730      * @param filename  the filename to query, null returns null
731      * @return the path of the file, an empty string if none exists, null if invalid
732      */
getFullPath(String filename)733     public static String getFullPath(String filename) {
734         return doGetFullPath(filename, true);
735     }
736 
737     /**
738      * Gets the full path from a full filename, which is the prefix + path,
739      * and also excluding the final directory separator.
740      * <p>
741      * This method will handle a file in either Unix or Windows format.
742      * The method is entirely text based, and returns the text before the
743      * last forward or backslash.
744      * <pre>
745      * C:\a\b\c.txt --> C:\a\b
746      * ~/a/b/c.txt  --> ~/a/b
747      * a.txt        --> ""
748      * a/b/c        --> a/b
749      * a/b/c/       --> a/b/c
750      * C:           --> C:
751      * C:\          --> C:\
752      * ~            --> ~
753      * ~/           --> ~
754      * ~user        --> ~user
755      * ~user/       --> ~user
756      * </pre>
757      * <p>
758      * The output will be the same irrespective of the machine that the code is running on.
759      *
760      * @param filename  the filename to query, null returns null
761      * @return the path of the file, an empty string if none exists, null if invalid
762      */
getFullPathNoEndSeparator(String filename)763     public static String getFullPathNoEndSeparator(String filename) {
764         return doGetFullPath(filename, false);
765     }
766 
767     /**
768      * Does the work of getting the path.
769      *
770      * @param filename  the filename
771      * @param includeSeparator  true to include the end separator
772      * @return the path
773      */
doGetFullPath(String filename, boolean includeSeparator)774     private static String doGetFullPath(String filename, boolean includeSeparator) {
775         if (filename == null) {
776             return null;
777         }
778         int prefix = getPrefixLength(filename);
779         if (prefix < 0) {
780             return null;
781         }
782         if (prefix >= filename.length()) {
783             if (includeSeparator) {
784                 return getPrefix(filename);  // add end slash if necessary
785             } else {
786                 return filename;
787             }
788         }
789         int index = indexOfLastSeparator(filename);
790         if (index < 0) {
791             return filename.substring(0, prefix);
792         }
793         int end = index + (includeSeparator ?  1 : 0);
794         return filename.substring(0, end);
795     }
796 
797     /**
798      * Gets the name minus the path from a full filename.
799      * <p>
800      * This method will handle a file in either Unix or Windows format.
801      * The text after the last forward or backslash is returned.
802      * <pre>
803      * a/b/c.txt --> c.txt
804      * a.txt     --> a.txt
805      * a/b/c     --> c
806      * a/b/c/    --> ""
807      * </pre>
808      * <p>
809      * The output will be the same irrespective of the machine that the code is running on.
810      *
811      * @param filename  the filename to query, null returns null
812      * @return the name of the file without the path, or an empty string if none exists
813      */
getName(String filename)814     public static String getName(String filename) {
815         if (filename == null) {
816             return null;
817         }
818         int index = indexOfLastSeparator(filename);
819         return filename.substring(index + 1);
820     }
821 
822     /**
823      * Gets the base name, minus the full path and extension, from a full filename.
824      * <p>
825      * This method will handle a file in either Unix or Windows format.
826      * The text after the last forward or backslash and before the last dot is returned.
827      * <pre>
828      * a/b/c.txt --> c
829      * a.txt     --> a
830      * a/b/c     --> c
831      * a/b/c/    --> ""
832      * </pre>
833      * <p>
834      * The output will be the same irrespective of the machine that the code is running on.
835      *
836      * @param filename  the filename to query, null returns null
837      * @return the name of the file without the path, or an empty string if none exists
838      */
getBaseName(String filename)839     public static String getBaseName(String filename) {
840         return removeExtension(getName(filename));
841     }
842 
843     /**
844      * Gets the extension of a filename.
845      * <p>
846      * This method returns the textual part of the filename after the last dot.
847      * There must be no directory separator after the dot.
848      * <pre>
849      * foo.txt      --> "txt"
850      * a/b/c.jpg    --> "jpg"
851      * a/b.txt/c    --> ""
852      * a/b/c        --> ""
853      * </pre>
854      * <p>
855      * The output will be the same irrespective of the machine that the code is running on.
856      *
857      * @param filename the filename to retrieve the extension of.
858      * @return the extension of the file or an empty string if none exists.
859      */
getExtension(String filename)860     public static String getExtension(String filename) {
861         if (filename == null) {
862             return null;
863         }
864         int index = indexOfExtension(filename);
865         if (index == -1) {
866             return "";
867         } else {
868             return filename.substring(index + 1);
869         }
870     }
871 
872     //-----------------------------------------------------------------------
873     /**
874      * Removes the extension from a filename.
875      * <p>
876      * This method returns the textual part of the filename before the last dot.
877      * There must be no directory separator after the dot.
878      * <pre>
879      * foo.txt    --> foo
880      * a\b\c.jpg  --> a\b\c
881      * a\b\c      --> a\b\c
882      * a.b\c      --> a.b\c
883      * </pre>
884      * <p>
885      * The output will be the same irrespective of the machine that the code is running on.
886      *
887      * @param filename  the filename to query, null returns null
888      * @return the filename minus the extension
889      */
removeExtension(String filename)890     public static String removeExtension(String filename) {
891         if (filename == null) {
892             return null;
893         }
894         int index = indexOfExtension(filename);
895         if (index == -1) {
896             return filename;
897         } else {
898             return filename.substring(0, index);
899         }
900     }
901 
902     //-----------------------------------------------------------------------
903     /**
904      * Checks whether two filenames are equal exactly.
905      * <p>
906      * No processing is performed on the filenames other than comparison,
907      * thus this is merely a null-safe case-sensitive equals.
908      *
909      * @param filename1  the first filename to query, may be null
910      * @param filename2  the second filename to query, may be null
911      * @return true if the filenames are equal, null equals null
912      * @see IOCase#SENSITIVE
913      */
equals(String filename1, String filename2)914     public static boolean equals(String filename1, String filename2) {
915         return equals(filename1, filename2, false, IOCase.SENSITIVE);
916     }
917 
918     /**
919      * Checks whether two filenames are equal using the case rules of the system.
920      * <p>
921      * No processing is performed on the filenames other than comparison.
922      * The check is case-sensitive on Unix and case-insensitive on Windows.
923      *
924      * @param filename1  the first filename to query, may be null
925      * @param filename2  the second filename to query, may be null
926      * @return true if the filenames are equal, null equals null
927      * @see IOCase#SYSTEM
928      */
equalsOnSystem(String filename1, String filename2)929     public static boolean equalsOnSystem(String filename1, String filename2) {
930         return equals(filename1, filename2, false, IOCase.SYSTEM);
931     }
932 
933     //-----------------------------------------------------------------------
934     /**
935      * Checks whether two filenames are equal after both have been normalized.
936      * <p>
937      * Both filenames are first passed to {@link #normalize(String)}.
938      * The check is then performed in a case-sensitive manner.
939      *
940      * @param filename1  the first filename to query, may be null
941      * @param filename2  the second filename to query, may be null
942      * @return true if the filenames are equal, null equals null
943      * @see IOCase#SENSITIVE
944      */
equalsNormalized(String filename1, String filename2)945     public static boolean equalsNormalized(String filename1, String filename2) {
946         return equals(filename1, filename2, true, IOCase.SENSITIVE);
947     }
948 
949     /**
950      * Checks whether two filenames are equal after both have been normalized
951      * and using the case rules of the system.
952      * <p>
953      * Both filenames are first passed to {@link #normalize(String)}.
954      * The check is then performed case-sensitive on Unix and
955      * case-insensitive on Windows.
956      *
957      * @param filename1  the first filename to query, may be null
958      * @param filename2  the second filename to query, may be null
959      * @return true if the filenames are equal, null equals null
960      * @see IOCase#SYSTEM
961      */
equalsNormalizedOnSystem(String filename1, String filename2)962     public static boolean equalsNormalizedOnSystem(String filename1, String filename2) {
963         return equals(filename1, filename2, true, IOCase.SYSTEM);
964     }
965 
966     /**
967      * Checks whether two filenames are equal, optionally normalizing and providing
968      * control over the case-sensitivity.
969      *
970      * @param filename1  the first filename to query, may be null
971      * @param filename2  the second filename to query, may be null
972      * @param normalized  whether to normalize the filenames
973      * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
974      * @return true if the filenames are equal, null equals null
975      * @since Commons IO 1.3
976      */
equals( String filename1, String filename2, boolean normalized, IOCase caseSensitivity)977     public static boolean equals(
978             String filename1, String filename2,
979             boolean normalized, IOCase caseSensitivity) {
980 
981         if (filename1 == null || filename2 == null) {
982             return filename1 == filename2;
983         }
984         if (normalized) {
985             filename1 = normalize(filename1);
986             filename2 = normalize(filename2);
987             if (filename1 == null || filename2 == null) {
988                 throw new NullPointerException(
989                     "Error normalizing one or both of the file names");
990             }
991         }
992         if (caseSensitivity == null) {
993             caseSensitivity = IOCase.SENSITIVE;
994         }
995         return caseSensitivity.checkEquals(filename1, filename2);
996     }
997 
998     //-----------------------------------------------------------------------
999     /**
1000      * Checks whether the extension of the filename is that specified.
1001      * <p>
1002      * This method obtains the extension as the textual part of the filename
1003      * after the last dot. There must be no directory separator after the dot.
1004      * The extension check is case-sensitive on all platforms.
1005      *
1006      * @param filename  the filename to query, null returns false
1007      * @param extension  the extension to check for, null or empty checks for no extension
1008      * @return true if the filename has the specified extension
1009      */
isExtension(String filename, String extension)1010     public static boolean isExtension(String filename, String extension) {
1011         if (filename == null) {
1012             return false;
1013         }
1014         if (extension == null || extension.length() == 0) {
1015             return (indexOfExtension(filename) == -1);
1016         }
1017         String fileExt = getExtension(filename);
1018         return fileExt.equals(extension);
1019     }
1020 
1021     /**
1022      * Checks whether the extension of the filename is one of those specified.
1023      * <p>
1024      * This method obtains the extension as the textual part of the filename
1025      * after the last dot. There must be no directory separator after the dot.
1026      * The extension check is case-sensitive on all platforms.
1027      *
1028      * @param filename  the filename to query, null returns false
1029      * @param extensions  the extensions to check for, null checks for no extension
1030      * @return true if the filename is one of the extensions
1031      */
isExtension(String filename, String[] extensions)1032     public static boolean isExtension(String filename, String[] extensions) {
1033         if (filename == null) {
1034             return false;
1035         }
1036         if (extensions == null || extensions.length == 0) {
1037             return (indexOfExtension(filename) == -1);
1038         }
1039         String fileExt = getExtension(filename);
1040         for (int i = 0; i < extensions.length; i++) {
1041             if (fileExt.equals(extensions[i])) {
1042                 return true;
1043             }
1044         }
1045         return false;
1046     }
1047 
1048     /**
1049      * Checks whether the extension of the filename is one of those specified.
1050      * <p>
1051      * This method obtains the extension as the textual part of the filename
1052      * after the last dot. There must be no directory separator after the dot.
1053      * The extension check is case-sensitive on all platforms.
1054      *
1055      * @param filename  the filename to query, null returns false
1056      * @param extensions  the extensions to check for, null checks for no extension
1057      * @return true if the filename is one of the extensions
1058      */
isExtension(String filename, Collection<String> extensions)1059     public static boolean isExtension(String filename, Collection<String> extensions) {
1060         if (filename == null) {
1061             return false;
1062         }
1063         if (extensions == null || extensions.isEmpty()) {
1064             return (indexOfExtension(filename) == -1);
1065         }
1066         String fileExt = getExtension(filename);
1067         for (Iterator<String> it = extensions.iterator(); it.hasNext();) {
1068             if (fileExt.equals(it.next())) {
1069                 return true;
1070             }
1071         }
1072         return false;
1073     }
1074 
1075     //-----------------------------------------------------------------------
1076     /**
1077      * Checks a filename to see if it matches the specified wildcard matcher,
1078      * always testing case-sensitive.
1079      * <p>
1080      * The wildcard matcher uses the characters '?' and '*' to represent a
1081      * single or multiple wildcard characters.
1082      * This is the same as often found on Dos/Unix command lines.
1083      * The check is case-sensitive always.
1084      * <pre>
1085      * wildcardMatch("c.txt", "*.txt")      --> true
1086      * wildcardMatch("c.txt", "*.jpg")      --> false
1087      * wildcardMatch("a/b/c.txt", "a/b/*")  --> true
1088      * wildcardMatch("c.txt", "*.???")      --> true
1089      * wildcardMatch("c.txt", "*.????")     --> false
1090      * </pre>
1091      *
1092      * @param filename  the filename to match on
1093      * @param wildcardMatcher  the wildcard string to match against
1094      * @return true if the filename matches the wilcard string
1095      * @see IOCase#SENSITIVE
1096      */
wildcardMatch(String filename, String wildcardMatcher)1097     public static boolean wildcardMatch(String filename, String wildcardMatcher) {
1098         return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1099     }
1100 
1101     /**
1102      * Checks a filename to see if it matches the specified wildcard matcher
1103      * using the case rules of the system.
1104      * <p>
1105      * The wildcard matcher uses the characters '?' and '*' to represent a
1106      * single or multiple wildcard characters.
1107      * This is the same as often found on Dos/Unix command lines.
1108      * The check is case-sensitive on Unix and case-insensitive on Windows.
1109      * <pre>
1110      * wildcardMatch("c.txt", "*.txt")      --> true
1111      * wildcardMatch("c.txt", "*.jpg")      --> false
1112      * wildcardMatch("a/b/c.txt", "a/b/*")  --> true
1113      * wildcardMatch("c.txt", "*.???")      --> true
1114      * wildcardMatch("c.txt", "*.????")     --> false
1115      * </pre>
1116      *
1117      * @param filename  the filename to match on
1118      * @param wildcardMatcher  the wildcard string to match against
1119      * @return true if the filename matches the wilcard string
1120      * @see IOCase#SYSTEM
1121      */
wildcardMatchOnSystem(String filename, String wildcardMatcher)1122     public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) {
1123         return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1124     }
1125 
1126     /**
1127      * Checks a filename to see if it matches the specified wildcard matcher
1128      * allowing control over case-sensitivity.
1129      * <p>
1130      * The wildcard matcher uses the characters '?' and '*' to represent a
1131      * single or multiple wildcard characters.
1132      *
1133      * @param filename  the filename to match on
1134      * @param wildcardMatcher  the wildcard string to match against
1135      * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1136      * @return true if the filename matches the wilcard string
1137      * @since Commons IO 1.3
1138      */
wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity)1139     public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) {
1140         if (filename == null && wildcardMatcher == null) {
1141             return true;
1142         }
1143         if (filename == null || wildcardMatcher == null) {
1144             return false;
1145         }
1146         if (caseSensitivity == null) {
1147             caseSensitivity = IOCase.SENSITIVE;
1148         }
1149         filename = caseSensitivity.convertCase(filename);
1150         wildcardMatcher = caseSensitivity.convertCase(wildcardMatcher);
1151         String[] wcs = splitOnTokens(wildcardMatcher);
1152         boolean anyChars = false;
1153         int textIdx = 0;
1154         int wcsIdx = 0;
1155         Stack<int[]> backtrack = new Stack<int[]>();
1156 
1157         // loop around a backtrack stack, to handle complex * matching
1158         do {
1159             if (backtrack.size() > 0) {
1160                 int[] array = backtrack.pop();
1161                 wcsIdx = array[0];
1162                 textIdx = array[1];
1163                 anyChars = true;
1164             }
1165 
1166             // loop whilst tokens and text left to process
1167             while (wcsIdx < wcs.length) {
1168 
1169                 if (wcs[wcsIdx].equals("?")) {
1170                     // ? so move to next text char
1171                     textIdx++;
1172                     anyChars = false;
1173 
1174                 } else if (wcs[wcsIdx].equals("*")) {
1175                     // set any chars status
1176                     anyChars = true;
1177                     if (wcsIdx == wcs.length - 1) {
1178                         textIdx = filename.length();
1179                     }
1180 
1181                 } else {
1182                     // matching text token
1183                     if (anyChars) {
1184                         // any chars then try to locate text token
1185                         textIdx = filename.indexOf(wcs[wcsIdx], textIdx);
1186                         if (textIdx == -1) {
1187                             // token not found
1188                             break;
1189                         }
1190                         int repeat = filename.indexOf(wcs[wcsIdx], textIdx + 1);
1191                         if (repeat >= 0) {
1192                             backtrack.push(new int[] {wcsIdx, repeat});
1193                         }
1194                     } else {
1195                         // matching from current position
1196                         if (!filename.startsWith(wcs[wcsIdx], textIdx)) {
1197                             // couldnt match token
1198                             break;
1199                         }
1200                     }
1201 
1202                     // matched text token, move text index to end of matched token
1203                     textIdx += wcs[wcsIdx].length();
1204                     anyChars = false;
1205                 }
1206 
1207                 wcsIdx++;
1208             }
1209 
1210             // full match
1211             if (wcsIdx == wcs.length && textIdx == filename.length()) {
1212                 return true;
1213             }
1214 
1215         } while (backtrack.size() > 0);
1216 
1217         return false;
1218     }
1219 
1220     /**
1221      * Splits a string into a number of tokens.
1222      *
1223      * @param text  the text to split
1224      * @return the tokens, never null
1225      */
splitOnTokens(String text)1226     static String[] splitOnTokens(String text) {
1227         // used by wildcardMatch
1228         // package level so a unit test may run on this
1229 
1230         if (text.indexOf("?") == -1 && text.indexOf("*") == -1) {
1231             return new String[] { text };
1232         }
1233 
1234         char[] array = text.toCharArray();
1235         ArrayList<String> list = new ArrayList<String>();
1236         StringBuffer buffer = new StringBuffer();
1237         for (int i = 0; i < array.length; i++) {
1238             if (array[i] == '?' || array[i] == '*') {
1239                 if (buffer.length() != 0) {
1240                     list.add(buffer.toString());
1241                     buffer.setLength(0);
1242                 }
1243                 if (array[i] == '?') {
1244                     list.add("?");
1245                 } else if (list.size() == 0 ||
1246                         (i > 0 && list.get(list.size() - 1).equals("*") == false)) {
1247                     list.add("*");
1248                 }
1249             } else {
1250                 buffer.append(array[i]);
1251             }
1252         }
1253         if (buffer.length() != 0) {
1254             list.add(buffer.toString());
1255         }
1256 
1257         return list.toArray( new String[ list.size() ] );
1258     }
1259 
1260 }
1261