1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package java.util.regex;
18 
19 import java.util.ArrayList;
20 import java.util.List;
21 import libcore.util.EmptyArray;
22 
23 /**
24  * Used to make {@code String.split} fast (and to help {@code Pattern.split} too).
25  * @hide
26  */
27 public class Splitter {
28     // The RI allows regular expressions beginning with ] or }, but that's probably a bug.
29     private static final String METACHARACTERS = "\\?*+[](){}^$.|";
30 
Splitter()31     private Splitter() {
32     }
33 
34     /**
35      * Returns a result equivalent to {@code s.split(separator, limit)} if it's able
36      * to compute it more cheaply than ICU, or null if the caller should fall back to
37      * using ICU.
38      */
fastSplit(String re, String input, int limit)39     public static String[] fastSplit(String re, String input, int limit) {
40         // Can we do it cheaply?
41         int len = re.length();
42         if (len == 0) {
43             return null;
44         }
45         char ch = re.charAt(0);
46         if (len == 1 && METACHARACTERS.indexOf(ch) == -1) {
47             // We're looking for a single non-metacharacter. Easy.
48         } else if (len == 2 && ch == '\\') {
49             // We're looking for a quoted character.
50             // Quoted metacharacters are effectively single non-metacharacters.
51             ch = re.charAt(1);
52             if (METACHARACTERS.indexOf(ch) == -1) {
53                 return null;
54             }
55         } else {
56             return null;
57         }
58 
59         // We can do this cheaply...
60 
61         // Unlike Perl, which considers the result of splitting the empty string to be the empty
62         // array, Java returns an array containing the empty string.
63         if (input.isEmpty()) {
64             return new String[] { "" };
65         }
66 
67         // Count separators
68         int separatorCount = 0;
69         int begin = 0;
70         int end;
71         while (separatorCount + 1 != limit && (end = input.indexOf(ch, begin)) != -1) {
72             ++separatorCount;
73             begin = end + 1;
74         }
75         int lastPartEnd = input.length();
76         if (limit == 0 && begin == lastPartEnd) {
77             // Last part is empty for limit == 0, remove all trailing empty matches.
78             if (separatorCount == lastPartEnd) {
79                 // Input contains only separators.
80                 return EmptyArray.STRING;
81             }
82             // Find the beginning of trailing separators.
83             do {
84                 --begin;
85             } while (input.charAt(begin - 1) == ch);
86             // Reduce separatorCount and fix lastPartEnd.
87             separatorCount -= input.length() - begin;
88             lastPartEnd = begin;
89         }
90 
91         // Collect the result parts.
92         String[] result = new String[separatorCount + 1];
93         begin = 0;
94         for (int i = 0; i != separatorCount; ++i) {
95             end = input.indexOf(ch, begin);
96             result[i] = input.substring(begin, end);
97             begin = end + 1;
98         }
99         // Add last part.
100         result[separatorCount] = input.substring(begin, lastPartEnd);
101         return result;
102     }
103 
split(Pattern pattern, String re, String input, int limit)104     public static String[] split(Pattern pattern, String re, String input, int limit) {
105         String[] fastResult = fastSplit(re, input, limit);
106         if (fastResult != null) {
107             return fastResult;
108         }
109 
110         // Unlike Perl, which considers the result of splitting the empty string to be the empty
111         // array, Java returns an array containing the empty string.
112         if (input.isEmpty()) {
113             return new String[] { "" };
114         }
115 
116         // Collect text preceding each occurrence of the separator, while there's enough space.
117         ArrayList<String> list = new ArrayList<String>();
118         Matcher matcher = new Matcher(pattern, input);
119         int begin = 0;
120         while (list.size() + 1 != limit && matcher.find()) {
121             list.add(input.substring(begin, matcher.start()));
122             begin = matcher.end();
123         }
124         return finishSplit(list, input, begin, limit);
125     }
126 
finishSplit(List<String> list, String input, int begin, int limit)127     private static String[] finishSplit(List<String> list, String input, int begin, int limit) {
128         // Add trailing text.
129         if (begin < input.length()) {
130             list.add(input.substring(begin));
131         } else if (limit != 0) {
132             list.add("");
133         } else {
134             // Remove all trailing empty matches in the limit == 0 case.
135             int i = list.size() - 1;
136             while (i >= 0 && list.get(i).isEmpty()) {
137                 list.remove(i);
138                 i--;
139             }
140         }
141         // Convert to an array.
142         return list.toArray(new String[list.size()]);
143     }
144 }
145