1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package java.util.regex; 18 19 import java.util.ArrayList; 20 import java.util.List; 21 import libcore.util.EmptyArray; 22 23 /** 24 * Used to make {@code String.split} fast (and to help {@code Pattern.split} too). 25 * @hide 26 */ 27 public class Splitter { 28 // The RI allows regular expressions beginning with ] or }, but that's probably a bug. 29 private static final String METACHARACTERS = "\\?*+[](){}^$.|"; 30 Splitter()31 private Splitter() { 32 } 33 34 /** 35 * Returns a result equivalent to {@code s.split(separator, limit)} if it's able 36 * to compute it more cheaply than ICU, or null if the caller should fall back to 37 * using ICU. 38 */ fastSplit(String re, String input, int limit)39 public static String[] fastSplit(String re, String input, int limit) { 40 // Can we do it cheaply? 41 int len = re.length(); 42 if (len == 0) { 43 return null; 44 } 45 char ch = re.charAt(0); 46 if (len == 1 && METACHARACTERS.indexOf(ch) == -1) { 47 // We're looking for a single non-metacharacter. Easy. 48 } else if (len == 2 && ch == '\\') { 49 // We're looking for a quoted character. 50 // Quoted metacharacters are effectively single non-metacharacters. 51 ch = re.charAt(1); 52 if (METACHARACTERS.indexOf(ch) == -1) { 53 return null; 54 } 55 } else { 56 return null; 57 } 58 59 // We can do this cheaply... 60 61 // Unlike Perl, which considers the result of splitting the empty string to be the empty 62 // array, Java returns an array containing the empty string. 63 if (input.isEmpty()) { 64 return new String[] { "" }; 65 } 66 67 // Count separators 68 int separatorCount = 0; 69 int begin = 0; 70 int end; 71 while (separatorCount + 1 != limit && (end = input.indexOf(ch, begin)) != -1) { 72 ++separatorCount; 73 begin = end + 1; 74 } 75 int lastPartEnd = input.length(); 76 if (limit == 0 && begin == lastPartEnd) { 77 // Last part is empty for limit == 0, remove all trailing empty matches. 78 if (separatorCount == lastPartEnd) { 79 // Input contains only separators. 80 return EmptyArray.STRING; 81 } 82 // Find the beginning of trailing separators. 83 do { 84 --begin; 85 } while (input.charAt(begin - 1) == ch); 86 // Reduce separatorCount and fix lastPartEnd. 87 separatorCount -= input.length() - begin; 88 lastPartEnd = begin; 89 } 90 91 // Collect the result parts. 92 String[] result = new String[separatorCount + 1]; 93 begin = 0; 94 for (int i = 0; i != separatorCount; ++i) { 95 end = input.indexOf(ch, begin); 96 result[i] = input.substring(begin, end); 97 begin = end + 1; 98 } 99 // Add last part. 100 result[separatorCount] = input.substring(begin, lastPartEnd); 101 return result; 102 } 103 split(Pattern pattern, String re, String input, int limit)104 public static String[] split(Pattern pattern, String re, String input, int limit) { 105 String[] fastResult = fastSplit(re, input, limit); 106 if (fastResult != null) { 107 return fastResult; 108 } 109 110 // Unlike Perl, which considers the result of splitting the empty string to be the empty 111 // array, Java returns an array containing the empty string. 112 if (input.isEmpty()) { 113 return new String[] { "" }; 114 } 115 116 // Collect text preceding each occurrence of the separator, while there's enough space. 117 ArrayList<String> list = new ArrayList<String>(); 118 Matcher matcher = new Matcher(pattern, input); 119 int begin = 0; 120 while (list.size() + 1 != limit && matcher.find()) { 121 list.add(input.substring(begin, matcher.start())); 122 begin = matcher.end(); 123 } 124 return finishSplit(list, input, begin, limit); 125 } 126 finishSplit(List<String> list, String input, int begin, int limit)127 private static String[] finishSplit(List<String> list, String input, int begin, int limit) { 128 // Add trailing text. 129 if (begin < input.length()) { 130 list.add(input.substring(begin)); 131 } else if (limit != 0) { 132 list.add(""); 133 } else { 134 // Remove all trailing empty matches in the limit == 0 case. 135 int i = list.size() - 1; 136 while (i >= 0 && list.get(i).isEmpty()) { 137 list.remove(i); 138 i--; 139 } 140 } 141 // Convert to an array. 142 return list.toArray(new String[list.size()]); 143 } 144 } 145