1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.tradefed.util; 17 18 import com.android.ddmlib.Log; 19 20 import java.util.ArrayList; 21 import java.util.regex.Matcher; 22 import java.util.regex.Pattern; 23 24 public class QuotationAwareTokenizer { 25 private static final String LOG_TAG = "TOKEN"; 26 27 /** 28 * Tokenizes the string, splitting on specified delimiter. Does not split between consecutive, 29 * unquoted double-quote marks. 30 * <p/> 31 * How the tokenizer works: 32 * <ol> 33 * <li> Split the string into "characters" where each "character" is either an escaped 34 * character like \" (that is, "\\\"") or a single real character like f (just "f"). 35 * <li> For each "character" 36 * <ol> 37 * <li> If it's a space, finish a token unless we're being quoted 38 * <li> If it's a quotation mark, flip the "we're being quoted" bit 39 * <li> Otherwise, add it to the token being built 40 * </ol> 41 * <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList} 42 * <ol> 43 * <li> If the last "character" is an escape character, throw an exception; that's not 44 * valid 45 * <li> If we're in the middle of a quotation, throw an exception; that's not valid 46 * <li> Otherwise, add the final token to (tokens) 47 * </ol> 48 * <li> Return a String[] version of (tokens) 49 * </ol> 50 * 51 * @param line A {@link String} to be tokenized 52 * @return A tokenized version of the string 53 * @throws IllegalArgumentException if the line cannot be parsed 54 */ tokenizeLine(String line, String delim)55 public static String[] tokenizeLine(String line, String delim) throws IllegalArgumentException { 56 if (line == null) { 57 throw new IllegalArgumentException("line is null"); 58 } 59 60 ArrayList<String> tokens = new ArrayList<String>(); 61 StringBuilder token = new StringBuilder(); 62 // This pattern matches an escaped character or a character. Escaped char takes precedence 63 final Pattern charPattern = Pattern.compile("\\\\.|."); 64 final Matcher charMatcher = charPattern.matcher(line); 65 String aChar = ""; 66 boolean quotation = false; 67 68 Log.d(LOG_TAG, String.format("Trying to tokenize the line '%s'", line)); 69 while (charMatcher.find()) { 70 aChar = charMatcher.group(); 71 72 if (delim.equals(aChar)) { 73 if (quotation) { 74 // inside a quotation; treat spaces as part of the token 75 token.append(aChar); 76 } else { 77 if (token.length() > 0) { 78 // this is the end of a non-empty token; dump it in our list of tokens, 79 // clear our temp storage, and keep rolling 80 Log.d(LOG_TAG, String.format("Finished token '%s'", token.toString())); 81 tokens.add(token.toString()); 82 token.delete(0, token.length()); 83 } 84 // otherwise, this is the non-first in a sequence of spaces; ignore. 85 } 86 } else if ("\"".equals(aChar)) { 87 // unescaped quotation mark; flip quotation state 88 Log.v(LOG_TAG, "Flipped quotation state"); 89 quotation ^= true; 90 } else { 91 // default case: add the character to the token being built 92 token.append(aChar); 93 } 94 } 95 96 if (quotation || "\\".equals(aChar)) { 97 // We ended in a quotation or with an escape character; this is not valid 98 throw new IllegalArgumentException("Unexpected EOL in a quotation or after an escape " + 99 "character"); 100 } 101 102 // Add the final token to the tokens array. 103 if (token.length() > 0) { 104 Log.v(LOG_TAG, String.format("Finished final token '%s'", token.toString())); 105 tokens.add(token.toString()); 106 token.delete(0, token.length()); 107 } 108 109 String[] tokensArray = new String[tokens.size()]; 110 return tokens.toArray(tokensArray); 111 } 112 113 /** 114 * Tokenizes the string, splitting on spaces. Does not split between consecutive, 115 * unquoted double-quote marks. 116 * <p> 117 * See also {@link #tokenizeLine(String, String)} 118 */ tokenizeLine(String line)119 public static String[] tokenizeLine(String line) throws IllegalArgumentException { 120 return tokenizeLine(line, " "); 121 } 122 123 /** 124 * Perform the reverse of {@link #tokenizeLine(String)}. <br/> 125 * Given array of tokens, combine them into a single line. 126 * 127 * @param tokens 128 * @return A {@link String} created from all the tokens. 129 */ combineTokens(String... tokens)130 public static String combineTokens(String... tokens) { 131 final Pattern wsPattern = Pattern.compile("\\s"); 132 StringBuilder sb = new StringBuilder(); 133 for (int i=0; i < tokens.length; i++) { 134 final String token = tokens[i]; 135 final Matcher wsMatcher = wsPattern.matcher(token); 136 if (wsMatcher.find()) { 137 sb.append('"'); 138 sb.append(token); 139 sb.append('"'); 140 } else { 141 sb.append(token); 142 } 143 if (i < (tokens.length - 1)) { 144 // don't output space after last token 145 sb.append(' '); 146 } 147 } 148 return sb.toString(); 149 } 150 } 151