1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.tradefed.util;
17 
18 import com.android.ddmlib.Log;
19 
20 import java.util.ArrayList;
21 import java.util.regex.Matcher;
22 import java.util.regex.Pattern;
23 
24 public class QuotationAwareTokenizer {
25     private static final String LOG_TAG = "TOKEN";
26 
27     /**
28      * Tokenizes the string, splitting on specified delimiter.  Does not split between consecutive,
29      * unquoted double-quote marks.
30      * <p/>
31      * How the tokenizer works:
32      * <ol>
33      *     <li> Split the string into "characters" where each "character" is either an escaped
34      *          character like \" (that is, "\\\"") or a single real character like f (just "f").
35      *     <li> For each "character"
36      *     <ol>
37      *         <li> If it's a space, finish a token unless we're being quoted
38      *         <li> If it's a quotation mark, flip the "we're being quoted" bit
39      *         <li> Otherwise, add it to the token being built
40      *     </ol>
41      *     <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList}
42      *     <ol>
43      *         <li> If the last "character" is an escape character, throw an exception; that's not
44      *              valid
45      *         <li> If we're in the middle of a quotation, throw an exception; that's not valid
46      *         <li> Otherwise, add the final token to (tokens)
47      *     </ol>
48      *     <li> Return a String[] version of (tokens)
49      * </ol>
50      *
51      * @param line A {@link String} to be tokenized
52      * @return A tokenized version of the string
53      * @throws IllegalArgumentException if the line cannot be parsed
54      */
tokenizeLine(String line, String delim)55     public static String[] tokenizeLine(String line, String delim) throws IllegalArgumentException {
56         if (line == null) {
57             throw new IllegalArgumentException("line is null");
58         }
59 
60         ArrayList<String> tokens = new ArrayList<String>();
61         StringBuilder token = new StringBuilder();
62         // This pattern matches an escaped character or a character.  Escaped char takes precedence
63         final Pattern charPattern = Pattern.compile("\\\\.|.");
64         final Matcher charMatcher = charPattern.matcher(line);
65         String aChar = "";
66         boolean quotation = false;
67 
68         Log.d(LOG_TAG, String.format("Trying to tokenize the line '%s'", line));
69         while (charMatcher.find()) {
70             aChar = charMatcher.group();
71 
72             if (delim.equals(aChar)) {
73                 if (quotation) {
74                     // inside a quotation; treat spaces as part of the token
75                     token.append(aChar);
76                 } else {
77                     if (token.length() > 0) {
78                         // this is the end of a non-empty token; dump it in our list of tokens,
79                         // clear our temp storage, and keep rolling
80                         Log.d(LOG_TAG, String.format("Finished token '%s'", token.toString()));
81                         tokens.add(token.toString());
82                         token.delete(0, token.length());
83                     }
84                     // otherwise, this is the non-first in a sequence of spaces; ignore.
85                 }
86             } else if ("\"".equals(aChar)) {
87                 // unescaped quotation mark; flip quotation state
88                 Log.v(LOG_TAG, "Flipped quotation state");
89                 quotation ^= true;
90             } else {
91                 // default case: add the character to the token being built
92                 token.append(aChar);
93             }
94         }
95 
96         if (quotation || "\\".equals(aChar)) {
97             // We ended in a quotation or with an escape character; this is not valid
98             throw new IllegalArgumentException("Unexpected EOL in a quotation or after an escape " +
99                     "character");
100         }
101 
102         // Add the final token to the tokens array.
103         if (token.length() > 0) {
104             Log.v(LOG_TAG, String.format("Finished final token '%s'", token.toString()));
105             tokens.add(token.toString());
106             token.delete(0, token.length());
107         }
108 
109         String[] tokensArray = new String[tokens.size()];
110         return tokens.toArray(tokensArray);
111     }
112 
113     /**
114      * Tokenizes the string, splitting on spaces.  Does not split between consecutive,
115      * unquoted double-quote marks.
116      * <p>
117      * See also {@link #tokenizeLine(String, String)}
118      */
tokenizeLine(String line)119     public static String[] tokenizeLine(String line) throws IllegalArgumentException {
120         return tokenizeLine(line, " ");
121     }
122 
123     /**
124      * Perform the reverse of {@link #tokenizeLine(String)}. <br/>
125      * Given array of tokens, combine them into a single line.
126      *
127      * @param tokens
128      * @return A {@link String} created from all the tokens.
129      */
combineTokens(String... tokens)130     public static String combineTokens(String... tokens) {
131         final Pattern wsPattern = Pattern.compile("\\s");
132         StringBuilder sb = new StringBuilder();
133         for (int i=0; i < tokens.length; i++) {
134             final String token = tokens[i];
135             final Matcher wsMatcher = wsPattern.matcher(token);
136             if (wsMatcher.find()) {
137                 sb.append('"');
138                 sb.append(token);
139                 sb.append('"');
140             } else {
141                 sb.append(token);
142             }
143             if (i < (tokens.length - 1)) {
144                 // don't output space after last token
145                 sb.append(' ');
146             }
147         }
148         return sb.toString();
149     }
150 }
151