1 
2 /*
3  * Copyright (C) 2020 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 
19 package com.android.build.config;
20 
21 import java.io.IOException;
22 import java.io.Reader;
23 import java.util.ArrayList;
24 import java.util.HashMap;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Map;
28 
29 /**
30  * A CSV parser.
31  */
32 public class CsvParser {
33     /**
34      * Internal string buffer grows by this amount.
35      */
36     private static final int CHUNK_SIZE = 64 * 1024;
37 
38     /**
39      * Error parsing.
40      */
41     public static class ParseException extends Exception {
42         private int mLine;
43         private int mColumn;
44 
ParseException(int line, int column, String message)45         public ParseException(int line, int column, String message) {
46             super(message);
47             mLine = line;
48             mColumn = column;
49         }
50 
51         /**
52          * Line number in source file.
53          */
getLine()54         public int getLine() {
55             return mLine;
56         }
57 
58         /**
59          * Column in source file.
60          */
getColumn()61         public int getColumn() {
62             return mColumn;
63         }
64     }
65 
66     public static class Line {
67         private final int mLineNumber;
68         private final List<String> mFields;
69 
Line(int lineno, List<String> fields)70         Line(int lineno, List<String> fields) {
71             mLineNumber = lineno;
72             mFields = fields;
73         }
74 
getLine()75         public int getLine() {
76             return mLineNumber;
77         }
78 
getFields()79         public List<String> getFields() {
80             return mFields;
81         }
82     }
83 
84     // Parser States
85     private static final int STATE_START_LINE = 0;
86     private static final int STATE_START_FIELD = 1;
87     private static final int STATE_INSIDE_QUOTED_FIELD = 2;
88     private static final int STATE_FIRST_QUOTATION_MARK = 3;
89     private static final int STATE_INSIDE_UNQUOTED_FIELD = 4;
90     private static final int STATE_DONE = 5;
91 
92     // Parser Actions
93     private static final int ACTION_APPEND_CHAR = 1;
94     private static final int ACTION_FIELD_COMPLETE = 2;
95     private static final int ACTION_LINE_COMPLETE = 4;
96 
97     /**
98      * Constructor.
99      */
CsvParser()100     private CsvParser() {
101     }
102 
103     /**
104      * Reads CSV and returns a list of Line objects.
105      *
106      * Handles newlines inside fields quoted with double quotes (").
107      *
108      * Doesn't report blank lines, but does include empty fields.
109      */
parse(Reader reader)110     public static List<Line> parse(Reader reader)
111             throws ParseException, IOException {
112         ArrayList<Line> result = new ArrayList();
113         int line = 1;
114         int column = 1;
115         int pos = 0;
116         char[] buf = new char[CHUNK_SIZE];
117         HashMap<String,String> stringPool = new HashMap();
118         ArrayList<String> fields = new ArrayList();
119 
120         int state = STATE_START_LINE;
121         while (state != STATE_DONE) {
122             int c = reader.read();
123             int action = 0;
124 
125             if (state == STATE_START_LINE) {
126                 if (c <= 0) {
127                     // No data, skip ACTION_LINE_COMPLETE.
128                     state = STATE_DONE;
129                 } else if (c == '"') {
130                     state = STATE_INSIDE_QUOTED_FIELD;
131                 } else if (c == ',') {
132                     action = ACTION_FIELD_COMPLETE;
133                     state = STATE_START_FIELD;
134                 } else if (c == '\n') {
135                     // Consume the newline, state stays STATE_START_LINE.
136                 } else {
137                     action = ACTION_APPEND_CHAR;
138                     state = STATE_INSIDE_UNQUOTED_FIELD;
139                 }
140             } else if (state == STATE_START_FIELD) {
141                 if (c <= 0) {
142                     // Field will be empty
143                     action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
144                     state = STATE_DONE;
145                 } else if (c == '"') {
146                     state = STATE_INSIDE_QUOTED_FIELD;
147                 } else if (c == ',') {
148                     action = ACTION_FIELD_COMPLETE;
149                     state = STATE_START_FIELD;
150                 } else if (c == '\n') {
151                     action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
152                     state = STATE_START_LINE;
153                 } else {
154                     action = ACTION_APPEND_CHAR;
155                     state = STATE_INSIDE_UNQUOTED_FIELD;
156                 }
157             } else if (state == STATE_INSIDE_QUOTED_FIELD) {
158                 if (c <= 0) {
159                     throw new ParseException(line, column,
160                             "Bad input: End of input inside quoted field.");
161                 } else if (c == '"') {
162                     state = STATE_FIRST_QUOTATION_MARK;
163                 } else {
164                     action = ACTION_APPEND_CHAR;
165                 }
166             } else if (state == STATE_FIRST_QUOTATION_MARK) {
167                 if (c <= 0) {
168                     action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
169                     state = STATE_DONE;
170                 } else if (c == '"') {
171                     action = ACTION_APPEND_CHAR;
172                     state = STATE_INSIDE_QUOTED_FIELD;
173                 } else if (c == ',') {
174                     action = ACTION_FIELD_COMPLETE;
175                     state = STATE_START_FIELD;
176                 } else if (c == '\n') {
177                     action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
178                     state = STATE_START_LINE;
179                 } else {
180                     throw new ParseException(line, column,
181                             "Bad input: Character after field ended or unquoted '\"'.");
182                 }
183             } else if (state == STATE_INSIDE_UNQUOTED_FIELD) {
184                 if (c <= 0) {
185                     action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
186                     state = STATE_DONE;
187                 } else if (c == ',') {
188                     action = ACTION_FIELD_COMPLETE;
189                     state = STATE_START_FIELD;
190                 } else if (c == '\n') {
191                     action = ACTION_FIELD_COMPLETE | ACTION_LINE_COMPLETE;
192                     state = STATE_START_LINE;
193                 } else {
194                     action = ACTION_APPEND_CHAR;
195                 }
196             }
197 
198             if ((action & ACTION_APPEND_CHAR) != 0) {
199                 // Reallocate buffer if necessary. Hopefully not often because CHUNK_SIZE is big.
200                 if (pos >= buf.length) {
201                     char[] old = buf;
202                     buf = new char[old.length + CHUNK_SIZE];
203                     System.arraycopy(old, 0, buf, 0, old.length);
204                 }
205                 // Store the character
206                 buf[pos] = (char)c;
207                 pos++;
208             }
209             if ((action & ACTION_FIELD_COMPLETE) != 0) {
210                 // A lot of the strings are duplicated, so pool them to reduce peak memory
211                 // usage. This could be made slightly better by having a custom key class
212                 // that does the lookup without making a new String that gets immediately
213                 // thrown away.
214                 String field = new String(buf, 0, pos);
215                 final String cached = stringPool.get(field);
216                 if (cached == null) {
217                     stringPool.put(field, field);
218                 } else {
219                     field = cached;
220                 }
221                 fields.add(field);
222                 pos = 0;
223             }
224             if ((action & ACTION_LINE_COMPLETE) != 0) {
225                 // Only report lines with any contents
226                 if (fields.size() > 0) {
227                     result.add(new Line(line, fields));
228                     fields = new ArrayList();
229                 }
230             }
231 
232             if (c == '\n') {
233                 line++;
234                 column = 1;
235             } else {
236                 column++;
237             }
238         }
239 
240         return result;
241     }
242 }
243