1 /*
2  * Copyright (C) 2008-2009 Marc Blank
3  * Licensed to The Android Open Source Project.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 package com.android.exchange.adapter;
19 
20 import android.content.Context;
21 
22 import com.android.exchange.Eas;
23 import com.android.exchange.EasException;
24 import com.android.exchange.service.EasService;
25 import com.android.exchange.utility.FileLogger;
26 import com.android.mail.utils.LogUtils;
27 import com.google.common.annotations.VisibleForTesting;
28 
29 import java.io.ByteArrayOutputStream;
30 import java.io.FileNotFoundException;
31 import java.io.FileOutputStream;
32 import java.io.IOException;
33 import java.io.InputStream;
34 import java.util.ArrayDeque;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.Deque;
38 
39 /**
40  * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that
41  * EAS uses (as defined in the EAS specification).
42  *
43  * Supports:
44  *      WBXML tokens to encode XML tags
45  *      WBXML code pages to support multiple XML namespaces
46  *      Inline strings
47  *      Opaque data
48  *
49  * Does not support: (throws EasParserException)
50  *      String tables
51  *      Entities
52  *      Processing instructions
53  *      Attribute encoding
54  *
55  */
56 public abstract class Parser {
57     private static final boolean LOG_VERBOSE = false;
58 
59     private static final String LOG_TAG = Eas.LOG_TAG;
60 
61     // The following constants are Wbxml standard
62     public static final int START_DOCUMENT = 0;
63     public static final int END_DOCUMENT = 1;
64     private static final int DONE = 1;
65     private static final int START = 2;
66     public static final int END = 3;
67     private static final int TEXT = 4;
68     private static final int OPAQUE = 5;
69     private static final int NOT_ENDED = Integer.MIN_VALUE;
70     private static final int EOF_BYTE = -1;
71 
72     private boolean capture = false;
73 
74     private ArrayList<Integer> captureArray;
75 
76     // The input stream for this parser
77     private InputStream in;
78 
79     // The stack of names of tags being processed; used when debug = true
80     private String[] nameArray = new String[32];
81 
82     public class Tag {
83         private final int mPage;
84         private final int mIndex;
85         // Whether the tag is associated with content (a value)
86         public final boolean mNoContent;
87         private final String mName;
88 
Tag(final int page, final int id)89         public Tag(final int page, final int id) {
90             mPage = page;
91             // The tag is in the low 6 bits
92             mIndex = id & Tags.PAGE_MASK;
93             // If the high bit is set, there is content (a value) to be read
94             mNoContent = (id & Wbxml.WITH_CONTENT) == 0;
95             if (Tags.isGlobalTag(mIndex)) {
96                 mName = "unsupported-WBXML";
97             } else if (!Tags.isValidTag(mPage, mIndex)) {
98                 mName = "unknown";
99             } else {
100                 mName = Tags.getTagName(mPage, mIndex);
101             }
102         }
103 
getTagNum()104         public int getTagNum() {
105             if (Tags.isGlobalTag(mIndex)) {
106                 return mIndex;
107             }
108             return (mPage << Tags.PAGE_SHIFT) | mIndex;
109         }
110 
111         @Override
toString()112         public String toString() {
113             return mName;
114         }
115     }
116 
117     // The stack of tags being processed
118     private final Deque<Tag> startTagArray = new ArrayDeque<Tag>();
119 
120     private Tag startTag;
121 
122     // The type of the last token read (eg, TEXT, OPAQUE, END, etc).
123     private int type;
124 
125     // The current page. As of EAS 14.1, this is a value 0-24.
126     private int page;
127 
128     // The current tag. The low order 6 bits contain the tag index and the
129     // higher order bits the page number. The format matches that used for
130     // the tag enums defined in Tags.java.
131     public int tag;
132 
133     // Whether the current tag is associated with content (a value)
134     public boolean noContent;
135 
136     // The value read, as a String
137     private String text;
138 
139     // The value read, as bytes
140     private byte[] bytes;
141 
142     // TODO: Define a new parse exception type rather than lumping these in as IOExceptions.
143 
144     /**
145      * Generated when the parser comes to EOF prematurely during parsing (i.e. in error)
146      */
147     public class EofException extends IOException {
148         private static final long serialVersionUID = 1L;
149     }
150 
151     /**
152      * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's
153      * input stream; in other words, the stream had no content.
154      */
155     public class EmptyStreamException extends EofException {
156         private static final long serialVersionUID = 1L;
157     }
158 
159     public class EodException extends IOException {
160         private static final long serialVersionUID = 1L;
161     }
162 
163     public class EasParserException extends IOException {
164         private static final long serialVersionUID = 1L;
165 
EasParserException()166         EasParserException() {
167             super("WBXML format error");
168         }
169 
EasParserException(final String reason)170         EasParserException(final String reason) {
171             super(reason);
172         }
173     }
174 
parse()175     public boolean parse() throws IOException, EasException {
176         return false;
177     }
178 
Parser(final InputStream in)179     public Parser(final InputStream in) throws IOException {
180         setInput(in, true);
181     }
182 
183     /**
184      * Constructor for use when switching parsers within a input stream
185      * @param parser an existing, initialized parser
186      * @throws IOException
187      */
Parser(final Parser parser)188     public Parser(final Parser parser) throws IOException {
189         setInput(parser.in, false);
190     }
191 
getInput()192     protected InputStream getInput() {
193         return in;
194     }
195 
196     /**
197      * Turns on data capture; this is used to create test streams that represent "live" data and
198      * can be used against the various parsers.
199      */
captureOn()200     public void captureOn() {
201         capture = true;
202         captureArray = new ArrayList<Integer>();
203     }
204 
205     /**
206      * Turns off data capture; writes the captured data to a specified file.
207      */
captureOff(final Context context, final String file)208     public void captureOff(final Context context, final String file) {
209         try {
210             final FileOutputStream out = context.openFileOutput(file,
211                     Context.MODE_WORLD_WRITEABLE);
212             out.write(captureArray.toString().getBytes());
213             out.close();
214         } catch (FileNotFoundException e) {
215             // This is debug code; exceptions aren't interesting.
216         } catch (IOException e) {
217             // This is debug code; exceptions aren't interesting.
218         }
219     }
220 
221     /**
222      * Return the value of the current tag, as a byte array. Throws EasParserException
223      * if neither opaque nor text data is present. Never returns null--returns
224      * an empty byte[] array for empty data.
225      *
226      * @return the byte array value of the current tag
227      * @throws IOException
228      */
getValueBytes()229     public byte[] getValueBytes() throws IOException {
230         final String name = startTag.toString();
231 
232         getNext();
233         // This means there was no value given, just <Foo/>; we'll return empty array
234         if (type == END) {
235             log("No value for tag: " + name);
236             return new byte[0];
237         } else if (type != OPAQUE && type != TEXT) {
238             throw new EasParserException("Expected OPAQUE or TEXT data for tag " + name);
239         }
240 
241         // Save the value
242         final byte[] val = type == OPAQUE ? bytes : text.getBytes("UTF-8");
243         // Read the next token; it had better be the end of the current tag
244         getNext();
245         // If not, throw an exception
246         if (type != END) {
247             throw new EasParserException("No END found for tag " + name);
248         }
249         return val;
250     }
251 
252     /**
253      * Return the value of the current tag, as a String. Throws EasParserException
254      * for non-text data. Never returns null--returns an empty string if no data.
255      *
256      * @return the String value of the current tag
257      * @throws IOException
258      */
getValue()259     public String getValue() throws IOException {
260         final String name = startTag.toString();
261 
262         getNext();
263         // This means there was no value given, just <Foo/>; we'll return empty string for now
264         if (type == END) {
265             log("No value for tag: " + name);
266             return "";
267         } else if (type != TEXT) {
268             throw new EasParserException("Expected TEXT data for tag " + name);
269         }
270 
271         // Save the value
272         final String val = text;
273         // Read the next token; it had better be the end of the current tag
274         getNext();
275         // If not, throw an exception
276         if (type != END) {
277             throw new EasParserException("No END found for tag " + name);
278         }
279         return val;
280     }
281 
282     /**
283      * Return the value of the current tag, as an integer. Throws EasParserException
284      * for non text data, and text data that doesn't parse as an integer. Returns
285      * 0 for empty data.
286      *
287      * @return the integer value of the current tag
288      * @throws IOException
289      */
getValueInt()290     public int getValueInt() throws IOException {
291         final String val = getValue();
292         if (val.length() == 0) {
293             return 0;
294         }
295 
296         int num;
297         try {
298             num = Integer.parseInt(val);
299         } catch (NumberFormatException e) {
300             throw new EasParserException("Tag " + startTag + ": " + e.getMessage());
301         }
302         return num;
303     }
304 
305     /**
306      * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to
307      * mark the end of the current tag and end of document.  If we hit end of document without
308      * looking for it, generate an EodException.  The tag returned consists of the page number
309      * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream.  Thus, all tags returned
310      * are unique.
311      *
312      * @param endingTag the tag that would represent the end of the tag we're processing
313      * @return the next tag found
314      * @throws IOException
315      */
nextTag(final int endingTag)316     public int nextTag(final int endingTag) throws IOException {
317         while (getNext() != DONE) {
318             // If we're a start, set tag to include the page and return it
319             if (type == START) {
320                 tag = startTag.getTagNum();
321                 return tag;
322             // If we're at the ending tag we're looking for, return the END signal
323             } else if (type == END && startTag.getTagNum() == endingTag) {
324                 return END;
325             }
326         }
327         // We're at end of document here.  If we're looking for it, return END_DOCUMENT
328         if (endingTag == START_DOCUMENT) {
329             return END_DOCUMENT;
330         }
331         // Otherwise, we've prematurely hit end of document, so exception out
332         // EodException is a subclass of IOException; this will be treated as an IO error by
333         // EasService
334         throw new EodException();
335     }
336 
337     /**
338      * Skip anything found in the stream until the end of the current tag is reached.  This can be
339      * used to ignore stretches of xml that aren't needed by the parser.
340      *
341      * @throws IOException
342      */
skipTag()343     public void skipTag() throws IOException {
344         final int thisTag = startTag.getTagNum();
345         // Just loop until we hit the end of the current tag
346         while (getNext() != DONE) {
347             if (type == END && startTag.getTagNum() == thisTag) {
348                 return;
349             }
350         }
351 
352         // If we're at end of document, that's bad
353         throw new EofException();
354     }
355 
356     /**
357      * Initializes the parser with an input stream; reads the first 4 bytes (which are always the
358      * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting
359      * page).
360      *
361      * @param in the InputStream associated with this parser
362      * @throws IOException
363      */
setInput(final InputStream in, final boolean initialize)364     public void setInput(final InputStream in, final boolean initialize) throws IOException {
365         this.in = in;
366         if ((in != null) && initialize) {
367             // If we fail on the very first byte, report an empty stream
368             try {
369                 final int version = readByte(); // version
370             } catch (EofException e) {
371                 throw new EmptyStreamException();
372             }
373             readInt();  // public identifier
374             readInt();  // 106 (UTF-8)
375             final int stringTableLength = readInt();  // string table length
376             if (stringTableLength != 0) {
377                 throw new EasParserException("WBXML string table unsupported");
378             }
379         }
380     }
381 
382     @VisibleForTesting
resetInput(final InputStream in)383     void resetInput(final InputStream in) {
384         this.in = in;
385         try {
386             // Read leading zero
387             read();
388         } catch (IOException e) {
389         }
390     }
391 
log(final String str)392     void log(final String str) {
393         if (!EasService.getProtocolLogging()) {
394             return;
395         }
396         final String logStr;
397         int cr = str.indexOf('\n');
398         if (cr > 0) {
399             logStr = str.substring(0, cr);
400         } else {
401             logStr = str;
402         }
403         final char [] charArray = new char[startTagArray.size() * 2];
404         Arrays.fill(charArray, ' ');
405         final String indent = new String(charArray);
406         LogUtils.d(LOG_TAG, "%s", indent + logStr);
407         if (EasService.getFileLogging()) {
408             FileLogger.log(LOG_TAG, logStr);
409         }
410     }
411 
logVerbose(final String str)412     void logVerbose(final String str) {
413         if (LOG_VERBOSE) {
414             log(str);
415         }
416     }
417 
pushTag(final int id)418     protected void pushTag(final int id) {
419         page = id >>> Tags.PAGE_SHIFT;
420         push(id);
421     }
422 
pop()423     protected void pop() {
424         // Retrieve the now-current startTag from our stack
425         startTag = startTagArray.removeFirst();
426         log("</" + startTag + '>');
427     }
428 
push(final int id)429     private void push(final int id) {
430         startTag = new Tag(page, id);
431         noContent = startTag.mNoContent;
432         log("<" + startTag + (noContent ? '/' : "") + '>');
433         // Save the startTag to our stack
434         startTagArray.addFirst(startTag);
435     }
436 
437     /**
438      * Return the next piece of data from the stream.  The return value indicates the type of data
439      * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or
440      * TEXT (the value of a tag)
441      *
442      * @return the type of data retrieved
443      * @throws IOException
444      */
getNext()445     private final int getNext() throws IOException {
446         bytes = null;
447         text = null;
448 
449         if (noContent) {
450             startTagArray.removeFirst();
451             type = END;
452             noContent = false;
453             return type;
454         }
455 
456         int id = read();
457         while (id == Wbxml.SWITCH_PAGE) {
458             // Get the new page number
459             page = readByte();
460             // Retrieve the current tag table
461             if (!Tags.isValidPage(page)) {
462                 // Unknown code page. These seem to happen mostly because of
463                 // invalid data from the server so throw an exception here.
464                 throw new EasParserException("Unknown code page " + page);
465             }
466             logVerbose("Page: " + page);
467             id = read();
468         }
469 
470         switch (id) {
471             case EOF_BYTE:
472                 // End of document
473                 type = DONE;
474                 break;
475 
476             case Wbxml.END:
477                 type = END;
478                 pop();
479                 break;
480 
481             case Wbxml.STR_I:
482                 // Inline string
483                 type = TEXT;
484                 text = readInlineString();
485                 log(startTag + ": " + text);
486                 break;
487 
488             case Wbxml.OPAQUE:
489                 // Integer length + opaque data
490                 type = OPAQUE;
491                 final int length = readInt();
492                 bytes = new byte[length];
493                 for (int i = 0; i < length; i++) {
494                     bytes[i] = (byte)readByte();
495                 }
496                 log(startTag + ": (opaque:" + length + ") ");
497                 break;
498 
499             default:
500                 if (Tags.isGlobalTag(id & Tags.PAGE_MASK)) {
501                     throw new EasParserException(String.format(
502                                     "Unhandled WBXML global token 0x%02X", id));
503                 }
504                 if ((id & Wbxml.WITH_ATTRIBUTES) != 0) {
505                     throw new EasParserException(String.format(
506                                     "Attributes unsupported, tag 0x%02X", id));
507                 }
508                 type = START;
509                 push(id);
510         }
511 
512         // Return the type of data we're dealing with
513         return type;
514     }
515 
516     /**
517      * Read an int from the input stream, and capture it if necessary for debugging.  Seems a small
518      * price to pay...
519      *
520      * @return the int read
521      * @throws IOException
522      */
read()523     private int read() throws IOException {
524         int i;
525         i = in.read();
526         if (capture) {
527             captureArray.add(i);
528         }
529         logVerbose("Byte: " + i);
530         return i;
531     }
532 
readByte()533     private int readByte() throws IOException {
534         int i = read();
535         if (i == EOF_BYTE) {
536             throw new EofException();
537         }
538         return i;
539     }
540 
541     /**
542      * Throws EasParserException if detects integer encoded with more than 5
543      * bytes. A uint_32 needs 5 bytes to fully encode 32 bits so if the high
544      * bit is set for more than 4 bytes, something is wrong with the data
545      * stream.
546      */
readInt()547     private int readInt() throws IOException {
548         int result = 0;
549         int i;
550         int numBytes = 0;
551 
552         do {
553             if (++numBytes > 5) {
554                 throw new EasParserException("Invalid integer encoding, too many bytes");
555             }
556             i = readByte();
557             result = (result << 7) | (i & 0x7f);
558         } while ((i & 0x80) != 0);
559 
560         return result;
561     }
562 
563     /**
564      * Read an inline string from the stream
565      *
566      * @return the String as parsed from the stream
567      * @throws IOException
568      */
readInlineString()569     private String readInlineString() throws IOException {
570         final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256);
571         while (true) {
572             final int i = read();
573             if (i == 0) {
574                 break;
575             } else if (i == EOF_BYTE) {
576                 throw new EofException();
577             }
578             outputStream.write(i);
579         }
580         outputStream.flush();
581         final String res = outputStream.toString("UTF-8");
582         outputStream.close();
583         return res;
584     }
585 }
586