1 /* 2 * Copyright (C) 2008-2009 Marc Blank 3 * Licensed to The Android Open Source Project. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package com.android.exchange.adapter; 19 20 import android.content.Context; 21 22 import com.android.exchange.Eas; 23 import com.android.exchange.EasException; 24 import com.android.exchange.service.EasService; 25 import com.android.exchange.utility.FileLogger; 26 import com.android.mail.utils.LogUtils; 27 import com.google.common.annotations.VisibleForTesting; 28 29 import java.io.ByteArrayOutputStream; 30 import java.io.FileNotFoundException; 31 import java.io.FileOutputStream; 32 import java.io.IOException; 33 import java.io.InputStream; 34 import java.util.ArrayDeque; 35 import java.util.ArrayList; 36 import java.util.Arrays; 37 import java.util.Deque; 38 39 /** 40 * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that 41 * EAS uses (as defined in the EAS specification). 42 * 43 * Supports: 44 * WBXML tokens to encode XML tags 45 * WBXML code pages to support multiple XML namespaces 46 * Inline strings 47 * Opaque data 48 * 49 * Does not support: (throws EasParserException) 50 * String tables 51 * Entities 52 * Processing instructions 53 * Attribute encoding 54 * 55 */ 56 public abstract class Parser { 57 private static final boolean LOG_VERBOSE = false; 58 59 private static final String LOG_TAG = Eas.LOG_TAG; 60 61 // The following constants are Wbxml standard 62 public static final int START_DOCUMENT = 0; 63 public static final int END_DOCUMENT = 1; 64 private static final int DONE = 1; 65 private static final int START = 2; 66 public static final int END = 3; 67 private static final int TEXT = 4; 68 private static final int OPAQUE = 5; 69 private static final int NOT_ENDED = Integer.MIN_VALUE; 70 private static final int EOF_BYTE = -1; 71 72 private boolean capture = false; 73 74 private ArrayList<Integer> captureArray; 75 76 // The input stream for this parser 77 private InputStream in; 78 79 // The stack of names of tags being processed; used when debug = true 80 private String[] nameArray = new String[32]; 81 82 public class Tag { 83 private final int mPage; 84 private final int mIndex; 85 // Whether the tag is associated with content (a value) 86 public final boolean mNoContent; 87 private final String mName; 88 Tag(final int page, final int id)89 public Tag(final int page, final int id) { 90 mPage = page; 91 // The tag is in the low 6 bits 92 mIndex = id & Tags.PAGE_MASK; 93 // If the high bit is set, there is content (a value) to be read 94 mNoContent = (id & Wbxml.WITH_CONTENT) == 0; 95 if (Tags.isGlobalTag(mIndex)) { 96 mName = "unsupported-WBXML"; 97 } else if (!Tags.isValidTag(mPage, mIndex)) { 98 mName = "unknown"; 99 } else { 100 mName = Tags.getTagName(mPage, mIndex); 101 } 102 } 103 getTagNum()104 public int getTagNum() { 105 if (Tags.isGlobalTag(mIndex)) { 106 return mIndex; 107 } 108 return (mPage << Tags.PAGE_SHIFT) | mIndex; 109 } 110 111 @Override toString()112 public String toString() { 113 return mName; 114 } 115 } 116 117 // The stack of tags being processed 118 private final Deque<Tag> startTagArray = new ArrayDeque<Tag>(); 119 120 private Tag startTag; 121 122 // The type of the last token read (eg, TEXT, OPAQUE, END, etc). 123 private int type; 124 125 // The current page. As of EAS 14.1, this is a value 0-24. 126 private int page; 127 128 // The current tag. The low order 6 bits contain the tag index and the 129 // higher order bits the page number. The format matches that used for 130 // the tag enums defined in Tags.java. 131 public int tag; 132 133 // Whether the current tag is associated with content (a value) 134 public boolean noContent; 135 136 // The value read, as a String 137 private String text; 138 139 // The value read, as bytes 140 private byte[] bytes; 141 142 // TODO: Define a new parse exception type rather than lumping these in as IOExceptions. 143 144 /** 145 * Generated when the parser comes to EOF prematurely during parsing (i.e. in error) 146 */ 147 public class EofException extends IOException { 148 private static final long serialVersionUID = 1L; 149 } 150 151 /** 152 * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's 153 * input stream; in other words, the stream had no content. 154 */ 155 public class EmptyStreamException extends EofException { 156 private static final long serialVersionUID = 1L; 157 } 158 159 public class EodException extends IOException { 160 private static final long serialVersionUID = 1L; 161 } 162 163 public class EasParserException extends IOException { 164 private static final long serialVersionUID = 1L; 165 EasParserException()166 EasParserException() { 167 super("WBXML format error"); 168 } 169 EasParserException(final String reason)170 EasParserException(final String reason) { 171 super(reason); 172 } 173 } 174 parse()175 public boolean parse() throws IOException, EasException { 176 return false; 177 } 178 Parser(final InputStream in)179 public Parser(final InputStream in) throws IOException { 180 setInput(in, true); 181 } 182 183 /** 184 * Constructor for use when switching parsers within a input stream 185 * @param parser an existing, initialized parser 186 * @throws IOException 187 */ Parser(final Parser parser)188 public Parser(final Parser parser) throws IOException { 189 setInput(parser.in, false); 190 } 191 getInput()192 protected InputStream getInput() { 193 return in; 194 } 195 196 /** 197 * Turns on data capture; this is used to create test streams that represent "live" data and 198 * can be used against the various parsers. 199 */ captureOn()200 public void captureOn() { 201 capture = true; 202 captureArray = new ArrayList<Integer>(); 203 } 204 205 /** 206 * Turns off data capture; writes the captured data to a specified file. 207 */ captureOff(final Context context, final String file)208 public void captureOff(final Context context, final String file) { 209 try { 210 final FileOutputStream out = context.openFileOutput(file, 211 Context.MODE_WORLD_WRITEABLE); 212 out.write(captureArray.toString().getBytes()); 213 out.close(); 214 } catch (FileNotFoundException e) { 215 // This is debug code; exceptions aren't interesting. 216 } catch (IOException e) { 217 // This is debug code; exceptions aren't interesting. 218 } 219 } 220 221 /** 222 * Return the value of the current tag, as a byte array. Throws EasParserException 223 * if neither opaque nor text data is present. Never returns null--returns 224 * an empty byte[] array for empty data. 225 * 226 * @return the byte array value of the current tag 227 * @throws IOException 228 */ getValueBytes()229 public byte[] getValueBytes() throws IOException { 230 final String name = startTag.toString(); 231 232 getNext(); 233 // This means there was no value given, just <Foo/>; we'll return empty array 234 if (type == END) { 235 log("No value for tag: " + name); 236 return new byte[0]; 237 } else if (type != OPAQUE && type != TEXT) { 238 throw new EasParserException("Expected OPAQUE or TEXT data for tag " + name); 239 } 240 241 // Save the value 242 final byte[] val = type == OPAQUE ? bytes : text.getBytes("UTF-8"); 243 // Read the next token; it had better be the end of the current tag 244 getNext(); 245 // If not, throw an exception 246 if (type != END) { 247 throw new EasParserException("No END found for tag " + name); 248 } 249 return val; 250 } 251 252 /** 253 * Return the value of the current tag, as a String. Throws EasParserException 254 * for non-text data. Never returns null--returns an empty string if no data. 255 * 256 * @return the String value of the current tag 257 * @throws IOException 258 */ getValue()259 public String getValue() throws IOException { 260 final String name = startTag.toString(); 261 262 getNext(); 263 // This means there was no value given, just <Foo/>; we'll return empty string for now 264 if (type == END) { 265 log("No value for tag: " + name); 266 return ""; 267 } else if (type != TEXT) { 268 throw new EasParserException("Expected TEXT data for tag " + name); 269 } 270 271 // Save the value 272 final String val = text; 273 // Read the next token; it had better be the end of the current tag 274 getNext(); 275 // If not, throw an exception 276 if (type != END) { 277 throw new EasParserException("No END found for tag " + name); 278 } 279 return val; 280 } 281 282 /** 283 * Return the value of the current tag, as an integer. Throws EasParserException 284 * for non text data, and text data that doesn't parse as an integer. Returns 285 * 0 for empty data. 286 * 287 * @return the integer value of the current tag 288 * @throws IOException 289 */ getValueInt()290 public int getValueInt() throws IOException { 291 final String val = getValue(); 292 if (val.length() == 0) { 293 return 0; 294 } 295 296 int num; 297 try { 298 num = Integer.parseInt(val); 299 } catch (NumberFormatException e) { 300 throw new EasParserException("Tag " + startTag + ": " + e.getMessage()); 301 } 302 return num; 303 } 304 305 /** 306 * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to 307 * mark the end of the current tag and end of document. If we hit end of document without 308 * looking for it, generate an EodException. The tag returned consists of the page number 309 * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream. Thus, all tags returned 310 * are unique. 311 * 312 * @param endingTag the tag that would represent the end of the tag we're processing 313 * @return the next tag found 314 * @throws IOException 315 */ nextTag(final int endingTag)316 public int nextTag(final int endingTag) throws IOException { 317 while (getNext() != DONE) { 318 // If we're a start, set tag to include the page and return it 319 if (type == START) { 320 tag = startTag.getTagNum(); 321 return tag; 322 // If we're at the ending tag we're looking for, return the END signal 323 } else if (type == END && startTag.getTagNum() == endingTag) { 324 return END; 325 } 326 } 327 // We're at end of document here. If we're looking for it, return END_DOCUMENT 328 if (endingTag == START_DOCUMENT) { 329 return END_DOCUMENT; 330 } 331 // Otherwise, we've prematurely hit end of document, so exception out 332 // EodException is a subclass of IOException; this will be treated as an IO error by 333 // EasService 334 throw new EodException(); 335 } 336 337 /** 338 * Skip anything found in the stream until the end of the current tag is reached. This can be 339 * used to ignore stretches of xml that aren't needed by the parser. 340 * 341 * @throws IOException 342 */ skipTag()343 public void skipTag() throws IOException { 344 final int thisTag = startTag.getTagNum(); 345 // Just loop until we hit the end of the current tag 346 while (getNext() != DONE) { 347 if (type == END && startTag.getTagNum() == thisTag) { 348 return; 349 } 350 } 351 352 // If we're at end of document, that's bad 353 throw new EofException(); 354 } 355 356 /** 357 * Initializes the parser with an input stream; reads the first 4 bytes (which are always the 358 * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting 359 * page). 360 * 361 * @param in the InputStream associated with this parser 362 * @throws IOException 363 */ setInput(final InputStream in, final boolean initialize)364 public void setInput(final InputStream in, final boolean initialize) throws IOException { 365 this.in = in; 366 if ((in != null) && initialize) { 367 // If we fail on the very first byte, report an empty stream 368 try { 369 final int version = readByte(); // version 370 } catch (EofException e) { 371 throw new EmptyStreamException(); 372 } 373 readInt(); // public identifier 374 readInt(); // 106 (UTF-8) 375 final int stringTableLength = readInt(); // string table length 376 if (stringTableLength != 0) { 377 throw new EasParserException("WBXML string table unsupported"); 378 } 379 } 380 } 381 382 @VisibleForTesting resetInput(final InputStream in)383 void resetInput(final InputStream in) { 384 this.in = in; 385 try { 386 // Read leading zero 387 read(); 388 } catch (IOException e) { 389 } 390 } 391 log(final String str)392 void log(final String str) { 393 if (!EasService.getProtocolLogging()) { 394 return; 395 } 396 final String logStr; 397 int cr = str.indexOf('\n'); 398 if (cr > 0) { 399 logStr = str.substring(0, cr); 400 } else { 401 logStr = str; 402 } 403 final char [] charArray = new char[startTagArray.size() * 2]; 404 Arrays.fill(charArray, ' '); 405 final String indent = new String(charArray); 406 LogUtils.d(LOG_TAG, "%s", indent + logStr); 407 if (EasService.getFileLogging()) { 408 FileLogger.log(LOG_TAG, logStr); 409 } 410 } 411 logVerbose(final String str)412 void logVerbose(final String str) { 413 if (LOG_VERBOSE) { 414 log(str); 415 } 416 } 417 pushTag(final int id)418 protected void pushTag(final int id) { 419 page = id >>> Tags.PAGE_SHIFT; 420 push(id); 421 } 422 pop()423 protected void pop() { 424 // Retrieve the now-current startTag from our stack 425 startTag = startTagArray.removeFirst(); 426 log("</" + startTag + '>'); 427 } 428 push(final int id)429 private void push(final int id) { 430 startTag = new Tag(page, id); 431 noContent = startTag.mNoContent; 432 log("<" + startTag + (noContent ? '/' : "") + '>'); 433 // Save the startTag to our stack 434 startTagArray.addFirst(startTag); 435 } 436 437 /** 438 * Return the next piece of data from the stream. The return value indicates the type of data 439 * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or 440 * TEXT (the value of a tag) 441 * 442 * @return the type of data retrieved 443 * @throws IOException 444 */ getNext()445 private final int getNext() throws IOException { 446 bytes = null; 447 text = null; 448 449 if (noContent) { 450 startTagArray.removeFirst(); 451 type = END; 452 noContent = false; 453 return type; 454 } 455 456 int id = read(); 457 while (id == Wbxml.SWITCH_PAGE) { 458 // Get the new page number 459 page = readByte(); 460 // Retrieve the current tag table 461 if (!Tags.isValidPage(page)) { 462 // Unknown code page. These seem to happen mostly because of 463 // invalid data from the server so throw an exception here. 464 throw new EasParserException("Unknown code page " + page); 465 } 466 logVerbose("Page: " + page); 467 id = read(); 468 } 469 470 switch (id) { 471 case EOF_BYTE: 472 // End of document 473 type = DONE; 474 break; 475 476 case Wbxml.END: 477 type = END; 478 pop(); 479 break; 480 481 case Wbxml.STR_I: 482 // Inline string 483 type = TEXT; 484 text = readInlineString(); 485 log(startTag + ": " + text); 486 break; 487 488 case Wbxml.OPAQUE: 489 // Integer length + opaque data 490 type = OPAQUE; 491 final int length = readInt(); 492 bytes = new byte[length]; 493 for (int i = 0; i < length; i++) { 494 bytes[i] = (byte)readByte(); 495 } 496 log(startTag + ": (opaque:" + length + ") "); 497 break; 498 499 default: 500 if (Tags.isGlobalTag(id & Tags.PAGE_MASK)) { 501 throw new EasParserException(String.format( 502 "Unhandled WBXML global token 0x%02X", id)); 503 } 504 if ((id & Wbxml.WITH_ATTRIBUTES) != 0) { 505 throw new EasParserException(String.format( 506 "Attributes unsupported, tag 0x%02X", id)); 507 } 508 type = START; 509 push(id); 510 } 511 512 // Return the type of data we're dealing with 513 return type; 514 } 515 516 /** 517 * Read an int from the input stream, and capture it if necessary for debugging. Seems a small 518 * price to pay... 519 * 520 * @return the int read 521 * @throws IOException 522 */ read()523 private int read() throws IOException { 524 int i; 525 i = in.read(); 526 if (capture) { 527 captureArray.add(i); 528 } 529 logVerbose("Byte: " + i); 530 return i; 531 } 532 readByte()533 private int readByte() throws IOException { 534 int i = read(); 535 if (i == EOF_BYTE) { 536 throw new EofException(); 537 } 538 return i; 539 } 540 541 /** 542 * Throws EasParserException if detects integer encoded with more than 5 543 * bytes. A uint_32 needs 5 bytes to fully encode 32 bits so if the high 544 * bit is set for more than 4 bytes, something is wrong with the data 545 * stream. 546 */ readInt()547 private int readInt() throws IOException { 548 int result = 0; 549 int i; 550 int numBytes = 0; 551 552 do { 553 if (++numBytes > 5) { 554 throw new EasParserException("Invalid integer encoding, too many bytes"); 555 } 556 i = readByte(); 557 result = (result << 7) | (i & 0x7f); 558 } while ((i & 0x80) != 0); 559 560 return result; 561 } 562 563 /** 564 * Read an inline string from the stream 565 * 566 * @return the String as parsed from the stream 567 * @throws IOException 568 */ readInlineString()569 private String readInlineString() throws IOException { 570 final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256); 571 while (true) { 572 final int i = read(); 573 if (i == 0) { 574 break; 575 } else if (i == EOF_BYTE) { 576 throw new EofException(); 577 } 578 outputStream.write(i); 579 } 580 outputStream.flush(); 581 final String res = outputStream.toString("UTF-8"); 582 outputStream.close(); 583 return res; 584 } 585 } 586