1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.vcard;
17 
18 import android.text.TextUtils;
19 import android.util.Base64;
20 import android.util.Log;
21 
22 import com.android.vcard.exception.VCardAgentNotSupportedException;
23 import com.android.vcard.exception.VCardException;
24 import com.android.vcard.exception.VCardInvalidCommentLineException;
25 import com.android.vcard.exception.VCardInvalidLineException;
26 import com.android.vcard.exception.VCardVersionException;
27 
28 import java.io.BufferedReader;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.InputStreamReader;
32 import java.io.Reader;
33 import java.util.ArrayList;
34 import java.util.Collection;
35 import java.util.HashSet;
36 import java.util.List;
37 import java.util.Set;
38 
39 /**
40  * <p>
41  * Basic implementation achieving vCard parsing. Based on vCard 2.1.
42  * </p>
43  * @hide
44  */
45 /* package */ class VCardParserImpl_V21 {
46     private static final String LOG_TAG = VCardConstants.LOG_TAG;
47 
48     protected static final class CustomBufferedReader extends BufferedReader {
49         private long mTime;
50 
51         /**
52          * Needed since "next line" may be null due to end of line.
53          */
54         private boolean mNextLineIsValid;
55         private String mNextLine;
56 
CustomBufferedReader(Reader in)57         public CustomBufferedReader(Reader in) {
58             super(in);
59         }
60 
61         @Override
readLine()62         public String readLine() throws IOException {
63             if (mNextLineIsValid) {
64                 final String ret = mNextLine;
65                 mNextLine = null;
66                 mNextLineIsValid = false;
67                 return ret;
68             }
69 
70             final long start = System.currentTimeMillis();
71             final String line = super.readLine();
72             final long end = System.currentTimeMillis();
73             mTime += end - start;
74             return line;
75         }
76 
77         /**
78          * Read one line, but make this object store it in its queue.
79          */
peekLine()80         public String peekLine() throws IOException {
81             if (!mNextLineIsValid) {
82                 final long start = System.currentTimeMillis();
83                 final String line = super.readLine();
84                 final long end = System.currentTimeMillis();
85                 mTime += end - start;
86 
87                 mNextLine = line;
88                 mNextLineIsValid = true;
89             }
90 
91             return mNextLine;
92         }
93 
getTotalmillisecond()94         public long getTotalmillisecond() {
95             return mTime;
96         }
97     }
98 
99     private static final String DEFAULT_ENCODING = "8BIT";
100     private static final String DEFAULT_CHARSET = "UTF-8";
101 
102     protected final String mIntermediateCharset;
103 
104     private final List<VCardInterpreter> mInterpreterList = new ArrayList<VCardInterpreter>();
105     private boolean mCanceled;
106 
107     /**
108      * <p>
109      * The encoding type for deconding byte streams. This member variable is
110      * reset to a default encoding every time when a new item comes.
111      * </p>
112      * <p>
113      * "Encoding" in vCard is different from "Charset". It is mainly used for
114      * addresses, notes, images. "7BIT", "8BIT", "BASE64", and
115      * "QUOTED-PRINTABLE" are known examples.
116      * </p>
117      */
118     protected String mCurrentEncoding;
119 
120     protected String mCurrentCharset;
121 
122     /**
123      * <p>
124      * The reader object to be used internally.
125      * </p>
126      * <p>
127      * Developers should not directly read a line from this object. Use
128      * getLine() unless there some reason.
129      * </p>
130      */
131     protected CustomBufferedReader mReader;
132 
133     /**
134      * <p>
135      * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard
136      * specification, but happens to be seen in real world vCard.
137      * </p>
138      * <p>
139      * We just accept those invalid types after emitting a warning for each of it.
140      * </p>
141      */
142     protected final Set<String> mUnknownTypeSet = new HashSet<String>();
143 
144     /**
145      * <p>
146      * Set for storing unkonwn VALUE attributes, which is not acceptable in
147      * vCard specification, but happens to be seen in real world vCard.
148      * </p>
149      * <p>
150      * We just accept those invalid types after emitting a warning for each of it.
151      * </p>
152      */
153     protected final Set<String> mUnknownValueSet = new HashSet<String>();
154 
155 
VCardParserImpl_V21()156     public VCardParserImpl_V21() {
157         this(VCardConfig.VCARD_TYPE_DEFAULT);
158     }
159 
VCardParserImpl_V21(int vcardType)160     public VCardParserImpl_V21(int vcardType) {
161         mIntermediateCharset =  VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
162     }
163 
164     /**
165      * @return true when a given property name is a valid property name.
166      */
isValidPropertyName(final String propertyName)167     protected boolean isValidPropertyName(final String propertyName) {
168         if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) ||
169                 propertyName.startsWith("X-"))
170                 && !mUnknownTypeSet.contains(propertyName)) {
171             mUnknownTypeSet.add(propertyName);
172             Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName);
173         }
174         return true;
175     }
176 
177     /**
178      * @return String. It may be null, or its length may be 0
179      * @throws IOException
180      */
getLine()181     protected String getLine() throws IOException {
182         return mReader.readLine();
183     }
184 
peekLine()185     protected String peekLine() throws IOException {
186         return mReader.peekLine();
187     }
188 
189     /**
190      * @return String with it's length > 0
191      * @throws IOException
192      * @throws VCardException when the stream reached end of line
193      */
getNonEmptyLine()194     protected String getNonEmptyLine() throws IOException, VCardException {
195         String line;
196         while (true) {
197             line = getLine();
198             if (line == null) {
199                 throw new VCardException("Reached end of buffer.");
200             } else if (line.trim().length() > 0) {
201                 return line;
202             }
203         }
204     }
205 
206     /**
207      * <code>
208      * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF
209      *         items *CRLF
210      *         "END" [ws] ":" [ws] "VCARD"
211      * </code>
212      * @return False when reaching end of file.
213      */
parseOneVCard()214     private boolean parseOneVCard() throws IOException, VCardException {
215         // reset for this entire vCard.
216         mCurrentEncoding = DEFAULT_ENCODING;
217         mCurrentCharset = DEFAULT_CHARSET;
218 
219         boolean allowGarbage = false;
220         if (!readBeginVCard(allowGarbage)) {
221             return false;
222         }
223         for (VCardInterpreter interpreter : mInterpreterList) {
224             interpreter.onEntryStarted();
225         }
226         parseItems();
227         for (VCardInterpreter interpreter : mInterpreterList) {
228             interpreter.onEntryEnded();
229         }
230         return true;
231     }
232 
233     /**
234      * @return True when successful. False when reaching the end of line
235      * @throws IOException
236      * @throws VCardException
237      */
readBeginVCard(boolean allowGarbage)238     protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException {
239         // TODO: use consructPropertyLine().
240         String line;
241         do {
242             while (true) {
243                 line = getLine();
244                 if (line == null) {
245                     return false;
246                 } else if (line.trim().length() > 0) {
247                     break;
248                 }
249             }
250             final String[] strArray = line.split(":", 2);
251             final int length = strArray.length;
252 
253             // Although vCard 2.1/3.0 specification does not allow lower cases,
254             // we found vCard file emitted by some external vCard expoter have such
255             // invalid Strings.
256             // e.g. BEGIN:vCard
257             if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN")
258                     && strArray[1].trim().equalsIgnoreCase("VCARD")) {
259                 return true;
260             } else if (!allowGarbage) {
261                 throw new VCardException("Expected String \"BEGIN:VCARD\" did not come "
262                         + "(Instead, \"" + line + "\" came)");
263             }
264         } while (allowGarbage);
265 
266         throw new VCardException("Reached where must not be reached.");
267     }
268 
269     /**
270      * Parses lines other than the first "BEGIN:VCARD". Takes care of "END:VCARD"n and
271      * "BEGIN:VCARD" in nested vCard.
272      */
273     /*
274      * items = *CRLF item / item
275      *
276      * Note: BEGIN/END aren't include in the original spec while this method handles them.
277      */
parseItems()278     protected void parseItems() throws IOException, VCardException {
279         boolean ended = false;
280 
281         try {
282             ended = parseItem();
283         } catch (VCardInvalidCommentLineException e) {
284             Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
285         }
286 
287         while (!ended) {
288             try {
289                 ended = parseItem();
290             } catch (VCardInvalidCommentLineException e) {
291                 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
292             }
293         }
294     }
295 
296     /*
297      * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR"
298      * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts
299      * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."]
300      * "AGENT" [params] ":" vcard CRLF
301      */
parseItem()302     protected boolean parseItem() throws IOException, VCardException {
303         // Reset for an item.
304         mCurrentEncoding = DEFAULT_ENCODING;
305 
306         final String line = getNonEmptyLine();
307         final VCardProperty propertyData = constructPropertyData(line);
308 
309         final String propertyNameUpper = propertyData.getName().toUpperCase();
310         final String propertyRawValue = propertyData.getRawValue();
311 
312         if (propertyNameUpper.equals(VCardConstants.PROPERTY_BEGIN)) {
313             if (propertyRawValue.equalsIgnoreCase("VCARD")) {
314                 handleNest();
315             } else {
316                 throw new VCardException("Unknown BEGIN type: " + propertyRawValue);
317             }
318         } else if (propertyNameUpper.equals(VCardConstants.PROPERTY_END)) {
319             if (propertyRawValue.equalsIgnoreCase("VCARD")) {
320                 return true;  // Ended.
321             } else {
322                 throw new VCardException("Unknown END type: " + propertyRawValue);
323             }
324         } else {
325             parseItemInter(propertyData, propertyNameUpper);
326         }
327         return false;
328     }
329 
parseItemInter(VCardProperty property, String propertyNameUpper)330     private void parseItemInter(VCardProperty property, String propertyNameUpper)
331             throws IOException, VCardException {
332         String propertyRawValue = property.getRawValue();
333         if (propertyNameUpper.equals(VCardConstants.PROPERTY_AGENT)) {
334             handleAgent(property);
335         } else if (isValidPropertyName(propertyNameUpper)) {
336             if (propertyNameUpper.equals(VCardConstants.PROPERTY_VERSION) &&
337                     !propertyRawValue.equals(getVersionString())) {
338                 throw new VCardVersionException(
339                         "Incompatible version: " + propertyRawValue + " != " + getVersionString());
340             }
341             handlePropertyValue(property, propertyNameUpper);
342         } else {
343             throw new VCardException("Unknown property name: \"" + propertyNameUpper + "\"");
344         }
345     }
346 
handleNest()347     private void handleNest() throws IOException, VCardException {
348         for (VCardInterpreter interpreter : mInterpreterList) {
349             interpreter.onEntryStarted();
350         }
351         parseItems();
352         for (VCardInterpreter interpreter : mInterpreterList) {
353             interpreter.onEntryEnded();
354         }
355     }
356 
357     // For performance reason, the states for group and property name are merged into one.
358     static private final int STATE_GROUP_OR_PROPERTY_NAME = 0;
359     static private final int STATE_PARAMS = 1;
360     // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not.
361     static private final int STATE_PARAMS_IN_DQUOTE = 2;
362 
constructPropertyData(String line)363     protected VCardProperty constructPropertyData(String line) throws VCardException {
364         final VCardProperty propertyData = new VCardProperty();
365 
366         final int length = line.length();
367         if (length > 0 && line.charAt(0) == '#') {
368             throw new VCardInvalidCommentLineException();
369         }
370 
371         int state = STATE_GROUP_OR_PROPERTY_NAME;
372         int nameIndex = 0;
373 
374         // This loop is developed so that we don't have to take care of bottle neck here.
375         // Refactor carefully when you need to do so.
376         for (int i = 0; i < length; i++) {
377             final char ch = line.charAt(i);
378             switch (state) {
379                 case STATE_GROUP_OR_PROPERTY_NAME: {
380                     if (ch == ':') {  // End of a property name.
381                         final String propertyName = line.substring(nameIndex, i);
382                         propertyData.setName(propertyName);
383                         propertyData.setRawValue( i < length - 1 ? line.substring(i + 1) : "");
384                         return propertyData;
385                     } else if (ch == '.') {  // Each group is followed by the dot.
386                         final String groupName = line.substring(nameIndex, i);
387                         if (groupName.length() == 0) {
388                             Log.w(LOG_TAG, "Empty group found. Ignoring.");
389                         } else {
390                             propertyData.addGroup(groupName);
391                         }
392                         nameIndex = i + 1;  // Next should be another group or a property name.
393                     } else if (ch == ';') {  // End of property name and beginneng of parameters.
394                         final String propertyName = line.substring(nameIndex, i);
395                         propertyData.setName(propertyName);
396                         nameIndex = i + 1;
397                         state = STATE_PARAMS;  // Start parameter parsing.
398                     }
399                     // TODO: comma support (in vCard 3.0 and 4.0).
400                     break;
401                 }
402                 case STATE_PARAMS: {
403                     if (ch == '"') {
404                         if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
405                             Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
406                                     "Silently allow it");
407                         }
408                         state = STATE_PARAMS_IN_DQUOTE;
409                     } else if (ch == ';') {  // Starts another param.
410                         handleParams(propertyData, line.substring(nameIndex, i));
411                         nameIndex = i + 1;
412                     } else if (ch == ':') {  // End of param and beginenning of values.
413                         handleParams(propertyData, line.substring(nameIndex, i));
414                         propertyData.setRawValue(i < length - 1 ? line.substring(i + 1) : "");
415                         return propertyData;
416                     }
417                     break;
418                 }
419                 case STATE_PARAMS_IN_DQUOTE: {
420                     if (ch == '"') {
421                         if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
422                             Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
423                                     "Silently allow it");
424                         }
425                         state = STATE_PARAMS;
426                     }
427                     break;
428                 }
429             }
430         }
431 
432         throw new VCardInvalidLineException("Invalid line: \"" + line + "\"");
433     }
434 
435     /*
436      * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param /
437      * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws]
438      * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "="
439      * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "="
440      * [ws] word / knowntype
441      */
442     protected void handleParams(VCardProperty propertyData, String params)
443             throws VCardException {
444         final String[] strArray = params.split("=", 2);
445         if (strArray.length == 2) {
446             final String paramName = strArray[0].trim().toUpperCase();
447             String paramValue = strArray[1].trim();
448             if (paramName.equals("TYPE")) {
449                 handleType(propertyData, paramValue);
450             } else if (paramName.equals("VALUE")) {
451                 handleValue(propertyData, paramValue);
452             } else if (paramName.equals("ENCODING")) {
453                 handleEncoding(propertyData, paramValue.toUpperCase());
454             } else if (paramName.equals("CHARSET")) {
455                 handleCharset(propertyData, paramValue);
456             } else if (paramName.equals("LANGUAGE")) {
457                 handleLanguage(propertyData, paramValue);
458             } else if (paramName.startsWith("X-")) {
459                 handleAnyParam(propertyData, paramName, paramValue);
460             } else {
461                 throw new VCardException("Unknown type \"" + paramName + "\"");
462             }
463         } else {
464             handleParamWithoutName(propertyData, strArray[0]);
465         }
466     }
467 
468     /**
469      * vCard 3.0 parser implementation may throw VCardException.
470      */
471     protected void handleParamWithoutName(VCardProperty propertyData, final String paramValue) {
472         handleType(propertyData, paramValue);
473     }
474 
475     /*
476      * ptypeval = knowntype / "X-" word
477      */
478     protected void handleType(VCardProperty propertyData, final String ptypeval) {
479         if (!(getKnownTypeSet().contains(ptypeval.toUpperCase())
480                 || ptypeval.startsWith("X-"))
481                 && !mUnknownTypeSet.contains(ptypeval)) {
482             mUnknownTypeSet.add(ptypeval);
483             Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval));
484         }
485         propertyData.addParameter(VCardConstants.PARAM_TYPE, ptypeval);
486     }
487 
488     /*
489      * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
490      */
491     protected void handleValue(VCardProperty propertyData, final String pvalueval) {
492         if (!(getKnownValueSet().contains(pvalueval.toUpperCase())
493                 || pvalueval.startsWith("X-")
494                 || mUnknownValueSet.contains(pvalueval))) {
495             mUnknownValueSet.add(pvalueval);
496             Log.w(LOG_TAG, String.format(
497                     "The value unsupported by TYPE of %s: ", getVersion(), pvalueval));
498         }
499         propertyData.addParameter(VCardConstants.PARAM_VALUE, pvalueval);
500     }
501 
502     /*
503      * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word
504      */
505     protected void handleEncoding(VCardProperty propertyData, String pencodingval)
506             throws VCardException {
507         if (getAvailableEncodingSet().contains(pencodingval) ||
508                 pencodingval.startsWith("X-")) {
509             propertyData.addParameter(VCardConstants.PARAM_ENCODING, pencodingval);
510             // Update encoding right away, as this is needed to understanding other params.
511             mCurrentEncoding = pencodingval.toUpperCase();
512         } else {
513             throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
514         }
515     }
516 
517     /**
518      * <p>
519      * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521),
520      * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc.
521      * We allow any charset.
522      * </p>
523      */
524     protected void handleCharset(VCardProperty propertyData, String charsetval) {
525         mCurrentCharset = charsetval;
526         propertyData.addParameter(VCardConstants.PARAM_CHARSET, charsetval);
527     }
528 
529     /**
530      * See also Section 7.1 of RFC 1521
531      */
532     protected void handleLanguage(VCardProperty propertyData, String langval)
533             throws VCardException {
534         String[] strArray = langval.split("-");
535         if (strArray.length != 2) {
536             throw new VCardException("Invalid Language: \"" + langval + "\"");
537         }
538         String tmp = strArray[0];
539         int length = tmp.length();
540         for (int i = 0; i < length; i++) {
541             if (!isAsciiLetter(tmp.charAt(i))) {
542                 throw new VCardException("Invalid Language: \"" + langval + "\"");
543             }
544         }
545         tmp = strArray[1];
546         length = tmp.length();
547         for (int i = 0; i < length; i++) {
548             if (!isAsciiLetter(tmp.charAt(i))) {
549                 throw new VCardException("Invalid Language: \"" + langval + "\"");
550             }
551         }
552         propertyData.addParameter(VCardConstants.PARAM_LANGUAGE, langval);
553     }
554 
555     private boolean isAsciiLetter(char ch) {
556         if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
557             return true;
558         }
559         return false;
560     }
561 
562     /**
563      * Mainly for "X-" type. This accepts any kind of type without check.
564      */
565     protected void handleAnyParam(
566             VCardProperty propertyData, String paramName, String paramValue) {
567         propertyData.addParameter(paramName, paramValue);
568     }
569 
570     protected void handlePropertyValue(VCardProperty property, String propertyName)
571             throws IOException, VCardException {
572         final String propertyNameUpper = property.getName().toUpperCase();
573         String propertyRawValue = property.getRawValue();
574         final String sourceCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
575         final Collection<String> charsetCollection =
576                 property.getParameters(VCardConstants.PARAM_CHARSET);
577         String targetCharset =
578                 ((charsetCollection != null) ? charsetCollection.iterator().next() : null);
579         if (TextUtils.isEmpty(targetCharset)) {
580             targetCharset = VCardConfig.DEFAULT_IMPORT_CHARSET;
581         }
582 
583         // TODO: have "separableProperty" which reflects vCard spec..
584         if (propertyNameUpper.equals(VCardConstants.PROPERTY_ADR)
585                 || propertyNameUpper.equals(VCardConstants.PROPERTY_ORG)
586                 || propertyNameUpper.equals(VCardConstants.PROPERTY_N)) {
587             handleAdrOrgN(property, propertyRawValue, sourceCharset, targetCharset);
588             return;
589         }
590 
591         if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP) ||
592                 // If encoding attribute is missing, then attempt to detect QP encoding.
593                 // This is to handle a bug where the android exporter was creating FN properties
594                 // with missing encoding.  b/7292017
595                 (propertyNameUpper.equals(VCardConstants.PROPERTY_FN) &&
596                         property.getParameters(VCardConstants.PARAM_ENCODING) == null &&
597                         VCardUtils.appearsLikeAndroidVCardQuotedPrintable(propertyRawValue))
598                 ) {
599             final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
600             final String propertyEncodedValue =
601                     VCardUtils.parseQuotedPrintable(quotedPrintablePart,
602                             false, sourceCharset, targetCharset);
603             property.setRawValue(quotedPrintablePart);
604             property.setValues(propertyEncodedValue);
605             for (VCardInterpreter interpreter : mInterpreterList) {
606                 interpreter.onPropertyCreated(property);
607             }
608         } else if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64)
609                 || mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_B)) {
610             // It is very rare, but some BASE64 data may be so big that
611             // OutOfMemoryError occurs. To ignore such cases, use try-catch.
612             try {
613                 final String base64Property = getBase64(propertyRawValue);
614                 try {
615                     property.setByteValue(Base64.decode(base64Property, Base64.DEFAULT));
616                 } catch (IllegalArgumentException e) {
617                     throw new VCardException("Decode error on base64 photo: " + propertyRawValue);
618                 }
619                 for (VCardInterpreter interpreter : mInterpreterList) {
620                     interpreter.onPropertyCreated(property);
621                 }
622             } catch (OutOfMemoryError error) {
623                 Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!");
624                 for (VCardInterpreter interpreter : mInterpreterList) {
625                     interpreter.onPropertyCreated(property);
626                 }
627             }
628         } else {
629             if (!(mCurrentEncoding.equals("7BIT") || mCurrentEncoding.equals("8BIT") ||
630                     mCurrentEncoding.startsWith("X-"))) {
631                 Log.w(LOG_TAG,
632                         String.format("The encoding \"%s\" is unsupported by vCard %s",
633                                 mCurrentEncoding, getVersionString()));
634             }
635 
636             // Some device uses line folding defined in RFC 2425, which is not allowed
637             // in vCard 2.1 (while needed in vCard 3.0).
638             //
639             // e.g.
640             // BEGIN:VCARD
641             // VERSION:2.1
642             // N:;Omega;;;
643             // EMAIL;INTERNET:"Omega"
644             //   <omega@example.com>
645             // FN:Omega
646             // END:VCARD
647             //
648             // The vCard above assumes that email address should become:
649             // "Omega" <omega@example.com>
650             //
651             // But vCard 2.1 requires Quote-Printable when a line contains line break(s).
652             //
653             // For more information about line folding,
654             // see "5.8.1. Line delimiting and folding" in RFC 2425.
655             //
656             // We take care of this case more formally in vCard 3.0, so we only need to
657             // do this in vCard 2.1.
658             if (getVersion() == VCardConfig.VERSION_21) {
659                 StringBuilder builder = null;
660                 while (true) {
661                     final String nextLine = peekLine();
662                     // We don't need to care too much about this exceptional case,
663                     // but we should not wrongly eat up "END:VCARD", since it critically
664                     // breaks this parser's state machine.
665                     // Thus we roughly look over the next line and confirm it is at least not
666                     // "END:VCARD". This extra fee is worth paying. This is exceptional
667                     // anyway.
668                     if (!TextUtils.isEmpty(nextLine) &&
669                             nextLine.charAt(0) == ' ' &&
670                             !"END:VCARD".contains(nextLine.toUpperCase())) {
671                         getLine();  // Drop the next line.
672 
673                         if (builder == null) {
674                             builder = new StringBuilder();
675                             builder.append(propertyRawValue);
676                         }
677                         builder.append(nextLine.substring(1));
678                     } else {
679                         break;
680                     }
681                 }
682                 if (builder != null) {
683                     propertyRawValue = builder.toString();
684                 }
685             }
686 
687             ArrayList<String> propertyValueList = new ArrayList<String>();
688             String value = VCardUtils.convertStringCharset(
689                     maybeUnescapeText(propertyRawValue), sourceCharset, targetCharset);
690             propertyValueList.add(value);
691             property.setValues(propertyValueList);
692             for (VCardInterpreter interpreter : mInterpreterList) {
693                 interpreter.onPropertyCreated(property);
694             }
695         }
696     }
697 
698     private void handleAdrOrgN(VCardProperty property, String propertyRawValue,
699             String sourceCharset, String targetCharset) throws VCardException, IOException {
700         List<String> encodedValueList = new ArrayList<String>();
701 
702         // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some softwares/devices emit
703         // such data.
704         if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) {
705             // First we retrieve Quoted-Printable String from vCard entry, which may include
706             // multiple lines.
707             final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
708 
709             // "Raw value" from the view of users should contain all part of QP string.
710             // TODO: add test for this handling
711             property.setRawValue(quotedPrintablePart);
712 
713             // We split Quoted-Printable String using semi-colon before decoding it, as
714             // the Quoted-Printable may have semi-colon, which confuses splitter.
715             final List<String> quotedPrintableValueList =
716                     VCardUtils.constructListFromValue(quotedPrintablePart, getVersion());
717             for (String quotedPrintableValue : quotedPrintableValueList) {
718                 String encoded = VCardUtils.parseQuotedPrintable(quotedPrintableValue,
719                         false, sourceCharset, targetCharset);
720                 encodedValueList.add(encoded);
721             }
722         } else {
723             final String propertyValue = getPotentialMultiline(propertyRawValue);
724             final List<String> rawValueList =
725                     VCardUtils.constructListFromValue(propertyValue, getVersion());
726             for (String rawValue : rawValueList) {
727                 encodedValueList.add(VCardUtils.convertStringCharset(
728                         rawValue, sourceCharset, targetCharset));
729             }
730         }
731 
732         property.setValues(encodedValueList);
733         for (VCardInterpreter interpreter : mInterpreterList) {
734             interpreter.onPropertyCreated(property);
735         }
736     }
737 
738     /**
739      * <p>
740      * Parses and returns Quoted-Printable.
741      * </p>
742      *
743      * @param firstString The string following a parameter name and attributes.
744      *            Example: "string" in
745      *            "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r".
746      * @return whole Quoted-Printable string, including a given argument and
747      *         following lines. Excludes the last empty line following to Quoted
748      *         Printable lines.
749      * @throws IOException
750      * @throws VCardException
751      */
752     private String getQuotedPrintablePart(String firstString)
753             throws IOException, VCardException {
754         // Specifically, there may be some padding between = and CRLF.
755         // See the following:
756         //
757         // qp-line := *(qp-segment transport-padding CRLF)
758         // qp-part transport-padding
759         // qp-segment := qp-section *(SPACE / TAB) "="
760         // ; Maximum length of 76 characters
761         //
762         // e.g. (from RFC 2045)
763         // Now's the time =
764         // for all folk to come=
765         // to the aid of their country.
766         if (firstString.trim().endsWith("=")) {
767             // remove "transport-padding"
768             int pos = firstString.length() - 1;
769             while (firstString.charAt(pos) != '=') {
770             }
771             StringBuilder builder = new StringBuilder();
772             builder.append(firstString.substring(0, pos + 1));
773             builder.append("\r\n");
774             String line;
775             while (true) {
776                 line = getLine();
777                 if (line == null) {
778                     throw new VCardException("File ended during parsing a Quoted-Printable String");
779                 }
780                 if (line.trim().endsWith("=")) {
781                     // remove "transport-padding"
782                     pos = line.length() - 1;
783                     while (line.charAt(pos) != '=') {
784                     }
785                     builder.append(line.substring(0, pos + 1));
786                     builder.append("\r\n");
787                 } else {
788                     builder.append(line);
789                     break;
790                 }
791             }
792             return builder.toString();
793         } else {
794             return firstString;
795         }
796     }
797 
798     /**
799      * Given the first line of a property, checks consecutive lines after it and builds a new
800      * multi-line value if it exists.
801      *
802      * @param firstString The first line of the property.
803      * @return A new property, potentially built from multiple lines.
804      * @throws IOException
805      */
806     private String getPotentialMultiline(String firstString) throws IOException {
807         final StringBuilder builder = new StringBuilder();
808         builder.append(firstString);
809 
810         while (true) {
811             final String line = peekLine();
812             if (line == null || line.length() == 0) {
813                 break;
814             }
815 
816             final String propertyName = getPropertyNameUpperCase(line);
817             if (propertyName != null) {
818                 break;
819             }
820 
821             // vCard 2.1 does not allow multi-line of adr but microsoft vcards may have it.
822             // We will consider the next line to be a part of a multi-line value if it does not
823             // contain a property name (i.e. a colon or semi-colon).
824             // Consume the line.
825             getLine();
826             builder.append(" ").append(line);
827         }
828 
829         return builder.toString();
830     }
831 
832     protected String getBase64(String firstString) throws IOException, VCardException {
833         final StringBuilder builder = new StringBuilder();
834         builder.append(firstString);
835 
836         while (true) {
837             final String line = peekLine();
838             if (line == null) {
839                 throw new VCardException("File ended during parsing BASE64 binary");
840             }
841 
842             // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't
843             // have them. We try to detect those cases using colon and semi-colon, given BASE64
844             // does not contain it.
845             // E.g.
846             //      TEL;TYPE=WORK:+5555555
847             // or
848             //      END:VCARD
849             String propertyName = getPropertyNameUpperCase(line);
850             if (getKnownPropertyNameSet().contains(propertyName) ||
851                     VCardConstants.PROPERTY_X_ANDROID_CUSTOM.equals(propertyName)) {
852                 Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " +
853                         "which must not contain semi-colon or colon. Treat the line as next "
854                         + "property.");
855                 Log.w(LOG_TAG, "Problematic line: " + line.trim());
856                 break;
857             }
858 
859             // Consume the line.
860             getLine();
861 
862             if (line.length() == 0) {
863                 break;
864             }
865             // Trim off any extraneous whitespace to handle 2.1 implementations
866             // that use 3.0 style line continuations. This is safe because space
867             // isn't a Base64 encoding value.
868             builder.append(line.trim());
869         }
870 
871         return builder.toString();
872     }
873 
874     /**
875      * Extracts the property name portion of a given vCard line.
876      * <p>
877      * Properties must contain a colon.
878      * <p>
879      * E.g.
880      *      TEL;TYPE=WORK:+5555555  // returns "TEL"
881      *      END:VCARD // returns "END"
882      *      TEL; // returns null
883      *
884      * @param line The vCard line.
885      * @return The property name portion. {@literal null} if no property name found.
886      */
887     private String getPropertyNameUpperCase(String line) {
888         final int colonIndex = line.indexOf(":");
889         if (colonIndex > -1) {
890             final int semiColonIndex = line.indexOf(";");
891 
892             // Find the minimum index that is greater than -1.
893             final int minIndex;
894             if (colonIndex == -1) {
895                 minIndex = semiColonIndex;
896             } else if (semiColonIndex == -1) {
897                 minIndex = colonIndex;
898             } else {
899                 minIndex = Math.min(colonIndex, semiColonIndex);
900             }
901             return line.substring(0, minIndex).toUpperCase();
902         }
903         return null;
904     }
905 
906     /*
907      * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an
908      * error toward the AGENT property.
909      * // TODO: Support AGENT property.
910      * item =
911      * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws]
912      * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"
913      */
914     protected void handleAgent(final VCardProperty property) throws VCardException {
915         if (!property.getRawValue().toUpperCase().contains("BEGIN:VCARD")) {
916             // Apparently invalid line seen in Windows Mobile 6.5. Ignore them.
917             for (VCardInterpreter interpreter : mInterpreterList) {
918                 interpreter.onPropertyCreated(property);
919             }
920             return;
921         } else {
922             throw new VCardAgentNotSupportedException("AGENT Property is not supported now.");
923         }
924     }
925 
926     /**
927      * For vCard 3.0.
928      */
929     protected String maybeUnescapeText(final String text) {
930         return text;
931     }
932 
933     /**
934      * Returns unescaped String if the character should be unescaped. Return
935      * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";"
936      * while "\x" should not be.
937      */
938     protected String maybeUnescapeCharacter(final char ch) {
939         return unescapeCharacter(ch);
940     }
941 
942     /* package */ static String unescapeCharacter(final char ch) {
943         // Original vCard 2.1 specification does not allow transformation
944         // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous
945         // implementation of
946         // this class allowed them, so keep it as is.
947         if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') {
948             return String.valueOf(ch);
949         } else {
950             return null;
951         }
952     }
953 
954     /**
955      * @return {@link VCardConfig#VERSION_21}
956      */
957     protected int getVersion() {
958         return VCardConfig.VERSION_21;
959     }
960 
961     /**
962      * @return {@link VCardConfig#VERSION_30}
963      */
964     protected String getVersionString() {
965         return VCardConstants.VERSION_V21;
966     }
967 
968     protected Set<String> getKnownPropertyNameSet() {
969         return VCardParser_V21.sKnownPropertyNameSet;
970     }
971 
972     protected Set<String> getKnownTypeSet() {
973         return VCardParser_V21.sKnownTypeSet;
974     }
975 
976     protected Set<String> getKnownValueSet() {
977         return VCardParser_V21.sKnownValueSet;
978     }
979 
980     protected Set<String> getAvailableEncodingSet() {
981         return VCardParser_V21.sAvailableEncoding;
982     }
983 
984     protected String getDefaultEncoding() {
985         return DEFAULT_ENCODING;
986     }
987 
988     protected String getDefaultCharset() {
989         return DEFAULT_CHARSET;
990     }
991 
992     protected String getCurrentCharset() {
993         return mCurrentCharset;
994     }
995 
996     public void addInterpreter(VCardInterpreter interpreter) {
997         mInterpreterList.add(interpreter);
998     }
999 
1000     public void parse(InputStream is) throws IOException, VCardException {
1001         if (is == null) {
1002             throw new NullPointerException("InputStream must not be null.");
1003         }
1004 
1005         final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1006         mReader = new CustomBufferedReader(tmpReader);
1007 
1008         final long start = System.currentTimeMillis();
1009         for (VCardInterpreter interpreter : mInterpreterList) {
1010             interpreter.onVCardStarted();
1011         }
1012 
1013         // vcard_file = [wsls] vcard [wsls]
1014         while (true) {
1015             synchronized (this) {
1016                 if (mCanceled) {
1017                     Log.i(LOG_TAG, "Cancel request has come. exitting parse operation.");
1018                     break;
1019                 }
1020             }
1021             if (!parseOneVCard()) {
1022                 break;
1023             }
1024         }
1025 
1026         for (VCardInterpreter interpreter : mInterpreterList) {
1027             interpreter.onVCardEnded();
1028         }
1029     }
1030 
1031     public void parseOne(InputStream is) throws IOException, VCardException {
1032         if (is == null) {
1033             throw new NullPointerException("InputStream must not be null.");
1034         }
1035 
1036         final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1037         mReader = new CustomBufferedReader(tmpReader);
1038 
1039         final long start = System.currentTimeMillis();
1040         for (VCardInterpreter interpreter : mInterpreterList) {
1041             interpreter.onVCardStarted();
1042         }
1043         parseOneVCard();
1044         for (VCardInterpreter interpreter : mInterpreterList) {
1045             interpreter.onVCardEnded();
1046         }
1047     }
1048 
1049     public final synchronized void cancel() {
1050         Log.i(LOG_TAG, "ParserImpl received cancel operation.");
1051         mCanceled = true;
1052     }
1053 }
1054