1 /*
2  * Copyright 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.example.android.basicsyncadapter.net;
18 
19 import android.text.format.Time;
20 import android.util.Xml;
21 
22 import org.xmlpull.v1.XmlPullParser;
23 import org.xmlpull.v1.XmlPullParserException;
24 
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.text.ParseException;
28 import java.util.ArrayList;
29 import java.util.List;
30 
31 /**
32  * This class parses generic Atom feeds.
33  *
34  * <p>Given an InputStream representation of a feed, it returns a List of entries,
35  * where each list element represents a single entry (post) in the XML feed.
36  *
37  * <p>An example of an Atom feed can be found at:
38  * http://en.wikipedia.org/w/index.php?title=Atom_(standard)&oldid=560239173#Example_of_an_Atom_1.0_feed
39  */
40 public class FeedParser {
41 
42     // Constants indicting XML element names that we're interested in
43     private static final int TAG_ID = 1;
44     private static final int TAG_TITLE = 2;
45     private static final int TAG_PUBLISHED = 3;
46     private static final int TAG_LINK = 4;
47 
48     // We don't use XML namespaces
49     private static final String ns = null;
50 
51     /** Parse an Atom feed, returning a collection of Entry objects.
52      *
53      * @param in Atom feed, as a stream.
54      * @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects.
55      * @throws org.xmlpull.v1.XmlPullParserException on error parsing feed.
56      * @throws java.io.IOException on I/O error.
57      */
parse(InputStream in)58     public List<Entry> parse(InputStream in)
59             throws XmlPullParserException, IOException, ParseException {
60         try {
61             XmlPullParser parser = Xml.newPullParser();
62             parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, false);
63             parser.setInput(in, null);
64             parser.nextTag();
65             return readFeed(parser);
66         } finally {
67             in.close();
68         }
69     }
70 
71     /**
72      * Decode a feed attached to an XmlPullParser.
73      *
74      * @param parser Incoming XMl
75      * @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects.
76      * @throws org.xmlpull.v1.XmlPullParserException on error parsing feed.
77      * @throws java.io.IOException on I/O error.
78      */
readFeed(XmlPullParser parser)79     private List<Entry> readFeed(XmlPullParser parser)
80             throws XmlPullParserException, IOException, ParseException {
81         List<Entry> entries = new ArrayList<Entry>();
82 
83         // Search for <feed> tags. These wrap the beginning/end of an Atom document.
84         //
85         // Example:
86         // <?xml version="1.0" encoding="utf-8"?>
87         // <feed xmlns="http://www.w3.org/2005/Atom">
88         // ...
89         // </feed>
90         parser.require(XmlPullParser.START_TAG, ns, "feed");
91         while (parser.next() != XmlPullParser.END_TAG) {
92             if (parser.getEventType() != XmlPullParser.START_TAG) {
93                 continue;
94             }
95             String name = parser.getName();
96             // Starts by looking for the <entry> tag. This tag repeates inside of <feed> for each
97             // article in the feed.
98             //
99             // Example:
100             // <entry>
101             //   <title>Article title</title>
102             //   <link rel="alternate" type="text/html" href="http://example.com/article/1234"/>
103             //   <link rel="edit" href="http://example.com/admin/article/1234"/>
104             //   <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id>
105             //   <published>2003-06-27T12:00:00Z</published>
106             //   <updated>2003-06-28T12:00:00Z</updated>
107             //   <summary>Article summary goes here.</summary>
108             //   <author>
109             //     <name>Rick Deckard</name>
110             //     <email>deckard@example.com</email>
111             //   </author>
112             // </entry>
113             if (name.equals("entry")) {
114                 entries.add(readEntry(parser));
115             } else {
116                 skip(parser);
117             }
118         }
119         return entries;
120     }
121 
122     /**
123      * Parses the contents of an entry. If it encounters a title, summary, or link tag, hands them
124      * off to their respective "read" methods for processing. Otherwise, skips the tag.
125      */
readEntry(XmlPullParser parser)126     private Entry readEntry(XmlPullParser parser)
127             throws XmlPullParserException, IOException, ParseException {
128         parser.require(XmlPullParser.START_TAG, ns, "entry");
129         String id = null;
130         String title = null;
131         String link = null;
132         long publishedOn = 0;
133 
134         while (parser.next() != XmlPullParser.END_TAG) {
135             if (parser.getEventType() != XmlPullParser.START_TAG) {
136                 continue;
137             }
138             String name = parser.getName();
139             if (name.equals("id")){
140                 // Example: <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id>
141                 id = readTag(parser, TAG_ID);
142             } else if (name.equals("title")) {
143                 // Example: <title>Article title</title>
144                 title = readTag(parser, TAG_TITLE);
145             } else if (name.equals("link")) {
146                 // Example: <link rel="alternate" type="text/html" href="http://example.com/article/1234"/>
147                 //
148                 // Multiple link types can be included. readAlternateLink() will only return
149                 // non-null when reading an "alternate"-type link. Ignore other responses.
150                 String tempLink = readTag(parser, TAG_LINK);
151                 if (tempLink != null) {
152                     link = tempLink;
153                 }
154             } else if (name.equals("published")) {
155                 // Example: <published>2003-06-27T12:00:00Z</published>
156                 Time t = new Time();
157                 t.parse3339(readTag(parser, TAG_PUBLISHED));
158                 publishedOn = t.toMillis(false);
159             } else {
160                 skip(parser);
161             }
162         }
163         return new Entry(id, title, link, publishedOn);
164     }
165 
166     /**
167      * Process an incoming tag and read the selected value from it.
168      */
readTag(XmlPullParser parser, int tagType)169     private String readTag(XmlPullParser parser, int tagType)
170             throws IOException, XmlPullParserException {
171         String tag = null;
172         String endTag = null;
173 
174         switch (tagType) {
175             case TAG_ID:
176                 return readBasicTag(parser, "id");
177             case TAG_TITLE:
178                 return readBasicTag(parser, "title");
179             case TAG_PUBLISHED:
180                 return readBasicTag(parser, "published");
181             case TAG_LINK:
182                 return readAlternateLink(parser);
183             default:
184                 throw new IllegalArgumentException("Unknown tag type: " + tagType);
185         }
186     }
187 
188     /**
189      * Reads the body of a basic XML tag, which is guaranteed not to contain any nested elements.
190      *
191      * <p>You probably want to call readTag().
192      *
193      * @param parser Current parser object
194      * @param tag XML element tag name to parse
195      * @return Body of the specified tag
196      * @throws java.io.IOException
197      * @throws org.xmlpull.v1.XmlPullParserException
198      */
readBasicTag(XmlPullParser parser, String tag)199     private String readBasicTag(XmlPullParser parser, String tag)
200             throws IOException, XmlPullParserException {
201         parser.require(XmlPullParser.START_TAG, ns, tag);
202         String result = readText(parser);
203         parser.require(XmlPullParser.END_TAG, ns, tag);
204         return result;
205     }
206 
207     /**
208      * Processes link tags in the feed.
209      */
readAlternateLink(XmlPullParser parser)210     private String readAlternateLink(XmlPullParser parser)
211             throws IOException, XmlPullParserException {
212         String link = null;
213         parser.require(XmlPullParser.START_TAG, ns, "link");
214         String tag = parser.getName();
215         String relType = parser.getAttributeValue(null, "rel");
216         if (relType.equals("alternate")) {
217             link = parser.getAttributeValue(null, "href");
218         }
219         while (true) {
220             if (parser.nextTag() == XmlPullParser.END_TAG) break;
221             // Intentionally break; consumes any remaining sub-tags.
222         }
223         return link;
224     }
225 
226     /**
227      * For the tags title and summary, extracts their text values.
228      */
readText(XmlPullParser parser)229     private String readText(XmlPullParser parser) throws IOException, XmlPullParserException {
230         String result = null;
231         if (parser.next() == XmlPullParser.TEXT) {
232             result = parser.getText();
233             parser.nextTag();
234         }
235         return result;
236     }
237 
238     /**
239      * Skips tags the parser isn't interested in. Uses depth to handle nested tags. i.e.,
240      * if the next tag after a START_TAG isn't a matching END_TAG, it keeps going until it
241      * finds the matching END_TAG (as indicated by the value of "depth" being 0).
242      */
skip(XmlPullParser parser)243     private void skip(XmlPullParser parser) throws XmlPullParserException, IOException {
244         if (parser.getEventType() != XmlPullParser.START_TAG) {
245             throw new IllegalStateException();
246         }
247         int depth = 1;
248         while (depth != 0) {
249             switch (parser.next()) {
250                 case XmlPullParser.END_TAG:
251                     depth--;
252                     break;
253                 case XmlPullParser.START_TAG:
254                     depth++;
255                     break;
256             }
257         }
258     }
259 
260     /**
261      * This class represents a single entry (post) in the XML feed.
262      *
263      * <p>It includes the data members "title," "link," and "summary."
264      */
265     public static class Entry {
266         public final String id;
267         public final String title;
268         public final String link;
269         public final long published;
270 
Entry(String id, String title, String link, long published)271         Entry(String id, String title, String link, long published) {
272             this.id = id;
273             this.title = title;
274             this.link = link;
275             this.published = published;
276         }
277     }
278 }
279