1 // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the LICENSE file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS.  All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 #include "webm/webm_parser.h"
9 
10 #include <cassert>
11 #include <cstdint>
12 
13 #include "src/ebml_parser.h"
14 #include "src/master_parser.h"
15 #include "src/segment_parser.h"
16 #include "src/unknown_parser.h"
17 #include "webm/element.h"
18 
19 namespace webm {
20 
21 // Parses WebM EBML documents (i.e. level-0 WebM elements).
22 class WebmParser::DocumentParser {
23  public:
24   // Resets the parser after a seek to a new position in the reader.
DidSeek()25   void DidSeek() {
26     PrepareForNextChild();
27     did_seek_ = true;
28     state_ = State::kBegin;
29   }
30 
31   // Feeds the parser; will return Status::kOkCompleted when the reader returns
32   // Status::kEndOfFile, but only if the parser has already completed parsing
33   // its child elements.
Feed(Callback * callback,Reader * reader)34   Status Feed(Callback* callback, Reader* reader) {
35     assert(callback != nullptr);
36     assert(reader != nullptr);
37 
38     Callback* const original_callback = callback;
39     if (action_ == Action::kSkip) {
40       callback = &skip_callback_;
41     }
42 
43     Status status;
44     std::uint64_t num_bytes_read;
45     while (true) {
46       switch (state_) {
47         case State::kBegin: {
48           child_metadata_.header_size = 0;
49           child_metadata_.position = reader->Position();
50           state_ = State::kReadingChildId;
51           continue;
52         }
53 
54         case State::kReadingChildId: {
55           assert(child_parser_ == nullptr);
56           status = id_parser_.Feed(callback, reader, &num_bytes_read);
57           child_metadata_.header_size += num_bytes_read;
58           if (!status.completed_ok()) {
59             if (status.code == Status::kEndOfFile &&
60                 reader->Position() == child_metadata_.position) {
61               state_ = State::kEndReached;
62               continue;
63             }
64             return status;
65           }
66           state_ = State::kReadingChildSize;
67           continue;
68         }
69 
70         case State::kReadingChildSize: {
71           assert(child_parser_ == nullptr);
72           status = size_parser_.Feed(callback, reader, &num_bytes_read);
73           child_metadata_.header_size += num_bytes_read;
74           if (!status.completed_ok()) {
75             return status;
76           }
77           child_metadata_.id = id_parser_.id();
78           child_metadata_.size = size_parser_.size();
79           state_ = State::kValidatingChildSize;
80           continue;
81         }
82 
83         case State::kValidatingChildSize: {
84           assert(child_parser_ == nullptr);
85 
86           if (child_metadata_.id == Id::kSegment) {
87             child_parser_ = &segment_parser_;
88             did_seek_ = false;
89             state_ = State::kGettingAction;
90             continue;
91           } else if (child_metadata_.id == Id::kEbml) {
92             child_parser_ = &ebml_parser_;
93             did_seek_ = false;
94             state_ = State::kGettingAction;
95             continue;
96           }
97 
98           Ancestory ancestory;
99           if (did_seek_ && Ancestory::ById(child_metadata_.id, &ancestory)) {
100             assert(!ancestory.empty());
101             assert(ancestory.id() == Id::kSegment ||
102                    ancestory.id() == Id::kEbml);
103 
104             if (ancestory.id() == Id::kSegment) {
105               child_parser_ = &segment_parser_;
106             } else {
107               child_parser_ = &ebml_parser_;
108             }
109 
110             child_parser_->InitAfterSeek(ancestory.next(), child_metadata_);
111             child_metadata_.id = ancestory.id();
112             child_metadata_.header_size = kUnknownHeaderSize;
113             child_metadata_.size = kUnknownElementSize;
114             child_metadata_.position = kUnknownElementPosition;
115             did_seek_ = false;
116             action_ = Action::kRead;
117             state_ = State::kReadingChildBody;
118             continue;
119           }
120 
121           if (child_metadata_.id == Id::kVoid) {
122             child_parser_ = &void_parser_;
123           } else {
124             if (child_metadata_.size == kUnknownElementSize) {
125               return Status(Status::kIndefiniteUnknownElement);
126             }
127             child_parser_ = &unknown_parser_;
128           }
129           state_ = State::kGettingAction;
130           continue;
131         }
132 
133         case State::kGettingAction: {
134           assert(child_parser_ != nullptr);
135           status = callback->OnElementBegin(child_metadata_, &action_);
136           if (!status.completed_ok()) {
137             return status;
138           }
139 
140           if (action_ == Action::kSkip) {
141             callback = &skip_callback_;
142             if (child_metadata_.size != kUnknownElementSize) {
143               child_parser_ = &skip_parser_;
144             }
145           }
146           state_ = State::kInitializingChildParser;
147           continue;
148         }
149 
150         case State::kInitializingChildParser: {
151           assert(child_parser_ != nullptr);
152           status = child_parser_->Init(child_metadata_, child_metadata_.size);
153           if (!status.completed_ok()) {
154             return status;
155           }
156           state_ = State::kReadingChildBody;
157           continue;
158         }
159 
160         case State::kReadingChildBody: {
161           assert(child_parser_ != nullptr);
162           status = child_parser_->Feed(callback, reader, &num_bytes_read);
163           if (!status.completed_ok()) {
164             return status;
165           }
166           if (child_parser_->GetCachedMetadata(&child_metadata_)) {
167             state_ = State::kValidatingChildSize;
168           } else {
169             child_metadata_.header_size = 0;
170             state_ = State::kReadingChildId;
171           }
172           PrepareForNextChild();
173           callback = original_callback;
174           child_metadata_.position = reader->Position();
175           continue;
176         }
177 
178         case State::kEndReached: {
179           return Status(Status::kOkCompleted);
180         }
181       }
182     }
183   }
184 
185  private:
186   // Parsing states for the finite-state machine.
187   enum class State {
188     /* clang-format off */
189     // State                      Transitions to state      When
190     kBegin,                    // kReadingChildId           done
191     kReadingChildId,           // kReadingChildSize         done
192                                // kEndReached               EOF
193     kReadingChildSize,         // kValidatingChildSize      done
194     kValidatingChildSize,      // kGettingAction            done
195     kGettingAction,            // kInitializingChildParser  done
196     kInitializingChildParser,  // kReadingChildBody         done
197     kReadingChildBody,         // kValidatingChildSize      cached metadata
198                                // kReadingChildId           otherwise
199     kEndReached,               // No transitions from here
200     /* clang-format on */
201   };
202 
203   // The parser for parsing child element Ids.
204   IdParser id_parser_;
205 
206   // The parser for parsing child element sizes.
207   SizeParser size_parser_;
208 
209   // The parser for Id::kEbml elements.
210   EbmlParser ebml_parser_;
211 
212   // The parser for Id::kSegment child elements.
213   SegmentParser segment_parser_;
214 
215   // The parser for Id::kVoid child elements.
216   VoidParser void_parser_;
217 
218   // The parser used when skipping elements (if the element's size is known).
219   SkipParser skip_parser_;
220 
221   // The parser used for unknown children.
222   UnknownParser unknown_parser_;
223 
224   // The callback used when skipping elements.
225   SkipCallback skip_callback_;
226 
227   // The parser that is parsing the current child element.
228   ElementParser* child_parser_ = nullptr;
229 
230   // Metadata for the current child being parsed.
231   ElementMetadata child_metadata_ = {};
232 
233   // Action for the current child being parsed.
234   Action action_ = Action::kRead;
235 
236   // True if a seek was performed and the parser needs to handle it.
237   bool did_seek_ = false;
238 
239   // The current state of the finite state machine.
240   State state_ = State::kBegin;
241 
242   // Resets state in preparation for parsing a child element.
PrepareForNextChild()243   void PrepareForNextChild() {
244     id_parser_ = {};
245     size_parser_ = {};
246     child_parser_ = nullptr;
247     action_ = Action::kRead;
248   }
249 };
250 
251 // We have to explicitly declare a destructor (even if it's just defaulted)
252 // because using the pimpl idiom with std::unique_ptr requires it. See Herb
253 // Sutter's GotW #100 for further explanation.
254 WebmParser::~WebmParser() = default;
255 
WebmParser()256 WebmParser::WebmParser() : parser_(new DocumentParser) {}
257 
DidSeek()258 void WebmParser::DidSeek() {
259   parser_->DidSeek();
260   parsing_status_ = Status(Status::kOkPartial);
261 }
262 
Feed(Callback * callback,Reader * reader)263 Status WebmParser::Feed(Callback* callback, Reader* reader) {
264   assert(callback != nullptr);
265   assert(reader != nullptr);
266 
267   if (parsing_status_.is_parsing_error()) {
268     return parsing_status_;
269   }
270   parsing_status_ = parser_->Feed(callback, reader);
271   return parsing_status_;
272 }
273 
Swap(WebmParser * other)274 void WebmParser::Swap(WebmParser* other) {
275   assert(other != nullptr);
276   parser_.swap(other->parser_);
277   std::swap(parsing_status_, other->parsing_status_);
278 }
279 
swap(WebmParser & left,WebmParser & right)280 void swap(WebmParser& left, WebmParser& right) { left.Swap(&right); }
281 
282 }  // namespace webm
283