1 // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the LICENSE file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 #include "webm/webm_parser.h"
9
10 #include <cassert>
11 #include <cstdint>
12
13 #include "src/ebml_parser.h"
14 #include "src/master_parser.h"
15 #include "src/segment_parser.h"
16 #include "src/unknown_parser.h"
17 #include "webm/element.h"
18
19 namespace webm {
20
21 // Parses WebM EBML documents (i.e. level-0 WebM elements).
22 class WebmParser::DocumentParser {
23 public:
24 // Resets the parser after a seek to a new position in the reader.
DidSeek()25 void DidSeek() {
26 PrepareForNextChild();
27 did_seek_ = true;
28 state_ = State::kBegin;
29 }
30
31 // Feeds the parser; will return Status::kOkCompleted when the reader returns
32 // Status::kEndOfFile, but only if the parser has already completed parsing
33 // its child elements.
Feed(Callback * callback,Reader * reader)34 Status Feed(Callback* callback, Reader* reader) {
35 assert(callback != nullptr);
36 assert(reader != nullptr);
37
38 Callback* const original_callback = callback;
39 if (action_ == Action::kSkip) {
40 callback = &skip_callback_;
41 }
42
43 Status status;
44 std::uint64_t num_bytes_read;
45 while (true) {
46 switch (state_) {
47 case State::kBegin: {
48 child_metadata_.header_size = 0;
49 child_metadata_.position = reader->Position();
50 state_ = State::kReadingChildId;
51 continue;
52 }
53
54 case State::kReadingChildId: {
55 assert(child_parser_ == nullptr);
56 status = id_parser_.Feed(callback, reader, &num_bytes_read);
57 child_metadata_.header_size += num_bytes_read;
58 if (!status.completed_ok()) {
59 if (status.code == Status::kEndOfFile &&
60 reader->Position() == child_metadata_.position) {
61 state_ = State::kEndReached;
62 continue;
63 }
64 return status;
65 }
66 state_ = State::kReadingChildSize;
67 continue;
68 }
69
70 case State::kReadingChildSize: {
71 assert(child_parser_ == nullptr);
72 status = size_parser_.Feed(callback, reader, &num_bytes_read);
73 child_metadata_.header_size += num_bytes_read;
74 if (!status.completed_ok()) {
75 return status;
76 }
77 child_metadata_.id = id_parser_.id();
78 child_metadata_.size = size_parser_.size();
79 state_ = State::kValidatingChildSize;
80 continue;
81 }
82
83 case State::kValidatingChildSize: {
84 assert(child_parser_ == nullptr);
85
86 if (child_metadata_.id == Id::kSegment) {
87 child_parser_ = &segment_parser_;
88 did_seek_ = false;
89 state_ = State::kGettingAction;
90 continue;
91 } else if (child_metadata_.id == Id::kEbml) {
92 child_parser_ = &ebml_parser_;
93 did_seek_ = false;
94 state_ = State::kGettingAction;
95 continue;
96 }
97
98 Ancestory ancestory;
99 if (did_seek_ && Ancestory::ById(child_metadata_.id, &ancestory)) {
100 assert(!ancestory.empty());
101 assert(ancestory.id() == Id::kSegment ||
102 ancestory.id() == Id::kEbml);
103
104 if (ancestory.id() == Id::kSegment) {
105 child_parser_ = &segment_parser_;
106 } else {
107 child_parser_ = &ebml_parser_;
108 }
109
110 child_parser_->InitAfterSeek(ancestory.next(), child_metadata_);
111 child_metadata_.id = ancestory.id();
112 child_metadata_.header_size = kUnknownHeaderSize;
113 child_metadata_.size = kUnknownElementSize;
114 child_metadata_.position = kUnknownElementPosition;
115 did_seek_ = false;
116 action_ = Action::kRead;
117 state_ = State::kReadingChildBody;
118 continue;
119 }
120
121 if (child_metadata_.id == Id::kVoid) {
122 child_parser_ = &void_parser_;
123 } else {
124 if (child_metadata_.size == kUnknownElementSize) {
125 return Status(Status::kIndefiniteUnknownElement);
126 }
127 child_parser_ = &unknown_parser_;
128 }
129 state_ = State::kGettingAction;
130 continue;
131 }
132
133 case State::kGettingAction: {
134 assert(child_parser_ != nullptr);
135 status = callback->OnElementBegin(child_metadata_, &action_);
136 if (!status.completed_ok()) {
137 return status;
138 }
139
140 if (action_ == Action::kSkip) {
141 callback = &skip_callback_;
142 if (child_metadata_.size != kUnknownElementSize) {
143 child_parser_ = &skip_parser_;
144 }
145 }
146 state_ = State::kInitializingChildParser;
147 continue;
148 }
149
150 case State::kInitializingChildParser: {
151 assert(child_parser_ != nullptr);
152 status = child_parser_->Init(child_metadata_, child_metadata_.size);
153 if (!status.completed_ok()) {
154 return status;
155 }
156 state_ = State::kReadingChildBody;
157 continue;
158 }
159
160 case State::kReadingChildBody: {
161 assert(child_parser_ != nullptr);
162 status = child_parser_->Feed(callback, reader, &num_bytes_read);
163 if (!status.completed_ok()) {
164 return status;
165 }
166 if (child_parser_->GetCachedMetadata(&child_metadata_)) {
167 state_ = State::kValidatingChildSize;
168 } else {
169 child_metadata_.header_size = 0;
170 state_ = State::kReadingChildId;
171 }
172 PrepareForNextChild();
173 callback = original_callback;
174 child_metadata_.position = reader->Position();
175 continue;
176 }
177
178 case State::kEndReached: {
179 return Status(Status::kOkCompleted);
180 }
181 }
182 }
183 }
184
185 private:
186 // Parsing states for the finite-state machine.
187 enum class State {
188 /* clang-format off */
189 // State Transitions to state When
190 kBegin, // kReadingChildId done
191 kReadingChildId, // kReadingChildSize done
192 // kEndReached EOF
193 kReadingChildSize, // kValidatingChildSize done
194 kValidatingChildSize, // kGettingAction done
195 kGettingAction, // kInitializingChildParser done
196 kInitializingChildParser, // kReadingChildBody done
197 kReadingChildBody, // kValidatingChildSize cached metadata
198 // kReadingChildId otherwise
199 kEndReached, // No transitions from here
200 /* clang-format on */
201 };
202
203 // The parser for parsing child element Ids.
204 IdParser id_parser_;
205
206 // The parser for parsing child element sizes.
207 SizeParser size_parser_;
208
209 // The parser for Id::kEbml elements.
210 EbmlParser ebml_parser_;
211
212 // The parser for Id::kSegment child elements.
213 SegmentParser segment_parser_;
214
215 // The parser for Id::kVoid child elements.
216 VoidParser void_parser_;
217
218 // The parser used when skipping elements (if the element's size is known).
219 SkipParser skip_parser_;
220
221 // The parser used for unknown children.
222 UnknownParser unknown_parser_;
223
224 // The callback used when skipping elements.
225 SkipCallback skip_callback_;
226
227 // The parser that is parsing the current child element.
228 ElementParser* child_parser_ = nullptr;
229
230 // Metadata for the current child being parsed.
231 ElementMetadata child_metadata_ = {};
232
233 // Action for the current child being parsed.
234 Action action_ = Action::kRead;
235
236 // True if a seek was performed and the parser needs to handle it.
237 bool did_seek_ = false;
238
239 // The current state of the finite state machine.
240 State state_ = State::kBegin;
241
242 // Resets state in preparation for parsing a child element.
PrepareForNextChild()243 void PrepareForNextChild() {
244 id_parser_ = {};
245 size_parser_ = {};
246 child_parser_ = nullptr;
247 action_ = Action::kRead;
248 }
249 };
250
251 // We have to explicitly declare a destructor (even if it's just defaulted)
252 // because using the pimpl idiom with std::unique_ptr requires it. See Herb
253 // Sutter's GotW #100 for further explanation.
254 WebmParser::~WebmParser() = default;
255
WebmParser()256 WebmParser::WebmParser() : parser_(new DocumentParser) {}
257
DidSeek()258 void WebmParser::DidSeek() {
259 parser_->DidSeek();
260 parsing_status_ = Status(Status::kOkPartial);
261 }
262
Feed(Callback * callback,Reader * reader)263 Status WebmParser::Feed(Callback* callback, Reader* reader) {
264 assert(callback != nullptr);
265 assert(reader != nullptr);
266
267 if (parsing_status_.is_parsing_error()) {
268 return parsing_status_;
269 }
270 parsing_status_ = parser_->Feed(callback, reader);
271 return parsing_status_;
272 }
273
Swap(WebmParser * other)274 void WebmParser::Swap(WebmParser* other) {
275 assert(other != nullptr);
276 parser_.swap(other->parser_);
277 std::swap(parsing_status_, other->parsing_status_);
278 }
279
swap(WebmParser & left,WebmParser & right)280 void swap(WebmParser& left, WebmParser& right) { left.Swap(&right); }
281
282 } // namespace webm
283