// Copyright (c) 2016 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #include "webm/webm_parser.h" #include #include #include "src/ebml_parser.h" #include "src/master_parser.h" #include "src/segment_parser.h" #include "src/unknown_parser.h" #include "webm/element.h" namespace webm { // Parses WebM EBML documents (i.e. level-0 WebM elements). class WebmParser::DocumentParser { public: // Resets the parser after a seek to a new position in the reader. void DidSeek() { PrepareForNextChild(); did_seek_ = true; state_ = State::kBegin; } // Feeds the parser; will return Status::kOkCompleted when the reader returns // Status::kEndOfFile, but only if the parser has already completed parsing // its child elements. Status Feed(Callback* callback, Reader* reader) { assert(callback != nullptr); assert(reader != nullptr); Callback* const original_callback = callback; if (action_ == Action::kSkip) { callback = &skip_callback_; } Status status; std::uint64_t num_bytes_read; while (true) { switch (state_) { case State::kBegin: { child_metadata_.header_size = 0; child_metadata_.position = reader->Position(); state_ = State::kReadingChildId; continue; } case State::kReadingChildId: { assert(child_parser_ == nullptr); status = id_parser_.Feed(callback, reader, &num_bytes_read); child_metadata_.header_size += num_bytes_read; if (!status.completed_ok()) { if (status.code == Status::kEndOfFile && reader->Position() == child_metadata_.position) { state_ = State::kEndReached; continue; } return status; } state_ = State::kReadingChildSize; continue; } case State::kReadingChildSize: { assert(child_parser_ == nullptr); status = size_parser_.Feed(callback, reader, &num_bytes_read); child_metadata_.header_size += num_bytes_read; if (!status.completed_ok()) { return status; } child_metadata_.id = id_parser_.id(); child_metadata_.size = size_parser_.size(); state_ = State::kValidatingChildSize; continue; } case State::kValidatingChildSize: { assert(child_parser_ == nullptr); if (child_metadata_.id == Id::kSegment) { child_parser_ = &segment_parser_; did_seek_ = false; state_ = State::kGettingAction; continue; } else if (child_metadata_.id == Id::kEbml) { child_parser_ = &ebml_parser_; did_seek_ = false; state_ = State::kGettingAction; continue; } Ancestory ancestory; if (did_seek_ && Ancestory::ById(child_metadata_.id, &ancestory)) { assert(!ancestory.empty()); assert(ancestory.id() == Id::kSegment || ancestory.id() == Id::kEbml); if (ancestory.id() == Id::kSegment) { child_parser_ = &segment_parser_; } else { child_parser_ = &ebml_parser_; } child_parser_->InitAfterSeek(ancestory.next(), child_metadata_); child_metadata_.id = ancestory.id(); child_metadata_.header_size = kUnknownHeaderSize; child_metadata_.size = kUnknownElementSize; child_metadata_.position = kUnknownElementPosition; did_seek_ = false; action_ = Action::kRead; state_ = State::kReadingChildBody; continue; } if (child_metadata_.id == Id::kVoid) { child_parser_ = &void_parser_; } else { if (child_metadata_.size == kUnknownElementSize) { return Status(Status::kIndefiniteUnknownElement); } child_parser_ = &unknown_parser_; } state_ = State::kGettingAction; continue; } case State::kGettingAction: { assert(child_parser_ != nullptr); status = callback->OnElementBegin(child_metadata_, &action_); if (!status.completed_ok()) { return status; } if (action_ == Action::kSkip) { callback = &skip_callback_; if (child_metadata_.size != kUnknownElementSize) { child_parser_ = &skip_parser_; } } state_ = State::kInitializingChildParser; continue; } case State::kInitializingChildParser: { assert(child_parser_ != nullptr); status = child_parser_->Init(child_metadata_, child_metadata_.size); if (!status.completed_ok()) { return status; } state_ = State::kReadingChildBody; continue; } case State::kReadingChildBody: { assert(child_parser_ != nullptr); status = child_parser_->Feed(callback, reader, &num_bytes_read); if (!status.completed_ok()) { return status; } if (child_parser_->GetCachedMetadata(&child_metadata_)) { state_ = State::kValidatingChildSize; } else { child_metadata_.header_size = 0; state_ = State::kReadingChildId; } PrepareForNextChild(); callback = original_callback; child_metadata_.position = reader->Position(); continue; } case State::kEndReached: { return Status(Status::kOkCompleted); } } } } private: // Parsing states for the finite-state machine. enum class State { /* clang-format off */ // State Transitions to state When kBegin, // kReadingChildId done kReadingChildId, // kReadingChildSize done // kEndReached EOF kReadingChildSize, // kValidatingChildSize done kValidatingChildSize, // kGettingAction done kGettingAction, // kInitializingChildParser done kInitializingChildParser, // kReadingChildBody done kReadingChildBody, // kValidatingChildSize cached metadata // kReadingChildId otherwise kEndReached, // No transitions from here /* clang-format on */ }; // The parser for parsing child element Ids. IdParser id_parser_; // The parser for parsing child element sizes. SizeParser size_parser_; // The parser for Id::kEbml elements. EbmlParser ebml_parser_; // The parser for Id::kSegment child elements. SegmentParser segment_parser_; // The parser for Id::kVoid child elements. VoidParser void_parser_; // The parser used when skipping elements (if the element's size is known). SkipParser skip_parser_; // The parser used for unknown children. UnknownParser unknown_parser_; // The callback used when skipping elements. SkipCallback skip_callback_; // The parser that is parsing the current child element. ElementParser* child_parser_ = nullptr; // Metadata for the current child being parsed. ElementMetadata child_metadata_ = {}; // Action for the current child being parsed. Action action_ = Action::kRead; // True if a seek was performed and the parser needs to handle it. bool did_seek_ = false; // The current state of the finite state machine. State state_ = State::kBegin; // Resets state in preparation for parsing a child element. void PrepareForNextChild() { id_parser_ = {}; size_parser_ = {}; child_parser_ = nullptr; action_ = Action::kRead; } }; // We have to explicitly declare a destructor (even if it's just defaulted) // because using the pimpl idiom with std::unique_ptr requires it. See Herb // Sutter's GotW #100 for further explanation. WebmParser::~WebmParser() = default; WebmParser::WebmParser() : parser_(new DocumentParser) {} void WebmParser::DidSeek() { parser_->DidSeek(); parsing_status_ = Status(Status::kOkPartial); } Status WebmParser::Feed(Callback* callback, Reader* reader) { assert(callback != nullptr); assert(reader != nullptr); if (parsing_status_.is_parsing_error()) { return parsing_status_; } parsing_status_ = parser_->Feed(callback, reader); return parsing_status_; } void WebmParser::Swap(WebmParser* other) { assert(other != nullptr); parser_.swap(other->parser_); std::swap(parsing_status_, other->parsing_status_); } void swap(WebmParser& left, WebmParser& right) { left.Swap(&right); } } // namespace webm