1 // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the LICENSE file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS.  All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 #ifndef SRC_MASTER_PARSER_H_
9 #define SRC_MASTER_PARSER_H_
10 
11 #include <cassert>
12 #include <cstdint>
13 #include <functional>
14 #include <memory>
15 #include <type_traits>
16 #include <unordered_map>
17 #include <utility>
18 
19 #include "src/element_parser.h"
20 #include "src/id_parser.h"
21 #include "src/size_parser.h"
22 #include "src/skip_parser.h"
23 #include "src/unknown_parser.h"
24 #include "src/void_parser.h"
25 #include "webm/callback.h"
26 #include "webm/element.h"
27 #include "webm/id.h"
28 #include "webm/reader.h"
29 #include "webm/status.h"
30 
31 namespace webm {
32 
33 // A general purpose parser for EBML master elements.
34 //
35 // For example, if a document specification defines a Foo master element that
36 // has two boolean children (Bar and Baz), then a FooParser capable of parsing
37 // the Foo master element could be defined as follows:
38 //
39 // struct FooParser : public MasterParser {
40 //   FooParser()
41 //       : MasterParser(MakeChild<BoolParser>(Id::kBar),
42 //                      MakeChild<BoolParser>(Id::kBaz)) {}
43 // };
44 //
45 // See the MasterValueParser for an alternative class for parsing master
46 // elements into a data structure.
47 class MasterParser : public ElementParser {
48  public:
49   // Constructs a new MasterParser that uses the given
50   // {Id, std::unique_ptr<ElementParser>} pairs to map child IDs to the
51   // appropriate parser/handler. Each argument must be of type
52   // std::pair<Id, std::unique_ptr<ElementParser>>. If a parser is not
53   // explicitly provided for Id::kVoid, a VoidParser will automatically be used
54   // for it.
55   //
56   // Initializer lists don't support move-only types (i.e. std::unique_ptr), so
57   // instead a variadic template is used.
58   template <typename... T>
MasterParser(T &&...parser_pairs)59   explicit MasterParser(T&&... parser_pairs) {
60     // Prefer an odd reserve size. This makes libc++ use a prime number for the
61     // bucket count. Otherwise, if it happens to be a power of 2, then libc++
62     // will use a power-of-2 bucket count (and since Matroska EBML IDs have low
63     // entropy in the low bits, there will be a lot of collisions). libstdc++
64     // always prefers a prime bucket count. I'm not sure how MSVC or others are
65     // implemented, but this shouldn't adversely affect them even if they are
66     // implemented differently. Add one to the count because we'll likely need
67     // to insert a parser for Id::kVoid.
68     parsers_.reserve((sizeof...(T) + 1) | 1);
69 
70     // This dummy initializer list is just used to force the parameter pack to
71     // be expanded, which turns the expression into a for-each "loop" that
72     // inserts each argument into the map.
73     auto dummy = {0, (InsertParser(std::forward<T>(parser_pairs)), 0)...};
74     (void)dummy;  // Silence unused variable warning.
75 
76     if (parsers_.find(Id::kVoid) == parsers_.end()) {
77       InsertParser(MakeChild<VoidParser>(Id::kVoid));
78     }
79   }
80 
81   MasterParser(const MasterParser&) = delete;
82   MasterParser& operator=(const MasterParser&) = delete;
83 
84   Status Init(const ElementMetadata& metadata, std::uint64_t max_size) override;
85 
86   void InitAfterSeek(const Ancestory& child_ancestory,
87                      const ElementMetadata& child_metadata) override;
88 
89   Status Feed(Callback* callback, Reader* reader,
90               std::uint64_t* num_bytes_read) override;
91 
92   bool GetCachedMetadata(ElementMetadata* metadata) override;
93 
header_size()94   std::uint32_t header_size() const { return header_size_; }
95 
96   // Gets the size of this element. May be called before the parse is fully
97   // complete (but only after Init() has already been called and successfully
98   // returned).
size()99   std::uint64_t size() const { return my_size_; }
100 
101   // Gets absolute byte position of the start of the element in the byte stream.
102   // May be called before the parse is fully complete (but only after Init() has
103   // already been called and successfully returned).
position()104   std::uint64_t position() const { return my_position_; }
105 
106   // Gets the metadata for the child that is currently being parsed. This may
107   // only be called while the child's body (not its header information like ID
108   // and size) is being parsed.
child_metadata()109   const ElementMetadata& child_metadata() const {
110     assert(state_ == State::kValidatingChildSize ||
111            state_ == State::kGettingAction ||
112            state_ == State::kInitializingChildParser ||
113            state_ == State::kReadingChildBody);
114     return child_metadata_;
115   }
116 
117  protected:
118   // Allocates a new parser of type T, forwarding args to the constructor, and
119   // creates a std::pair<Id, std::unique_ptr<ElementParser>> using the given id
120   // and the allocated parser.
121   template <typename T, typename... Args>
MakeChild(Id id,Args &&...args)122   static std::pair<Id, std::unique_ptr<ElementParser>> MakeChild(
123       Id id, Args&&... args) {
124     std::unique_ptr<ElementParser> ptr(new T(std::forward<Args>(args)...));
125     return std::pair<Id, std::unique_ptr<ElementParser>>(id, std::move(ptr));
126   }
127 
128  private:
129   // Parsing states for the finite-state machine.
130   enum class State {
131     /* clang-format off */
132     // State                      Transitions to state      When
133     kFirstReadOfChildId,       // kFinishingReadingChildId  size(id)  > 1
134                                // kReadingChildSize         size(id) == 1
135                                // kEndReached               EOF
136     kFinishingReadingChildId,  // kReadingChildSize         done
137     kReadingChildSize,         // kValidatingChildSize      done
138     kValidatingChildSize,      // kGettingAction            done
139                                // kEndReached               unknown id & unsized
140     kGettingAction,            // kInitializingChildParser  done
141     kInitializingChildParser,  // kReadingChildBody         done
142     kReadingChildBody,         // kChildFullyParsed         child parse done
143     kChildFullyParsed,         // kValidatingChildSize      cached metadata
144                                // kFirstReadOfChildId       read  < my_size_
145                                // kEndReached               read == my_size_
146     kEndReached,               // No transitions from here (must call Init)
147     /* clang-format on */
148   };
149 
150   using StdHashId = std::hash<std::underlying_type<Id>::type>;
151 
152   // Hash functor for hashing Id enums for storage in std::unordered_map.
153   struct IdHash : StdHashId {
154     // Type aliases for conforming to the std::hash interface.
155     using argument_type = Id;
156     using result_type = StdHashId::result_type;
157 
158     // Returns the hash of the given id.
operatorIdHash159     result_type operator()(argument_type id) const {
160       return StdHashId::operator()(static_cast<StdHashId::argument_type>(id));
161     }
162   };
163 
164   // The parser for parsing element Ids.
165   IdParser id_parser_;
166 
167   // The parser for parsing element sizes.
168   SizeParser size_parser_;
169 
170   // Metadata for the child element that is currently being parsed.
171   ElementMetadata child_metadata_;
172 
173   // Maps child IDs to the appropriate parser that can handle that child.
174   std::unordered_map<Id, std::unique_ptr<ElementParser>, IdHash> parsers_;
175 
176   // The parser that is used to parse unknown children.
177   UnknownParser unknown_parser_;
178 
179   // The parser that is used to skip over children.
180   SkipParser skip_parser_;
181 
182   // The parser that is being used to parse the current child. This must be null
183   // or a pointer in parsers_.
184   ElementParser* child_parser_;
185 
186   // The current parsing action for the child that is currently being parsed.
187   Action action_ = Action::kRead;
188 
189   // The current state of the parser.
190   State state_;
191 
192   std::uint32_t header_size_;
193 
194   // The size of this element.
195   std::uint64_t my_size_;
196 
197   std::uint64_t my_position_;
198 
199   std::uint64_t max_size_;
200 
201   // The total number of bytes read by this parser.
202   std::uint64_t total_bytes_read_;
203 
204   // Set to true if parsing has completed and this parser consumed an extra
205   // element header (ID and size) that wasn't from a child.
206   bool has_cached_metadata_ = false;
207 
208   // Inserts the parser into the parsers_ map and asserts it is the only parser
209   // registers to parse the corresponding Id.
210   template <typename T>
InsertParser(T && parser)211   void InsertParser(T&& parser) {
212     bool inserted = parsers_.insert(std::forward<T>(parser)).second;
213     (void)inserted;  // Silence unused variable warning.
214     assert(inserted);  // Make sure there aren't duplicates.
215   }
216 
217   // Common initialization logic for Init/InitAfterseek.
218   void InitSetup(std::uint32_t header_size, std::uint64_t size_in_bytes,
219                  std::uint64_t position);
220 
221   // Resets the internal parsers in preparation for parsing the next child.
222   void PrepareForNextChild();
223 };
224 
225 }  // namespace webm
226 
227 #endif  // SRC_MASTER_PARSER_H_
228