1 // Copyright (c) 2012 The WebM project authors. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the LICENSE file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS.  All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 
9 #ifndef WEBVTT_WEBVTTPARSER_H_
10 #define WEBVTT_WEBVTTPARSER_H_
11 
12 #include <list>
13 #include <string>
14 
15 namespace libwebvtt {
16 
17 class Reader {
18  public:
19   // Fetch a character from the stream. Return
20   // negative if error, positive if end-of-stream,
21   // and 0 if a character is available.
22   virtual int GetChar(char* c) = 0;
23 
24  protected:
25   virtual ~Reader();
26 };
27 
28 class LineReader : protected Reader {
29  public:
30   // Consume a line of text from the stream, stripping off
31   // the line terminator characters.  Returns negative if error,
32   // 0 on success, and positive at end-of-stream.
33   int GetLine(std::string* line);
34 
35  protected:
36   virtual ~LineReader();
37 
38   // Puts a character back into the stream.
39   virtual void UngetChar(char c) = 0;
40 };
41 
42 // As measured in thousandths of a second,
43 // e.g. a duration of 1 equals 0.001 seconds,
44 // and a duration of 1000 equals 1 second.
45 typedef long long presentation_t;  // NOLINT
46 
47 struct Time {
48   int hours;
49   int minutes;
50   int seconds;
51   int milliseconds;
52 
53   bool operator==(const Time& rhs) const;
54   bool operator<(const Time& rhs) const;
55   bool operator>(const Time& rhs) const;
56   bool operator<=(const Time& rhs) const;
57   bool operator>=(const Time& rhs) const;
58 
59   presentation_t presentation() const;
60   Time& presentation(presentation_t);
61 
62   Time& operator+=(presentation_t);
63   Time operator+(presentation_t) const;
64 
65   Time& operator-=(presentation_t);
66   presentation_t operator-(const Time&) const;
67 };
68 
69 struct Setting {
70   std::string name;
71   std::string value;
72 };
73 
74 struct Cue {
75   std::string identifier;
76 
77   Time start_time;
78   Time stop_time;
79 
80   typedef std::list<Setting> settings_t;
81   settings_t settings;
82 
83   typedef std::list<std::string> payload_t;
84   payload_t payload;
85 };
86 
87 class Parser : private LineReader {
88  public:
89   explicit Parser(Reader* r);
90   virtual ~Parser();
91 
92   // Pre-parse enough of the stream to determine whether
93   // this is really a WEBVTT file. Returns 0 on success,
94   // negative if error.
95   int Init();
96 
97   // Parse the next WebVTT cue from the stream. Returns 0 if
98   // an entire cue was parsed, negative if error, and positive
99   // at end-of-stream.
100   int Parse(Cue* cue);
101 
102  private:
103   // Returns the next character in the stream, using the look-back character
104   // if present (as per Reader::GetChar).
105   virtual int GetChar(char* c);
106 
107   // Puts a character back into the stream (as per LineReader::UngetChar).
108   virtual void UngetChar(char c);
109 
110   // Check for presence of a UTF-8 BOM in the stream.  Returns
111   // negative if error, 0 on success, and positive at end-of-stream.
112   int ParseBOM();
113 
114   // Parse the distinguished "cue timings" line, which includes the start
115   // and stop times and settings.  Argument |line| contains the complete
116   // line of text (as returned by ParseLine()), which the function is free
117   // to modify as it sees fit, to facilitate scanning.  Argument |arrow_pos|
118   // is the offset of the arrow token ("-->"), which indicates that this is
119   // the timings line.  Returns negative if error, 0 on success.
120   //
121   static int ParseTimingsLine(std::string* line,
122                               std::string::size_type arrow_pos,
123                               Time* start_time, Time* stop_time,
124                               Cue::settings_t* settings);
125 
126   // Parse a single time specifier (from the timings line), starting
127   // at the given offset; lexical scanning stops when a NUL character
128   // is detected. The function modifies offset |off| by the number of
129   // characters consumed.  Returns negative if error, 0 on success.
130   //
131   static int ParseTime(const std::string& line, std::string::size_type* off,
132                        Time* time);
133 
134   // Parse the cue settings from the timings line, starting at the
135   // given offset.  Returns negative if error, 0 on success.
136   //
137   static int ParseSettings(const std::string& line, std::string::size_type off,
138                            Cue::settings_t* settings);
139 
140   // Parse a non-negative integer from the characters in |line| beginning
141   // at offset |off|.  The function increments |off| by the number
142   // of characters consumed.  Returns the value, or negative if error.
143   //
144   static int ParseNumber(const std::string& line, std::string::size_type* off);
145 
146   Reader* const reader_;
147 
148   // Provides one character's worth of look-back, to facilitate scanning.
149   int unget_;
150 
151   // Disable copy ctor and copy assign for Parser.
152   Parser(const Parser&);
153   Parser& operator=(const Parser&);
154 };
155 
156 }  // namespace libwebvtt
157 
158 #endif  // WEBVTT_WEBVTTPARSER_H_
159