1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.google.android.exoplayer2.text.webvtt;
17 
18 import static com.google.android.exoplayer2.text.span.SpanUtil.addOrReplaceSpan;
19 import static java.lang.annotation.RetentionPolicy.SOURCE;
20 
21 import android.graphics.Color;
22 import android.graphics.Typeface;
23 import android.text.Layout;
24 import android.text.SpannableStringBuilder;
25 import android.text.Spanned;
26 import android.text.SpannedString;
27 import android.text.TextUtils;
28 import android.text.style.AbsoluteSizeSpan;
29 import android.text.style.AlignmentSpan;
30 import android.text.style.BackgroundColorSpan;
31 import android.text.style.ForegroundColorSpan;
32 import android.text.style.RelativeSizeSpan;
33 import android.text.style.StrikethroughSpan;
34 import android.text.style.StyleSpan;
35 import android.text.style.TypefaceSpan;
36 import android.text.style.UnderlineSpan;
37 import androidx.annotation.IntDef;
38 import androidx.annotation.Nullable;
39 import com.google.android.exoplayer2.text.Cue;
40 import com.google.android.exoplayer2.text.span.HorizontalTextInVerticalContextSpan;
41 import com.google.android.exoplayer2.text.span.RubySpan;
42 import com.google.android.exoplayer2.util.Assertions;
43 import com.google.android.exoplayer2.util.Log;
44 import com.google.android.exoplayer2.util.ParsableByteArray;
45 import com.google.android.exoplayer2.util.Util;
46 import java.lang.annotation.Documented;
47 import java.lang.annotation.Retention;
48 import java.util.ArrayDeque;
49 import java.util.ArrayList;
50 import java.util.Collections;
51 import java.util.Comparator;
52 import java.util.HashMap;
53 import java.util.List;
54 import java.util.Map;
55 import java.util.regex.Matcher;
56 import java.util.regex.Pattern;
57 import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
58 
59 /** Parser for WebVTT cues. (https://w3c.github.io/webvtt/#cues) */
60 public final class WebvttCueParser {
61 
62   /**
63    * Valid values for {@link WebvttCueInfoBuilder#textAlignment}.
64    *
65    * <p>We use a custom list (and not {@link Layout.Alignment} directly) in order to include both
66    * {@code START}/{@code LEFT} and {@code END}/{@code RIGHT}. The distinction is important for
67    * {@link WebvttCueInfoBuilder#derivePosition(int)}.
68    *
69    * <p>These correspond to the valid values for the 'align' cue setting in the <a
70    * href="https://www.w3.org/TR/webvtt1/#webvtt-cue-text-alignment">WebVTT spec</a>.
71    */
72   @Documented
73   @Retention(SOURCE)
74   @IntDef({
75     TEXT_ALIGNMENT_START,
76     TEXT_ALIGNMENT_CENTER,
77     TEXT_ALIGNMENT_END,
78     TEXT_ALIGNMENT_LEFT,
79     TEXT_ALIGNMENT_RIGHT
80   })
81   private @interface TextAlignment {}
82 
83   /**
84    * See WebVTT's <a
85    * href="https://www.w3.org/TR/webvtt1/#webvtt-cue-start-alignment">align:start</a>.
86    */
87   private static final int TEXT_ALIGNMENT_START = 1;
88 
89   /**
90    * See WebVTT's <a
91    * href="https://www.w3.org/TR/webvtt1/#webvtt-cue-center-alignment">align:center</a>.
92    */
93   private static final int TEXT_ALIGNMENT_CENTER = 2;
94 
95   /**
96    * See WebVTT's <a href="https://www.w3.org/TR/webvtt1/#webvtt-cue-end-alignment">align:end</a>.
97    */
98   private static final int TEXT_ALIGNMENT_END = 3;
99 
100   /**
101    * See WebVTT's <a href="https://www.w3.org/TR/webvtt1/#webvtt-cue-left-alignment">align:left</a>.
102    */
103   private static final int TEXT_ALIGNMENT_LEFT = 4;
104 
105   /**
106    * See WebVTT's <a
107    * href="https://www.w3.org/TR/webvtt1/#webvtt-cue-right-alignment">align:right</a>.
108    */
109   private static final int TEXT_ALIGNMENT_RIGHT = 5;
110 
111   public static final Pattern CUE_HEADER_PATTERN = Pattern
112       .compile("^(\\S+)\\s+-->\\s+(\\S+)(.*)?$");
113   private static final Pattern CUE_SETTING_PATTERN = Pattern.compile("(\\S+?):(\\S+)");
114 
115   private static final char CHAR_LESS_THAN = '<';
116   private static final char CHAR_GREATER_THAN = '>';
117   private static final char CHAR_SLASH = '/';
118   private static final char CHAR_AMPERSAND = '&';
119   private static final char CHAR_SEMI_COLON = ';';
120   private static final char CHAR_SPACE = ' ';
121 
122   private static final String ENTITY_LESS_THAN = "lt";
123   private static final String ENTITY_GREATER_THAN = "gt";
124   private static final String ENTITY_AMPERSAND = "amp";
125   private static final String ENTITY_NON_BREAK_SPACE = "nbsp";
126 
127   private static final String TAG_BOLD = "b";
128   private static final String TAG_CLASS = "c";
129   private static final String TAG_ITALIC = "i";
130   private static final String TAG_LANG = "lang";
131   private static final String TAG_RUBY = "ruby";
132   private static final String TAG_RUBY_TEXT = "rt";
133   private static final String TAG_UNDERLINE = "u";
134   private static final String TAG_VOICE = "v";
135 
136   private static final int STYLE_BOLD = Typeface.BOLD;
137   private static final int STYLE_ITALIC = Typeface.ITALIC;
138 
139   /* package */ static final float DEFAULT_POSITION = 0.5f;
140 
141   private static final String TAG = "WebvttCueParser";
142 
143   /**
144    * See WebVTT's <a href="https://www.w3.org/TR/webvtt1/#default-text-color">default text
145    * colors</a>.
146    */
147   private static final Map<String, Integer> DEFAULT_TEXT_COLORS;
148 
149   static {
150     Map<String, Integer> defaultColors = new HashMap<>();
151     defaultColors.put("white", Color.rgb(255, 255, 255));
152     defaultColors.put("lime", Color.rgb(0, 255, 0));
153     defaultColors.put("cyan", Color.rgb(0, 255, 255));
154     defaultColors.put("red", Color.rgb(255, 0, 0));
155     defaultColors.put("yellow", Color.rgb(255, 255, 0));
156     defaultColors.put("magenta", Color.rgb(255, 0, 255));
157     defaultColors.put("blue", Color.rgb(0, 0, 255));
158     defaultColors.put("black", Color.rgb(0, 0, 0));
159     DEFAULT_TEXT_COLORS = Collections.unmodifiableMap(defaultColors);
160   }
161 
162   /**
163    * See WebVTT's <a href="https://www.w3.org/TR/webvtt1/#default-text-background">default text
164    * background colors</a>.
165    */
166   private static final Map<String, Integer> DEFAULT_BACKGROUND_COLORS;
167 
168   static {
169     Map<String, Integer> defaultBackgroundColors = new HashMap<>();
170     defaultBackgroundColors.put("bg_white", Color.rgb(255, 255, 255));
171     defaultBackgroundColors.put("bg_lime", Color.rgb(0, 255, 0));
172     defaultBackgroundColors.put("bg_cyan", Color.rgb(0, 255, 255));
173     defaultBackgroundColors.put("bg_red", Color.rgb(255, 0, 0));
174     defaultBackgroundColors.put("bg_yellow", Color.rgb(255, 255, 0));
175     defaultBackgroundColors.put("bg_magenta", Color.rgb(255, 0, 255));
176     defaultBackgroundColors.put("bg_blue", Color.rgb(0, 0, 255));
177     defaultBackgroundColors.put("bg_black", Color.rgb(0, 0, 0));
178     DEFAULT_BACKGROUND_COLORS = Collections.unmodifiableMap(defaultBackgroundColors);
179   }
180 
181   /**
182    * Parses the next valid WebVTT cue in a parsable array, including timestamps, settings and text.
183    *
184    * @param webvttData Parsable WebVTT file data.
185    * @param styles List of styles defined by the CSS style blocks preceding the cues.
186    * @return The parsed cue info, or null if no valid cue was found.
187    */
188   @Nullable
parseCue(ParsableByteArray webvttData, List<WebvttCssStyle> styles)189   public static WebvttCueInfo parseCue(ParsableByteArray webvttData, List<WebvttCssStyle> styles) {
190     @Nullable String firstLine = webvttData.readLine();
191     if (firstLine == null) {
192       return null;
193     }
194     Matcher cueHeaderMatcher = WebvttCueParser.CUE_HEADER_PATTERN.matcher(firstLine);
195     if (cueHeaderMatcher.matches()) {
196       // We have found the timestamps in the first line. No id present.
197       return parseCue(null, cueHeaderMatcher, webvttData, styles);
198     }
199     // The first line is not the timestamps, but could be the cue id.
200     @Nullable String secondLine = webvttData.readLine();
201     if (secondLine == null) {
202       return null;
203     }
204     cueHeaderMatcher = WebvttCueParser.CUE_HEADER_PATTERN.matcher(secondLine);
205     if (cueHeaderMatcher.matches()) {
206       // We can do the rest of the parsing, including the id.
207       return parseCue(firstLine.trim(), cueHeaderMatcher, webvttData, styles);
208     }
209     return null;
210   }
211 
212   /**
213    * Parses a string containing a list of cue settings.
214    *
215    * @param cueSettingsList String containing the settings for a given cue.
216    * @return The cue settings parsed into a {@link Cue.Builder}.
217    */
parseCueSettingsList(String cueSettingsList)218   /* package */ static Cue.Builder parseCueSettingsList(String cueSettingsList) {
219     WebvttCueInfoBuilder builder = new WebvttCueInfoBuilder();
220     parseCueSettingsList(cueSettingsList, builder);
221     return builder.toCueBuilder();
222   }
223 
224   /** Create a new {@link Cue} containing {@code text} and with WebVTT default values. */
newCueForText(CharSequence text)225   /* package */ static Cue newCueForText(CharSequence text) {
226     WebvttCueInfoBuilder infoBuilder = new WebvttCueInfoBuilder();
227     infoBuilder.text = text;
228     return infoBuilder.toCueBuilder().build();
229   }
230 
231   /**
232    * Parses the text payload of a WebVTT Cue and returns it as a styled {@link SpannedString}.
233    *
234    * @param id ID of the cue, {@code null} if it is not present.
235    * @param markup The markup text to be parsed.
236    * @param styles List of styles defined by the CSS style blocks preceding the cues.
237    * @return The styled cue text.
238    */
parseCueText( @ullable String id, String markup, List<WebvttCssStyle> styles)239   /* package */ static SpannedString parseCueText(
240       @Nullable String id, String markup, List<WebvttCssStyle> styles) {
241     SpannableStringBuilder spannedText = new SpannableStringBuilder();
242     ArrayDeque<StartTag> startTagStack = new ArrayDeque<>();
243     List<StyleMatch> scratchStyleMatches = new ArrayList<>();
244     int pos = 0;
245     List<Element> nestedElements = new ArrayList<>();
246     while (pos < markup.length()) {
247       char curr = markup.charAt(pos);
248       switch (curr) {
249         case CHAR_LESS_THAN:
250           if (pos + 1 >= markup.length()) {
251             pos++;
252             break; // avoid ArrayOutOfBoundsException
253           }
254           int ltPos = pos;
255           boolean isClosingTag = markup.charAt(ltPos + 1) == CHAR_SLASH;
256           pos = findEndOfTag(markup, ltPos + 1);
257           boolean isVoidTag = markup.charAt(pos - 2) == CHAR_SLASH;
258           String fullTagExpression = markup.substring(ltPos + (isClosingTag ? 2 : 1),
259               isVoidTag ? pos - 2 : pos - 1);
260           if (fullTagExpression.trim().isEmpty()) {
261             continue;
262           }
263           String tagName = getTagName(fullTagExpression);
264           if (!isSupportedTag(tagName)) {
265             continue;
266           }
267           if (isClosingTag) {
268             StartTag startTag;
269             do {
270               if (startTagStack.isEmpty()) {
271                 break;
272               }
273               startTag = startTagStack.pop();
274               applySpansForTag(
275                   id, startTag, nestedElements, spannedText, styles, scratchStyleMatches);
276               if (!startTagStack.isEmpty()) {
277                 nestedElements.add(new Element(startTag, spannedText.length()));
278               } else {
279                 nestedElements.clear();
280               }
281             } while (!startTag.name.equals(tagName));
282           } else if (!isVoidTag) {
283             startTagStack.push(StartTag.buildStartTag(fullTagExpression, spannedText.length()));
284           }
285           break;
286         case CHAR_AMPERSAND:
287           int semiColonEndIndex = markup.indexOf(CHAR_SEMI_COLON, pos + 1);
288           int spaceEndIndex = markup.indexOf(CHAR_SPACE, pos + 1);
289           int entityEndIndex = semiColonEndIndex == -1 ? spaceEndIndex
290               : (spaceEndIndex == -1 ? semiColonEndIndex
291                   : Math.min(semiColonEndIndex, spaceEndIndex));
292           if (entityEndIndex != -1) {
293             applyEntity(markup.substring(pos + 1, entityEndIndex), spannedText);
294             if (entityEndIndex == spaceEndIndex) {
295               spannedText.append(" ");
296             }
297             pos = entityEndIndex + 1;
298           } else {
299             spannedText.append(curr);
300             pos++;
301           }
302           break;
303         default:
304           spannedText.append(curr);
305           pos++;
306           break;
307       }
308     }
309     // apply unclosed tags
310     while (!startTagStack.isEmpty()) {
311       applySpansForTag(
312           id, startTagStack.pop(), nestedElements, spannedText, styles, scratchStyleMatches);
313     }
314     applySpansForTag(
315         id,
316         StartTag.buildWholeCueVirtualTag(),
317         /* nestedElements= */ Collections.emptyList(),
318         spannedText,
319         styles,
320         scratchStyleMatches);
321     return SpannedString.valueOf(spannedText);
322   }
323 
324   // Internal methods
325 
326   @Nullable
parseCue( @ullable String id, Matcher cueHeaderMatcher, ParsableByteArray webvttData, List<WebvttCssStyle> styles)327   private static WebvttCueInfo parseCue(
328       @Nullable String id,
329       Matcher cueHeaderMatcher,
330       ParsableByteArray webvttData,
331       List<WebvttCssStyle> styles) {
332     WebvttCueInfoBuilder builder = new WebvttCueInfoBuilder();
333     try {
334       // Parse the cue start and end times.
335       builder.startTimeUs =
336           WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(1)));
337       builder.endTimeUs =
338           WebvttParserUtil.parseTimestampUs(Assertions.checkNotNull(cueHeaderMatcher.group(2)));
339     } catch (NumberFormatException e) {
340       Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
341       return null;
342     }
343 
344     parseCueSettingsList(Assertions.checkNotNull(cueHeaderMatcher.group(3)), builder);
345 
346     // Parse the cue text.
347     StringBuilder textBuilder = new StringBuilder();
348     for (String line = webvttData.readLine();
349         !TextUtils.isEmpty(line);
350         line = webvttData.readLine()) {
351       if (textBuilder.length() > 0) {
352         textBuilder.append("\n");
353       }
354       textBuilder.append(line.trim());
355     }
356     builder.text = parseCueText(id, textBuilder.toString(), styles);
357     return builder.build();
358   }
359 
parseCueSettingsList(String cueSettingsList, WebvttCueInfoBuilder builder)360   private static void parseCueSettingsList(String cueSettingsList, WebvttCueInfoBuilder builder) {
361     // Parse the cue settings list.
362     Matcher cueSettingMatcher = CUE_SETTING_PATTERN.matcher(cueSettingsList);
363 
364     while (cueSettingMatcher.find()) {
365       String name = Assertions.checkNotNull(cueSettingMatcher.group(1));
366       String value = Assertions.checkNotNull(cueSettingMatcher.group(2));
367       try {
368         if ("line".equals(name)) {
369           parseLineAttribute(value, builder);
370         } else if ("align".equals(name)) {
371           builder.textAlignment = parseTextAlignment(value);
372         } else if ("position".equals(name)) {
373           parsePositionAttribute(value, builder);
374         } else if ("size".equals(name)) {
375           builder.size = WebvttParserUtil.parsePercentage(value);
376         } else if ("vertical".equals(name)) {
377           builder.verticalType = parseVerticalAttribute(value);
378         } else {
379           Log.w(TAG, "Unknown cue setting " + name + ":" + value);
380         }
381       } catch (NumberFormatException e) {
382         Log.w(TAG, "Skipping bad cue setting: " + cueSettingMatcher.group());
383       }
384     }
385   }
386 
parseLineAttribute(String s, WebvttCueInfoBuilder builder)387   private static void parseLineAttribute(String s, WebvttCueInfoBuilder builder) {
388     int commaIndex = s.indexOf(',');
389     if (commaIndex != -1) {
390       builder.lineAnchor = parseLineAnchor(s.substring(commaIndex + 1));
391       s = s.substring(0, commaIndex);
392     }
393     if (s.endsWith("%")) {
394       builder.line = WebvttParserUtil.parsePercentage(s);
395       builder.lineType = Cue.LINE_TYPE_FRACTION;
396     } else {
397       int lineNumber = Integer.parseInt(s);
398       if (lineNumber < 0) {
399         // WebVTT defines line -1 as last visible row when lineAnchor is ANCHOR_TYPE_START, where-as
400         // Cue defines it to be the first row that's not visible.
401         lineNumber--;
402       }
403       builder.line = lineNumber;
404       builder.lineType = Cue.LINE_TYPE_NUMBER;
405     }
406   }
407 
408   @Cue.AnchorType
parseLineAnchor(String s)409   private static int parseLineAnchor(String s) {
410     switch (s) {
411       case "start":
412         return Cue.ANCHOR_TYPE_START;
413       case "center":
414       case "middle":
415         return Cue.ANCHOR_TYPE_MIDDLE;
416       case "end":
417         return Cue.ANCHOR_TYPE_END;
418       default:
419         Log.w(TAG, "Invalid anchor value: " + s);
420         return Cue.TYPE_UNSET;
421     }
422   }
423 
parsePositionAttribute(String s, WebvttCueInfoBuilder builder)424   private static void parsePositionAttribute(String s, WebvttCueInfoBuilder builder) {
425     int commaIndex = s.indexOf(',');
426     if (commaIndex != -1) {
427       builder.positionAnchor = parsePositionAnchor(s.substring(commaIndex + 1));
428       s = s.substring(0, commaIndex);
429     }
430     builder.position = WebvttParserUtil.parsePercentage(s);
431   }
432 
433   @Cue.AnchorType
parsePositionAnchor(String s)434   private static int parsePositionAnchor(String s) {
435     switch (s) {
436       case "line-left":
437       case "start":
438         return Cue.ANCHOR_TYPE_START;
439       case "center":
440       case "middle":
441         return Cue.ANCHOR_TYPE_MIDDLE;
442       case "line-right":
443       case "end":
444         return Cue.ANCHOR_TYPE_END;
445       default:
446         Log.w(TAG, "Invalid anchor value: " + s);
447         return Cue.TYPE_UNSET;
448     }
449   }
450 
451   @Cue.VerticalType
parseVerticalAttribute(String s)452   private static int parseVerticalAttribute(String s) {
453     switch (s) {
454       case "rl":
455         return Cue.VERTICAL_TYPE_RL;
456       case "lr":
457         return Cue.VERTICAL_TYPE_LR;
458       default:
459         Log.w(TAG, "Invalid 'vertical' value: " + s);
460         return Cue.TYPE_UNSET;
461     }
462   }
463 
464   @TextAlignment
parseTextAlignment(String s)465   private static int parseTextAlignment(String s) {
466     switch (s) {
467       case "start":
468         return TEXT_ALIGNMENT_START;
469       case "left":
470         return TEXT_ALIGNMENT_LEFT;
471       case "center":
472       case "middle":
473         return TEXT_ALIGNMENT_CENTER;
474       case "end":
475         return TEXT_ALIGNMENT_END;
476       case "right":
477         return TEXT_ALIGNMENT_RIGHT;
478       default:
479         Log.w(TAG, "Invalid alignment value: " + s);
480         // Default value: https://www.w3.org/TR/webvtt1/#webvtt-cue-text-alignment
481         return TEXT_ALIGNMENT_CENTER;
482     }
483   }
484 
485   /**
486    * Find end of tag (&gt;). The position returned is the position of the &gt; plus one (exclusive).
487    *
488    * @param markup The WebVTT cue markup to be parsed.
489    * @param startPos The position from where to start searching for the end of tag.
490    * @return The position of the end of tag plus 1 (one).
491    */
findEndOfTag(String markup, int startPos)492   private static int findEndOfTag(String markup, int startPos) {
493     int index = markup.indexOf(CHAR_GREATER_THAN, startPos);
494     return index == -1 ? markup.length() : index + 1;
495   }
496 
applyEntity(String entity, SpannableStringBuilder spannedText)497   private static void applyEntity(String entity, SpannableStringBuilder spannedText) {
498     switch (entity) {
499       case ENTITY_LESS_THAN:
500         spannedText.append('<');
501         break;
502       case ENTITY_GREATER_THAN:
503         spannedText.append('>');
504         break;
505       case ENTITY_NON_BREAK_SPACE:
506         spannedText.append(' ');
507         break;
508       case ENTITY_AMPERSAND:
509         spannedText.append('&');
510         break;
511       default:
512         Log.w(TAG, "ignoring unsupported entity: '&" + entity + ";'");
513         break;
514     }
515   }
516 
isSupportedTag(String tagName)517   private static boolean isSupportedTag(String tagName) {
518     switch (tagName) {
519       case TAG_BOLD:
520       case TAG_CLASS:
521       case TAG_ITALIC:
522       case TAG_LANG:
523       case TAG_RUBY:
524       case TAG_RUBY_TEXT:
525       case TAG_UNDERLINE:
526       case TAG_VOICE:
527         return true;
528       default:
529         return false;
530     }
531   }
532 
applySpansForTag( @ullable String cueId, StartTag startTag, List<Element> nestedElements, SpannableStringBuilder text, List<WebvttCssStyle> styles, List<StyleMatch> scratchStyleMatches)533   private static void applySpansForTag(
534       @Nullable String cueId,
535       StartTag startTag,
536       List<Element> nestedElements,
537       SpannableStringBuilder text,
538       List<WebvttCssStyle> styles,
539       List<StyleMatch> scratchStyleMatches) {
540     int start = startTag.position;
541     int end = text.length();
542     switch(startTag.name) {
543       case TAG_BOLD:
544         text.setSpan(new StyleSpan(STYLE_BOLD), start, end,
545             Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
546         break;
547       case TAG_ITALIC:
548         text.setSpan(new StyleSpan(STYLE_ITALIC), start, end,
549             Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
550         break;
551       case TAG_RUBY:
552         applyRubySpans(nestedElements, text, start);
553         break;
554       case TAG_UNDERLINE:
555         text.setSpan(new UnderlineSpan(), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
556         break;
557       case TAG_CLASS:
558         applyDefaultColors(text, startTag.classes, start, end);
559         break;
560       case TAG_LANG:
561       case TAG_VOICE:
562       case "": // Case of the "whole cue" virtual tag.
563         break;
564       default:
565         return;
566     }
567     scratchStyleMatches.clear();
568     getApplicableStyles(styles, cueId, startTag, scratchStyleMatches);
569     int styleMatchesCount = scratchStyleMatches.size();
570     for (int i = 0; i < styleMatchesCount; i++) {
571       applyStyleToText(text, scratchStyleMatches.get(i).style, start, end);
572     }
573   }
574 
applyRubySpans( List<Element> nestedElements, SpannableStringBuilder text, int startTagPosition)575   private static void applyRubySpans(
576       List<Element> nestedElements, SpannableStringBuilder text, int startTagPosition) {
577     List<Element> sortedNestedElements = new ArrayList<>(nestedElements.size());
578     sortedNestedElements.addAll(nestedElements);
579     Collections.sort(sortedNestedElements, Element.BY_START_POSITION_ASC);
580     int deletedCharCount = 0;
581     int lastRubyTextEnd = startTagPosition;
582     for (int i = 0; i < sortedNestedElements.size(); i++) {
583       if (!TAG_RUBY_TEXT.equals(sortedNestedElements.get(i).startTag.name)) {
584         continue;
585       }
586       Element rubyTextElement = sortedNestedElements.get(i);
587       // Move the rubyText from spannedText into the RubySpan.
588       int adjustedRubyTextStart = rubyTextElement.startTag.position - deletedCharCount;
589       int adjustedRubyTextEnd = rubyTextElement.endPosition - deletedCharCount;
590       CharSequence rubyText = text.subSequence(adjustedRubyTextStart, adjustedRubyTextEnd);
591       text.delete(adjustedRubyTextStart, adjustedRubyTextEnd);
592       text.setSpan(
593           new RubySpan(rubyText.toString(), RubySpan.POSITION_OVER),
594           lastRubyTextEnd,
595           adjustedRubyTextStart,
596           Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
597       deletedCharCount += rubyText.length();
598       // The ruby text has been deleted, so new-start == old-end.
599       lastRubyTextEnd = adjustedRubyTextStart;
600     }
601   }
602 
603   /**
604    * Adds {@link ForegroundColorSpan}s and {@link BackgroundColorSpan}s to {@code text} for entries
605    * in {@code classes} that match WebVTT's <a
606    * href="https://www.w3.org/TR/webvtt1/#default-text-color">default text colors</a> or <a
607    * href="https://www.w3.org/TR/webvtt1/#default-text-background">default text background
608    * colors</a>.
609    */
applyDefaultColors( SpannableStringBuilder text, String[] classes, int start, int end)610   private static void applyDefaultColors(
611       SpannableStringBuilder text, String[] classes, int start, int end) {
612     for (String className : classes) {
613       if (DEFAULT_TEXT_COLORS.containsKey(className)) {
614         int color = DEFAULT_TEXT_COLORS.get(className);
615         text.setSpan(new ForegroundColorSpan(color), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
616       } else if (DEFAULT_BACKGROUND_COLORS.containsKey(className)) {
617         int color = DEFAULT_BACKGROUND_COLORS.get(className);
618         text.setSpan(new BackgroundColorSpan(color), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
619       }
620     }
621   }
622 
applyStyleToText(SpannableStringBuilder spannedText, WebvttCssStyle style, int start, int end)623   private static void applyStyleToText(SpannableStringBuilder spannedText, WebvttCssStyle style,
624       int start, int end) {
625     if (style == null) {
626       return;
627     }
628     if (style.getStyle() != WebvttCssStyle.UNSPECIFIED) {
629       addOrReplaceSpan(
630           spannedText,
631           new StyleSpan(style.getStyle()),
632           start,
633           end,
634           Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
635     }
636     if (style.isLinethrough()) {
637       spannedText.setSpan(new StrikethroughSpan(), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
638     }
639     if (style.isUnderline()) {
640       spannedText.setSpan(new UnderlineSpan(), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
641     }
642     if (style.hasFontColor()) {
643       addOrReplaceSpan(
644           spannedText,
645           new ForegroundColorSpan(style.getFontColor()),
646           start,
647           end,
648           Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
649     }
650     if (style.hasBackgroundColor()) {
651       addOrReplaceSpan(
652           spannedText,
653           new BackgroundColorSpan(style.getBackgroundColor()),
654           start,
655           end,
656           Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
657     }
658     if (style.getFontFamily() != null) {
659       addOrReplaceSpan(
660           spannedText,
661           new TypefaceSpan(style.getFontFamily()),
662           start,
663           end,
664           Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
665     }
666     Layout.Alignment textAlign = style.getTextAlign();
667     if (textAlign != null) {
668       addOrReplaceSpan(
669           spannedText,
670           new AlignmentSpan.Standard(textAlign),
671           start,
672           end,
673           Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
674     }
675     switch (style.getFontSizeUnit()) {
676       case WebvttCssStyle.FONT_SIZE_UNIT_PIXEL:
677         addOrReplaceSpan(
678             spannedText,
679             new AbsoluteSizeSpan((int) style.getFontSize(), true),
680             start,
681             end,
682             Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
683         break;
684       case WebvttCssStyle.FONT_SIZE_UNIT_EM:
685         addOrReplaceSpan(
686             spannedText,
687             new RelativeSizeSpan(style.getFontSize()),
688             start,
689             end,
690             Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
691         break;
692       case WebvttCssStyle.FONT_SIZE_UNIT_PERCENT:
693         addOrReplaceSpan(
694             spannedText,
695             new RelativeSizeSpan(style.getFontSize() / 100),
696             start,
697             end,
698             Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
699         break;
700       case WebvttCssStyle.UNSPECIFIED:
701         // Do nothing.
702         break;
703     }
704     if (style.getCombineUpright()) {
705       spannedText.setSpan(
706           new HorizontalTextInVerticalContextSpan(), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
707     }
708   }
709 
710   /**
711    * Returns the tag name for the given tag contents.
712    *
713    * @param tagExpression Characters between &amp;lt: and &amp;gt; of a start or end tag.
714    * @return The name of tag.
715    */
getTagName(String tagExpression)716   private static String getTagName(String tagExpression) {
717     tagExpression = tagExpression.trim();
718     Assertions.checkArgument(!tagExpression.isEmpty());
719     return Util.splitAtFirst(tagExpression, "[ \\.]")[0];
720   }
721 
getApplicableStyles( List<WebvttCssStyle> declaredStyles, @Nullable String id, StartTag tag, List<StyleMatch> output)722   private static void getApplicableStyles(
723       List<WebvttCssStyle> declaredStyles,
724       @Nullable String id,
725       StartTag tag,
726       List<StyleMatch> output) {
727     int styleCount = declaredStyles.size();
728     for (int i = 0; i < styleCount; i++) {
729       WebvttCssStyle style = declaredStyles.get(i);
730       int score = style.getSpecificityScore(id, tag.name, tag.classes, tag.voice);
731       if (score > 0) {
732         output.add(new StyleMatch(score, style));
733       }
734     }
735     Collections.sort(output);
736   }
737 
738   private static final class WebvttCueInfoBuilder {
739 
740     public long startTimeUs;
741     public long endTimeUs;
742     public @MonotonicNonNull CharSequence text;
743     @TextAlignment public int textAlignment;
744     public float line;
745     // Equivalent to WebVTT's snap-to-lines flag:
746     // https://www.w3.org/TR/webvtt1/#webvtt-cue-snap-to-lines-flag
747     @Cue.LineType public int lineType;
748     @Cue.AnchorType public int lineAnchor;
749     public float position;
750     @Cue.AnchorType public int positionAnchor;
751     public float size;
752     @Cue.VerticalType public int verticalType;
753 
WebvttCueInfoBuilder()754     public WebvttCueInfoBuilder() {
755       startTimeUs = 0;
756       endTimeUs = 0;
757       // Default: https://www.w3.org/TR/webvtt1/#webvtt-cue-text-alignment
758       textAlignment = TEXT_ALIGNMENT_CENTER;
759       line = Cue.DIMEN_UNSET;
760       // Defaults to NUMBER (true): https://www.w3.org/TR/webvtt1/#webvtt-cue-snap-to-lines-flag
761       lineType = Cue.LINE_TYPE_NUMBER;
762       // Default: https://www.w3.org/TR/webvtt1/#webvtt-cue-line-alignment
763       lineAnchor = Cue.ANCHOR_TYPE_START;
764       position = Cue.DIMEN_UNSET;
765       positionAnchor = Cue.TYPE_UNSET;
766       // Default: https://www.w3.org/TR/webvtt1/#webvtt-cue-size
767       size = 1.0f;
768       verticalType = Cue.TYPE_UNSET;
769     }
770 
build()771     public WebvttCueInfo build() {
772       return new WebvttCueInfo(toCueBuilder().build(), startTimeUs, endTimeUs);
773     }
774 
toCueBuilder()775     public Cue.Builder toCueBuilder() {
776       float position =
777           this.position != Cue.DIMEN_UNSET ? this.position : derivePosition(textAlignment);
778       @Cue.AnchorType
779       int positionAnchor =
780           this.positionAnchor != Cue.TYPE_UNSET
781               ? this.positionAnchor
782               : derivePositionAnchor(textAlignment);
783       Cue.Builder cueBuilder =
784           new Cue.Builder()
785               .setTextAlignment(convertTextAlignment(textAlignment))
786               .setLine(computeLine(line, lineType), lineType)
787               .setLineAnchor(lineAnchor)
788               .setPosition(position)
789               .setPositionAnchor(positionAnchor)
790               .setSize(Math.min(size, deriveMaxSize(positionAnchor, position)))
791               .setVerticalType(verticalType);
792 
793       if (text != null) {
794         cueBuilder.setText(text);
795       }
796 
797       return cueBuilder;
798     }
799 
800     // https://www.w3.org/TR/webvtt1/#webvtt-cue-line
computeLine(float line, @Cue.LineType int lineType)801     private static float computeLine(float line, @Cue.LineType int lineType) {
802       if (line != Cue.DIMEN_UNSET
803           && lineType == Cue.LINE_TYPE_FRACTION
804           && (line < 0.0f || line > 1.0f)) {
805         return 1.0f; // Step 1
806       } else if (line != Cue.DIMEN_UNSET) {
807         // Step 2: Do nothing, line is already correct.
808         return line;
809       } else if (lineType == Cue.LINE_TYPE_FRACTION) {
810         return 1.0f; // Step 3
811       } else {
812         // Steps 4 - 10 (stacking multiple simultaneous cues) are handled by
813         // WebvttSubtitle.getCues(long) and WebvttSubtitle.isNormal(Cue).
814         return Cue.DIMEN_UNSET;
815       }
816     }
817 
818     // https://www.w3.org/TR/webvtt1/#webvtt-cue-position
derivePosition(@extAlignment int textAlignment)819     private static float derivePosition(@TextAlignment int textAlignment) {
820       switch (textAlignment) {
821         case TEXT_ALIGNMENT_LEFT:
822           return 0.0f;
823         case TEXT_ALIGNMENT_RIGHT:
824           return 1.0f;
825         case TEXT_ALIGNMENT_START:
826         case TEXT_ALIGNMENT_CENTER:
827         case TEXT_ALIGNMENT_END:
828         default:
829           return DEFAULT_POSITION;
830       }
831     }
832 
833     // https://www.w3.org/TR/webvtt1/#webvtt-cue-position-alignment
834     @Cue.AnchorType
derivePositionAnchor(@extAlignment int textAlignment)835     private static int derivePositionAnchor(@TextAlignment int textAlignment) {
836       switch (textAlignment) {
837         case TEXT_ALIGNMENT_LEFT:
838         case TEXT_ALIGNMENT_START:
839           return Cue.ANCHOR_TYPE_START;
840         case TEXT_ALIGNMENT_RIGHT:
841         case TEXT_ALIGNMENT_END:
842           return Cue.ANCHOR_TYPE_END;
843         case TEXT_ALIGNMENT_CENTER:
844         default:
845           return Cue.ANCHOR_TYPE_MIDDLE;
846       }
847     }
848 
849     @Nullable
convertTextAlignment(@extAlignment int textAlignment)850     private static Layout.Alignment convertTextAlignment(@TextAlignment int textAlignment) {
851       switch (textAlignment) {
852         case TEXT_ALIGNMENT_START:
853         case TEXT_ALIGNMENT_LEFT:
854           return Layout.Alignment.ALIGN_NORMAL;
855         case TEXT_ALIGNMENT_CENTER:
856           return Layout.Alignment.ALIGN_CENTER;
857         case TEXT_ALIGNMENT_END:
858         case TEXT_ALIGNMENT_RIGHT:
859           return Layout.Alignment.ALIGN_OPPOSITE;
860         default:
861           Log.w(TAG, "Unknown textAlignment: " + textAlignment);
862           return null;
863       }
864     }
865 
866     // Step 2 here: https://www.w3.org/TR/webvtt1/#processing-cue-settings
deriveMaxSize(@ue.AnchorType int positionAnchor, float position)867     private static float deriveMaxSize(@Cue.AnchorType int positionAnchor, float position) {
868       switch (positionAnchor) {
869         case Cue.ANCHOR_TYPE_START:
870           return 1.0f - position;
871         case Cue.ANCHOR_TYPE_END:
872           return position;
873         case Cue.ANCHOR_TYPE_MIDDLE:
874           if (position <= 0.5f) {
875             return position * 2;
876           } else {
877             return (1.0f - position) * 2;
878           }
879         case Cue.TYPE_UNSET:
880         default:
881           throw new IllegalStateException(String.valueOf(positionAnchor));
882       }
883     }
884   }
885 
886   private static final class StyleMatch implements Comparable<StyleMatch> {
887 
888     public final int score;
889     public final WebvttCssStyle style;
890 
StyleMatch(int score, WebvttCssStyle style)891     public StyleMatch(int score, WebvttCssStyle style) {
892       this.score = score;
893       this.style = style;
894     }
895 
896     @Override
compareTo(StyleMatch another)897     public int compareTo(StyleMatch another) {
898       return this.score - another.score;
899     }
900 
901   }
902 
903   private static final class StartTag {
904 
905     private static final String[] NO_CLASSES = new String[0];
906 
907     public final String name;
908     public final int position;
909     public final String voice;
910     public final String[] classes;
911 
StartTag(String name, int position, String voice, String[] classes)912     private StartTag(String name, int position, String voice, String[] classes) {
913       this.position = position;
914       this.name = name;
915       this.voice = voice;
916       this.classes = classes;
917     }
918 
buildStartTag(String fullTagExpression, int position)919     public static StartTag buildStartTag(String fullTagExpression, int position) {
920       fullTagExpression = fullTagExpression.trim();
921       Assertions.checkArgument(!fullTagExpression.isEmpty());
922       int voiceStartIndex = fullTagExpression.indexOf(" ");
923       String voice;
924       if (voiceStartIndex == -1) {
925         voice = "";
926       } else {
927         voice = fullTagExpression.substring(voiceStartIndex).trim();
928         fullTagExpression = fullTagExpression.substring(0, voiceStartIndex);
929       }
930       String[] nameAndClasses = Util.split(fullTagExpression, "\\.");
931       String name = nameAndClasses[0];
932       String[] classes;
933       if (nameAndClasses.length > 1) {
934         classes = Util.nullSafeArrayCopyOfRange(nameAndClasses, 1, nameAndClasses.length);
935       } else {
936         classes = NO_CLASSES;
937       }
938       return new StartTag(name, position, voice, classes);
939     }
940 
buildWholeCueVirtualTag()941     public static StartTag buildWholeCueVirtualTag() {
942       return new StartTag("", 0, "", new String[0]);
943     }
944 
945   }
946 
947   /** Information about a complete element (i.e. start tag and end position). */
948   private static class Element {
949     private static final Comparator<Element> BY_START_POSITION_ASC =
950         (e1, e2) -> Integer.compare(e1.startTag.position, e2.startTag.position);
951 
952     private final StartTag startTag;
953     /**
954      * The position of the end of this element's text in the un-marked-up cue text (i.e. the
955      * corollary to {@link StartTag#position}).
956      */
957     private final int endPosition;
958 
Element(StartTag startTag, int endPosition)959     private Element(StartTag startTag, int endPosition) {
960       this.startTag = startTag;
961       this.endPosition = endPosition;
962     }
963   }
964 }
965