• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.google.android.exoplayer2.text.subrip;
17 
18 import android.text.Html;
19 import android.text.Spanned;
20 import android.text.TextUtils;
21 import androidx.annotation.Nullable;
22 import com.google.android.exoplayer2.text.Cue;
23 import com.google.android.exoplayer2.text.SimpleSubtitleDecoder;
24 import com.google.android.exoplayer2.text.Subtitle;
25 import com.google.android.exoplayer2.util.Assertions;
26 import com.google.android.exoplayer2.util.Log;
27 import com.google.android.exoplayer2.util.LongArray;
28 import com.google.android.exoplayer2.util.ParsableByteArray;
29 import java.util.ArrayList;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 
33 /**
34  * A {@link SimpleSubtitleDecoder} for SubRip.
35  */
36 public final class SubripDecoder extends SimpleSubtitleDecoder {
37 
38   // Fractional positions for use when alignment tags are present.
39   private static final float START_FRACTION = 0.08f;
40   private static final float END_FRACTION = 1 - START_FRACTION;
41   private static final float MID_FRACTION = 0.5f;
42 
43   private static final String TAG = "SubripDecoder";
44 
45   // Some SRT files don't include hours or milliseconds in the timecode, so we use optional groups.
46   private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+)(?:,(\\d+))?";
47   private static final Pattern SUBRIP_TIMING_LINE =
48       Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")\\s*");
49 
50   // NOTE: Android Studio's suggestion to simplify '\\}' is incorrect [internal: b/144480183].
51   private static final Pattern SUBRIP_TAG_PATTERN = Pattern.compile("\\{\\\\.*?\\}");
52   private static final String SUBRIP_ALIGNMENT_TAG = "\\{\\\\an[1-9]\\}";
53 
54   // Alignment tags for SSA V4+.
55   private static final String ALIGN_BOTTOM_LEFT = "{\\an1}";
56   private static final String ALIGN_BOTTOM_MID = "{\\an2}";
57   private static final String ALIGN_BOTTOM_RIGHT = "{\\an3}";
58   private static final String ALIGN_MID_LEFT = "{\\an4}";
59   private static final String ALIGN_MID_MID = "{\\an5}";
60   private static final String ALIGN_MID_RIGHT = "{\\an6}";
61   private static final String ALIGN_TOP_LEFT = "{\\an7}";
62   private static final String ALIGN_TOP_MID = "{\\an8}";
63   private static final String ALIGN_TOP_RIGHT = "{\\an9}";
64 
65   private final StringBuilder textBuilder;
66   private final ArrayList<String> tags;
67 
SubripDecoder()68   public SubripDecoder() {
69     super("SubripDecoder");
70     textBuilder = new StringBuilder();
71     tags = new ArrayList<>();
72   }
73 
74   @Override
decode(byte[] bytes, int length, boolean reset)75   protected Subtitle decode(byte[] bytes, int length, boolean reset) {
76     ArrayList<Cue> cues = new ArrayList<>();
77     LongArray cueTimesUs = new LongArray();
78     ParsableByteArray subripData = new ParsableByteArray(bytes, length);
79 
80     @Nullable String currentLine;
81     while ((currentLine = subripData.readLine()) != null) {
82       if (currentLine.length() == 0) {
83         // Skip blank lines.
84         continue;
85       }
86 
87       // Parse the index line as a sanity check.
88       try {
89         Integer.parseInt(currentLine);
90       } catch (NumberFormatException e) {
91         Log.w(TAG, "Skipping invalid index: " + currentLine);
92         continue;
93       }
94 
95       // Read and parse the timing line.
96       currentLine = subripData.readLine();
97       if (currentLine == null) {
98         Log.w(TAG, "Unexpected end");
99         break;
100       }
101 
102       Matcher matcher = SUBRIP_TIMING_LINE.matcher(currentLine);
103       if (matcher.matches()) {
104         cueTimesUs.add(parseTimecode(matcher, /* groupOffset= */ 1));
105         cueTimesUs.add(parseTimecode(matcher, /* groupOffset= */ 6));
106       } else {
107         Log.w(TAG, "Skipping invalid timing: " + currentLine);
108         continue;
109       }
110 
111       // Read and parse the text and tags.
112       textBuilder.setLength(0);
113       tags.clear();
114       currentLine = subripData.readLine();
115       while (!TextUtils.isEmpty(currentLine)) {
116         if (textBuilder.length() > 0) {
117           textBuilder.append("<br>");
118         }
119         textBuilder.append(processLine(currentLine, tags));
120         currentLine = subripData.readLine();
121       }
122 
123       Spanned text = Html.fromHtml(textBuilder.toString());
124 
125       @Nullable String alignmentTag = null;
126       for (int i = 0; i < tags.size(); i++) {
127         String tag = tags.get(i);
128         if (tag.matches(SUBRIP_ALIGNMENT_TAG)) {
129           alignmentTag = tag;
130           // Subsequent alignment tags should be ignored.
131           break;
132         }
133       }
134       cues.add(buildCue(text, alignmentTag));
135       cues.add(Cue.EMPTY);
136     }
137 
138     Cue[] cuesArray = cues.toArray(new Cue[0]);
139     long[] cueTimesUsArray = cueTimesUs.toArray();
140     return new SubripSubtitle(cuesArray, cueTimesUsArray);
141   }
142 
143   /**
144    * Trims and removes tags from the given line. The removed tags are added to {@code tags}.
145    *
146    * @param line The line to process.
147    * @param tags A list to which removed tags will be added.
148    * @return The processed line.
149    */
processLine(String line, ArrayList<String> tags)150   private String processLine(String line, ArrayList<String> tags) {
151     line = line.trim();
152 
153     int removedCharacterCount = 0;
154     StringBuilder processedLine = new StringBuilder(line);
155     Matcher matcher = SUBRIP_TAG_PATTERN.matcher(line);
156     while (matcher.find()) {
157       String tag = matcher.group();
158       tags.add(tag);
159       int start = matcher.start() - removedCharacterCount;
160       int tagLength = tag.length();
161       processedLine.replace(start, /* end= */ start + tagLength, /* str= */ "");
162       removedCharacterCount += tagLength;
163     }
164 
165     return processedLine.toString();
166   }
167 
168   /**
169    * Build a {@link Cue} based on the given text and alignment tag.
170    *
171    * @param text The text.
172    * @param alignmentTag The alignment tag, or {@code null} if no alignment tag is available.
173    * @return Built cue
174    */
buildCue(Spanned text, @Nullable String alignmentTag)175   private Cue buildCue(Spanned text, @Nullable String alignmentTag) {
176     if (alignmentTag == null) {
177       return new Cue(text);
178     }
179 
180     // Horizontal alignment.
181     @Cue.AnchorType int positionAnchor;
182     switch (alignmentTag) {
183       case ALIGN_BOTTOM_LEFT:
184       case ALIGN_MID_LEFT:
185       case ALIGN_TOP_LEFT:
186         positionAnchor = Cue.ANCHOR_TYPE_START;
187         break;
188       case ALIGN_BOTTOM_RIGHT:
189       case ALIGN_MID_RIGHT:
190       case ALIGN_TOP_RIGHT:
191         positionAnchor = Cue.ANCHOR_TYPE_END;
192         break;
193       case ALIGN_BOTTOM_MID:
194       case ALIGN_MID_MID:
195       case ALIGN_TOP_MID:
196       default:
197         positionAnchor = Cue.ANCHOR_TYPE_MIDDLE;
198         break;
199     }
200 
201     // Vertical alignment.
202     @Cue.AnchorType int lineAnchor;
203     switch (alignmentTag) {
204       case ALIGN_BOTTOM_LEFT:
205       case ALIGN_BOTTOM_MID:
206       case ALIGN_BOTTOM_RIGHT:
207         lineAnchor = Cue.ANCHOR_TYPE_END;
208         break;
209       case ALIGN_TOP_LEFT:
210       case ALIGN_TOP_MID:
211       case ALIGN_TOP_RIGHT:
212         lineAnchor = Cue.ANCHOR_TYPE_START;
213         break;
214       case ALIGN_MID_LEFT:
215       case ALIGN_MID_MID:
216       case ALIGN_MID_RIGHT:
217       default:
218         lineAnchor = Cue.ANCHOR_TYPE_MIDDLE;
219         break;
220     }
221 
222     return new Cue(
223         text,
224         /* textAlignment= */ null,
225         getFractionalPositionForAnchorType(lineAnchor),
226         Cue.LINE_TYPE_FRACTION,
227         lineAnchor,
228         getFractionalPositionForAnchorType(positionAnchor),
229         positionAnchor,
230         Cue.DIMEN_UNSET);
231   }
232 
parseTimecode(Matcher matcher, int groupOffset)233   private static long parseTimecode(Matcher matcher, int groupOffset) {
234     @Nullable String hours = matcher.group(groupOffset + 1);
235     long timestampMs = hours != null ? Long.parseLong(hours) * 60 * 60 * 1000 : 0;
236     timestampMs +=
237         Long.parseLong(Assertions.checkNotNull(matcher.group(groupOffset + 2))) * 60 * 1000;
238     timestampMs += Long.parseLong(Assertions.checkNotNull(matcher.group(groupOffset + 3))) * 1000;
239     @Nullable String millis = matcher.group(groupOffset + 4);
240     if (millis != null) {
241       timestampMs += Long.parseLong(millis);
242     }
243     return timestampMs * 1000;
244   }
245 
getFractionalPositionForAnchorType(@ue.AnchorType int anchorType)246   /* package */ static float getFractionalPositionForAnchorType(@Cue.AnchorType int anchorType) {
247     switch (anchorType) {
248       case Cue.ANCHOR_TYPE_START:
249         return SubripDecoder.START_FRACTION;
250       case Cue.ANCHOR_TYPE_MIDDLE:
251         return SubripDecoder.MID_FRACTION;
252       case Cue.ANCHOR_TYPE_END:
253         return SubripDecoder.END_FRACTION;
254       case Cue.TYPE_UNSET:
255       default:
256         // Should never happen.
257         throw new IllegalArgumentException();
258     }
259   }
260 }
261