1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.google.android.exoplayer2.text.subrip; 17 18 import android.text.Html; 19 import android.text.Spanned; 20 import android.text.TextUtils; 21 import androidx.annotation.Nullable; 22 import com.google.android.exoplayer2.text.Cue; 23 import com.google.android.exoplayer2.text.SimpleSubtitleDecoder; 24 import com.google.android.exoplayer2.text.Subtitle; 25 import com.google.android.exoplayer2.util.Assertions; 26 import com.google.android.exoplayer2.util.Log; 27 import com.google.android.exoplayer2.util.LongArray; 28 import com.google.android.exoplayer2.util.ParsableByteArray; 29 import java.util.ArrayList; 30 import java.util.regex.Matcher; 31 import java.util.regex.Pattern; 32 33 /** 34 * A {@link SimpleSubtitleDecoder} for SubRip. 35 */ 36 public final class SubripDecoder extends SimpleSubtitleDecoder { 37 38 // Fractional positions for use when alignment tags are present. 39 private static final float START_FRACTION = 0.08f; 40 private static final float END_FRACTION = 1 - START_FRACTION; 41 private static final float MID_FRACTION = 0.5f; 42 43 private static final String TAG = "SubripDecoder"; 44 45 // Some SRT files don't include hours or milliseconds in the timecode, so we use optional groups. 46 private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+)(?:,(\\d+))?"; 47 private static final Pattern SUBRIP_TIMING_LINE = 48 Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")\\s*"); 49 50 // NOTE: Android Studio's suggestion to simplify '\\}' is incorrect [internal: b/144480183]. 51 private static final Pattern SUBRIP_TAG_PATTERN = Pattern.compile("\\{\\\\.*?\\}"); 52 private static final String SUBRIP_ALIGNMENT_TAG = "\\{\\\\an[1-9]\\}"; 53 54 // Alignment tags for SSA V4+. 55 private static final String ALIGN_BOTTOM_LEFT = "{\\an1}"; 56 private static final String ALIGN_BOTTOM_MID = "{\\an2}"; 57 private static final String ALIGN_BOTTOM_RIGHT = "{\\an3}"; 58 private static final String ALIGN_MID_LEFT = "{\\an4}"; 59 private static final String ALIGN_MID_MID = "{\\an5}"; 60 private static final String ALIGN_MID_RIGHT = "{\\an6}"; 61 private static final String ALIGN_TOP_LEFT = "{\\an7}"; 62 private static final String ALIGN_TOP_MID = "{\\an8}"; 63 private static final String ALIGN_TOP_RIGHT = "{\\an9}"; 64 65 private final StringBuilder textBuilder; 66 private final ArrayList<String> tags; 67 SubripDecoder()68 public SubripDecoder() { 69 super("SubripDecoder"); 70 textBuilder = new StringBuilder(); 71 tags = new ArrayList<>(); 72 } 73 74 @Override decode(byte[] bytes, int length, boolean reset)75 protected Subtitle decode(byte[] bytes, int length, boolean reset) { 76 ArrayList<Cue> cues = new ArrayList<>(); 77 LongArray cueTimesUs = new LongArray(); 78 ParsableByteArray subripData = new ParsableByteArray(bytes, length); 79 80 @Nullable String currentLine; 81 while ((currentLine = subripData.readLine()) != null) { 82 if (currentLine.length() == 0) { 83 // Skip blank lines. 84 continue; 85 } 86 87 // Parse the index line as a sanity check. 88 try { 89 Integer.parseInt(currentLine); 90 } catch (NumberFormatException e) { 91 Log.w(TAG, "Skipping invalid index: " + currentLine); 92 continue; 93 } 94 95 // Read and parse the timing line. 96 currentLine = subripData.readLine(); 97 if (currentLine == null) { 98 Log.w(TAG, "Unexpected end"); 99 break; 100 } 101 102 Matcher matcher = SUBRIP_TIMING_LINE.matcher(currentLine); 103 if (matcher.matches()) { 104 cueTimesUs.add(parseTimecode(matcher, /* groupOffset= */ 1)); 105 cueTimesUs.add(parseTimecode(matcher, /* groupOffset= */ 6)); 106 } else { 107 Log.w(TAG, "Skipping invalid timing: " + currentLine); 108 continue; 109 } 110 111 // Read and parse the text and tags. 112 textBuilder.setLength(0); 113 tags.clear(); 114 currentLine = subripData.readLine(); 115 while (!TextUtils.isEmpty(currentLine)) { 116 if (textBuilder.length() > 0) { 117 textBuilder.append("<br>"); 118 } 119 textBuilder.append(processLine(currentLine, tags)); 120 currentLine = subripData.readLine(); 121 } 122 123 Spanned text = Html.fromHtml(textBuilder.toString()); 124 125 @Nullable String alignmentTag = null; 126 for (int i = 0; i < tags.size(); i++) { 127 String tag = tags.get(i); 128 if (tag.matches(SUBRIP_ALIGNMENT_TAG)) { 129 alignmentTag = tag; 130 // Subsequent alignment tags should be ignored. 131 break; 132 } 133 } 134 cues.add(buildCue(text, alignmentTag)); 135 cues.add(Cue.EMPTY); 136 } 137 138 Cue[] cuesArray = cues.toArray(new Cue[0]); 139 long[] cueTimesUsArray = cueTimesUs.toArray(); 140 return new SubripSubtitle(cuesArray, cueTimesUsArray); 141 } 142 143 /** 144 * Trims and removes tags from the given line. The removed tags are added to {@code tags}. 145 * 146 * @param line The line to process. 147 * @param tags A list to which removed tags will be added. 148 * @return The processed line. 149 */ processLine(String line, ArrayList<String> tags)150 private String processLine(String line, ArrayList<String> tags) { 151 line = line.trim(); 152 153 int removedCharacterCount = 0; 154 StringBuilder processedLine = new StringBuilder(line); 155 Matcher matcher = SUBRIP_TAG_PATTERN.matcher(line); 156 while (matcher.find()) { 157 String tag = matcher.group(); 158 tags.add(tag); 159 int start = matcher.start() - removedCharacterCount; 160 int tagLength = tag.length(); 161 processedLine.replace(start, /* end= */ start + tagLength, /* str= */ ""); 162 removedCharacterCount += tagLength; 163 } 164 165 return processedLine.toString(); 166 } 167 168 /** 169 * Build a {@link Cue} based on the given text and alignment tag. 170 * 171 * @param text The text. 172 * @param alignmentTag The alignment tag, or {@code null} if no alignment tag is available. 173 * @return Built cue 174 */ buildCue(Spanned text, @Nullable String alignmentTag)175 private Cue buildCue(Spanned text, @Nullable String alignmentTag) { 176 if (alignmentTag == null) { 177 return new Cue(text); 178 } 179 180 // Horizontal alignment. 181 @Cue.AnchorType int positionAnchor; 182 switch (alignmentTag) { 183 case ALIGN_BOTTOM_LEFT: 184 case ALIGN_MID_LEFT: 185 case ALIGN_TOP_LEFT: 186 positionAnchor = Cue.ANCHOR_TYPE_START; 187 break; 188 case ALIGN_BOTTOM_RIGHT: 189 case ALIGN_MID_RIGHT: 190 case ALIGN_TOP_RIGHT: 191 positionAnchor = Cue.ANCHOR_TYPE_END; 192 break; 193 case ALIGN_BOTTOM_MID: 194 case ALIGN_MID_MID: 195 case ALIGN_TOP_MID: 196 default: 197 positionAnchor = Cue.ANCHOR_TYPE_MIDDLE; 198 break; 199 } 200 201 // Vertical alignment. 202 @Cue.AnchorType int lineAnchor; 203 switch (alignmentTag) { 204 case ALIGN_BOTTOM_LEFT: 205 case ALIGN_BOTTOM_MID: 206 case ALIGN_BOTTOM_RIGHT: 207 lineAnchor = Cue.ANCHOR_TYPE_END; 208 break; 209 case ALIGN_TOP_LEFT: 210 case ALIGN_TOP_MID: 211 case ALIGN_TOP_RIGHT: 212 lineAnchor = Cue.ANCHOR_TYPE_START; 213 break; 214 case ALIGN_MID_LEFT: 215 case ALIGN_MID_MID: 216 case ALIGN_MID_RIGHT: 217 default: 218 lineAnchor = Cue.ANCHOR_TYPE_MIDDLE; 219 break; 220 } 221 222 return new Cue( 223 text, 224 /* textAlignment= */ null, 225 getFractionalPositionForAnchorType(lineAnchor), 226 Cue.LINE_TYPE_FRACTION, 227 lineAnchor, 228 getFractionalPositionForAnchorType(positionAnchor), 229 positionAnchor, 230 Cue.DIMEN_UNSET); 231 } 232 parseTimecode(Matcher matcher, int groupOffset)233 private static long parseTimecode(Matcher matcher, int groupOffset) { 234 @Nullable String hours = matcher.group(groupOffset + 1); 235 long timestampMs = hours != null ? Long.parseLong(hours) * 60 * 60 * 1000 : 0; 236 timestampMs += 237 Long.parseLong(Assertions.checkNotNull(matcher.group(groupOffset + 2))) * 60 * 1000; 238 timestampMs += Long.parseLong(Assertions.checkNotNull(matcher.group(groupOffset + 3))) * 1000; 239 @Nullable String millis = matcher.group(groupOffset + 4); 240 if (millis != null) { 241 timestampMs += Long.parseLong(millis); 242 } 243 return timestampMs * 1000; 244 } 245 getFractionalPositionForAnchorType(@ue.AnchorType int anchorType)246 /* package */ static float getFractionalPositionForAnchorType(@Cue.AnchorType int anchorType) { 247 switch (anchorType) { 248 case Cue.ANCHOR_TYPE_START: 249 return SubripDecoder.START_FRACTION; 250 case Cue.ANCHOR_TYPE_MIDDLE: 251 return SubripDecoder.MID_FRACTION; 252 case Cue.ANCHOR_TYPE_END: 253 return SubripDecoder.END_FRACTION; 254 case Cue.TYPE_UNSET: 255 default: 256 // Should never happen. 257 throw new IllegalArgumentException(); 258 } 259 } 260 } 261