1 /*
2  * IndexDecoder
3  *
4  * Author: Lasse Collin <lasse.collin@tukaani.org>
5  *
6  * This file has been put into the public domain.
7  * You can do whatever you want with this file.
8  */
9 
10 package org.tukaani.xz.index;
11 
12 import java.io.IOException;
13 import java.io.EOFException;
14 import java.util.zip.CheckedInputStream;
15 import org.tukaani.xz.common.DecoderUtil;
16 import org.tukaani.xz.common.StreamFlags;
17 import org.tukaani.xz.SeekableInputStream;
18 import org.tukaani.xz.CorruptedInputException;
19 import org.tukaani.xz.MemoryLimitException;
20 import org.tukaani.xz.UnsupportedOptionsException;
21 
22 public class IndexDecoder extends IndexBase {
23     private final StreamFlags streamFlags;
24     private final long streamPadding;
25     private final int memoryUsage;
26 
27     // Unpadded Size and Uncompressed Size fields
28     private final long[] unpadded;
29     private final long[] uncompressed;
30 
31     // Uncompressed size of the largest Block. It is used by
32     // SeekableXZInputStream to find out the largest Block of the .xz file.
33     private long largestBlockSize = 0;
34 
35     // Offsets relative to the beginning of the .xz file. These are all zero
36     // for the first Stream in the file.
37     private int recordOffset = 0;
38     private long compressedOffset = 0;
39     private long uncompressedOffset = 0;
40 
IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags, long streamPadding, int memoryLimit)41     public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags,
42                         long streamPadding, int memoryLimit)
43             throws IOException {
44         super(new CorruptedInputException("XZ Index is corrupt"));
45         this.streamFlags = streamFooterFlags;
46         this.streamPadding = streamPadding;
47 
48         // If endPos is exceeded before the CRC32 field has been decoded,
49         // the Index is corrupt.
50         long endPos = in.position() + streamFooterFlags.backwardSize - 4;
51 
52         java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
53         CheckedInputStream inChecked = new CheckedInputStream(in, crc32);
54 
55         // Index Indicator
56         if (inChecked.read() != 0x00)
57             throw new CorruptedInputException("XZ Index is corrupt");
58 
59         try {
60             // Number of Records
61             long count = DecoderUtil.decodeVLI(inChecked);
62 
63             // Catch Record counts that obviously too high to be valid.
64             // This test isn't exact because it ignores Index Indicator,
65             // Number of Records, and CRC32 fields, but this is good enough
66             // to catch the most obvious problems.
67             if (count >= streamFooterFlags.backwardSize / 2)
68                 throw new CorruptedInputException("XZ Index is corrupt");
69 
70             // If the Record count doesn't fit into an int, we cannot
71             // allocate the arrays to hold the Records.
72             if (count > Integer.MAX_VALUE)
73                 throw new UnsupportedOptionsException("XZ Index has over "
74                         + Integer.MAX_VALUE + " Records");
75 
76             // Calculate approximate memory requirements and check the
77             // memory usage limit.
78             memoryUsage = 1 + (int)((16L * count + 1023) / 1024);
79             if (memoryLimit >= 0 && memoryUsage > memoryLimit)
80                 throw new MemoryLimitException(memoryUsage, memoryLimit);
81 
82             // Allocate the arrays for the Records.
83             unpadded = new long[(int)count];
84             uncompressed = new long[(int)count];
85             int record = 0;
86 
87             // Decode the Records.
88             for (int i = (int)count; i > 0; --i) {
89                 // Get the next Record.
90                 long unpaddedSize = DecoderUtil.decodeVLI(inChecked);
91                 long uncompressedSize = DecoderUtil.decodeVLI(inChecked);
92 
93                 // Check that the input position stays sane. Since this is
94                 // checked only once per loop iteration instead of for
95                 // every input byte read, it's still possible that
96                 // EOFException gets thrown with corrupt input.
97                 if (in.position() > endPos)
98                     throw new CorruptedInputException("XZ Index is corrupt");
99 
100                 // Add the new Record.
101                 unpadded[record] = blocksSum + unpaddedSize;
102                 uncompressed[record] = uncompressedSum + uncompressedSize;
103                 ++record;
104                 super.add(unpaddedSize, uncompressedSize);
105                 assert record == recordCount;
106 
107                 // Remember the uncompressed size of the largest Block.
108                 if (largestBlockSize < uncompressedSize)
109                     largestBlockSize = uncompressedSize;
110             }
111         } catch (EOFException e) {
112             // EOFException is caught just in case a corrupt input causes
113             // DecoderUtil.decodeVLI to read too much at once.
114             throw new CorruptedInputException("XZ Index is corrupt");
115         }
116 
117         // Validate that the size of the Index field matches
118         // Backward Size.
119         int indexPaddingSize = getIndexPaddingSize();
120         if (in.position() + indexPaddingSize != endPos)
121             throw new CorruptedInputException("XZ Index is corrupt");
122 
123         // Index Padding
124         while (indexPaddingSize-- > 0)
125             if (inChecked.read() != 0x00)
126                 throw new CorruptedInputException("XZ Index is corrupt");
127 
128         // CRC32
129         long value = crc32.getValue();
130         for (int i = 0; i < 4; ++i)
131             if (((value >>> (i * 8)) & 0xFF) != in.read())
132                 throw new CorruptedInputException("XZ Index is corrupt");
133     }
134 
setOffsets(IndexDecoder prev)135     public void setOffsets(IndexDecoder prev) {
136         // NOTE: SeekableXZInputStream checks that the total number of Blocks
137         // in concatenated Streams fits into an int.
138         recordOffset = prev.recordOffset + (int)prev.recordCount;
139         compressedOffset = prev.compressedOffset
140                            + prev.getStreamSize() + prev.streamPadding;
141         assert (compressedOffset & 3) == 0;
142         uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum;
143     }
144 
getMemoryUsage()145     public int getMemoryUsage() {
146         return memoryUsage;
147     }
148 
getStreamFlags()149     public StreamFlags getStreamFlags() {
150         return streamFlags;
151     }
152 
getRecordCount()153     public int getRecordCount() {
154         // It was already checked in the constructor that it fits into an int.
155         // Otherwise we couldn't have allocated the arrays.
156         return (int)recordCount;
157     }
158 
getUncompressedSize()159     public long getUncompressedSize() {
160         return uncompressedSum;
161     }
162 
getLargestBlockSize()163     public long getLargestBlockSize() {
164         return largestBlockSize;
165     }
166 
hasUncompressedOffset(long pos)167     public boolean hasUncompressedOffset(long pos) {
168         return pos >= uncompressedOffset
169                && pos < uncompressedOffset + uncompressedSum;
170     }
171 
hasRecord(int blockNumber)172     public boolean hasRecord(int blockNumber) {
173         return blockNumber >= recordOffset
174                && blockNumber < recordOffset + recordCount;
175     }
176 
locateBlock(BlockInfo info, long target)177     public void locateBlock(BlockInfo info, long target) {
178         assert target >= uncompressedOffset;
179         target -= uncompressedOffset;
180         assert target < uncompressedSum;
181 
182         int left = 0;
183         int right = unpadded.length - 1;
184 
185         while (left < right) {
186             int i = left + (right - left) / 2;
187 
188             if (uncompressed[i] <= target)
189                 left = i + 1;
190             else
191                 right = i;
192         }
193 
194         setBlockInfo(info, recordOffset + left);
195     }
196 
197     public void setBlockInfo(BlockInfo info, int blockNumber) {
198         // The caller has checked that the given Block number is inside
199         // this Index.
200         assert blockNumber >= recordOffset;
201         assert blockNumber - recordOffset < recordCount;
202 
203         info.index = this;
204         info.blockNumber = blockNumber;
205 
206         int pos = blockNumber - recordOffset;
207 
208         if (pos == 0) {
209             info.compressedOffset = 0;
210             info.uncompressedOffset = 0;
211         } else {
212             info.compressedOffset = (unpadded[pos - 1] + 3) & ~3;
213             info.uncompressedOffset = uncompressed[pos - 1];
214         }
215 
216         info.unpaddedSize = unpadded[pos] - info.compressedOffset;
217         info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset;
218 
219         info.compressedOffset += compressedOffset
220                                  + DecoderUtil.STREAM_HEADER_SIZE;
221         info.uncompressedOffset += uncompressedOffset;
222     }
223 }
224