1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied.  See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 package org.apache.commons.compress.compressors.lzw;
20 
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.nio.ByteOrder;
24 
25 import org.apache.commons.compress.MemoryLimitException;
26 import org.apache.commons.compress.compressors.CompressorInputStream;
27 import org.apache.commons.compress.utils.BitInputStream;
28 import org.apache.commons.compress.utils.InputStreamStatistics;
29 
30 /**
31  * <p>Generic LZW implementation. It is used internally for
32  * the Z decompressor and the Unshrinking Zip file compression method,
33  * but may be useful for third-party projects in implementing their own LZW variations.</p>
34  *
35  * @NotThreadSafe
36  * @since 1.10
37  */
38 public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
39     protected static final int DEFAULT_CODE_SIZE = 9;
40     protected static final int UNUSED_PREFIX = -1;
41 
42     private final byte[] oneByte = new byte[1];
43 
44     protected final BitInputStream in;
45     private int clearCode = -1;
46     private int codeSize = DEFAULT_CODE_SIZE;
47     private byte previousCodeFirstChar;
48     private int previousCode = UNUSED_PREFIX;
49     private int tableSize;
50     private int[] prefixes;
51     private byte[] characters;
52     private byte[] outputStack;
53     private int outputStackLocation;
54 
LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder)55     protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
56         this.in = new BitInputStream(inputStream, byteOrder);
57     }
58 
59     @Override
close()60     public void close() throws IOException {
61         in.close();
62     }
63 
64     @Override
read()65     public int read() throws IOException {
66         final int ret = read(oneByte);
67         if (ret < 0) {
68             return ret;
69         }
70         return 0xff & oneByte[0];
71     }
72 
73     @Override
read(final byte[] b, final int off, final int len)74     public int read(final byte[] b, final int off, final int len) throws IOException {
75         int bytesRead = readFromStack(b, off, len);
76         while (len - bytesRead > 0) {
77             final int result = decompressNextSymbol();
78             if (result < 0) {
79                 if (bytesRead > 0) {
80                     count(bytesRead);
81                     return bytesRead;
82                 }
83                 return result;
84             }
85             bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
86         }
87         count(bytesRead);
88         return bytesRead;
89     }
90 
91     /**
92      * @since 1.17
93      */
94     @Override
getCompressedCount()95     public long getCompressedCount() {
96         return in.getBytesRead();
97     }
98 
99     /**
100      * Read the next code and expand it.
101      * @return the expanded next code
102      * @throws IOException on error
103      */
decompressNextSymbol()104     protected abstract int decompressNextSymbol() throws IOException;
105 
106     /**
107      * Add a new entry to the dictionary.
108      * @param previousCode the previous code
109      * @param character the next character to append
110      * @return the new code
111      * @throws IOException on error
112      */
addEntry(int previousCode, byte character)113     protected abstract int addEntry(int previousCode, byte character)
114         throws IOException;
115 
116     /**
117      * Sets the clear code based on the code size.
118      * @param codeSize code size
119      */
setClearCode(final int codeSize)120     protected void setClearCode(final int codeSize) {
121         clearCode = (1 << (codeSize - 1));
122     }
123 
124     /**
125      * Initializes the arrays based on the maximum code size.
126      * First checks that the estimated memory usage is below memoryLimitInKb
127      *
128      * @param maxCodeSize maximum code size
129      * @param memoryLimitInKb maximum allowed estimated memory usage in Kb
130      * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb
131      */
initializeTables(final int maxCodeSize, final int memoryLimitInKb)132     protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb)
133             throws MemoryLimitException {
134 
135         if (memoryLimitInKb > -1) {
136             final int maxTableSize = 1 << maxCodeSize;
137             //account for potential overflow
138             long memoryUsageInBytes = (long) maxTableSize * 6;//(4 (prefixes) + 1 (characters) +1 (outputStack))
139             long memoryUsageInKb = memoryUsageInBytes >> 10;
140 
141             if (memoryUsageInKb > memoryLimitInKb) {
142                 throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb);
143             }
144         }
145         initializeTables(maxCodeSize);
146     }
147 
148     /**
149      * Initializes the arrays based on the maximum code size.
150      * @param maxCodeSize maximum code size
151      */
initializeTables(final int maxCodeSize)152     protected void initializeTables(final int maxCodeSize) {
153         final int maxTableSize = 1 << maxCodeSize;
154         prefixes = new int[maxTableSize];
155         characters = new byte[maxTableSize];
156         outputStack = new byte[maxTableSize];
157         outputStackLocation = maxTableSize;
158         final int max = 1 << 8;
159         for (int i = 0; i < max; i++) {
160             prefixes[i] = -1;
161             characters[i] = (byte) i;
162         }
163     }
164 
165     /**
166      * Reads the next code from the stream.
167      * @return the next code
168      * @throws IOException on error
169      */
readNextCode()170     protected int readNextCode() throws IOException {
171         if (codeSize > 31) {
172             throw new IllegalArgumentException("code size must not be bigger than 31");
173         }
174         return (int) in.readBits(codeSize);
175     }
176 
177     /**
178      * Adds a new entry if the maximum table size hasn't been exceeded
179      * and returns the new index.
180      * @param previousCode the previous code
181      * @param character the character to append
182      * @param maxTableSize the maximum table size
183      * @return the new code
184      */
addEntry(final int previousCode, final byte character, final int maxTableSize)185     protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
186         if (tableSize < maxTableSize) {
187             prefixes[tableSize] = previousCode;
188             characters[tableSize] = character;
189             return tableSize++;
190         }
191         return -1;
192     }
193 
194     /**
195      * Add entry for repeat of previousCode we haven't added, yet.
196      * @return new code for a repeat of the previous code
197      * @throws IOException on error
198      */
addRepeatOfPreviousCode()199     protected int addRepeatOfPreviousCode() throws IOException {
200         if (previousCode == -1) {
201             // can't have a repeat for the very first code
202             throw new IOException("The first code can't be a reference to its preceding code");
203         }
204         return addEntry(previousCode, previousCodeFirstChar);
205     }
206 
207     /**
208      * Expands the entry with index code to the output stack and may
209      * create a new entry
210      * @param code the code
211      * @param addedUnfinishedEntry whether unfinished entries have been added
212      * @return the new location of the output stack
213      * @throws IOException on error
214      */
expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry)215     protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry)
216         throws IOException {
217         for (int entry = code; entry >= 0; entry = prefixes[entry]) {
218             outputStack[--outputStackLocation] = characters[entry];
219         }
220         if (previousCode != -1 && !addedUnfinishedEntry) {
221             addEntry(previousCode, outputStack[outputStackLocation]);
222         }
223         previousCode = code;
224         previousCodeFirstChar = outputStack[outputStackLocation];
225         return outputStackLocation;
226     }
227 
readFromStack(final byte[] b, final int off, final int len)228     private int readFromStack(final byte[] b, final int off, final int len) {
229         final int remainingInStack = outputStack.length - outputStackLocation;
230         if (remainingInStack > 0) {
231             final int maxLength = Math.min(remainingInStack, len);
232             System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
233             outputStackLocation += maxLength;
234             return maxLength;
235         }
236         return 0;
237     }
238 
getCodeSize()239     protected int getCodeSize() {
240         return codeSize;
241     }
242 
resetCodeSize()243     protected void resetCodeSize() {
244         setCodeSize(DEFAULT_CODE_SIZE);
245     }
246 
setCodeSize(final int cs)247     protected void setCodeSize(final int cs) {
248         this.codeSize = cs;
249     }
250 
incrementCodeSize()251     protected void incrementCodeSize() {
252         codeSize++;
253     }
254 
resetPreviousCode()255     protected void resetPreviousCode() {
256         this.previousCode = -1;
257     }
258 
getPrefix(final int offset)259     protected int getPrefix(final int offset) {
260         return prefixes[offset];
261     }
262 
setPrefix(final int offset, final int value)263     protected void setPrefix(final int offset, final int value) {
264         prefixes[offset] = value;
265     }
266 
getPrefixesLength()267     protected int getPrefixesLength() {
268         return prefixes.length;
269     }
270 
getClearCode()271     protected int getClearCode() {
272         return clearCode;
273     }
274 
getTableSize()275     protected int getTableSize() {
276         return tableSize;
277     }
278 
setTableSize(final int newSize)279     protected void setTableSize(final int newSize) {
280         tableSize = newSize;
281     }
282 
283 }
284