1 /*
2  * Copyright (C) 2014 Square, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package okio;
17 
18 import java.io.EOFException;
19 import java.io.IOException;
20 import java.util.zip.CRC32;
21 import java.util.zip.Inflater;
22 
23 /**
24  * A source that uses <a href="http://www.ietf.org/rfc/rfc1952.txt">GZIP</a> to
25  * decompress data read from another source.
26  */
27 public final class GzipSource implements Source {
28   private static final byte FHCRC = 1;
29   private static final byte FEXTRA = 2;
30   private static final byte FNAME = 3;
31   private static final byte FCOMMENT = 4;
32 
33   private static final byte SECTION_HEADER = 0;
34   private static final byte SECTION_BODY = 1;
35   private static final byte SECTION_TRAILER = 2;
36   private static final byte SECTION_DONE = 3;
37 
38   /** The current section. Always progresses forward. */
39   private int section = SECTION_HEADER;
40 
41   /**
42    * Our source should yield a GZIP header (which we consume directly), followed
43    * by deflated bytes (which we consume via an InflaterSource), followed by a
44    * GZIP trailer (which we also consume directly).
45    */
46   private final BufferedSource source;
47 
48   /** The inflater used to decompress the deflated body. */
49   private final Inflater inflater;
50 
51   /**
52    * The inflater source takes care of moving data between compressed source and
53    * decompressed sink buffers.
54    */
55   private final InflaterSource inflaterSource;
56 
57   /** Checksum used to check both the GZIP header and decompressed body. */
58   private final CRC32 crc = new CRC32();
59 
GzipSource(Source source)60   public GzipSource(Source source) {
61     if (source == null) throw new IllegalArgumentException("source == null");
62     this.inflater = new Inflater(true);
63     this.source = Okio.buffer(source);
64     this.inflaterSource = new InflaterSource(this.source, inflater);
65   }
66 
read(Buffer sink, long byteCount)67   @Override public long read(Buffer sink, long byteCount) throws IOException {
68     if (byteCount < 0) throw new IllegalArgumentException("byteCount < 0: " + byteCount);
69     if (byteCount == 0) return 0;
70 
71     // If we haven't consumed the header, we must consume it before anything else.
72     if (section == SECTION_HEADER) {
73       consumeHeader();
74       section = SECTION_BODY;
75     }
76 
77     // Attempt to read at least a byte of the body. If we do, we're done.
78     if (section == SECTION_BODY) {
79       long offset = sink.size;
80       long result = inflaterSource.read(sink, byteCount);
81       if (result != -1) {
82         updateCrc(sink, offset, result);
83         return result;
84       }
85       section = SECTION_TRAILER;
86     }
87 
88     // The body is exhausted; time to read the trailer. We always consume the
89     // trailer before returning a -1 exhausted result; that way if you read to
90     // the end of a GzipSource you guarantee that the CRC has been checked.
91     if (section == SECTION_TRAILER) {
92       consumeTrailer();
93       section = SECTION_DONE;
94 
95       // Gzip streams self-terminate: they return -1 before their underlying
96       // source returns -1. Here we attempt to force the underlying stream to
97       // return -1 which may trigger it to release its resources. If it doesn't
98       // return -1, then our Gzip data finished prematurely!
99       if (!source.exhausted()) {
100         throw new IOException("gzip finished without exhausting source");
101       }
102     }
103 
104     return -1;
105   }
106 
consumeHeader()107   private void consumeHeader() throws IOException {
108     // Read the 10-byte header. We peek at the flags byte first so we know if we
109     // need to CRC the entire header. Then we read the magic ID1ID2 sequence.
110     // We can skip everything else in the first 10 bytes.
111     // +---+---+---+---+---+---+---+---+---+---+
112     // |ID1|ID2|CM |FLG|     MTIME     |XFL|OS | (more-->)
113     // +---+---+---+---+---+---+---+---+---+---+
114     source.require(10);
115     byte flags = source.buffer().getByte(3);
116     boolean fhcrc = ((flags >> FHCRC) & 1) == 1;
117     if (fhcrc) updateCrc(source.buffer(), 0, 10);
118 
119     short id1id2 = source.readShort();
120     checkEqual("ID1ID2", (short) 0x1f8b, id1id2);
121     source.skip(8);
122 
123     // Skip optional extra fields.
124     // +---+---+=================================+
125     // | XLEN  |...XLEN bytes of "extra field"...| (more-->)
126     // +---+---+=================================+
127     if (((flags >> FEXTRA) & 1) == 1) {
128       source.require(2);
129       if (fhcrc) updateCrc(source.buffer(), 0, 2);
130       int xlen = source.buffer().readShortLe();
131       source.require(xlen);
132       if (fhcrc) updateCrc(source.buffer(), 0, xlen);
133       source.skip(xlen);
134     }
135 
136     // Skip an optional 0-terminated name.
137     // +=========================================+
138     // |...original file name, zero-terminated...| (more-->)
139     // +=========================================+
140     if (((flags >> FNAME) & 1) == 1) {
141       long index = source.indexOf((byte) 0);
142       if (index == -1) throw new EOFException();
143       if (fhcrc) updateCrc(source.buffer(), 0, index + 1);
144       source.skip(index + 1);
145     }
146 
147     // Skip an optional 0-terminated comment.
148     // +===================================+
149     // |...file comment, zero-terminated...| (more-->)
150     // +===================================+
151     if (((flags >> FCOMMENT) & 1) == 1) {
152       long index = source.indexOf((byte) 0);
153       if (index == -1) throw new EOFException();
154       if (fhcrc) updateCrc(source.buffer(), 0, index + 1);
155       source.skip(index + 1);
156     }
157 
158     // Confirm the optional header CRC.
159     // +---+---+
160     // | CRC16 |
161     // +---+---+
162     if (fhcrc) {
163       checkEqual("FHCRC", source.readShortLe(), (short) crc.getValue());
164       crc.reset();
165     }
166   }
167 
consumeTrailer()168   private void consumeTrailer() throws IOException {
169     // Read the eight-byte trailer. Confirm the body's CRC and size.
170     // +---+---+---+---+---+---+---+---+
171     // |     CRC32     |     ISIZE     |
172     // +---+---+---+---+---+---+---+---+
173     checkEqual("CRC", source.readIntLe(), (int) crc.getValue());
174     checkEqual("ISIZE", source.readIntLe(), inflater.getTotalOut());
175   }
176 
timeout()177   @Override public Timeout timeout() {
178     return source.timeout();
179   }
180 
close()181   @Override public void close() throws IOException {
182     inflaterSource.close();
183   }
184 
185   /** Updates the CRC with the given bytes. */
updateCrc(Buffer buffer, long offset, long byteCount)186   private void updateCrc(Buffer buffer, long offset, long byteCount) {
187     // Skip segments that we aren't checksumming.
188     Segment s = buffer.head;
189     for (; offset >= (s.limit - s.pos); s = s.next) {
190       offset -= (s.limit - s.pos);
191     }
192 
193     // Checksum one segment at a time.
194     for (; byteCount > 0; s = s.next) {
195       int pos = (int) (s.pos + offset);
196       int toUpdate = (int) Math.min(s.limit - pos, byteCount);
197       crc.update(s.data, pos, toUpdate);
198       byteCount -= toUpdate;
199       offset = 0;
200     }
201   }
202 
checkEqual(String name, int expected, int actual)203   private void checkEqual(String name, int expected, int actual) throws IOException {
204     if (actual != expected) {
205       throw new IOException(String.format(
206           "%s: actual 0x%08x != expected 0x%08x", name, actual, expected));
207     }
208   }
209 }
210