1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_LIB_IO_SNAPPY_SNAPPY_INPUTBUFFER_H_
17 #define TENSORFLOW_CORE_LIB_IO_SNAPPY_SNAPPY_INPUTBUFFER_H_
18 
19 #include <string>
20 #include "tensorflow/core/lib/core/status.h"
21 #include "tensorflow/core/lib/io/inputstream_interface.h"
22 #include "tensorflow/core/platform/env.h"
23 #include "tensorflow/core/platform/macros.h"
24 #include "tensorflow/core/platform/snappy.h"
25 #include "tensorflow/core/platform/types.h"
26 
27 namespace tensorflow {
28 namespace io {
29 
30 // An SnappyInputBuffer provides support for reading from a file compressed
31 // using snappy (https://github.com/google/snappy).
32 //
33 // A given instance of an SnappyInputBuffer is NOT safe for concurrent use
34 // by multiple threads
35 class SnappyInputBuffer : public InputStreamInterface {
36  public:
37   // Create a SnappyInputBuffer for `file` with a buffer of size
38   // `input_buffer_bytes` bytes for reading contents from `file` and another
39   // buffer with size `output_buffer_bytes` for caching decompressed contents.
40   // Does *not* take ownership of "file".
41   SnappyInputBuffer(RandomAccessFile* file, size_t input_buffer_bytes,
42                     size_t output_buffer_bytes);
43 
44   // Reads bytes_to_read bytes into *result, overwriting *result.
45   //
46   // Return Status codes:
47   // OK:
48   //   If successful.
49   // OUT_OF_RANGE:
50   //   If there are not enough bytes to read before the end of the file.
51   // DATA_LOSS:
52   //   If uncompression failed or if the file is corrupted.
53   // RESOURCE_EXHAUSTED:
54   //   If input_buffer_ is smaller in size than a compressed block.
55   // others:
56   //   If reading from file failed.
57   Status ReadNBytes(int64 bytes_to_read, tstring* result) override;
58 
59   int64 Tell() const override;
60 
61   Status Reset() override;
62 
63  private:
64   // Reads data from `file_` and tries to fill up `input_buffer_` if enough
65   // unread data is left in `file_`.
66   //
67   // Looks up `next_in_` to check how much data in `input_buffer_`
68   // has already been read. The used data is removed and new data is added to
69   // after any unread data in `input_buffer_`.
70   // After this call `next_in` points to the start of `input_buffer_`
71   // and `avail_in_` stores the number of readable bytes in
72   // `input_buffer_`.
73   //
74   // Returns OutOfRange error if NO data could be read from file. Note that this
75   // won't return an OutOfRange if there wasn't sufficient data in file to
76   // completely fill up `input_buffer_`.
77   Status ReadFromFile();
78 
79   // Reads the length of the next compressed block stored in the next 4 bytes at
80   // `next_in_`. Uncompresses the next compressed block and writes the output
81   // produced to the output_buffer_.
82   // Should be called only after the cached output has been consumed.
83   Status Inflate();
84 
85   // Starts reading bytes at `next_out_` until either `bytes_to_read`
86   // bytes have been read or `next_out_` is reached.
87   // Returns the number of bytes read and advances the `next_out_`
88   // pointer to the next location to read from.
89   size_t ReadBytesFromCache(size_t bytes_to_read, char* result);
90 
91   // Reads the length of the next *compressed* block and stores in `length`.
92   // The length is stored in 4 bytes in little endian notation.
93   Status ReadCompressedBlockLength(uint32* length);
94 
95   RandomAccessFile* file_;         // Not owned
96   int64 file_pos_ = 0;             // Next position to read from in `file_`
97   size_t input_buffer_capacity_;   // Size of `input_buffer_`.
98                                    // Must be at least as big as the size of
99                                    // the largest compressed block.
100   size_t output_buffer_capacity_;  // Size of `output_buffer_`
101 
102   // Buffer for storing contents read from compressed file.
103   // TODO(srbs): Consider using circular buffers. That would greatly simplify
104   // the implementation.
105   std::unique_ptr<char[]> input_buffer_;
106 
107   // Buffer for storing inflated contents of `file_`.
108   std::unique_ptr<char[]> output_buffer_;
109 
110   // Next unread byte in `input_buffer_`.
111   char* next_in_;
112 
113   // Next unread byte in `output_buffer_`
114   char* next_out_;
115 
116   // Number of unread bytes available at `next_in_` in `input_buffer_`.
117   size_t avail_in_ = 0;
118 
119   // Number of unread bytes available at `next_out_` in `output_buffer_`.
120   size_t avail_out_ = 0;
121 
122   // Number of *uncompressed* bytes that have been read from this stream.
123   int64 bytes_read_;
124 
125   TF_DISALLOW_COPY_AND_ASSIGN(SnappyInputBuffer);
126 };
127 
128 }  // namespace io
129 }  // namespace tensorflow
130 
131 #endif  // TENSORFLOW_CORE_LIB_IO_SNAPPY_SNAPPY_INPUTBUFFER_H_
132