1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef _BSDIFF_ENDSLEY_PATCH_WRITER_H_
6 #define _BSDIFF_ENDSLEY_PATCH_WRITER_H_
7 
8 #include <memory>
9 #include <string>
10 #include <vector>
11 
12 #include "bsdiff/compressor_interface.h"
13 #include "bsdiff/constants.h"
14 #include "bsdiff/patch_writer_interface.h"
15 
16 namespace bsdiff {
17 
18 // A PatchWriterInterface class compatible with the format used by Android Play
19 // Store's bsdiff implementation, which is based on Matthew Endsley's bsdiff
20 // implementation. See https://github.com/mendsley/bsdiff for the original
21 // implementation of this format. See also Google's APK patch size estimator for
22 // more information on the file-by-file format used by Play Store:
23 // https://github.com/googlesamples/apk-patch-size-estimator
24 
25 // This format, identified by the "ENDSLEY/BSDIFF43" magic string, uses a single
26 // stream with the control entries, diff data and extra data interleaved. After
27 // the header, each Control Entry is stored in 24 bytes followed by the diff
28 // stream data for that entry only, and then followed by the extra stream data
29 // for that entry only. The format doesn't handle the compression of the data,
30 // instead, the whole file (including the magic string) is compressed with any
31 // compression algorithm.
32 
33 // This format is easier to parse and allows the patch to be streamed, but by
34 // mixing the diff and extra data into the same compression context offers a
35 // slightly worse compression ratio (about 3.5% compared to upstream's format).
36 
37 class EndsleyPatchWriter : public PatchWriterInterface {
38  public:
39   // Create the patch writer that will write the data to the passed vector
40   // |patch|, resizing it as needed. The |patch| vector must be valid until
41   // Close() is called or this patch is destroyed. The data in |patch| will be
42   // compressed using the compressor type |type|.
EndsleyPatchWriter(std::vector<uint8_t> * patch,CompressorType type,int brotli_quality)43   EndsleyPatchWriter(std::vector<uint8_t>* patch,
44                      CompressorType type,
45                      int brotli_quality)
46       : patch_(patch),
47         compressor_type_(type),
48         brotli_quality_(brotli_quality) {}
49 
50   // PatchWriterInterface overrides.
51   bool Init(size_t new_size) override;
52   bool WriteDiffStream(const uint8_t* data, size_t size) override;
53   bool WriteExtraStream(const uint8_t* data, size_t size) override;
54   bool AddControlEntry(const ControlEntry& entry) override;
55   bool Close() override;
56 
57  private:
58   // Emit at the end of the |patch_| vector the passed control entry.
59   void EmitControlEntry(const ControlEntry& entry);
60 
61   // Emit at the end of the |patch_| vector the passed buffer.
62   void EmitBuffer(const uint8_t* data, size_t size);
63 
64   // Flush as much as possible of the pending data.
65   void Flush();
66 
67   // The vector we are writing to, owned by the caller.
68   std::vector<uint8_t>* patch_;
69 
70   // The compressor type to use and its quality (if any).
71   CompressorType compressor_type_;
72   int brotli_quality_;
73 
74   std::unique_ptr<CompressorInterface> compressor_;
75 
76   // The pending diff and extra data to be encoded in the file. These vectors
77   // would not be used whenever is possible to the data directly to the patch_
78   // vector; namely when the control, diff and extra stream data are provided in
79   // that order for each control entry.
80   std::vector<uint8_t> diff_data_;
81   std::vector<uint8_t> extra_data_;
82   std::vector<ControlEntry> control_;
83 
84   // Defined as the sum of all the diff_size and extra_size values in
85   // |control_|. This is used to determine whether it is worth Flushing the
86   // pending data.
87   size_t pending_control_data_{0};
88 
89   // Number of bytes in the diff and extra stream that are pending in the
90   // last control entry encoded in the |patch_|. If both are zero the last
91   // control entry was completely emitted.
92   size_t pending_diff_{0};
93   size_t pending_extra_{0};
94 };
95 
96 }  // namespace bsdiff
97 
98 #endif  // _BSDIFF_ENDSLEY_PATCH_WRITER_H_
99