1 /*-
2  * Copyright 2003-2005 Colin Percival
3  * All rights reserved
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted providing that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #if 0
28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $");
29 #endif
30 
31 #include "bspatch.h"
32 
33 #include <bzlib.h>
34 #include <err.h>
35 #include <fcntl.h>
36 #include <inttypes.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <sys/types.h>
41 
42 #include <algorithm>
43 #include <memory>
44 #include <limits>
45 
46 #include "extents.h"
47 #include "extents_file.h"
48 #include "file.h"
49 #include "file_interface.h"
50 
51 namespace {
52 
ParseInt64(u_char * buf)53 int64_t ParseInt64(u_char* buf) {
54   int64_t y;
55 
56   y = buf[7] & 0x7F;
57   y = y * 256;
58   y += buf[6];
59   y = y * 256;
60   y += buf[5];
61   y = y * 256;
62   y += buf[4];
63   y = y * 256;
64   y += buf[3];
65   y = y * 256;
66   y += buf[2];
67   y = y * 256;
68   y += buf[1];
69   y = y * 256;
70   y += buf[0];
71 
72   if (buf[7] & 0x80)
73     y = -y;
74 
75   return y;
76 }
77 
78 }  // namespace
79 
80 namespace bsdiff {
81 
bspatch(const char * old_filename,const char * new_filename,const char * patch_filename,const char * old_extents,const char * new_extents)82 int bspatch(
83     const char* old_filename, const char* new_filename,
84     const char* patch_filename,
85     const char* old_extents, const char* new_extents) {
86   FILE* f, *cpf, *dpf, *epf;
87   BZFILE* cpfbz2, *dpfbz2, *epfbz2;
88   int cbz2err, dbz2err, ebz2err;
89   ssize_t bzctrllen, bzdatalen;
90   u_char header[32], buf[8];
91   u_char* new_buf;
92   off_t ctrl[3];
93   off_t lenread;
94 
95   int using_extents = (old_extents != NULL || new_extents != NULL);
96 
97   // Open patch file.
98   if ((f = fopen(patch_filename, "r")) == NULL)
99     err(1, "fopen(%s)", patch_filename);
100 
101   // File format:
102   //   0       8    "BSDIFF40"
103   //   8       8    X
104   //   16      8    Y
105   //   24      8    sizeof(new_filename)
106   //   32      X    bzip2(control block)
107   //   32+X    Y    bzip2(diff block)
108   //   32+X+Y  ???  bzip2(extra block)
109   // with control block a set of triples (x,y,z) meaning "add x bytes
110   // from oldfile to x bytes from the diff block; copy y bytes from the
111   // extra block; seek forwards in oldfile by z bytes".
112 
113   // Read header.
114   if (fread(header, 1, 32, f) < 32) {
115     if (feof(f))
116       errx(1, "Corrupt patch\n");
117     err(1, "fread(%s)", patch_filename);
118   }
119 
120   // Check for appropriate magic.
121   if (memcmp(header, "BSDIFF40", 8) != 0)
122     errx(1, "Corrupt patch\n");
123 
124   // Read lengths from header.
125   uint64_t oldsize, newsize;
126   bzctrllen = ParseInt64(header + 8);
127   bzdatalen = ParseInt64(header + 16);
128   int64_t signed_newsize = ParseInt64(header + 24);
129   newsize = signed_newsize;
130   if ((bzctrllen < 0) || (bzdatalen < 0) || (signed_newsize < 0))
131     errx(1, "Corrupt patch\n");
132 
133   // Close patch file and re-open it via libbzip2 at the right places.
134   if (fclose(f))
135     err(1, "fclose(%s)", patch_filename);
136   if ((cpf = fopen(patch_filename, "r")) == NULL)
137     err(1, "fopen(%s)", patch_filename);
138   if (fseek(cpf, 32, SEEK_SET))
139     err(1, "fseeko(%s, %lld)", patch_filename, (long long)32);
140   if ((cpfbz2 = BZ2_bzReadOpen(&cbz2err, cpf, 0, 0, NULL, 0)) == NULL)
141     errx(1, "BZ2_bzReadOpen, bz2err = %d", cbz2err);
142   if ((dpf = fopen(patch_filename, "r")) == NULL)
143     err(1, "fopen(%s)", patch_filename);
144   if (fseek(dpf, 32 + bzctrllen, SEEK_SET))
145     err(1, "fseeko(%s, %lld)", patch_filename, (long long)(32 + bzctrllen));
146   if ((dpfbz2 = BZ2_bzReadOpen(&dbz2err, dpf, 0, 0, NULL, 0)) == NULL)
147     errx(1, "BZ2_bzReadOpen, bz2err = %d", dbz2err);
148   if ((epf = fopen(patch_filename, "r")) == NULL)
149     err(1, "fopen(%s)", patch_filename);
150   if (fseek(epf, 32 + bzctrllen + bzdatalen, SEEK_SET))
151     err(1, "fseeko(%s, %lld)", patch_filename,
152         (long long)(32 + bzctrllen + bzdatalen));
153   if ((epfbz2 = BZ2_bzReadOpen(&ebz2err, epf, 0, 0, NULL, 0)) == NULL)
154     errx(1, "BZ2_bzReadOpen, bz2err = %d", ebz2err);
155 
156   // Open input file for reading.
157   std::unique_ptr<FileInterface> old_file = File::FOpen(old_filename, O_RDONLY);
158   if (!old_file)
159     err(1, "Error opening the old filename");
160 
161   if (using_extents) {
162     std::vector<ex_t> parsed_old_extents;
163     if (!ParseExtentStr(old_extents, &parsed_old_extents))
164       errx(1, "Error parsing the old extents");
165     old_file.reset(new ExtentsFile(std::move(old_file), parsed_old_extents));
166   }
167 
168   if (!old_file->GetSize(&oldsize))
169     err(1, "cannot obtain the size of %s", old_filename);
170   uint64_t old_file_pos = 0;
171 
172   if ((new_buf = static_cast<u_char*>(malloc(newsize + 1))) == NULL)
173     err(1, NULL);
174 
175   // The oldpos can be negative, but the new pos is only incremented linearly.
176   int64_t oldpos = 0;
177   uint64_t newpos = 0;
178   std::vector<u_char> old_buf(1024 * 1024);
179   while (newpos < newsize) {
180     int64_t i, j;
181     // Read control data.
182     for (i = 0; i <= 2; i++) {
183       lenread = BZ2_bzRead(&cbz2err, cpfbz2, buf, 8);
184       if ((lenread < 8) || ((cbz2err != BZ_OK) && (cbz2err != BZ_STREAM_END)))
185         errx(1, "Corrupt patch\n");
186       ctrl[i] = ParseInt64(buf);
187     };
188 
189     // Sanity-check.
190     if (ctrl[0] < 0 || ctrl[1] < 0)
191       errx(1, "Corrupt patch\n");
192 
193     // Sanity-check.
194     if (newpos + ctrl[0] > newsize)
195       errx(1, "Corrupt patch\n");
196 
197     // Read diff string.
198     lenread = BZ2_bzRead(&dbz2err, dpfbz2, new_buf + newpos, ctrl[0]);
199     if ((lenread < ctrl[0]) ||
200         ((dbz2err != BZ_OK) && (dbz2err != BZ_STREAM_END)))
201       errx(1, "Corrupt patch\n");
202 
203     // Add old data to diff string. It is enough to fseek once, at
204     // the beginning of the sequence, to avoid unnecessary overhead.
205     j = newpos;
206     if ((i = oldpos) < 0) {
207       j -= i;
208       i = 0;
209     }
210     // We just checked that |i| is not negative.
211     if (static_cast<uint64_t>(i) != old_file_pos && !old_file->Seek(i))
212       err(1, "error seeking input file to offset %" PRId64, i);
213     if ((old_file_pos = oldpos + ctrl[0]) > oldsize)
214       old_file_pos = oldsize;
215 
216     uint64_t chunk_size = old_file_pos - i;
217     while (chunk_size > 0) {
218       size_t read_bytes;
219       size_t bytes_to_read =
220           std::min(chunk_size, static_cast<uint64_t>(old_buf.size()));
221       if (!old_file->Read(old_buf.data(), bytes_to_read, &read_bytes))
222         err(1, "error reading from input file");
223       if (!read_bytes)
224         errx(1, "EOF reached while reading from input file");
225       // new_buf already has data from diff block, adds old data to it.
226       for (size_t k = 0; k < read_bytes; k++)
227         new_buf[j++] += old_buf[k];
228       chunk_size -= read_bytes;
229     }
230 
231     // Adjust pointers.
232     newpos += ctrl[0];
233     oldpos += ctrl[0];
234 
235     // Sanity-check.
236     if (newpos + ctrl[1] > newsize)
237       errx(1, "Corrupt patch\n");
238 
239     // Read extra string.
240     lenread = BZ2_bzRead(&ebz2err, epfbz2, new_buf + newpos, ctrl[1]);
241     if ((lenread < ctrl[1]) ||
242         ((ebz2err != BZ_OK) && (ebz2err != BZ_STREAM_END)))
243       errx(1, "Corrupt patch\n");
244 
245     // Adjust pointers.
246     newpos += ctrl[1];
247     oldpos += ctrl[2];
248   };
249 
250   // Close input file.
251   old_file->Close();
252 
253   // Clean up the bzip2 reads.
254   BZ2_bzReadClose(&cbz2err, cpfbz2);
255   BZ2_bzReadClose(&dbz2err, dpfbz2);
256   BZ2_bzReadClose(&ebz2err, epfbz2);
257   if (fclose(cpf) || fclose(dpf) || fclose(epf))
258     err(1, "fclose(%s)", patch_filename);
259 
260   // Write the new file.
261   std::unique_ptr<FileInterface> new_file =
262       File::FOpen(new_filename, O_CREAT | O_WRONLY);
263   if (!new_file)
264     err(1, "Error opening the new filename %s", new_filename);
265 
266   if (using_extents) {
267     std::vector<ex_t> parsed_new_extents;
268     if (!ParseExtentStr(new_extents, &parsed_new_extents))
269       errx(1, "Error parsing the new extents");
270     new_file.reset(new ExtentsFile(std::move(new_file), parsed_new_extents));
271   }
272 
273   u_char* temp_new_buf = new_buf;   // new_buf needed for free()
274   while (newsize > 0) {
275     size_t bytes_written;
276     if (!new_file->Write(temp_new_buf, newsize, &bytes_written))
277       err(1, "Error writing new file %s", new_filename);
278     newsize -= bytes_written;
279     temp_new_buf += bytes_written;
280   }
281 
282   if (!new_file->Close())
283     err(1, "Error closing new file %s", new_filename);
284 
285   free(new_buf);
286 
287   return 0;
288 }
289 
290 }  // namespace bsdiff
291