1 //
2 // Copyright (C) 2015 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "update_engine/payload_generator/delta_diff_utils.h"
18
19 #include <algorithm>
20 #include <map>
21
22 #include <base/files/file_util.h>
23 #include <base/format_macros.h>
24 #include <base/strings/stringprintf.h>
25
26 #include "update_engine/common/hash_calculator.h"
27 #include "update_engine/common/subprocess.h"
28 #include "update_engine/common/utils.h"
29 #include "update_engine/payload_generator/block_mapping.h"
30 #include "update_engine/payload_generator/bzip.h"
31 #include "update_engine/payload_generator/delta_diff_generator.h"
32 #include "update_engine/payload_generator/extent_ranges.h"
33 #include "update_engine/payload_generator/extent_utils.h"
34 #include "update_engine/payload_generator/xz.h"
35
36 using std::map;
37 using std::string;
38 using std::vector;
39
40 namespace chromeos_update_engine {
41 namespace {
42
43 const char* const kBsdiffPath = "bsdiff";
44 const char* const kImgdiffPath = "imgdiff";
45
46 // The maximum destination size allowed for bsdiff. In general, bsdiff should
47 // work for arbitrary big files, but the payload generation and payload
48 // application requires a significant amount of RAM. We put a hard-limit of
49 // 200 MiB that should not affect any released board, but will limit the
50 // Chrome binary in ASan builders.
51 const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024; // bytes
52
53 // The maximum destination size allowed for imgdiff. In general, imgdiff should
54 // work for arbitrary big files, but the payload application is quite memory
55 // intensive, so we limit these operations to 50 MiB.
56 const uint64_t kMaxImgdiffDestinationSize = 50 * 1024 * 1024; // bytes
57
58 // Process a range of blocks from |range_start| to |range_end| in the extent at
59 // position |*idx_p| of |extents|. If |do_remove| is true, this range will be
60 // removed, which may cause the extent to be trimmed, split or removed entirely.
61 // The value of |*idx_p| is updated to point to the next extent to be processed.
62 // Returns true iff the next extent to process is a new or updated one.
ProcessExtentBlockRange(vector<Extent> * extents,size_t * idx_p,const bool do_remove,uint64_t range_start,uint64_t range_end)63 bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p,
64 const bool do_remove, uint64_t range_start,
65 uint64_t range_end) {
66 size_t idx = *idx_p;
67 uint64_t start_block = (*extents)[idx].start_block();
68 uint64_t num_blocks = (*extents)[idx].num_blocks();
69 uint64_t range_size = range_end - range_start;
70
71 if (do_remove) {
72 if (range_size == num_blocks) {
73 // Remove the entire extent.
74 extents->erase(extents->begin() + idx);
75 } else if (range_end == num_blocks) {
76 // Trim the end of the extent.
77 (*extents)[idx].set_num_blocks(num_blocks - range_size);
78 idx++;
79 } else if (range_start == 0) {
80 // Trim the head of the extent.
81 (*extents)[idx].set_start_block(start_block + range_size);
82 (*extents)[idx].set_num_blocks(num_blocks - range_size);
83 } else {
84 // Trim the middle, splitting the remainder into two parts.
85 (*extents)[idx].set_num_blocks(range_start);
86 Extent e;
87 e.set_start_block(start_block + range_end);
88 e.set_num_blocks(num_blocks - range_end);
89 idx++;
90 extents->insert(extents->begin() + idx, e);
91 }
92 } else if (range_end == num_blocks) {
93 // Done with this extent.
94 idx++;
95 } else {
96 return false;
97 }
98
99 *idx_p = idx;
100 return true;
101 }
102
103 // Remove identical corresponding block ranges in |src_extents| and
104 // |dst_extents|. Used for preventing moving of blocks onto themselves during
105 // MOVE operations. The value of |total_bytes| indicates the actual length of
106 // content; this may be slightly less than the total size of blocks, in which
107 // case the last block is only partly occupied with data. Returns the total
108 // number of bytes removed.
RemoveIdenticalBlockRanges(vector<Extent> * src_extents,vector<Extent> * dst_extents,const size_t total_bytes)109 size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents,
110 vector<Extent>* dst_extents,
111 const size_t total_bytes) {
112 size_t src_idx = 0;
113 size_t dst_idx = 0;
114 uint64_t src_offset = 0, dst_offset = 0;
115 bool new_src = true, new_dst = true;
116 size_t removed_bytes = 0, nonfull_block_bytes;
117 bool do_remove = false;
118 while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) {
119 if (new_src) {
120 src_offset = 0;
121 new_src = false;
122 }
123 if (new_dst) {
124 dst_offset = 0;
125 new_dst = false;
126 }
127
128 do_remove = ((*src_extents)[src_idx].start_block() + src_offset ==
129 (*dst_extents)[dst_idx].start_block() + dst_offset);
130
131 uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks();
132 uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks();
133 uint64_t min_num_blocks = std::min(src_num_blocks - src_offset,
134 dst_num_blocks - dst_offset);
135 uint64_t prev_src_offset = src_offset;
136 uint64_t prev_dst_offset = dst_offset;
137 src_offset += min_num_blocks;
138 dst_offset += min_num_blocks;
139
140 new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove,
141 prev_src_offset, src_offset);
142 new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove,
143 prev_dst_offset, dst_offset);
144 if (do_remove)
145 removed_bytes += min_num_blocks * kBlockSize;
146 }
147
148 // If we removed the last block and this block is only partly used by file
149 // content, deduct the unused portion from the total removed byte count.
150 if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize))
151 removed_bytes -= kBlockSize - nonfull_block_bytes;
152
153 return removed_bytes;
154 }
155
156 // Returns true if the given blob |data| contains gzip header magic.
ContainsGZip(const brillo::Blob & data)157 bool ContainsGZip(const brillo::Blob& data) {
158 const uint8_t kGZipMagic[] = {0x1f, 0x8b, 0x08, 0x00};
159 return std::search(data.begin(),
160 data.end(),
161 std::begin(kGZipMagic),
162 std::end(kGZipMagic)) != data.end();
163 }
164
165 } // namespace
166
167 namespace diff_utils {
168
DeltaReadPartition(vector<AnnotatedOperation> * aops,const PartitionConfig & old_part,const PartitionConfig & new_part,ssize_t hard_chunk_blocks,size_t soft_chunk_blocks,const PayloadVersion & version,BlobFileWriter * blob_file)169 bool DeltaReadPartition(vector<AnnotatedOperation>* aops,
170 const PartitionConfig& old_part,
171 const PartitionConfig& new_part,
172 ssize_t hard_chunk_blocks,
173 size_t soft_chunk_blocks,
174 const PayloadVersion& version,
175 BlobFileWriter* blob_file) {
176 ExtentRanges old_visited_blocks;
177 ExtentRanges new_visited_blocks;
178
179 TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks(
180 aops,
181 old_part.path,
182 new_part.path,
183 old_part.fs_interface ? old_part.fs_interface->GetBlockCount() : 0,
184 new_part.fs_interface->GetBlockCount(),
185 soft_chunk_blocks,
186 version,
187 blob_file,
188 &old_visited_blocks,
189 &new_visited_blocks));
190
191 map<string, vector<Extent>> old_files_map;
192 if (old_part.fs_interface) {
193 vector<FilesystemInterface::File> old_files;
194 old_part.fs_interface->GetFiles(&old_files);
195 for (const FilesystemInterface::File& file : old_files)
196 old_files_map[file.name] = file.extents;
197 }
198
199 TEST_AND_RETURN_FALSE(new_part.fs_interface);
200 vector<FilesystemInterface::File> new_files;
201 new_part.fs_interface->GetFiles(&new_files);
202
203 // The processing is very straightforward here, we generate operations for
204 // every file (and pseudo-file such as the metadata) in the new filesystem
205 // based on the file with the same name in the old filesystem, if any.
206 // Files with overlapping data blocks (like hardlinks or filesystems with tail
207 // packing or compression where the blocks store more than one file) are only
208 // generated once in the new image, but are also used only once from the old
209 // image due to some simplifications (see below).
210 for (const FilesystemInterface::File& new_file : new_files) {
211 // Ignore the files in the new filesystem without blocks. Symlinks with
212 // data blocks (for example, symlinks bigger than 60 bytes in ext2) are
213 // handled as normal files. We also ignore blocks that were already
214 // processed by a previous file.
215 vector<Extent> new_file_extents = FilterExtentRanges(
216 new_file.extents, new_visited_blocks);
217 new_visited_blocks.AddExtents(new_file_extents);
218
219 if (new_file_extents.empty())
220 continue;
221
222 LOG(INFO) << "Encoding file " << new_file.name << " ("
223 << BlocksInExtents(new_file_extents) << " blocks)";
224
225 // We can't visit each dst image inode more than once, as that would
226 // duplicate work. Here, we avoid visiting each source image inode
227 // more than once. Technically, we could have multiple operations
228 // that read the same blocks from the source image for diffing, but
229 // we choose not to avoid complexity. Eventually we will move away
230 // from using a graph/cycle detection/etc to generate diffs, and at that
231 // time, it will be easy (non-complex) to have many operations read
232 // from the same source blocks. At that time, this code can die. -adlr
233 vector<Extent> old_file_extents = FilterExtentRanges(
234 old_files_map[new_file.name], old_visited_blocks);
235 old_visited_blocks.AddExtents(old_file_extents);
236
237 TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
238 old_part.path,
239 new_part.path,
240 old_file_extents,
241 new_file_extents,
242 new_file.name, // operation name
243 hard_chunk_blocks,
244 version,
245 blob_file));
246 }
247 // Process all the blocks not included in any file. We provided all the unused
248 // blocks in the old partition as available data.
249 vector<Extent> new_unvisited = {
250 ExtentForRange(0, new_part.size / kBlockSize)};
251 new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks);
252 if (new_unvisited.empty())
253 return true;
254
255 vector<Extent> old_unvisited;
256 if (old_part.fs_interface) {
257 old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize));
258 old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks);
259 }
260
261 LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited)
262 << " unwritten blocks using chunk size of "
263 << soft_chunk_blocks << " blocks.";
264 // We use the soft_chunk_blocks limit for the <non-file-data> as we don't
265 // really know the structure of this data and we should not expect it to have
266 // redundancy between partitions.
267 TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
268 old_part.path,
269 new_part.path,
270 old_unvisited,
271 new_unvisited,
272 "<non-file-data>", // operation name
273 soft_chunk_blocks,
274 version,
275 blob_file));
276
277 return true;
278 }
279
DeltaMovedAndZeroBlocks(vector<AnnotatedOperation> * aops,const string & old_part,const string & new_part,size_t old_num_blocks,size_t new_num_blocks,ssize_t chunk_blocks,const PayloadVersion & version,BlobFileWriter * blob_file,ExtentRanges * old_visited_blocks,ExtentRanges * new_visited_blocks)280 bool DeltaMovedAndZeroBlocks(vector<AnnotatedOperation>* aops,
281 const string& old_part,
282 const string& new_part,
283 size_t old_num_blocks,
284 size_t new_num_blocks,
285 ssize_t chunk_blocks,
286 const PayloadVersion& version,
287 BlobFileWriter* blob_file,
288 ExtentRanges* old_visited_blocks,
289 ExtentRanges* new_visited_blocks) {
290 vector<BlockMapping::BlockId> old_block_ids;
291 vector<BlockMapping::BlockId> new_block_ids;
292 TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part,
293 new_part,
294 old_num_blocks * kBlockSize,
295 new_num_blocks * kBlockSize,
296 kBlockSize,
297 &old_block_ids,
298 &new_block_ids));
299
300 // If the update is inplace, we map all the blocks that didn't move,
301 // regardless of the contents since they are already copied and no operation
302 // is required.
303 if (version.InplaceUpdate()) {
304 uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks);
305 for (uint64_t block = 0; block < num_blocks; block++) {
306 if (old_block_ids[block] == new_block_ids[block] &&
307 !old_visited_blocks->ContainsBlock(block) &&
308 !new_visited_blocks->ContainsBlock(block)) {
309 old_visited_blocks->AddBlock(block);
310 new_visited_blocks->AddBlock(block);
311 }
312 }
313 }
314
315 // A mapping from the block_id to the list of block numbers with that block id
316 // in the old partition. This is used to lookup where in the old partition
317 // is a block from the new partition.
318 map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map;
319
320 for (uint64_t block = old_num_blocks; block-- > 0; ) {
321 if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block))
322 old_blocks_map[old_block_ids[block]].push_back(block);
323 }
324
325 // The collection of blocks in the new partition with just zeros. This is a
326 // common case for free-space that's also problematic for bsdiff, so we want
327 // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of
328 // just zeros is so small that it doesn't make sense to spend the I/O reading
329 // zeros from the old partition.
330 vector<Extent> new_zeros;
331
332 vector<Extent> old_identical_blocks;
333 vector<Extent> new_identical_blocks;
334
335 for (uint64_t block = 0; block < new_num_blocks; block++) {
336 // Only produce operations for blocks that were not yet visited.
337 if (new_visited_blocks->ContainsBlock(block))
338 continue;
339 if (new_block_ids[block] == 0) {
340 AppendBlockToExtents(&new_zeros, block);
341 continue;
342 }
343
344 auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]);
345 // Check if the block exists in the old partition at all.
346 if (old_blocks_map_it == old_blocks_map.end() ||
347 old_blocks_map_it->second.empty())
348 continue;
349
350 AppendBlockToExtents(&old_identical_blocks,
351 old_blocks_map_it->second.back());
352 AppendBlockToExtents(&new_identical_blocks, block);
353 // We can't reuse source blocks in minor version 1 because the cycle
354 // breaking algorithm used in the in-place update doesn't support that.
355 if (version.InplaceUpdate())
356 old_blocks_map_it->second.pop_back();
357 }
358
359 // Produce operations for the zero blocks split per output extent.
360 // TODO(deymo): Produce ZERO operations instead of calling DeltaReadFile().
361 size_t num_ops = aops->size();
362 new_visited_blocks->AddExtents(new_zeros);
363 for (Extent extent : new_zeros) {
364 TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
365 "",
366 new_part,
367 vector<Extent>(), // old_extents
368 vector<Extent>{extent}, // new_extents
369 "<zeros>",
370 chunk_blocks,
371 version,
372 blob_file));
373 }
374 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
375 << BlocksInExtents(new_zeros) << " zeroed blocks";
376
377 // Produce MOVE/SOURCE_COPY operations for the moved blocks.
378 num_ops = aops->size();
379 if (chunk_blocks == -1)
380 chunk_blocks = new_num_blocks;
381 uint64_t used_blocks = 0;
382 old_visited_blocks->AddExtents(old_identical_blocks);
383 new_visited_blocks->AddExtents(new_identical_blocks);
384 for (Extent extent : new_identical_blocks) {
385 // We split the operation at the extent boundary or when bigger than
386 // chunk_blocks.
387 for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks();
388 op_block_offset += chunk_blocks) {
389 aops->emplace_back();
390 AnnotatedOperation* aop = &aops->back();
391 aop->name = "<identical-blocks>";
392 aop->op.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
393 ? InstallOperation::SOURCE_COPY
394 : InstallOperation::MOVE);
395
396 uint64_t chunk_num_blocks =
397 std::min(extent.num_blocks() - op_block_offset,
398 static_cast<uint64_t>(chunk_blocks));
399
400 // The current operation represents the move/copy operation for the
401 // sublist starting at |used_blocks| of length |chunk_num_blocks| where
402 // the src and dst are from |old_identical_blocks| and
403 // |new_identical_blocks| respectively.
404 StoreExtents(
405 ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks),
406 aop->op.mutable_src_extents());
407
408 Extent* op_dst_extent = aop->op.add_dst_extents();
409 op_dst_extent->set_start_block(extent.start_block() + op_block_offset);
410 op_dst_extent->set_num_blocks(chunk_num_blocks);
411 CHECK(
412 vector<Extent>{*op_dst_extent} == // NOLINT(whitespace/braces)
413 ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks));
414
415 used_blocks += chunk_num_blocks;
416 }
417 }
418 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
419 << used_blocks << " identical blocks moved";
420
421 return true;
422 }
423
DeltaReadFile(vector<AnnotatedOperation> * aops,const string & old_part,const string & new_part,const vector<Extent> & old_extents,const vector<Extent> & new_extents,const string & name,ssize_t chunk_blocks,const PayloadVersion & version,BlobFileWriter * blob_file)424 bool DeltaReadFile(vector<AnnotatedOperation>* aops,
425 const string& old_part,
426 const string& new_part,
427 const vector<Extent>& old_extents,
428 const vector<Extent>& new_extents,
429 const string& name,
430 ssize_t chunk_blocks,
431 const PayloadVersion& version,
432 BlobFileWriter* blob_file) {
433 brillo::Blob data;
434 InstallOperation operation;
435
436 uint64_t total_blocks = BlocksInExtents(new_extents);
437 if (chunk_blocks == -1)
438 chunk_blocks = total_blocks;
439
440 for (uint64_t block_offset = 0; block_offset < total_blocks;
441 block_offset += chunk_blocks) {
442 // Split the old/new file in the same chunks. Note that this could drop
443 // some information from the old file used for the new chunk. If the old
444 // file is smaller (or even empty when there's no old file) the chunk will
445 // also be empty.
446 vector<Extent> old_extents_chunk = ExtentsSublist(
447 old_extents, block_offset, chunk_blocks);
448 vector<Extent> new_extents_chunk = ExtentsSublist(
449 new_extents, block_offset, chunk_blocks);
450 NormalizeExtents(&old_extents_chunk);
451 NormalizeExtents(&new_extents_chunk);
452
453 TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part,
454 new_part,
455 old_extents_chunk,
456 new_extents_chunk,
457 version,
458 &data,
459 &operation));
460
461 // Check if the operation writes nothing.
462 if (operation.dst_extents_size() == 0) {
463 if (operation.type() == InstallOperation::MOVE) {
464 LOG(INFO) << "Empty MOVE operation ("
465 << name << "), skipping";
466 continue;
467 } else {
468 LOG(ERROR) << "Empty non-MOVE operation";
469 return false;
470 }
471 }
472
473 // Now, insert into the list of operations.
474 AnnotatedOperation aop;
475 aop.name = name;
476 if (static_cast<uint64_t>(chunk_blocks) < total_blocks) {
477 aop.name = base::StringPrintf("%s:%" PRIu64,
478 name.c_str(), block_offset / chunk_blocks);
479 }
480 aop.op = operation;
481
482 // Write the data
483 TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file));
484 aops->emplace_back(aop);
485 }
486 return true;
487 }
488
GenerateBestFullOperation(const brillo::Blob & new_data,const PayloadVersion & version,brillo::Blob * out_blob,InstallOperation_Type * out_type)489 bool GenerateBestFullOperation(const brillo::Blob& new_data,
490 const PayloadVersion& version,
491 brillo::Blob* out_blob,
492 InstallOperation_Type* out_type) {
493 if (new_data.empty())
494 return false;
495
496 if (version.OperationAllowed(InstallOperation::ZERO) &&
497 std::all_of(
498 new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) {
499 // The read buffer is all zeros, so produce a ZERO operation. No need to
500 // check other types of operations in this case.
501 *out_blob = brillo::Blob();
502 *out_type = InstallOperation::ZERO;
503 return true;
504 }
505
506 bool out_blob_set = false;
507
508 // Try compressing |new_data| with xz first.
509 if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) {
510 brillo::Blob new_data_xz;
511 if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) {
512 *out_type = InstallOperation::REPLACE_XZ;
513 *out_blob = std::move(new_data_xz);
514 out_blob_set = true;
515 }
516 }
517
518 // Try compressing it with bzip2.
519 if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) {
520 brillo::Blob new_data_bz;
521 // TODO(deymo): Implement some heuristic to determine if it is worth trying
522 // to compress the blob with bzip2 if we already have a good REPLACE_XZ.
523 if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() &&
524 (!out_blob_set || out_blob->size() > new_data_bz.size())) {
525 // A REPLACE_BZ is better or nothing else was set.
526 *out_type = InstallOperation::REPLACE_BZ;
527 *out_blob = std::move(new_data_bz);
528 out_blob_set = true;
529 }
530 }
531
532 // If nothing else worked or it was badly compressed we try a REPLACE.
533 if (!out_blob_set || out_blob->size() >= new_data.size()) {
534 *out_type = InstallOperation::REPLACE;
535 // This needs to make a copy of the data in the case bzip or xz didn't
536 // compress well, which is not the common case so the performance hit is
537 // low.
538 *out_blob = new_data;
539 }
540 return true;
541 }
542
ReadExtentsToDiff(const string & old_part,const string & new_part,const vector<Extent> & old_extents,const vector<Extent> & new_extents,const PayloadVersion & version,brillo::Blob * out_data,InstallOperation * out_op)543 bool ReadExtentsToDiff(const string& old_part,
544 const string& new_part,
545 const vector<Extent>& old_extents,
546 const vector<Extent>& new_extents,
547 const PayloadVersion& version,
548 brillo::Blob* out_data,
549 InstallOperation* out_op) {
550 InstallOperation operation;
551
552 // We read blocks from old_extents and write blocks to new_extents.
553 uint64_t blocks_to_read = BlocksInExtents(old_extents);
554 uint64_t blocks_to_write = BlocksInExtents(new_extents);
555
556 // Disable bsdiff and imgdiff when the data is too big.
557 bool bsdiff_allowed =
558 version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) ||
559 version.OperationAllowed(InstallOperation::BSDIFF);
560 if (bsdiff_allowed &&
561 blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) {
562 LOG(INFO) << "bsdiff blacklisted, data too big: "
563 << blocks_to_read * kBlockSize << " bytes";
564 bsdiff_allowed = false;
565 }
566
567 bool imgdiff_allowed = version.OperationAllowed(InstallOperation::IMGDIFF);
568 if (imgdiff_allowed &&
569 blocks_to_read * kBlockSize > kMaxImgdiffDestinationSize) {
570 LOG(INFO) << "imgdiff blacklisted, data too big: "
571 << blocks_to_read * kBlockSize << " bytes";
572 imgdiff_allowed = false;
573 }
574
575 // Make copies of the extents so we can modify them.
576 vector<Extent> src_extents = old_extents;
577 vector<Extent> dst_extents = new_extents;
578
579 // Read in bytes from new data.
580 brillo::Blob new_data;
581 TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part,
582 new_extents,
583 &new_data,
584 kBlockSize * blocks_to_write,
585 kBlockSize));
586 TEST_AND_RETURN_FALSE(!new_data.empty());
587
588 // Data blob that will be written to delta file.
589 brillo::Blob data_blob;
590
591 // Try generating a full operation for the given new data, regardless of the
592 // old_data.
593 InstallOperation_Type op_type;
594 TEST_AND_RETURN_FALSE(
595 GenerateBestFullOperation(new_data, version, &data_blob, &op_type));
596 operation.set_type(op_type);
597
598 brillo::Blob old_data;
599 if (blocks_to_read > 0) {
600 // Read old data.
601 TEST_AND_RETURN_FALSE(
602 utils::ReadExtents(old_part, src_extents, &old_data,
603 kBlockSize * blocks_to_read, kBlockSize));
604 if (old_data == new_data) {
605 // No change in data.
606 operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
607 ? InstallOperation::SOURCE_COPY
608 : InstallOperation::MOVE);
609 data_blob = brillo::Blob();
610 } else if (bsdiff_allowed || imgdiff_allowed) {
611 // If the source file is considered bsdiff safe (no bsdiff bugs
612 // triggered), see if BSDIFF encoding is smaller.
613 base::FilePath old_chunk;
614 TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&old_chunk));
615 ScopedPathUnlinker old_unlinker(old_chunk.value());
616 TEST_AND_RETURN_FALSE(utils::WriteFile(
617 old_chunk.value().c_str(), old_data.data(), old_data.size()));
618 base::FilePath new_chunk;
619 TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&new_chunk));
620 ScopedPathUnlinker new_unlinker(new_chunk.value());
621 TEST_AND_RETURN_FALSE(utils::WriteFile(
622 new_chunk.value().c_str(), new_data.data(), new_data.size()));
623
624 if (bsdiff_allowed) {
625 brillo::Blob bsdiff_delta;
626 TEST_AND_RETURN_FALSE(DiffFiles(
627 kBsdiffPath, old_chunk.value(), new_chunk.value(), &bsdiff_delta));
628 CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
629 if (bsdiff_delta.size() < data_blob.size()) {
630 operation.set_type(
631 version.OperationAllowed(InstallOperation::SOURCE_BSDIFF)
632 ? InstallOperation::SOURCE_BSDIFF
633 : InstallOperation::BSDIFF);
634 data_blob = std::move(bsdiff_delta);
635 }
636 }
637 if (imgdiff_allowed && ContainsGZip(old_data) && ContainsGZip(new_data)) {
638 brillo::Blob imgdiff_delta;
639 // Imgdiff might fail in some cases, only use the result if it succeed,
640 // otherwise print the extents to analyze.
641 if (DiffFiles(kImgdiffPath,
642 old_chunk.value(),
643 new_chunk.value(),
644 &imgdiff_delta) &&
645 imgdiff_delta.size() > 0) {
646 if (imgdiff_delta.size() < data_blob.size()) {
647 operation.set_type(InstallOperation::IMGDIFF);
648 data_blob = std::move(imgdiff_delta);
649 }
650 } else {
651 LOG(ERROR) << "Imgdiff failed with source extents: "
652 << ExtentsToString(src_extents)
653 << ", destination extents: "
654 << ExtentsToString(dst_extents);
655 }
656 }
657 }
658 }
659
660 size_t removed_bytes = 0;
661 // Remove identical src/dst block ranges in MOVE operations.
662 if (operation.type() == InstallOperation::MOVE) {
663 removed_bytes = RemoveIdenticalBlockRanges(
664 &src_extents, &dst_extents, new_data.size());
665 }
666 // Set legacy src_length and dst_length fields.
667 operation.set_src_length(old_data.size() - removed_bytes);
668 operation.set_dst_length(new_data.size() - removed_bytes);
669
670 // Embed extents in the operation.
671 StoreExtents(src_extents, operation.mutable_src_extents());
672 StoreExtents(dst_extents, operation.mutable_dst_extents());
673
674 // Replace operations should not have source extents.
675 if (IsAReplaceOperation(operation.type())) {
676 operation.clear_src_extents();
677 operation.clear_src_length();
678 }
679
680 *out_data = std::move(data_blob);
681 *out_op = operation;
682
683 return true;
684 }
685
686 // Runs the bsdiff or imgdiff tool in |diff_path| on two files and returns the
687 // resulting delta in |out|. Returns true on success.
DiffFiles(const string & diff_path,const string & old_file,const string & new_file,brillo::Blob * out)688 bool DiffFiles(const string& diff_path,
689 const string& old_file,
690 const string& new_file,
691 brillo::Blob* out) {
692 const string kPatchFile = "delta.patchXXXXXX";
693 string patch_file_path;
694
695 TEST_AND_RETURN_FALSE(
696 utils::MakeTempFile(kPatchFile, &patch_file_path, nullptr));
697
698 vector<string> cmd;
699 cmd.push_back(diff_path);
700 cmd.push_back(old_file);
701 cmd.push_back(new_file);
702 cmd.push_back(patch_file_path);
703
704 int rc = 1;
705 brillo::Blob patch_file;
706 string stdout;
707 TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc, &stdout));
708 if (rc != 0) {
709 LOG(ERROR) << diff_path << " returned " << rc << std::endl << stdout;
710 return false;
711 }
712 TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
713 unlink(patch_file_path.c_str());
714 return true;
715 }
716
IsAReplaceOperation(InstallOperation_Type op_type)717 bool IsAReplaceOperation(InstallOperation_Type op_type) {
718 return (op_type == InstallOperation::REPLACE ||
719 op_type == InstallOperation::REPLACE_BZ ||
720 op_type == InstallOperation::REPLACE_XZ);
721 }
722
723 // Returns true if |op| is a no-op operation that doesn't do any useful work
724 // (e.g., a move operation that copies blocks onto themselves).
IsNoopOperation(const InstallOperation & op)725 bool IsNoopOperation(const InstallOperation& op) {
726 return (op.type() == InstallOperation::MOVE &&
727 ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents()));
728 }
729
FilterNoopOperations(vector<AnnotatedOperation> * ops)730 void FilterNoopOperations(vector<AnnotatedOperation>* ops) {
731 ops->erase(
732 std::remove_if(
733 ops->begin(), ops->end(),
734 [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}),
735 ops->end());
736 }
737
InitializePartitionInfo(const PartitionConfig & part,PartitionInfo * info)738 bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) {
739 info->set_size(part.size);
740 HashCalculator hasher;
741 TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) ==
742 static_cast<off_t>(part.size));
743 TEST_AND_RETURN_FALSE(hasher.Finalize());
744 const brillo::Blob& hash = hasher.raw_hash();
745 info->set_hash(hash.data(), hash.size());
746 LOG(INFO) << part.path << ": size=" << part.size << " hash=" << hasher.hash();
747 return true;
748 }
749
CompareAopsByDestination(AnnotatedOperation first_aop,AnnotatedOperation second_aop)750 bool CompareAopsByDestination(AnnotatedOperation first_aop,
751 AnnotatedOperation second_aop) {
752 // We want empty operations to be at the end of the payload.
753 if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size())
754 return ((!first_aop.op.dst_extents().size()) <
755 (!second_aop.op.dst_extents().size()));
756 uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block();
757 uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block();
758 return first_dst_start < second_dst_start;
759 }
760
761 } // namespace diff_utils
762
763 } // namespace chromeos_update_engine
764