1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "compact_dex_writer.h"
18 
19 #include "android-base/stringprintf.h"
20 #include "base/logging.h"
21 #include "base/time_utils.h"
22 #include "dex/compact_dex_file.h"
23 #include "dex/compact_offset_table.h"
24 #include "dexlayout.h"
25 
26 namespace art {
27 
CompactDexWriter(DexLayout * dex_layout)28 CompactDexWriter::CompactDexWriter(DexLayout* dex_layout)
29     : DexWriter(dex_layout, /*compute_offsets=*/ true) {
30   CHECK(GetCompactDexLevel() != CompactDexLevel::kCompactDexLevelNone);
31 }
32 
GetCompactDexLevel() const33 CompactDexLevel CompactDexWriter::GetCompactDexLevel() const {
34   return dex_layout_->GetOptions().compact_dex_level_;
35 }
36 
Container()37 CompactDexWriter::Container::Container()
38     : data_item_dedupe_(&data_section_) {}
39 
WriteDebugInfoOffsetTable(Stream * stream)40 uint32_t CompactDexWriter::WriteDebugInfoOffsetTable(Stream* stream) {
41   const uint32_t start_offset = stream->Tell();
42   // Debug offsets for method indexes. 0 means no debug info.
43   std::vector<uint32_t> debug_info_offsets(header_->MethodIds().Size(), 0u);
44 
45   static constexpr InvokeType invoke_types[] = {
46     kDirect,
47     kVirtual
48   };
49 
50   for (InvokeType invoke_type : invoke_types) {
51     for (auto& class_def : header_->ClassDefs()) {
52       // Skip classes that are not defined in this dex file.
53       dex_ir::ClassData* class_data = class_def->GetClassData();
54       if (class_data == nullptr) {
55         continue;
56       }
57       for (auto& method : *(invoke_type == InvokeType::kDirect
58                                 ? class_data->DirectMethods()
59                                 : class_data->VirtualMethods())) {
60         const dex_ir::MethodId* method_id = method.GetMethodId();
61         dex_ir::CodeItem* code_item = method.GetCodeItem();
62         if (code_item != nullptr && code_item->DebugInfo() != nullptr) {
63           const uint32_t debug_info_offset = code_item->DebugInfo()->GetOffset();
64           const uint32_t method_idx = method_id->GetIndex();
65           if (debug_info_offsets[method_idx] != 0u) {
66             CHECK_EQ(debug_info_offset, debug_info_offsets[method_idx]);
67           }
68           debug_info_offsets[method_idx] = debug_info_offset;
69         }
70       }
71     }
72   }
73 
74   std::vector<uint8_t> data;
75   debug_info_base_ = 0u;
76   debug_info_offsets_table_offset_ = 0u;
77   CompactOffsetTable::Build(debug_info_offsets,
78                             &data,
79                             &debug_info_base_,
80                             &debug_info_offsets_table_offset_);
81   // Align the table and write it out.
82   stream->AlignTo(CompactOffsetTable::kAlignment);
83   debug_info_offsets_pos_ = stream->Tell();
84   stream->Write(data.data(), data.size());
85 
86   // Verify that the whole table decodes as expected and measure average performance.
87   const bool kMeasureAndTestOutput = dex_layout_->GetOptions().verify_output_;
88   if (kMeasureAndTestOutput && !debug_info_offsets.empty()) {
89     uint64_t start_time = NanoTime();
90     stream->Begin();
91     CompactOffsetTable::Accessor accessor(stream->Begin() + debug_info_offsets_pos_,
92                                           debug_info_base_,
93                                           debug_info_offsets_table_offset_);
94 
95     for (size_t i = 0; i < debug_info_offsets.size(); ++i) {
96       CHECK_EQ(accessor.GetOffset(i), debug_info_offsets[i]);
97     }
98     uint64_t end_time = NanoTime();
99     VLOG(dex) << "Average lookup time (ns) for debug info offsets: "
100               << (end_time - start_time) / debug_info_offsets.size();
101   }
102 
103   return stream->Tell() - start_offset;
104 }
105 
ScopedDataSectionItem(Stream * stream,dex_ir::Item * item,size_t alignment,Deduper * deduper)106 CompactDexWriter::ScopedDataSectionItem::ScopedDataSectionItem(Stream* stream,
107                                                                dex_ir::Item* item,
108                                                                size_t alignment,
109                                                                Deduper* deduper)
110     : stream_(stream),
111       item_(item),
112       alignment_(alignment),
113       deduper_(deduper),
114       start_offset_(stream->Tell()) {
115   stream_->AlignTo(alignment_);
116 }
117 
~ScopedDataSectionItem()118 CompactDexWriter::ScopedDataSectionItem::~ScopedDataSectionItem() {
119   if (deduper_ == nullptr) {
120     return;
121   }
122   // After having written, maybe dedupe the whole section (excluding padding).
123   const uint32_t deduped_offset = deduper_->Dedupe(start_offset_,
124                                                    stream_->Tell(),
125                                                    item_->GetOffset());
126   // If we deduped, only use the deduped offset if the alignment matches the required alignment.
127   // Otherwise, return without deduping.
128   if (deduped_offset != Deduper::kDidNotDedupe && IsAlignedParam(deduped_offset, alignment_)) {
129     // Update the IR offset to the offset of the deduped item.
130     item_->SetOffset(deduped_offset);
131     // Clear the written data for the item so that the stream write doesn't abort in the future.
132     stream_->Clear(start_offset_, stream_->Tell() - start_offset_);
133     // Since we deduped, restore the offset to the original position.
134     stream_->Seek(start_offset_);
135   }
136 }
137 
Written() const138 size_t CompactDexWriter::ScopedDataSectionItem::Written() const {
139   return stream_->Tell() - start_offset_;
140 }
141 
WriteCodeItem(Stream * stream,dex_ir::CodeItem * code_item,bool reserve_only)142 void CompactDexWriter::WriteCodeItem(Stream* stream,
143                                      dex_ir::CodeItem* code_item,
144                                      bool reserve_only) {
145   DCHECK(code_item != nullptr);
146   DCHECK(!reserve_only) << "Not supported because of deduping.";
147   ScopedDataSectionItem data_item(stream,
148                                   code_item,
149                                   CompactDexFile::CodeItem::kAlignment,
150                                   /* deduper= */ nullptr);
151 
152   CompactDexFile::CodeItem disk_code_item;
153 
154   uint16_t preheader_storage[CompactDexFile::CodeItem::kMaxPreHeaderSize] = {};
155   uint16_t* preheader_end = preheader_storage + CompactDexFile::CodeItem::kMaxPreHeaderSize;
156   const uint16_t* preheader = disk_code_item.Create(
157       code_item->RegistersSize(),
158       code_item->InsSize(),
159       code_item->OutsSize(),
160       code_item->TriesSize(),
161       code_item->InsnsSize(),
162       preheader_end);
163   const size_t preheader_bytes = (preheader_end - preheader) * sizeof(preheader[0]);
164 
165   static constexpr size_t kPayloadInstructionRequiredAlignment = 4;
166   const uint32_t current_code_item_start = stream->Tell() + preheader_bytes;
167   if (!IsAlignedParam(current_code_item_start, kPayloadInstructionRequiredAlignment) ||
168       kIsDebugBuild) {
169     // If the preheader is going to make the code unaligned, consider adding 2 bytes of padding
170     // before if required.
171     IterationRange<DexInstructionIterator> instructions = code_item->Instructions();
172     SafeDexInstructionIterator it(instructions.begin(), instructions.end());
173     for (; !it.IsErrorState() && it < instructions.end(); ++it) {
174       // In case the instruction goes past the end of the code item, make sure to not process it.
175       if (std::next(it).IsErrorState()) {
176         break;
177       }
178       const Instruction::Code opcode = it->Opcode();
179       // Payload instructions possibly require special alignment for their data.
180       if (opcode == Instruction::FILL_ARRAY_DATA ||
181           opcode == Instruction::PACKED_SWITCH ||
182           opcode == Instruction::SPARSE_SWITCH) {
183         stream->Skip(
184             RoundUp(current_code_item_start, kPayloadInstructionRequiredAlignment) -
185                 current_code_item_start);
186         break;
187       }
188     }
189   }
190 
191   // Write preheader first.
192   stream->Write(reinterpret_cast<const uint8_t*>(preheader), preheader_bytes);
193   // Registered offset is after the preheader.
194   ProcessOffset(stream, code_item);
195   // Avoid using sizeof so that we don't write the fake instruction array at the end of the code
196   // item.
197   stream->Write(&disk_code_item, OFFSETOF_MEMBER(CompactDexFile::CodeItem, insns_));
198   // Write the instructions.
199   stream->Write(code_item->Insns(), code_item->InsnsSize() * sizeof(uint16_t));
200   // Write the post instruction data.
201   WriteCodeItemPostInstructionData(stream, code_item, reserve_only);
202 }
203 
WriteDebugInfoItem(Stream * stream,dex_ir::DebugInfoItem * debug_info)204 void CompactDexWriter::WriteDebugInfoItem(Stream* stream, dex_ir::DebugInfoItem* debug_info) {
205   ScopedDataSectionItem data_item(stream,
206                                   debug_info,
207                                   SectionAlignment(DexFile::kDexTypeDebugInfoItem),
208                                   data_item_dedupe_);
209   ProcessOffset(stream, debug_info);
210   stream->Write(debug_info->GetDebugInfo(), debug_info->GetDebugInfoSize());
211 }
212 
213 
Deduper(DexContainer::Section * section)214 CompactDexWriter::Deduper::Deduper(DexContainer::Section* section)
215     : dedupe_map_(/*__n=*/ 32,
216                   HashedMemoryRange::HashEqual(section),
217                   HashedMemoryRange::HashEqual(section)) {}
218 
Dedupe(uint32_t data_start,uint32_t data_end,uint32_t item_offset)219 uint32_t CompactDexWriter::Deduper::Dedupe(uint32_t data_start,
220                                            uint32_t data_end,
221                                            uint32_t item_offset) {
222   HashedMemoryRange range {data_start, data_end - data_start};
223   auto existing = dedupe_map_.emplace(range, item_offset);
224   if (!existing.second) {
225     // Failed to insert means we deduped, return the existing item offset.
226     return existing.first->second;
227   }
228   return kDidNotDedupe;
229 }
230 
SortDebugInfosByMethodIndex()231 void CompactDexWriter::SortDebugInfosByMethodIndex() {
232   static constexpr InvokeType invoke_types[] = {
233     kDirect,
234     kVirtual
235   };
236   std::map<const dex_ir::DebugInfoItem*, uint32_t> method_idx_map;
237   for (InvokeType invoke_type : invoke_types) {
238     for (auto& class_def : header_->ClassDefs()) {
239       // Skip classes that are not defined in this dex file.
240       dex_ir::ClassData* class_data = class_def->GetClassData();
241       if (class_data == nullptr) {
242         continue;
243       }
244       for (auto& method : *(invoke_type == InvokeType::kDirect
245                                 ? class_data->DirectMethods()
246                                 : class_data->VirtualMethods())) {
247         const dex_ir::MethodId* method_id = method.GetMethodId();
248         dex_ir::CodeItem* code_item = method.GetCodeItem();
249         if (code_item != nullptr && code_item->DebugInfo() != nullptr) {
250           const dex_ir::DebugInfoItem* debug_item = code_item->DebugInfo();
251           method_idx_map.insert(std::make_pair(debug_item, method_id->GetIndex()));
252         }
253       }
254     }
255   }
256   std::sort(header_->DebugInfoItems().begin(),
257             header_->DebugInfoItems().end(),
258             [&](const std::unique_ptr<dex_ir::DebugInfoItem>& a,
259                 const std::unique_ptr<dex_ir::DebugInfoItem>& b) {
260     auto it_a = method_idx_map.find(a.get());
261     auto it_b = method_idx_map.find(b.get());
262     uint32_t idx_a = it_a != method_idx_map.end() ? it_a->second : 0u;
263     uint32_t idx_b = it_b != method_idx_map.end() ? it_b->second : 0u;
264     return idx_a < idx_b;
265   });
266 }
267 
WriteHeader(Stream * stream)268 void CompactDexWriter::WriteHeader(Stream* stream) {
269   CompactDexFile::Header header;
270   CompactDexFile::WriteMagic(&header.magic_[0]);
271   CompactDexFile::WriteCurrentVersion(&header.magic_[0]);
272   header.checksum_ = header_->Checksum();
273   std::copy_n(header_->Signature(), DexFile::kSha1DigestSize, header.signature_);
274   header.file_size_ = header_->FileSize();
275   // Since we are not necessarily outputting the same format as the input, avoid using the stored
276   // header size.
277   header.header_size_ = GetHeaderSize();
278   header.endian_tag_ = header_->EndianTag();
279   header.link_size_ = header_->LinkSize();
280   header.link_off_ = header_->LinkOffset();
281   header.map_off_ = header_->MapListOffset();
282   header.string_ids_size_ = header_->StringIds().Size();
283   header.string_ids_off_ = header_->StringIds().GetOffset();
284   header.type_ids_size_ = header_->TypeIds().Size();
285   header.type_ids_off_ = header_->TypeIds().GetOffset();
286   header.proto_ids_size_ = header_->ProtoIds().Size();
287   header.proto_ids_off_ = header_->ProtoIds().GetOffset();
288   header.field_ids_size_ = header_->FieldIds().Size();
289   header.field_ids_off_ = header_->FieldIds().GetOffset();
290   header.method_ids_size_ = header_->MethodIds().Size();
291   header.method_ids_off_ = header_->MethodIds().GetOffset();
292   header.class_defs_size_ = header_->ClassDefs().Size();
293   header.class_defs_off_ = header_->ClassDefs().GetOffset();
294   header.data_size_ = header_->DataSize();
295   header.data_off_ = header_->DataOffset();
296   header.owned_data_begin_ = owned_data_begin_;
297   header.owned_data_end_ = owned_data_end_;
298 
299   // Compact dex specific flags.
300   header.debug_info_offsets_pos_ = debug_info_offsets_pos_;
301   header.debug_info_offsets_table_offset_ = debug_info_offsets_table_offset_;
302   header.debug_info_base_ = debug_info_base_;
303   header.feature_flags_ = 0u;
304   // In cases where apps are converted to cdex during install, maintain feature flags so that
305   // the verifier correctly verifies apps that aren't targetting default methods.
306   if (header_->SupportDefaultMethods()) {
307     header.feature_flags_ |= static_cast<uint32_t>(CompactDexFile::FeatureFlags::kDefaultMethods);
308   }
309   stream->Seek(0);
310   stream->Overwrite(reinterpret_cast<uint8_t*>(&header), sizeof(header));
311 }
312 
GetHeaderSize() const313 size_t CompactDexWriter::GetHeaderSize() const {
314   return sizeof(CompactDexFile::Header);
315 }
316 
WriteStringData(Stream * stream,dex_ir::StringData * string_data)317 void CompactDexWriter::WriteStringData(Stream* stream, dex_ir::StringData* string_data) {
318   ScopedDataSectionItem data_item(stream,
319                                   string_data,
320                                   SectionAlignment(DexFile::kDexTypeStringDataItem),
321                                   data_item_dedupe_);
322   ProcessOffset(stream, string_data);
323   stream->WriteUleb128(CountModifiedUtf8Chars(string_data->Data()));
324   stream->Write(string_data->Data(), strlen(string_data->Data()));
325   // Skip null terminator (already zeroed out, no need to write).
326   stream->Skip(1);
327 }
328 
CanGenerateCompactDex(std::string * error_msg)329 bool CompactDexWriter::CanGenerateCompactDex(std::string* error_msg) {
330   static constexpr InvokeType invoke_types[] = {
331     kDirect,
332     kVirtual
333   };
334   std::vector<bool> saw_method_id(header_->MethodIds().Size(), false);
335   std::vector<dex_ir::CodeItem*> method_id_code_item(header_->MethodIds().Size(), nullptr);
336   std::vector<dex_ir::DebugInfoItem*> method_id_debug_info(header_->MethodIds().Size(), nullptr);
337   for (InvokeType invoke_type : invoke_types) {
338     for (auto& class_def : header_->ClassDefs()) {
339       // Skip classes that are not defined in this dex file.
340       dex_ir::ClassData* class_data = class_def->GetClassData();
341       if (class_data == nullptr) {
342         continue;
343       }
344       for (auto& method : *(invoke_type == InvokeType::kDirect
345                                 ? class_data->DirectMethods()
346                                 : class_data->VirtualMethods())) {
347         const uint32_t idx = method.GetMethodId()->GetIndex();
348         dex_ir::CodeItem* code_item = method.GetCodeItem();
349         dex_ir:: DebugInfoItem* debug_info_item = nullptr;
350         if (code_item != nullptr) {
351           debug_info_item = code_item->DebugInfo();
352         }
353         if (saw_method_id[idx]) {
354           if (method_id_code_item[idx] != code_item) {
355             *error_msg = android::base::StringPrintf("Conflicting code item for method id %u",
356                                                      idx);
357             // Conflicting info, abort generation.
358             return false;
359           }
360           if (method_id_debug_info[idx] != debug_info_item) {
361             *error_msg = android::base::StringPrintf("Conflicting debug info for method id %u",
362                                                      idx);
363             // Conflicting info, abort generation.
364             return false;
365           }
366         }
367         method_id_code_item[idx] = code_item;
368         method_id_debug_info[idx] = debug_info_item;
369         saw_method_id[idx] = true;
370       }
371     }
372   }
373   return true;
374 }
375 
Write(DexContainer * output,std::string * error_msg)376 bool CompactDexWriter::Write(DexContainer* output, std::string* error_msg)  {
377   DCHECK(error_msg != nullptr);
378   CHECK(compute_offsets_);
379   CHECK(output->IsCompactDexContainer());
380 
381   if (!CanGenerateCompactDex(error_msg)) {
382     return false;
383   }
384 
385   Container* const container = down_cast<Container*>(output);
386   // For now, use the same stream for both data and metadata.
387   Stream temp_main_stream(output->GetMainSection());
388   CHECK_EQ(output->GetMainSection()->Size(), 0u);
389   Stream temp_data_stream(output->GetDataSection());
390   Stream* main_stream = &temp_main_stream;
391   Stream* data_stream = &temp_data_stream;
392 
393   // We want offset 0 to be reserved for null, seek to the data section alignment or the end of the
394   // section.
395   data_stream->Seek(std::max(
396       static_cast<uint32_t>(output->GetDataSection()->Size()),
397       kDataSectionAlignment));
398   data_item_dedupe_ = &container->data_item_dedupe_;
399 
400   // Starting offset is right after the header.
401   main_stream->Seek(GetHeaderSize());
402 
403   // Based on: https://source.android.com/devices/tech/dalvik/dex-format
404   // Since the offsets may not be calculated already, the writing must be done in the correct order.
405   const uint32_t string_ids_offset = main_stream->Tell();
406   WriteStringIds(main_stream, /*reserve_only=*/ true);
407   WriteTypeIds(main_stream);
408   const uint32_t proto_ids_offset = main_stream->Tell();
409   WriteProtoIds(main_stream, /*reserve_only=*/ true);
410   WriteFieldIds(main_stream);
411   WriteMethodIds(main_stream);
412   const uint32_t class_defs_offset = main_stream->Tell();
413   WriteClassDefs(main_stream, /*reserve_only=*/ true);
414   const uint32_t call_site_ids_offset = main_stream->Tell();
415   WriteCallSiteIds(main_stream, /*reserve_only=*/ true);
416   WriteMethodHandles(main_stream);
417 
418   if (compute_offsets_) {
419     // Data section.
420     data_stream->AlignTo(kDataSectionAlignment);
421   }
422   owned_data_begin_ = data_stream->Tell();
423 
424   // Write code item first to minimize the space required for encoded methods.
425   // For cdex, the code items don't depend on the debug info.
426   WriteCodeItems(data_stream, /*reserve_only=*/ false);
427 
428   // Sort the debug infos by method index order, this reduces size by ~0.1% by reducing the size of
429   // the debug info offset table.
430   SortDebugInfosByMethodIndex();
431   WriteDebugInfoItems(data_stream);
432 
433   WriteEncodedArrays(data_stream);
434   WriteAnnotations(data_stream);
435   WriteAnnotationSets(data_stream);
436   WriteAnnotationSetRefs(data_stream);
437   WriteAnnotationsDirectories(data_stream);
438   WriteTypeLists(data_stream);
439   WriteClassDatas(data_stream);
440   WriteStringDatas(data_stream);
441   WriteHiddenapiClassData(data_stream);
442 
443   // Write delayed id sections that depend on data sections.
444   {
445     Stream::ScopedSeek seek(main_stream, string_ids_offset);
446     WriteStringIds(main_stream, /*reserve_only=*/ false);
447   }
448   {
449     Stream::ScopedSeek seek(main_stream, proto_ids_offset);
450     WriteProtoIds(main_stream, /*reserve_only=*/ false);
451   }
452   {
453     Stream::ScopedSeek seek(main_stream, class_defs_offset);
454     WriteClassDefs(main_stream, /*reserve_only=*/ false);
455   }
456   {
457     Stream::ScopedSeek seek(main_stream, call_site_ids_offset);
458     WriteCallSiteIds(main_stream, /*reserve_only=*/ false);
459   }
460 
461   // Write the map list.
462   if (compute_offsets_) {
463     data_stream->AlignTo(SectionAlignment(DexFile::kDexTypeMapList));
464     header_->SetMapListOffset(data_stream->Tell());
465   } else {
466     data_stream->Seek(header_->MapListOffset());
467   }
468 
469   // Map items are included in the data section.
470   GenerateAndWriteMapItems(data_stream);
471 
472   // Write link data if it exists.
473   const std::vector<uint8_t>& link_data = header_->LinkData();
474   if (link_data.size() > 0) {
475     CHECK_EQ(header_->LinkSize(), static_cast<uint32_t>(link_data.size()));
476     if (compute_offsets_) {
477       header_->SetLinkOffset(data_stream->Tell());
478     } else {
479       data_stream->Seek(header_->LinkOffset());
480     }
481     data_stream->Write(&link_data[0], link_data.size());
482   }
483 
484   // Write debug info offset table last to make dex file verifier happy.
485   WriteDebugInfoOffsetTable(data_stream);
486 
487   data_stream->AlignTo(kDataSectionAlignment);
488   owned_data_end_ = data_stream->Tell();
489   if (compute_offsets_) {
490     header_->SetDataSize(data_stream->Tell());
491     if (header_->DataSize() != 0) {
492       // Offset must be zero when the size is zero.
493       main_stream->AlignTo(kDataSectionAlignment);
494       // For now, default to saying the data is right after the main stream.
495       header_->SetDataOffset(main_stream->Tell());
496     } else {
497       header_->SetDataOffset(0u);
498     }
499   }
500 
501   // Write header last.
502   if (compute_offsets_) {
503     header_->SetFileSize(main_stream->Tell());
504   }
505   WriteHeader(main_stream);
506 
507   // Trim sections to make sure they are sized properly.
508   output->GetMainSection()->Resize(header_->FileSize());
509   output->GetDataSection()->Resize(data_stream->Tell());
510 
511   if (dex_layout_->GetOptions().update_checksum_) {
512     // Compute the cdex section (also covers the used part of the data section).
513     header_->SetChecksum(CompactDexFile::CalculateChecksum(output->GetMainSection()->Begin(),
514                                                            output->GetMainSection()->Size(),
515                                                            output->GetDataSection()->Begin(),
516                                                            output->GetDataSection()->Size()));
517     // Rewrite the header with the calculated checksum.
518     WriteHeader(main_stream);
519   }
520 
521   return true;
522 }
523 
CreateDexContainer() const524 std::unique_ptr<DexContainer> CompactDexWriter::CreateDexContainer() const {
525   return std::unique_ptr<DexContainer>(new CompactDexWriter::Container());
526 }
527 
528 }  // namespace art
529