1 //===- EhFrameReader.cpp --------------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "mcld/LD/EhFrameReader.h"
10 
11 #include "mcld/Fragment/NullFragment.h"
12 #include "mcld/MC/Input.h"
13 #include "mcld/LD/LDSection.h"
14 #include "mcld/Support/MsgHandling.h"
15 #include "mcld/Support/MemoryArea.h"
16 
17 #include <llvm/ADT/StringRef.h>
18 #include <llvm/Support/Dwarf.h>
19 #include <llvm/Support/LEB128.h>
20 
21 namespace mcld {
22 
23 //===----------------------------------------------------------------------===//
24 // Helper Functions
25 //===----------------------------------------------------------------------===//
26 /// skip_LEB128 - skip the first LEB128 encoded value from *pp, update *pp
27 /// to the next character.
28 /// @return - false if we ran off the end of the string.
skip_LEB128(EhFrameReader::ConstAddress * pp,EhFrameReader::ConstAddress pend)29 static bool skip_LEB128(EhFrameReader::ConstAddress* pp,
30                         EhFrameReader::ConstAddress pend) {
31   for (EhFrameReader::ConstAddress p = *pp; p < pend; ++p) {
32     if ((*p & 0x80) == 0x0) {
33       *pp = p + 1;
34       return true;
35     }
36   }
37   return false;
38 }
39 
40 //===----------------------------------------------------------------------===//
41 // EhFrameReader
42 //===----------------------------------------------------------------------===//
43 template <>
scan(ConstAddress pHandler,uint64_t pOffset,llvm::StringRef pData) const44 EhFrameReader::Token EhFrameReader::scan<true>(ConstAddress pHandler,
45                                                uint64_t pOffset,
46                                                llvm::StringRef pData) const {
47   Token result;
48   result.file_off = pOffset;
49 
50   const uint32_t* data = (const uint32_t*)pHandler;
51   size_t cur_idx = 0;
52 
53   // Length Field
54   uint32_t length = data[cur_idx++];
55   if (length == 0x0) {
56     // terminator
57     result.kind = Terminator;
58     result.data_off = 4;
59     result.size = 4;
60     return result;
61   }
62 
63   // Extended Field
64   uint64_t extended = 0x0;
65   if (length == 0xFFFFFFFF) {
66     extended = data[cur_idx++];
67     extended <<= 32;
68     extended |= data[cur_idx++];
69     result.size = extended + 12;
70     result.data_off = 16;
71     // 64-bit obj file still uses 32-bit eh_frame.
72     assert(false && "We don't support 64-bit eh_frame.");
73   } else {
74     result.size = length + 4;
75     result.data_off = 8;
76   }
77 
78   // ID Field
79   uint32_t ID = data[cur_idx++];
80   if (ID == 0x0)
81     result.kind = CIE;
82   else
83     result.kind = FDE;
84 
85   return result;
86 }
87 
88 template <>
read(Input & pInput,EhFrame & pEhFrame)89 bool EhFrameReader::read<32, true>(Input& pInput, EhFrame& pEhFrame) {
90   // Alphabet:
91   //   {CIE, FDE, CIEt}
92   //
93   // Regular Expression:
94   //   (CIE FDE*)+ CIEt
95   //
96   // Autometa:
97   //   S = {Q0, Q1, Q2}, Start = Q0, Accept = Q2
98   //
99   //              FDE
100   //             +---+
101   //        CIE   \ /   CIEt
102   //   Q0 -------> Q1 -------> Q2
103   //    |         / \           ^
104   //    |        +---+          |
105   //    |         CIE           |
106   //    +-----------------------+
107   //              CIEt
108   const State autometa[NumOfStates][NumOfTokenKinds] = {
109       //     CIE     FDE    Term  Unknown
110       {Q1, Reject, Accept, Reject},  // Q0
111       {Q1, Q1, Accept, Reject},      // Q1
112   };
113 
114   const Action transition[NumOfStates][NumOfTokenKinds] = {
115       /*    CIE     FDE     Term Unknown */
116       {addCIE, reject, addTerm, reject},  // Q0
117       {addCIE, addFDE, addTerm, reject},  // Q1
118   };
119 
120   LDSection& section = pEhFrame.getSection();
121   if (section.size() == 0x0) {
122     NullFragment* frag = new NullFragment();
123     pEhFrame.addFragment(*frag);
124     return true;
125   }
126 
127   // get file offset and address
128   uint64_t file_off = pInput.fileOffset() + section.offset();
129   llvm::StringRef sect_reg =
130       pInput.memArea()->request(file_off, section.size());
131   ConstAddress handler = (ConstAddress)sect_reg.begin();
132 
133   State cur_state = Q0;
134   while (Reject != cur_state && Accept != cur_state) {
135     Token token = scan<true>(handler, file_off, sect_reg);
136     llvm::StringRef entry =
137         pInput.memArea()->request(token.file_off, token.size);
138 
139     if (!transition[cur_state][token.kind](pEhFrame, entry, token)) {
140       // fail to scan
141       debug(diag::debug_cannot_scan_eh) << pInput.name();
142       return false;
143     }
144 
145     file_off += token.size;
146     handler += token.size;
147 
148     if (handler == sect_reg.end()) {
149       cur_state = Accept;
150     } else if (handler > sect_reg.end()) {
151       cur_state = Reject;
152     } else {
153       cur_state = autometa[cur_state][token.kind];
154     }
155   }  // end of while
156 
157   if (Reject == cur_state) {
158     // fail to parse
159     debug(diag::debug_cannot_parse_eh) << pInput.name();
160     return false;
161   }
162   return true;
163 }
164 
addCIE(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)165 bool EhFrameReader::addCIE(EhFrame& pEhFrame,
166                            llvm::StringRef pRegion,
167                            const EhFrameReader::Token& pToken) {
168   // skip Length, Extended Length and CIE ID.
169   ConstAddress handler = pRegion.begin() + pToken.data_off;
170   ConstAddress cie_end = pRegion.end();
171   ConstAddress handler_start = handler;
172   uint64_t pr_ptr_data_offset = pToken.data_off;
173 
174   // the version should be 1 or 3
175   uint8_t version = *handler++;
176   if (version != 1 && version != 3) {
177     return false;
178   }
179 
180   // Set up the Augumentation String
181   ConstAddress aug_str_front = handler;
182   ConstAddress aug_str_back = static_cast<ConstAddress>(
183       memchr(aug_str_front, '\0', cie_end - aug_str_front));
184   if (aug_str_back == NULL) {
185     return false;
186   }
187 
188   // skip the Augumentation String field
189   handler = aug_str_back + 1;
190 
191   // skip the Code Alignment Factor
192   if (!skip_LEB128(&handler, cie_end)) {
193     return false;
194   }
195   // skip the Data Alignment Factor
196   if (!skip_LEB128(&handler, cie_end)) {
197     return false;
198   }
199   // skip the Return Address Register
200   if (cie_end - handler < 1) {
201     return false;
202   }
203   ++handler;
204 
205   llvm::StringRef augment((const char*)aug_str_front);
206 
207   // we discard this CIE if the augumentation string is '\0'
208   if (augment.size() == 0) {
209     EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
210     cie->setFDEEncode(llvm::dwarf::DW_EH_PE_absptr);
211     pEhFrame.addCIE(*cie);
212     pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
213     return true;
214   }
215 
216   // the Augmentation String start with 'eh' is a CIE from gcc before 3.0,
217   // in LSB Core Spec 3.0RC1. We do not support it.
218   if (augment.size() > 1 && augment[0] == 'e' && augment[1] == 'h') {
219     return false;
220   }
221 
222   // parse the Augmentation String to get the FDE encodeing if 'z' existed
223   uint8_t fde_encoding = llvm::dwarf::DW_EH_PE_absptr;
224   std::string augdata;
225   std::string pr_ptr_data;
226   if (augment[0] == 'z') {
227     unsigned offset;
228     size_t augdata_size = llvm::decodeULEB128((const uint8_t*)handler, &offset);
229     handler += offset;
230     augdata = std::string((const char*)handler, augdata_size);
231 
232     // parse the Augmentation String
233     for (size_t i = 1; i < augment.size(); ++i) {
234       switch (augment[i]) {
235         // LDSA encoding (1 byte)
236         case 'L': {
237           if (cie_end - handler < 1) {
238             return false;
239           }
240           ++handler;
241           break;
242         }
243         // Two arguments, the first one represents the encoding of the second
244         // argument (1 byte). The second one is the address of personality
245         // routine.
246         case 'P': {
247           // the first argument
248           if (cie_end - handler < 1) {
249             return false;
250           }
251           uint8_t per_encode = *handler;
252           ++handler;
253           // get the length of the second argument
254           uint32_t per_length = 0;
255           if ((per_encode & 0x60) == 0x60) {
256             return false;
257           }
258           switch (per_encode & 7) {
259             default:
260               return false;
261             case llvm::dwarf::DW_EH_PE_udata2:
262               per_length = 2;
263               break;
264             case llvm::dwarf::DW_EH_PE_udata4:
265               per_length = 4;
266               break;
267             case llvm::dwarf::DW_EH_PE_udata8:
268               per_length = 8;
269               break;
270             case llvm::dwarf::DW_EH_PE_absptr:
271               per_length = 4;  // pPkg.bitclass / 8;
272               break;
273           }
274           // skip the alignment
275           if (llvm::dwarf::DW_EH_PE_aligned == (per_encode & 0xf0)) {
276             uint32_t per_align = handler - cie_end;
277             per_align += per_length - 1;
278             per_align &= ~(per_length - 1);
279             if (static_cast<uint32_t>(cie_end - handler) < per_align) {
280               return false;
281             }
282             handler += per_align;
283           }
284           // skip the second argument
285           if (static_cast<uint32_t>(cie_end - handler) < per_length) {
286             return false;
287           }
288           pr_ptr_data_offset += handler - handler_start;
289           pr_ptr_data = std::string((const char*)handler, per_length);
290           handler += per_length;
291           break;
292         }  // end of case 'P'
293 
294         // FDE encoding (1 byte)
295         case 'R': {
296           if (cie_end - handler < 1) {
297             return false;
298           }
299           fde_encoding = *handler;
300           switch (fde_encoding & 7) {
301             case llvm::dwarf::DW_EH_PE_udata2:
302             case llvm::dwarf::DW_EH_PE_udata4:
303             case llvm::dwarf::DW_EH_PE_udata8:
304             case llvm::dwarf::DW_EH_PE_absptr:
305               break;
306             default:
307               return false;
308           }
309           ++handler;
310           break;
311         }
312         default:
313           return false;
314       }  // end switch
315     }    // the rest chars.
316   }      // first char is 'z'
317 
318   // create and push back the CIE entry
319   EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
320   cie->setFDEEncode(fde_encoding);
321   cie->setPersonalityOffset(pr_ptr_data_offset);
322   cie->setPersonalityName(pr_ptr_data);
323   cie->setAugmentationData(augdata);
324   pEhFrame.addCIE(*cie);
325   pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
326   return true;
327 }
328 
addFDE(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)329 bool EhFrameReader::addFDE(EhFrame& pEhFrame,
330                            llvm::StringRef pRegion,
331                            const EhFrameReader::Token& pToken) {
332   if (pToken.data_off == pRegion.size())
333     return false;
334 
335   const int32_t offset =
336       *(const int32_t*)(pRegion.begin() + pToken.data_off - 4);
337   size_t cie_offset =
338       (size_t)((int64_t)(pToken.file_off + 4) - (int32_t)offset);
339 
340   EhFrame::CIEMap::iterator iter = pEhFrame.getCIEMap().find(cie_offset);
341   if (iter == pEhFrame.getCIEMap().end())
342     return false;
343 
344   // create and push back the FDE entry
345   EhFrame::FDE* fde = new EhFrame::FDE(pRegion, *iter->second);
346   pEhFrame.addFDE(*fde);
347   return true;
348 }
349 
addTerm(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)350 bool EhFrameReader::addTerm(EhFrame& pEhFrame,
351                             llvm::StringRef pRegion,
352                             const EhFrameReader::Token& pToken) {
353   return true;
354 }
355 
reject(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)356 bool EhFrameReader::reject(EhFrame& pEhFrame,
357                            llvm::StringRef pRegion,
358                            const EhFrameReader::Token& pToken) {
359   return true;
360 }
361 
362 }  // namespace mcld
363