1 //===- EhFrameReader.cpp --------------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "mcld/LD/EhFrameReader.h"
10 
11 #include "mcld/Fragment/NullFragment.h"
12 #include "mcld/MC/Input.h"
13 #include "mcld/LD/LDSection.h"
14 #include "mcld/Support/MsgHandling.h"
15 #include "mcld/Support/MemoryArea.h"
16 
17 #include <llvm/ADT/StringRef.h>
18 #include <llvm/Support/Dwarf.h>
19 #include <llvm/Support/LEB128.h>
20 
21 namespace mcld {
22 
23 //===----------------------------------------------------------------------===//
24 // Helper Functions
25 //===----------------------------------------------------------------------===//
26 /// skip_LEB128 - skip the first LEB128 encoded value from *pp, update *pp
27 /// to the next character.
28 /// @return - false if we ran off the end of the string.
skip_LEB128(EhFrameReader::ConstAddress * pp,EhFrameReader::ConstAddress pend)29 static bool skip_LEB128(EhFrameReader::ConstAddress* pp,
30                         EhFrameReader::ConstAddress pend) {
31   for (EhFrameReader::ConstAddress p = *pp; p < pend; ++p) {
32     if ((*p & 0x80) == 0x0) {
33       *pp = p + 1;
34       return true;
35     }
36   }
37   return false;
38 }
39 
40 //===----------------------------------------------------------------------===//
41 // EhFrameReader
42 //===----------------------------------------------------------------------===//
43 template <>
scan(ConstAddress pHandler,uint64_t pOffset,llvm::StringRef pData) const44 EhFrameReader::Token EhFrameReader::scan<true>(ConstAddress pHandler,
45                                                uint64_t pOffset,
46                                                llvm::StringRef pData) const {
47   Token result;
48   result.file_off = pOffset;
49 
50   const uint32_t* data = (const uint32_t*)pHandler;
51   size_t cur_idx = 0;
52 
53   // Length Field
54   uint32_t length = data[cur_idx++];
55   if (length == 0x0) {
56     // terminator
57     result.kind = Terminator;
58     result.data_off = 4;
59     result.size = 4;
60     return result;
61   }
62 
63   // Extended Field
64   uint64_t extended = 0x0;
65   if (length == 0xFFFFFFFF) {
66     extended = data[cur_idx++];
67     extended <<= 32;
68     extended |= data[cur_idx++];
69     result.size = extended + 12;
70     result.data_off = 16;
71     // 64-bit obj file still uses 32-bit eh_frame.
72     assert(false && "We don't support 64-bit eh_frame.");
73   } else {
74     result.size = length + 4;
75     result.data_off = 8;
76   }
77 
78   // ID Field
79   uint32_t ID = data[cur_idx++];
80   if (ID == 0x0)
81     result.kind = CIE;
82   else
83     result.kind = FDE;
84 
85   return result;
86 }
87 
88 template <>
read(Input & pInput,EhFrame & pEhFrame)89 bool EhFrameReader::read<32, true>(Input& pInput, EhFrame& pEhFrame) {
90   // Alphabet:
91   //   {CIE, FDE, CIEt}
92   //
93   // Regular Expression:
94   //   (CIE FDE*)+ CIEt
95   //
96   // Autometa:
97   //   S = {Q0, Q1, Q2}, Start = Q0, Accept = Q2
98   //
99   //              FDE
100   //             +---+
101   //        CIE   \ /   CIEt
102   //   Q0 -------> Q1 -------> Q2
103   //    |         / \           ^
104   //    |        +---+          |
105   //    |         CIE           |
106   //    +-----------------------+
107   //              CIEt
108   const State autometa[NumOfStates][NumOfTokenKinds] = {
109       //     CIE     FDE    Term  Unknown
110       {Q1, Reject, Accept, Reject},  // Q0
111       {Q1, Q1, Accept, Reject},      // Q1
112   };
113 
114   const Action transition[NumOfStates][NumOfTokenKinds] = {
115       /*    CIE     FDE     Term Unknown */
116       {addCIE, reject, addTerm, reject},  // Q0
117       {addCIE, addFDE, addTerm, reject},  // Q1
118   };
119 
120   LDSection& section = pEhFrame.getSection();
121   if (section.size() == 0x0) {
122     NullFragment* frag = new NullFragment();
123     pEhFrame.addFragment(*frag);
124     return true;
125   }
126 
127   // get file offset and address
128   uint64_t file_off = pInput.fileOffset() + section.offset();
129   llvm::StringRef sect_reg =
130       pInput.memArea()->request(file_off, section.size());
131   ConstAddress handler = (ConstAddress)sect_reg.begin();
132 
133   State cur_state = Q0;
134   while (Reject != cur_state && Accept != cur_state) {
135     Token token = scan<true>(handler, file_off, sect_reg);
136     llvm::StringRef entry =
137         pInput.memArea()->request(token.file_off, token.size);
138 
139     if (!transition[cur_state][token.kind](pEhFrame, entry, token)) {
140       // fail to scan
141       debug(diag::debug_cannot_scan_eh) << pInput.name();
142       return false;
143     }
144 
145     file_off += token.size;
146     handler += token.size;
147 
148     if (handler == sect_reg.end()) {
149       cur_state = Accept;
150     } else if (handler > sect_reg.end()) {
151       cur_state = Reject;
152     } else {
153       cur_state = autometa[cur_state][token.kind];
154     }
155   }  // end of while
156 
157   if (Reject == cur_state) {
158     // fail to parse
159     debug(diag::debug_cannot_parse_eh) << pInput.name();
160     return false;
161   }
162   return true;
163 }
164 
addCIE(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)165 bool EhFrameReader::addCIE(EhFrame& pEhFrame,
166                            llvm::StringRef pRegion,
167                            const EhFrameReader::Token& pToken) {
168   // skip Length, Extended Length and CIE ID.
169   ConstAddress handler = pRegion.begin() + pToken.data_off;
170   ConstAddress cie_end = pRegion.end();
171   ConstAddress handler_start = handler;
172   uint64_t pr_ptr_data_offset = pToken.data_off;
173 
174   // the version should be 1 or 3
175   uint8_t version = *handler++;
176   if (version != 1 && version != 3) {
177     return false;
178   }
179 
180   // Set up the Augumentation String
181   ConstAddress aug_str_front = handler;
182   ConstAddress aug_str_back = static_cast<ConstAddress>(
183       memchr(aug_str_front, '\0', cie_end - aug_str_front));
184   if (aug_str_back == NULL) {
185     return false;
186   }
187 
188   // skip the Augumentation String field
189   handler = aug_str_back + 1;
190 
191   // skip the Code Alignment Factor
192   if (!skip_LEB128(&handler, cie_end)) {
193     return false;
194   }
195   // skip the Data Alignment Factor
196   if (!skip_LEB128(&handler, cie_end)) {
197     return false;
198   }
199   // skip the Return Address Register
200   if (version == 1) {
201     if (cie_end - handler < 1)
202       return false;
203     ++handler;
204   } else {
205     if (!skip_LEB128(&handler, cie_end))
206       return false;
207   }
208 
209   llvm::StringRef augment((const char*)aug_str_front);
210 
211   // we discard this CIE if the augumentation string is '\0'
212   if (augment.size() == 0) {
213     EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
214     cie->setFDEEncode(llvm::dwarf::DW_EH_PE_absptr);
215     pEhFrame.addCIE(*cie);
216     pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
217     return true;
218   }
219 
220   // the Augmentation String start with 'eh' is a CIE from gcc before 3.0,
221   // in LSB Core Spec 3.0RC1. We do not support it.
222   if (augment.size() > 1 && augment[0] == 'e' && augment[1] == 'h') {
223     return false;
224   }
225 
226   // parse the Augmentation String to get the FDE encodeing if 'z' existed
227   uint8_t fde_encoding = llvm::dwarf::DW_EH_PE_absptr;
228   std::string augdata;
229   std::string pr_ptr_data;
230   if (augment[0] == 'z') {
231     unsigned offset;
232     size_t augdata_size = llvm::decodeULEB128((const uint8_t*)handler, &offset);
233     handler += offset;
234     augdata = std::string((const char*)handler, augdata_size);
235 
236     // parse the Augmentation String
237     for (size_t i = 1; i < augment.size(); ++i) {
238       switch (augment[i]) {
239         // LDSA encoding (1 byte)
240         case 'L': {
241           if (cie_end - handler < 1) {
242             return false;
243           }
244           ++handler;
245           break;
246         }
247         // Two arguments, the first one represents the encoding of the second
248         // argument (1 byte). The second one is the address of personality
249         // routine.
250         case 'P': {
251           // the first argument
252           if (cie_end - handler < 1) {
253             return false;
254           }
255           uint8_t per_encode = *handler;
256           ++handler;
257           // get the length of the second argument
258           uint32_t per_length = 0;
259           if ((per_encode & 0x60) == 0x60) {
260             return false;
261           }
262           switch (per_encode & 7) {
263             default:
264               return false;
265             case llvm::dwarf::DW_EH_PE_udata2:
266               per_length = 2;
267               break;
268             case llvm::dwarf::DW_EH_PE_udata4:
269               per_length = 4;
270               break;
271             case llvm::dwarf::DW_EH_PE_udata8:
272               per_length = 8;
273               break;
274             case llvm::dwarf::DW_EH_PE_absptr:
275               per_length = 4;  // pPkg.bitclass / 8;
276               break;
277           }
278           // skip the alignment
279           if (llvm::dwarf::DW_EH_PE_aligned == (per_encode & 0xf0)) {
280             uint32_t per_align = handler - cie_end;
281             per_align += per_length - 1;
282             per_align &= ~(per_length - 1);
283             if (static_cast<uint32_t>(cie_end - handler) < per_align) {
284               return false;
285             }
286             handler += per_align;
287           }
288           // skip the second argument
289           if (static_cast<uint32_t>(cie_end - handler) < per_length) {
290             return false;
291           }
292           pr_ptr_data_offset += handler - handler_start;
293           pr_ptr_data = std::string((const char*)handler, per_length);
294           handler += per_length;
295           break;
296         }  // end of case 'P'
297 
298         // FDE encoding (1 byte)
299         case 'R': {
300           if (cie_end - handler < 1) {
301             return false;
302           }
303           fde_encoding = *handler;
304           switch (fde_encoding & 7) {
305             case llvm::dwarf::DW_EH_PE_udata2:
306             case llvm::dwarf::DW_EH_PE_udata4:
307             case llvm::dwarf::DW_EH_PE_udata8:
308             case llvm::dwarf::DW_EH_PE_absptr:
309               break;
310             default:
311               return false;
312           }
313           ++handler;
314           break;
315         }
316         default:
317           return false;
318       }  // end switch
319     }    // the rest chars.
320   }      // first char is 'z'
321 
322   // create and push back the CIE entry
323   EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
324   cie->setFDEEncode(fde_encoding);
325   cie->setPersonalityOffset(pr_ptr_data_offset);
326   cie->setPersonalityName(pr_ptr_data);
327   cie->setAugmentationData(augdata);
328   pEhFrame.addCIE(*cie);
329   pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
330   return true;
331 }
332 
addFDE(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)333 bool EhFrameReader::addFDE(EhFrame& pEhFrame,
334                            llvm::StringRef pRegion,
335                            const EhFrameReader::Token& pToken) {
336   if (pToken.data_off == pRegion.size())
337     return false;
338 
339   const int32_t offset =
340       *(const int32_t*)(pRegion.begin() + pToken.data_off - 4);
341   size_t cie_offset =
342       (size_t)((int64_t)(pToken.file_off + 4) - (int32_t)offset);
343 
344   EhFrame::CIEMap::iterator iter = pEhFrame.getCIEMap().find(cie_offset);
345   if (iter == pEhFrame.getCIEMap().end())
346     return false;
347 
348   // create and push back the FDE entry
349   EhFrame::FDE* fde = new EhFrame::FDE(pRegion, *iter->second);
350   pEhFrame.addFDE(*fde);
351   return true;
352 }
353 
addTerm(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)354 bool EhFrameReader::addTerm(EhFrame& pEhFrame,
355                             llvm::StringRef pRegion,
356                             const EhFrameReader::Token& pToken) {
357   return true;
358 }
359 
reject(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)360 bool EhFrameReader::reject(EhFrame& pEhFrame,
361                            llvm::StringRef pRegion,
362                            const EhFrameReader::Token& pToken) {
363   return true;
364 }
365 
366 }  // namespace mcld
367