1 //===- EhFrameReader.cpp --------------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include <mcld/LD/EhFrameReader.h>
10 
11 #include <mcld/Fragment/NullFragment.h>
12 #include <mcld/MC/Input.h>
13 #include <mcld/LD/LDSection.h>
14 #include <mcld/Support/MsgHandling.h>
15 #include <mcld/Support/MemoryArea.h>
16 
17 #include <llvm/ADT/StringRef.h>
18 #include <llvm/Support/Dwarf.h>
19 #include <llvm/Support/LEB128.h>
20 
21 using namespace mcld;
22 using namespace llvm::dwarf;
23 
24 //===----------------------------------------------------------------------===//
25 // Helper Functions
26 //===----------------------------------------------------------------------===//
27 /// skip_LEB128 - skip the first LEB128 encoded value from *pp, update *pp
28 /// to the next character.
29 /// @return - false if we ran off the end of the string.
30 /// @ref - GNU gold 1.11, ehframe.h, Eh_frame::skip_leb128.
31 static bool
skip_LEB128(EhFrameReader::ConstAddress * pp,EhFrameReader::ConstAddress pend)32 skip_LEB128(EhFrameReader::ConstAddress* pp, EhFrameReader::ConstAddress pend)
33 {
34   for (EhFrameReader::ConstAddress p = *pp; p < pend; ++p) {
35     if (0x0 == (*p & 0x80)) {
36       *pp = p + 1;
37       return true;
38     }
39   }
40   return false;
41 }
42 
43 //===----------------------------------------------------------------------===//
44 // EhFrameReader
45 //===----------------------------------------------------------------------===//
46 template<> EhFrameReader::Token
scan(ConstAddress pHandler,uint64_t pOffset,llvm::StringRef pData) const47 EhFrameReader::scan<true>(ConstAddress pHandler,
48                           uint64_t pOffset,
49                           llvm::StringRef pData) const
50 {
51   Token result;
52   result.file_off = pOffset;
53 
54   const uint32_t* data = (const uint32_t*)pHandler;
55   size_t cur_idx = 0;
56 
57   // Length Field
58   uint32_t length = data[cur_idx++];
59   if (0x0 == length) {
60     // terminator
61     result.kind = Terminator;
62     result.data_off = 4;
63     result.size = 4;
64     return result;
65   }
66 
67   // Extended Field
68   uint64_t extended = 0x0;
69   if (0xFFFFFFFF == length) {
70     extended = data[cur_idx++];
71     extended <<= 32;
72     extended |= data[cur_idx++];
73     result.size = extended + 12;
74     result.data_off = 16;
75     // 64-bit obj file still uses 32-bit eh_frame.
76     assert (false && "We don't support 64-bit eh_frame.");
77   }
78   else {
79     result.size = length + 4;
80     result.data_off = 8;
81   }
82 
83   // ID Field
84   uint32_t ID = data[cur_idx++];
85   if (0x0 == ID)
86     result.kind = CIE;
87   else
88     result.kind = FDE;
89 
90   return result;
91 }
92 
93 template<>
read(Input & pInput,EhFrame & pEhFrame)94 bool EhFrameReader::read<32, true>(Input& pInput, EhFrame& pEhFrame)
95 {
96   // Alphabet:
97   //   {CIE, FDE, CIEt}
98   //
99   // Regular Expression:
100   //   (CIE FDE*)+ CIEt
101   //
102   // Autometa:
103   //   S = {Q0, Q1, Q2}, Start = Q0, Accept = Q2
104   //
105   //              FDE
106   //             +---+
107   //        CIE   \ /   CIEt
108   //   Q0 -------> Q1 -------> Q2
109   //    |         / \           ^
110   //    |        +---+          |
111   //    |         CIE           |
112   //    +-----------------------+
113   //              CIEt
114   const State autometa[NumOfStates][NumOfTokenKinds] = {
115   //     CIE     FDE    Term  Unknown
116     {     Q1, Reject, Accept, Reject }, // Q0
117     {     Q1,     Q1, Accept, Reject }, // Q1
118   };
119 
120   const Action transition[NumOfStates][NumOfTokenKinds] = {
121    /*    CIE     FDE     Term Unknown */
122     { addCIE, reject, addTerm, reject}, // Q0
123     { addCIE, addFDE, addTerm, reject}, // Q1
124   };
125 
126   LDSection& section = pEhFrame.getSection();
127   if (section.size() == 0x0) {
128     NullFragment* frag = new NullFragment();
129     pEhFrame.addFragment(*frag);
130     return true;
131   }
132 
133   // get file offset and address
134   uint64_t file_off = pInput.fileOffset() + section.offset();
135   llvm::StringRef sect_reg =
136       pInput.memArea()->request(file_off, section.size());
137   ConstAddress handler = (ConstAddress)sect_reg.begin();
138 
139   State cur_state = Q0;
140   while (Reject != cur_state && Accept != cur_state) {
141 
142     Token token = scan<true>(handler, file_off, sect_reg);
143     llvm::StringRef entry = pInput.memArea()->request(token.file_off, token.size);
144 
145     if (!transition[cur_state][token.kind](pEhFrame, entry, token)) {
146       // fail to scan
147       debug(diag::debug_cannot_scan_eh) << pInput.name();
148       return false;
149     }
150 
151     file_off += token.size;
152     handler += token.size;
153 
154     if (handler == sect_reg.end())
155       cur_state = Accept;
156     else if (handler > sect_reg.end()) {
157       cur_state = Reject;
158     }
159     else
160       cur_state = autometa[cur_state][token.kind];
161   } // end of while
162 
163   if (Reject == cur_state) {
164     // fail to parse
165     debug(diag::debug_cannot_parse_eh) << pInput.name();
166     return false;
167   }
168   return true;
169 }
170 
addCIE(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)171 bool EhFrameReader::addCIE(EhFrame& pEhFrame,
172                            llvm::StringRef pRegion,
173                            const EhFrameReader::Token& pToken)
174 {
175   // skip Length, Extended Length and CIE ID.
176   ConstAddress handler = pRegion.begin() + pToken.data_off;
177   ConstAddress cie_end = pRegion.end();
178   ConstAddress handler_start = handler;
179   uint64_t pr_ptr_data_offset = pToken.data_off;
180 
181   // the version should be 1 or 3
182   uint8_t version = *handler++;
183   if (1 != version && 3 != version) {
184     return false;
185   }
186 
187   // Set up the Augumentation String
188   ConstAddress aug_str_front = handler;
189   ConstAddress aug_str_back  = static_cast<ConstAddress>(
190                          memchr(aug_str_front, '\0', cie_end - aug_str_front));
191   if (NULL == aug_str_back) {
192     return false;
193   }
194 
195   // skip the Augumentation String field
196   handler = aug_str_back + 1;
197 
198   // skip the Code Alignment Factor
199   if (!skip_LEB128(&handler, cie_end)) {
200     return false;
201   }
202   // skip the Data Alignment Factor
203   if (!skip_LEB128(&handler, cie_end)) {
204     return false;
205   }
206   // skip the Return Address Register
207   if (cie_end - handler < 1) {
208     return false;
209   }
210   ++handler;
211 
212   llvm::StringRef augment((const char*)aug_str_front);
213 
214   // we discard this CIE if the augumentation string is '\0'
215   if (0 == augment.size()) {
216     EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
217     cie->setFDEEncode(llvm::dwarf::DW_EH_PE_absptr);
218     pEhFrame.addCIE(*cie);
219     pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
220     return true;
221   }
222 
223   // the Augmentation String start with 'eh' is a CIE from gcc before 3.0,
224   // in LSB Core Spec 3.0RC1. We do not support it.
225   if (augment.size() > 1 && augment[0] == 'e' && augment[1] == 'h') {
226     return false;
227   }
228 
229   // parse the Augmentation String to get the FDE encodeing if 'z' existed
230   uint8_t fde_encoding = llvm::dwarf::DW_EH_PE_absptr;
231   std::string augdata;
232   std::string pr_ptr_data;
233   if ('z' == augment[0]) {
234     unsigned offset;
235     size_t augdata_size = llvm::decodeULEB128((const uint8_t*)handler, &offset);
236     handler += offset;
237     augdata = std::string((const char*)handler, augdata_size);
238 
239     // parse the Augmentation String
240     for (size_t i = 1; i < augment.size(); ++i) {
241       switch (augment[i]) {
242         // LDSA encoding (1 byte)
243         case 'L': {
244           if (cie_end - handler < 1) {
245             return false;
246           }
247           ++handler;
248           break;
249         }
250         // Two arguments, the first one represents the encoding of the second
251         // argument (1 byte). The second one is the address of personality
252         // routine.
253         case 'P': {
254           // the first argument
255           if (cie_end - handler < 1) {
256             return false;
257           }
258           uint8_t per_encode = *handler;
259           ++handler;
260           // get the length of the second argument
261           uint32_t per_length = 0;
262           if (0x60 == (per_encode & 0x60)) {
263             return false;
264           }
265           switch (per_encode & 7) {
266             default:
267               return false;
268             case llvm::dwarf::DW_EH_PE_udata2:
269               per_length = 2;
270               break;
271             case llvm::dwarf::DW_EH_PE_udata4:
272               per_length = 4;
273               break;
274             case llvm::dwarf::DW_EH_PE_udata8:
275               per_length = 8;
276               break;
277             case llvm::dwarf::DW_EH_PE_absptr:
278               per_length = 4; // pPkg.bitclass / 8;
279               break;
280           }
281           // skip the alignment
282           if (llvm::dwarf::DW_EH_PE_aligned == (per_encode & 0xf0)) {
283             uint32_t per_align = handler - cie_end;
284             per_align += per_length - 1;
285             per_align &= ~(per_length -1);
286             if (static_cast<uint32_t>(cie_end - handler) < per_align) {
287               return false;
288             }
289             handler += per_align;
290           }
291           // skip the second argument
292           if (static_cast<uint32_t>(cie_end - handler) < per_length) {
293             return false;
294           }
295           pr_ptr_data_offset += handler - handler_start;
296           pr_ptr_data = std::string((const char*)handler, per_length);
297           handler += per_length;
298           break;
299         } // end of case 'P'
300 
301         // FDE encoding (1 byte)
302         case 'R': {
303           if (cie_end - handler < 1) {
304             return false;
305           }
306           fde_encoding = *handler;
307           switch (fde_encoding & 7) {
308             case llvm::dwarf::DW_EH_PE_udata2:
309             case llvm::dwarf::DW_EH_PE_udata4:
310             case llvm::dwarf::DW_EH_PE_udata8:
311             case llvm::dwarf::DW_EH_PE_absptr:
312               break;
313             default:
314               return false;
315           }
316           ++handler;
317           break;
318         }
319         default:
320           return false;
321       } // end switch
322     } // the rest chars.
323   } // first char is 'z'
324 
325   // create and push back the CIE entry
326   EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
327   cie->setFDEEncode(fde_encoding);
328   cie->setPersonalityOffset(pr_ptr_data_offset);
329   cie->setPersonalityName(pr_ptr_data);
330   cie->setAugmentationData(augdata);
331   pEhFrame.addCIE(*cie);
332   pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
333   return true;
334 }
335 
addFDE(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)336 bool EhFrameReader::addFDE(EhFrame& pEhFrame,
337                            llvm::StringRef pRegion,
338                            const EhFrameReader::Token& pToken)
339 {
340   if (pToken.data_off == pRegion.size())
341     return false;
342 
343   const int32_t offset = *(const int32_t*) (pRegion.begin() + pToken.data_off
344                                             - 4);
345   size_t cie_offset = (size_t) ((int64_t) (pToken.file_off + 4) -
346                                 (int32_t) offset);
347 
348   EhFrame::CIEMap::iterator iter = pEhFrame.getCIEMap().find(cie_offset);
349   if (iter == pEhFrame.getCIEMap().end())
350     return false;
351 
352   // create and push back the FDE entry
353   EhFrame::FDE* fde = new EhFrame::FDE(pRegion, *iter->second);
354   pEhFrame.addFDE(*fde);
355   return true;
356 }
357 
addTerm(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)358 bool EhFrameReader::addTerm(EhFrame& pEhFrame,
359                             llvm::StringRef pRegion,
360                             const EhFrameReader::Token& pToken)
361 {
362   return true;
363 }
364 
reject(EhFrame & pEhFrame,llvm::StringRef pRegion,const EhFrameReader::Token & pToken)365 bool EhFrameReader::reject(EhFrame& pEhFrame,
366                            llvm::StringRef pRegion,
367                            const EhFrameReader::Token& pToken)
368 {
369   return true;
370 }
371