1 /*
2  * This file is part of ltrace.
3  * Copyright (C) 2012,2013 Petr Machata, Red Hat Inc.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18  * 02110-1301 USA
19  */
20 
21 #ifndef BACKEND_H
22 #define BACKEND_H
23 
24 #include "forward.h"
25 #include "sysdep.h"
26 
27 #include <gelf.h>
28 
29 enum process_status {
30 	PS_INVALID,	/* Failure.  */
31 	PS_STOP,	/* Job-control stop.  */
32 	PS_TRACING_STOP,
33 	PS_SLEEPING,
34 	PS_ZOMBIE,
35 	PS_OTHER,	/* Necessary other states can be added as needed.  */
36 };
37 
38 /*
39  * This file contains documentation of back end interface.  Some of
40  * these may be implemented on an OS level (i.e. they are the same
41  * e.g. on all Linux architectures), some may differ per architecture
42  * on the same OS (e.g. a way to insert a breakpoint into the process
43  * image is a likely candidate).
44  */
45 
46 /* Convert a PID to a path to the corresponding binary.  */
47 char *pid2name(pid_t pid);
48 
49 /* Given a PID, find a leader of thread group.  */
50 pid_t process_leader(pid_t pid);
51 
52 /* Given a PID of leader thread, fill in PIDs of all the tasks.  The
53  * function will initialize the pointer *RET_TASKS to a
54  * newly-allocated array, and will store number of elements in that
55  * array to *RET_N.  You have to free that buffer when you don't need
56  * it anymore.  */
57 int process_tasks(pid_t pid, pid_t **ret_tasks, size_t *ret_n);
58 
59 /* Answer whether the process PID is stopped.  Returns 0 when not
60  * stopped, 1 when stopped, or -1 when there was an error.  */
61 int process_stopped(pid_t pid);
62 
63 /* Answer a status of the task PID.  See enum process_status.  */
64 enum process_status process_status(pid_t pid);
65 
66 /* Wait for PID to be ready for tracing.  */
67 int wait_for_proc(pid_t pid);
68 
69 /* Send a signal SIG to the task PID.  */
70 int task_kill(pid_t pid, int sig);
71 
72 /* Called after PID is attached, but before it is continued.  */
73 void trace_set_options(struct process *proc);
74 
75 /* Called after ltrace forks.  Should attach the newly created child,
76  * in whose context this function is called.  */
77 void trace_me(void);
78 
79 /* Called when ltrace needs to attach to PID, such as when it attaches
80  * to a running process, whose PID is given on the command line.  */
81 int trace_pid(pid_t pid);
82 
83 /* Stop tracing PID.  */
84 void untrace_pid(pid_t pid);
85 
86 /* The back end may need to store arbitrary data to a process.  This
87  * is a place where it can initialize PROC->arch_dep.  XXX this should
88  * be dropped in favor of arhc_process_init on pmachata/libs.  */
89 void get_arch_dep(struct process *proc);
90 
91 /* Return current instruction pointer of PROC.
92  *
93  * XXX note that the IP must fit into an arch pointer.  This prevents
94  * us to use 32-bit ltrace to trace 64-bit process, even on arches
95  * that would otherwise support this.  Above we have a definition of
96  * arch_addr_t.  This should be converted to an integral type and
97  * used for target addresses throughout.  */
98 void *get_instruction_pointer(struct process *proc);
99 
100 /* Set instruction pointer of PROC to ADDR.  XXX see above.  */
101 void set_instruction_pointer(struct process *proc, void *addr);
102 
103 /* Return current stack pointer of PROC.  XXX see above.  */
104 void *get_stack_pointer(struct process *proc);
105 
106 /* Find and return caller address, i.e. the address where the current
107  * function returns.  */
108 void *get_return_addr(struct process *proc, void *stack_pointer);
109 
110 /* Enable breakpoint SBP in process PROC.  */
111 void enable_breakpoint(struct process *proc, struct breakpoint *sbp);
112 
113 /* Disable breakpoint SBP in process PROC.  */
114 void disable_breakpoint(struct process *proc, struct breakpoint *sbp);
115 
116 /* Determine whether the event that we have just seen (and that is
117  * recorded in STATUS) was a syscall.  If it was, return 1.  If it was
118  * a return from syscall, return 2.  In both cases, set *SYSNUM to the
119  * number of said syscall.  If it wasn't a syscall, return 0.  If
120  * there was an error, return -1.  */
121 int syscall_p(struct process *proc, int status, int *sysnum);
122 
123 /* Continue execution of the process with given PID.  */
124 void continue_process(pid_t pid);
125 
126 /* Called after we received a signal SIGNUM.  Should do whatever
127  * book-keeping is necessary and continue the process if
128  * necessary.  */
129 void continue_after_signal(pid_t pid, int signum);
130 
131 /* Called after we received a system call SYSNUM.  RET_P is 0 if this
132  * is system call, otherwise it's return from a system call.  The
133  * callback should do whatever book-keeping is necessary and continue
134  * the process if necessary.  */
135 void continue_after_syscall(struct process *proc, int sysnum, int ret_p);
136 
137 /* Called after we hit a breakpoint SBP.  Should do whatever
138  * book-keeping is necessary and then continue the process.  */
139 void continue_after_breakpoint(struct process *proc, struct breakpoint *sbp);
140 
141 /* Called after we received a vfork.  Should do whatever book-keeping
142  * is necessary and continue the process if necessary.  N.B. right
143  * now, with Linux/GNU the only back end, this is not necessary.  I
144  * imagine other systems may be different.  */
145 void continue_after_vfork(struct process *proc);
146 
147 /* Called after the process exec's.  Should do whatever book-keeping
148  * is necessary and then continue the process.  */
149 void continue_after_exec(struct process *proc);
150 
151 /* Called when trace_me or primary trace_pid fail.  This may plug in
152  * any platform-specific knowledge of why it could be so.  */
153 void trace_fail_warning(pid_t pid);
154 
155 /* A pair of functions called to initiate a detachment request when
156  * ltrace is about to exit.  Their job is to undo any effects that
157  * tracing had and eventually detach process, perhaps by way of
158  * installing a process handler.
159  *
160  * OS_LTRACE_EXITING_SIGHANDLER is called from a signal handler
161  * context right after the signal was captured.  It returns 1 if the
162  * request was handled or 0 if it wasn't.
163  *
164  * If the call to OS_LTRACE_EXITING_SIGHANDLER didn't handle the
165  * request, OS_LTRACE_EXITING is called when the next event is
166  * generated.  Therefore it's called in "safe" context, without
167  * re-entrancy concerns, but it's only called after an even is
168  * generated.  */
169 int os_ltrace_exiting_sighandler(void);
170 void os_ltrace_exiting(void);
171 
172 /* Should copy COUNT bytes from address ADDR of process PROC to local
173  * buffer BUF.  */
174 size_t umovebytes(struct process *proc, arch_addr_t addr,
175 		  void *buf, size_t count);
176 
177 /* Find out an address of symbol SYM in process PROC, and return.
178  * Returning NULL delays breakpoint insertion and enables heaps of
179  * arch-specific black magic that we should clean up some day.
180  *
181  * XXX the same points as for get_instruction_pointer apply. */
182 void *sym2addr(struct process *proc, struct library_symbol *sym);
183 
184 /* Obtain address of PLT entry corresponding to relocation RELA in
185  * file LTE.  This is NDX-th PLT entry in the file.
186  *
187  * XXX should this return arch_addr_t?  */
188 GElf_Addr arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela);
189 
190 /* Called at some point after we have attached to PROC.  This callback
191  * should insert an introspection breakpoint for handling dynamic
192  * linker library loads.  */
193 int linkmap_init(struct process *proc, arch_addr_t dyn_addr);
194 
195 /* This should produce and return the next event of one of the traced
196  * processes.  The returned pointer will not be freed by the core and
197  * should be either statically allocated, or the management should be
198  * done some other way.  */
199 struct Event *next_event(void);
200 
201 /* Called when process PROC was removed.  */
202 void process_removed(struct process *proc);
203 
204 /* This should extract entry point address and interpreter (dynamic
205  * linker) bias if possible.  Returns 0 if there were no errors, -1
206  * otherwise.  Sets *ENTRYP and *INTERP_BIASP to non-zero values if
207  * the corresponding value is known, or zero otherwise; this is not
208  * done for pointers that are NULL.  */
209 int process_get_entry(struct process *proc,
210 		      arch_addr_t *entryp,
211 		      arch_addr_t *interp_biasp);
212 
213 
214 /* Optional callbacks
215  *
216  * Some callbacks are only available if backend (arch.h) has a certain
217  * define.  If such a define is not present, default implementation
218  * (most often doing nothing at all) us used instead.  This is used
219  * for gradual extensions of ltrace, so that backends that are not
220  * fully up to date, or that don't need certain functionality, keep
221  * working, while other backends take advantage of the optional
222  * features.  */
223 
224 /* The following callbacks have to be implemented in backend if arch.h
225  * defines ARCH_HAVE_LTELF_DATA.  Those are used to init and destroy
226  * LTE->arch.  arch_elf_init returns 0 on success or a negative value
227  * on failure.  */
228 int arch_elf_init(struct ltelf *lte, struct library *lib);
229 void arch_elf_destroy(struct ltelf *lte);
230 
231 /* The following callbacks have to be implemented in OS backend if
232  * os.h defines OS_HAVE_BREAKPOINT_DATA.  Those are used to init,
233  * destroy, and clone SBP->os.  os_breakpoint_init and
234  * os_breakpoint_clone return 0 on success or a negative value on
235  * failure.  */
236 int os_breakpoint_init(struct process *proc, struct breakpoint *sbp);
237 void os_breakpoint_destroy(struct breakpoint *sbp);
238 int os_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp);
239 
240 /* The following callbacks have to be implemented in backend if arch.h
241  * defines ARCH_HAVE_BREAKPOINT_DATA.  Those are used to init,
242  * destroy, and clone SBP->arch.  arch_breakpoint_init and
243  * arch_breakpoint_clone return 0 on success or a negative value on
244  * failure.  */
245 int arch_breakpoint_init(struct process *proc, struct breakpoint *sbp);
246 void arch_breakpoint_destroy(struct breakpoint *sbp);
247 int arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp);
248 
249 /* The following callbacks have to be implemented in OS backend if
250  * os.h defines OS_HAVE_LIBRARY_DATA.  Those are used to init, destroy
251  * and clone LIB->os.  os_library_init and os_library_clone return 0
252  * on success or a negative value on failure.  */
253 int os_library_init(struct library *lib);
254 void os_library_destroy(struct library *lib);
255 int os_library_clone(struct library *retp, struct library *lib);
256 
257 /* The following callbacks have to be implemented in backend if arch.h
258  * defines ARCH_HAVE_LIBRARY_DATA.  Those are used to init, destroy
259  * and clone LIB->arch.  arch_library_init and arch_library_clone
260  * return 0 on success or a negative value on failure.  */
261 int arch_library_init(struct library *lib);
262 void arch_library_destroy(struct library *lib);
263 int arch_library_clone(struct library *retp, struct library *lib);
264 
265 /* The following callbacks have to be implemented in OS backend if
266  * os.h defines OS_HAVE_LIBRARY_SYMBOL_DATA.  Those are used to init,
267  * destroy and clone LIBSYM->os.  os_library_symbol_init and
268  * os_library_symbol_clone return 0 on success or a negative value on
269  * failure.  */
270 int os_library_symbol_init(struct library_symbol *libsym);
271 void os_library_symbol_destroy(struct library_symbol *libsym);
272 int os_library_symbol_clone(struct library_symbol *retp,
273 			    struct library_symbol *libsym);
274 
275 /* The following callbacks have to be implemented in backend if arch.h
276  * defines ARCH_HAVE_LIBRARY_SYMBOL_DATA.  Those are used to init,
277  * destroy and clone LIBSYM->arch.  arch_library_symbol_init and
278  * arch_library_symbol_clone return 0 on success or a negative value
279  * on failure.  */
280 int arch_library_symbol_init(struct library_symbol *libsym);
281 void arch_library_symbol_destroy(struct library_symbol *libsym);
282 int arch_library_symbol_clone(struct library_symbol *retp,
283 			      struct library_symbol *libsym);
284 
285 /* The following callbacks have to be implemented in OS backend if
286  * os.h defines OS_HAVE_PROCESS_DATA.  The protocol is same as for,
287  * respectively, arch_process_init, arch_process_destroy,
288  * arch_process_clone and arch_process_exec.  */
289 int os_process_init(struct process *proc);
290 void os_process_destroy(struct process *proc);
291 int os_process_clone(struct process *retp, struct process *proc);
292 int os_process_exec(struct process *proc);
293 
294 /* The following callbacks have to be implemented in backend if arch.h
295  * defines ARCH_HAVE_PROCESS_DATA.  Those are used to init, destroy
296  * and clone PROC->arch.  arch_process_exec is called to update
297  * PROC->arch in case that PROC underwent an exec.  See notes at
298  * process_init, process_destroy, process_clone and process_exec in
299  * proc.h.  */
300 int arch_process_init(struct process *proc);
301 void arch_process_destroy(struct process *proc);
302 int arch_process_clone(struct process *retp, struct process *proc);
303 int arch_process_exec(struct process *proc);
304 
305 /* The following callback has to be implemented in backend if arch.h
306  * defines ARCH_HAVE_GET_SYM_INFO.
307  *
308  * This is called for every PLT relocation RELA in ELF file LTE (which
309  * is named FILENAME), that ltrace is about to add.  The corresponding
310  * PLT entry is for SYM_INDEX-th relocation in the file.  This call is
311  * supposed to initialize SYM and RELA.  It returns 0 if there were no
312  * errors and given symbol should be used, 1 if the symbol should not
313  * be used, or a negative value if there were errors.  */
314 int arch_get_sym_info(struct ltelf *lte, const char *filename, size_t sym_index,
315 		      GElf_Rela *rela, GElf_Sym *sym);
316 
317 enum plt_status {
318 	PLT_FAIL,
319 	PLT_OK,
320 	PLT_DEFAULT,
321 };
322 
323 /* The following callback has to be implemented in OS backend if os.h
324  * defines OS_HAVE_ADD_PLT_ENTRY.
325  *
326  * This is called for every PLT relocation R in ELF file LTE, that
327  * ltrace is about to add to a library constructed in process PROC.
328  * The corresponding PLT entry is for symbol called NAME, and it's
329  * I-th relocation in the file.
330  *
331  * If this function returns PLT_DEFAULT, PLT address is obtained by
332  * calling arch_plt_sym_val, and symbol is allocated.  If PLT_OK or
333  * PLT_DEFAULT are returned, the chain of symbols passed back in RET
334  * is added to library under construction.  */
335 enum plt_status os_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
336 				     const char *name, GElf_Rela *rela,
337 				     size_t i, struct library_symbol **ret);
338 
339 /* Like os_elf_add_plt_entry, but tied to ARCH_HAVE_ADD_PLT_ENTRY in
340  * arch.h.  The arch callback is called first.  If it returns
341  * PLT_DEFAULT, the os callback is called next.  */
342 enum plt_status arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
343 				       const char *name, GElf_Rela *rela,
344 				       size_t i, struct library_symbol **ret);
345 
346 /* The following callback has to be implemented in OS backend if os.h
347  * defines OS_HAVE_ADD_FUNC_ENTRY.
348  *
349  * This is called for every symbol in ltrace is about to add to the
350  * library constructed for LTE in process PROC.
351  *
352  * If this function returns PLT_DEFAULT, then if there is a
353  * pre-existing symbol, its name may be updated if the newly-found
354  * name is shorter.  Otherwise a new symbol is created.
355  *
356  * If PLT_OK or PLT_DEFAULT are returned, the chain of symbols passed
357  * back in RET is added to library under construction.  */
358 enum plt_status os_elf_add_func_entry(struct process *proc, struct ltelf *lte,
359 				      const GElf_Sym *sym,
360 				      arch_addr_t addr, const char *name,
361 				      struct library_symbol **ret);
362 
363 /* Like os_elf_add_func_entry, but tied to ARCH_HAVE_ADD_FUNC_ENTRY in
364  * arch.h.  The arch callback is called first.  If it returns
365  * PLT_DEFAULT, the os callback is called next.  */
366 enum plt_status arch_elf_add_func_entry(struct process *proc, struct ltelf *lte,
367 					const GElf_Sym *sym,
368 					arch_addr_t addr, const char *name,
369 					struct library_symbol **ret);
370 
371 /* This callback needs to be implemented if arch.h defines
372  * ARCH_HAVE_DYNLINK_DONE.  It is called after the dynamic linker is
373  * done with the process start-up.  */
374 void arch_dynlink_done(struct process *proc);
375 
376 /* This callback needs to be implemented if arch.h defines
377  * ARCH_HAVE_SYMBOL_RET.  It is called after a traced call returns.  */
378 void arch_symbol_ret(struct process *proc, struct library_symbol *libsym);
379 
380 
381 /* This callback needs to be implemented if arch.h defines
382  * ARCH_HAVE_FIND_DL_DEBUG.
383  * It is called by generic code to find the address of the dynamic
384  * linkers debug structure.
385  * DYN_ADDR holds the address of the dynamic section.
386  * If the debug area is found, return 0 and fill in the address in *RET.
387  * If the debug area is not found, return a negative value.  */
388 int arch_find_dl_debug(struct process *proc, arch_addr_t dyn_addr,
389 		       arch_addr_t *ret);
390 
391 /* This is called to obtain a list of directories to search when
392  * loading config files.  The callback sets *RETP to a pointer to the
393  * first element of a NULL-terminated array of directory names.  It's
394  * legitimate to set *RETP to NULL to indicate there are no
395  * directories.  The function returns 0 on success or a negative value
396  * on a failure.
397  *
398  * If PRIVATE is set, the list in *RETP should contain only user's own
399  * directories (presumably under HOME if there's any such thing on the
400  * given OS).  Otherwise only system directories should be reported.
401  *
402  * The directories don't have to exist.  Directories passed in -F are
403  * handled separately by the caller and this callback shouldn't
404  * concern itself with it.  */
405 int os_get_config_dirs(int private, const char ***retp);
406 
407 /* This is called to obtain list of legacy config files to import, if
408  * any.  A reference to initialized vector of char* is passed in.
409  *
410  * This returns 0 on success, in which case strings from *RETP (if
411  * any) are interpreted as files names.  These files belong to the
412  * caller and will eventually be freed.
413  *
414  * Returns a negative value for failure, in which case *RETP contents
415  * are not consulted in any way.  */
416 int os_get_ltrace_conf_filenames(struct vect *retp);
417 
418 /* If arch.h defines ARCH_HAVE_FETCH_ARG, the following callbacks have
419  * to be implemented: arch_fetch_arg_init, arch_fetch_arg_clone,
420  * arch_fetch_arg_done, arch_fetch_arg_next and arch_fetch_retval.
421  * See fetch.h for details.  */
422 
423 /* If arch.h defines both ARCH_HAVE_FETCH_ARG and
424  * ARCH_HAVE_FETCH_PACK, the following callbacks have to be
425  * implemented: arch_fetch_param_pack_start,
426  * arch_fetch_param_pack_end.  See fetch.h for details.  */
427 
428 enum sw_singlestep_status {
429 	SWS_FAIL,
430 	SWS_OK,
431 	SWS_HW,
432 };
433 struct sw_singlestep_data;
434 
435 /* The following callback has to be implemented in backend if arch.h
436  * defines ARCH_HAVE_SW_SINGLESTEP.
437  *
438  * This is called before the OS backend requests hardware singlestep.
439  * arch_sw_singlestep should consider whether a singlestep needs to be
440  * done in software.  If not, it returns SWS_HW.  Otherwise it needs
441  * to add one or several breakpoints by calling ADD_CB.  When it is
442  * done, it continues the process as appropriate, and answers either
443  * SWS_OK, or SWS_FAIL, depending on how it went.
444  *
445  * PROC is the process that should perform the singlestep, BP the
446  * breakpoint that we are singlestepping over.  ADD_CB is a callback
447  * to request adding breakpoints that should trap the process after
448  * it's continued.  The arguments to ADD_CB are the address where the
449  * breakpoint should be added, and DATA.  ADD_CB returns 0 on success
450  * or a negative value on failure.  It is expected that
451  * arch_sw_singlestep returns SWS_FAIL if ADD_CB returns error.  */
452 enum sw_singlestep_status arch_sw_singlestep(struct process *proc,
453 					     struct breakpoint *bp,
454 					     int (*add_cb)(arch_addr_t addr,
455 						   struct sw_singlestep_data *),
456 					     struct sw_singlestep_data *data);
457 
458 #endif /* BACKEND_H */
459